1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
53 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
55 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
56 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
57 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
58 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
59 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
60 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
61 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
62 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
63 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
64 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
66 ;; Int-float size matches
67 (define_mode_iterator SSEMODE4S [V4SF V4SI])
68 (define_mode_iterator SSEMODE2D [V2DF V2DI])
70 ;; Modes handled by integer vcond pattern
71 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
72 (V2DI "TARGET_SSE4_2")])
74 ;; Modes handled by vec_extract_even/odd pattern.
75 (define_mode_iterator SSEMODE_EO
78 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
79 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
80 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
82 ;; Mapping from float mode to required SSE level
83 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
85 ;; Mapping from integer vector mode to mnemonic suffix
86 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
88 ;; Mapping of the fma4 suffix
89 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
90 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
91 (V4SF "ss") (V2DF "sd")])
93 ;; Mapping of the avx suffix
94 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
95 (V4SF "ps") (V2DF "pd")])
97 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
99 ;; Mapping of the max integer size for xop rotate immediate constraint
100 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
102 ;; Mapping of vector modes back to the scalar modes
103 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
104 (V16QI "QI") (V8HI "HI")
105 (V4SI "SI") (V2DI "DI")])
107 ;; Mapping of vector modes to a vector mode of double size
108 (define_mode_attr ssedoublesizemode
109 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
110 (V8HI "V16HI") (V16QI "V32QI")
111 (V4DF "V8DF") (V8SF "V16SF")
112 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
114 ;; Number of scalar elements in each vector type
115 (define_mode_attr ssescalarnum
116 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
117 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
120 (define_mode_attr avxvecmode
121 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
122 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
123 (V8SF "V8SF") (V4DF "V4DF")])
124 (define_mode_attr avxvecpsmode
125 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
126 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
127 (define_mode_attr avxhalfvecmode
128 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
129 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
130 (define_mode_attr avxscalarmode
131 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
132 (V8SF "SF") (V4DF "DF")])
133 (define_mode_attr avxcvtvecmode
134 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
135 (define_mode_attr avxpermvecmode
136 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
137 (define_mode_attr avxmodesuffixf2c
138 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
139 (define_mode_attr avxmodesuffixp
140 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
142 (define_mode_attr avxmodesuffix
143 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
144 (V8SI "256") (V8SF "256") (V4DF "256")])
146 ;; Mapping of immediate bits for blend instructions
147 (define_mode_attr blendbits
148 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
150 ;; Mapping of immediate bits for pinsr instructions
151 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
153 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
155 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
161 (define_expand "mov<mode>"
162 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
163 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
166 ix86_expand_vector_move (<MODE>mode, operands);
170 (define_insn "*avx_mov<mode>_internal"
171 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
172 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 return standard_sse_constant_opcode (insn, operands[1]);
183 switch (get_attr_mode (insn))
187 return "vmovaps\t{%1, %0|%0, %1}";
190 return "vmovapd\t{%1, %0|%0, %1}";
192 return "vmovdqa\t{%1, %0|%0, %1}";
198 [(set_attr "type" "sselog1,ssemov,ssemov")
199 (set_attr "prefix" "vex")
200 (set_attr "mode" "<avxvecmode>")])
202 ;; All of these patterns are enabled for SSE1 as well as SSE2.
203 ;; This is essential for maintaining stable calling conventions.
205 (define_expand "mov<mode>"
206 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
207 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
210 ix86_expand_vector_move (<MODE>mode, operands);
214 (define_insn "*mov<mode>_internal"
215 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
216 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
218 && (register_operand (operands[0], <MODE>mode)
219 || register_operand (operands[1], <MODE>mode))"
221 switch (which_alternative)
224 return standard_sse_constant_opcode (insn, operands[1]);
227 switch (get_attr_mode (insn))
230 return "movaps\t{%1, %0|%0, %1}";
232 return "movapd\t{%1, %0|%0, %1}";
234 return "movdqa\t{%1, %0|%0, %1}";
240 [(set_attr "type" "sselog1,ssemov,ssemov")
242 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
243 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
244 (and (eq_attr "alternative" "2")
245 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
247 (const_string "V4SF")
248 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
249 (const_string "V4SF")
250 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
251 (const_string "V2DF")
253 (const_string "TI")))])
255 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
256 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
257 ;; from memory, we'd prefer to load the memory directly into the %xmm
258 ;; register. To facilitate this happy circumstance, this pattern won't
259 ;; split until after register allocation. If the 64-bit value didn't
260 ;; come from memory, this is the best we can do. This is much better
261 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
264 (define_insn_and_split "movdi_to_sse"
266 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
267 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
268 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
269 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
271 "&& reload_completed"
274 if (register_operand (operands[1], DImode))
276 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
277 Assemble the 64-bit DImode value in an xmm register. */
278 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
279 gen_rtx_SUBREG (SImode, operands[1], 0)));
280 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
281 gen_rtx_SUBREG (SImode, operands[1], 4)));
282 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
285 else if (memory_operand (operands[1], DImode))
286 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
287 operands[1], const0_rtx));
293 [(set (match_operand:V4SF 0 "register_operand" "")
294 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
295 "TARGET_SSE && reload_completed"
298 (vec_duplicate:V4SF (match_dup 1))
302 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
303 operands[2] = CONST0_RTX (V4SFmode);
307 [(set (match_operand:V2DF 0 "register_operand" "")
308 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
309 "TARGET_SSE2 && reload_completed"
310 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
312 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
313 operands[2] = CONST0_RTX (DFmode);
316 (define_expand "push<mode>1"
317 [(match_operand:AVX256MODE 0 "register_operand" "")]
320 ix86_expand_push (<MODE>mode, operands[0]);
324 (define_expand "push<mode>1"
325 [(match_operand:SSEMODE 0 "register_operand" "")]
328 ix86_expand_push (<MODE>mode, operands[0]);
332 (define_expand "movmisalign<mode>"
333 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
334 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
337 ix86_expand_vector_move_misalign (<MODE>mode, operands);
341 (define_expand "movmisalign<mode>"
342 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
343 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
346 ix86_expand_vector_move_misalign (<MODE>mode, operands);
350 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
351 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
353 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
355 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
356 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
357 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
358 [(set_attr "type" "ssemov")
359 (set_attr "movu" "1")
360 (set_attr "prefix" "vex")
361 (set_attr "mode" "<MODE>")])
363 (define_insn "sse2_movq128"
364 [(set (match_operand:V2DI 0 "register_operand" "=x")
367 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
368 (parallel [(const_int 0)]))
371 "%vmovq\t{%1, %0|%0, %1}"
372 [(set_attr "type" "ssemov")
373 (set_attr "prefix" "maybe_vex")
374 (set_attr "mode" "TI")])
376 (define_insn "<sse>_movup<ssemodesuffixf2c>"
377 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
379 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
381 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
382 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
383 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
384 [(set_attr "type" "ssemov")
385 (set_attr "movu" "1")
386 (set_attr "mode" "<MODE>")])
388 (define_insn "avx_movdqu<avxmodesuffix>"
389 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
391 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
393 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
394 "vmovdqu\t{%1, %0|%0, %1}"
395 [(set_attr "type" "ssemov")
396 (set_attr "movu" "1")
397 (set_attr "prefix" "vex")
398 (set_attr "mode" "<avxvecmode>")])
400 (define_insn "sse2_movdqu"
401 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
402 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
404 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
405 "movdqu\t{%1, %0|%0, %1}"
406 [(set_attr "type" "ssemov")
407 (set_attr "movu" "1")
408 (set_attr "prefix_data16" "1")
409 (set_attr "mode" "TI")])
411 (define_insn "avx_movnt<mode>"
412 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
414 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
416 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
417 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "prefix" "vex")
420 (set_attr "mode" "<MODE>")])
422 (define_insn "<sse>_movnt<mode>"
423 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
425 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
427 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
428 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
429 [(set_attr "type" "ssemov")
430 (set_attr "mode" "<MODE>")])
432 (define_insn "avx_movnt<mode>"
433 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
435 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
438 "vmovntdq\t{%1, %0|%0, %1}"
439 [(set_attr "type" "ssecvt")
440 (set_attr "prefix" "vex")
441 (set_attr "mode" "<avxvecmode>")])
443 (define_insn "sse2_movntv2di"
444 [(set (match_operand:V2DI 0 "memory_operand" "=m")
445 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
448 "movntdq\t{%1, %0|%0, %1}"
449 [(set_attr "type" "ssemov")
450 (set_attr "prefix_data16" "1")
451 (set_attr "mode" "TI")])
453 (define_insn "sse2_movntsi"
454 [(set (match_operand:SI 0 "memory_operand" "=m")
455 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
458 "movnti\t{%1, %0|%0, %1}"
459 [(set_attr "type" "ssemov")
460 (set_attr "prefix_data16" "0")
461 (set_attr "mode" "V2DF")])
463 (define_insn "avx_lddqu<avxmodesuffix>"
464 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
466 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
469 "vlddqu\t{%1, %0|%0, %1}"
470 [(set_attr "type" "ssecvt")
471 (set_attr "movu" "1")
472 (set_attr "prefix" "vex")
473 (set_attr "mode" "<avxvecmode>")])
475 (define_insn "sse3_lddqu"
476 [(set (match_operand:V16QI 0 "register_operand" "=x")
477 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
480 "lddqu\t{%1, %0|%0, %1}"
481 [(set_attr "type" "ssemov")
482 (set_attr "movu" "1")
483 (set_attr "prefix_data16" "0")
484 (set_attr "prefix_rep" "1")
485 (set_attr "mode" "TI")])
487 ; Expand patterns for non-temporal stores. At the moment, only those
488 ; that directly map to insns are defined; it would be possible to
489 ; define patterns for other modes that would expand to several insns.
491 (define_expand "storent<mode>"
492 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
494 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
496 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
499 (define_expand "storent<mode>"
500 [(set (match_operand:MODEF 0 "memory_operand" "")
502 [(match_operand:MODEF 1 "register_operand" "")]
507 (define_expand "storentv2di"
508 [(set (match_operand:V2DI 0 "memory_operand" "")
509 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
514 (define_expand "storentsi"
515 [(set (match_operand:SI 0 "memory_operand" "")
516 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
521 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
523 ;; Parallel floating point arithmetic
525 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
527 (define_expand "<code><mode>2"
528 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
530 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
531 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
532 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
534 (define_expand "<plusminus_insn><mode>3"
535 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
536 (plusminus:AVX256MODEF2P
537 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
538 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
539 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
542 (define_insn "*avx_<plusminus_insn><mode>3"
543 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
544 (plusminus:AVXMODEF2P
545 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
546 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
547 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
549 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
550 [(set_attr "type" "sseadd")
551 (set_attr "prefix" "vex")
552 (set_attr "mode" "<avxvecmode>")])
554 (define_expand "<plusminus_insn><mode>3"
555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
556 (plusminus:SSEMODEF2P
557 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
559 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
562 (define_insn "*<plusminus_insn><mode>3"
563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
564 (plusminus:SSEMODEF2P
565 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
566 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
567 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
568 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
569 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
570 [(set_attr "type" "sseadd")
571 (set_attr "mode" "<MODE>")])
573 (define_insn "*avx_vm<plusminus_insn><mode>3"
574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 (vec_merge:SSEMODEF2P
576 (plusminus:SSEMODEF2P
577 (match_operand:SSEMODEF2P 1 "register_operand" "x")
578 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
581 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
582 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
583 [(set_attr "type" "sseadd")
584 (set_attr "prefix" "vex")
585 (set_attr "mode" "<ssescalarmode>")])
587 (define_insn "<sse>_vm<plusminus_insn><mode>3"
588 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
589 (vec_merge:SSEMODEF2P
590 (plusminus:SSEMODEF2P
591 (match_operand:SSEMODEF2P 1 "register_operand" "0")
592 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
595 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
596 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
597 [(set_attr "type" "sseadd")
598 (set_attr "mode" "<ssescalarmode>")])
600 (define_expand "mul<mode>3"
601 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
603 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
604 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
605 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
606 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
608 (define_insn "*avx_mul<mode>3"
609 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
611 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
612 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
613 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
614 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
615 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
616 [(set_attr "type" "ssemul")
617 (set_attr "prefix" "vex")
618 (set_attr "mode" "<avxvecmode>")])
620 (define_expand "mul<mode>3"
621 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
623 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
624 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
625 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
626 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
628 (define_insn "*mul<mode>3"
629 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
631 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
632 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
633 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
634 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
635 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
636 [(set_attr "type" "ssemul")
637 (set_attr "mode" "<MODE>")])
639 (define_insn "*avx_vmmul<mode>3"
640 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
641 (vec_merge:SSEMODEF2P
643 (match_operand:SSEMODEF2P 1 "register_operand" "x")
644 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
647 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
648 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
649 [(set_attr "type" "ssemul")
650 (set_attr "prefix" "vex")
651 (set_attr "mode" "<ssescalarmode>")])
653 (define_insn "<sse>_vmmul<mode>3"
654 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
655 (vec_merge:SSEMODEF2P
657 (match_operand:SSEMODEF2P 1 "register_operand" "0")
658 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
661 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
662 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
663 [(set_attr "type" "ssemul")
664 (set_attr "mode" "<ssescalarmode>")])
666 (define_expand "divv8sf3"
667 [(set (match_operand:V8SF 0 "register_operand" "")
668 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
669 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
672 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
674 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
675 && flag_finite_math_only && !flag_trapping_math
676 && flag_unsafe_math_optimizations)
678 ix86_emit_swdivsf (operands[0], operands[1],
679 operands[2], V8SFmode);
684 (define_expand "divv4df3"
685 [(set (match_operand:V4DF 0 "register_operand" "")
686 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
687 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
689 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
691 (define_insn "avx_div<mode>3"
692 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
694 (match_operand:AVXMODEF2P 1 "register_operand" "x")
695 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
696 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
697 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
698 [(set_attr "type" "ssediv")
699 (set_attr "prefix" "vex")
700 (set_attr "mode" "<MODE>")])
702 (define_expand "divv4sf3"
703 [(set (match_operand:V4SF 0 "register_operand" "")
704 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
705 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
708 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
709 && flag_finite_math_only && !flag_trapping_math
710 && flag_unsafe_math_optimizations)
712 ix86_emit_swdivsf (operands[0], operands[1],
713 operands[2], V4SFmode);
718 (define_expand "divv2df3"
719 [(set (match_operand:V2DF 0 "register_operand" "")
720 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
721 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
725 (define_insn "*avx_div<mode>3"
726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (match_operand:SSEMODEF2P 1 "register_operand" "x")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
730 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
731 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
732 [(set_attr "type" "ssediv")
733 (set_attr "prefix" "vex")
734 (set_attr "mode" "<MODE>")])
736 (define_insn "<sse>_div<mode>3"
737 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
739 (match_operand:SSEMODEF2P 1 "register_operand" "0")
740 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
741 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
742 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
743 [(set_attr "type" "ssediv")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "*avx_vmdiv<mode>3"
747 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
748 (vec_merge:SSEMODEF2P
750 (match_operand:SSEMODEF2P 1 "register_operand" "x")
751 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
754 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
755 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
756 [(set_attr "type" "ssediv")
757 (set_attr "prefix" "vex")
758 (set_attr "mode" "<ssescalarmode>")])
760 (define_insn "<sse>_vmdiv<mode>3"
761 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
762 (vec_merge:SSEMODEF2P
764 (match_operand:SSEMODEF2P 1 "register_operand" "0")
765 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
768 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
769 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
770 [(set_attr "type" "ssediv")
771 (set_attr "mode" "<ssescalarmode>")])
773 (define_insn "avx_rcpv8sf2"
774 [(set (match_operand:V8SF 0 "register_operand" "=x")
776 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
778 "vrcpps\t{%1, %0|%0, %1}"
779 [(set_attr "type" "sse")
780 (set_attr "prefix" "vex")
781 (set_attr "mode" "V8SF")])
783 (define_insn "sse_rcpv4sf2"
784 [(set (match_operand:V4SF 0 "register_operand" "=x")
786 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
788 "%vrcpps\t{%1, %0|%0, %1}"
789 [(set_attr "type" "sse")
790 (set_attr "atom_sse_attr" "rcp")
791 (set_attr "prefix" "maybe_vex")
792 (set_attr "mode" "V4SF")])
794 (define_insn "*avx_vmrcpv4sf2"
795 [(set (match_operand:V4SF 0 "register_operand" "=x")
797 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
799 (match_operand:V4SF 2 "register_operand" "x")
802 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
803 [(set_attr "type" "sse")
804 (set_attr "prefix" "vex")
805 (set_attr "mode" "SF")])
807 (define_insn "sse_vmrcpv4sf2"
808 [(set (match_operand:V4SF 0 "register_operand" "=x")
810 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
812 (match_operand:V4SF 2 "register_operand" "0")
815 "rcpss\t{%1, %0|%0, %1}"
816 [(set_attr "type" "sse")
817 (set_attr "atom_sse_attr" "rcp")
818 (set_attr "mode" "SF")])
820 (define_expand "sqrtv8sf2"
821 [(set (match_operand:V8SF 0 "register_operand" "")
822 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
825 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
826 && flag_finite_math_only && !flag_trapping_math
827 && flag_unsafe_math_optimizations)
829 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
834 (define_insn "avx_sqrtv8sf2"
835 [(set (match_operand:V8SF 0 "register_operand" "=x")
836 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
838 "vsqrtps\t{%1, %0|%0, %1}"
839 [(set_attr "type" "sse")
840 (set_attr "prefix" "vex")
841 (set_attr "mode" "V8SF")])
843 (define_expand "sqrtv4sf2"
844 [(set (match_operand:V4SF 0 "register_operand" "")
845 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
848 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
849 && flag_finite_math_only && !flag_trapping_math
850 && flag_unsafe_math_optimizations)
852 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
857 (define_insn "sse_sqrtv4sf2"
858 [(set (match_operand:V4SF 0 "register_operand" "=x")
859 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
861 "%vsqrtps\t{%1, %0|%0, %1}"
862 [(set_attr "type" "sse")
863 (set_attr "atom_sse_attr" "sqrt")
864 (set_attr "prefix" "maybe_vex")
865 (set_attr "mode" "V4SF")])
867 (define_insn "sqrtv4df2"
868 [(set (match_operand:V4DF 0 "register_operand" "=x")
869 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
871 "vsqrtpd\t{%1, %0|%0, %1}"
872 [(set_attr "type" "sse")
873 (set_attr "prefix" "vex")
874 (set_attr "mode" "V4DF")])
876 (define_insn "sqrtv2df2"
877 [(set (match_operand:V2DF 0 "register_operand" "=x")
878 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
880 "%vsqrtpd\t{%1, %0|%0, %1}"
881 [(set_attr "type" "sse")
882 (set_attr "prefix" "maybe_vex")
883 (set_attr "mode" "V2DF")])
885 (define_insn "*avx_vmsqrt<mode>2"
886 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
887 (vec_merge:SSEMODEF2P
889 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
890 (match_operand:SSEMODEF2P 2 "register_operand" "x")
892 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
893 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
894 [(set_attr "type" "sse")
895 (set_attr "prefix" "vex")
896 (set_attr "mode" "<ssescalarmode>")])
898 (define_insn "<sse>_vmsqrt<mode>2"
899 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
900 (vec_merge:SSEMODEF2P
902 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
903 (match_operand:SSEMODEF2P 2 "register_operand" "0")
905 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
906 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "atom_sse_attr" "sqrt")
909 (set_attr "mode" "<ssescalarmode>")])
911 (define_expand "rsqrtv8sf2"
912 [(set (match_operand:V8SF 0 "register_operand" "")
914 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
915 "TARGET_AVX && TARGET_SSE_MATH"
917 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
921 (define_insn "avx_rsqrtv8sf2"
922 [(set (match_operand:V8SF 0 "register_operand" "=x")
924 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
926 "vrsqrtps\t{%1, %0|%0, %1}"
927 [(set_attr "type" "sse")
928 (set_attr "prefix" "vex")
929 (set_attr "mode" "V8SF")])
931 (define_expand "rsqrtv4sf2"
932 [(set (match_operand:V4SF 0 "register_operand" "")
934 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
937 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
941 (define_insn "sse_rsqrtv4sf2"
942 [(set (match_operand:V4SF 0 "register_operand" "=x")
944 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
946 "%vrsqrtps\t{%1, %0|%0, %1}"
947 [(set_attr "type" "sse")
948 (set_attr "prefix" "maybe_vex")
949 (set_attr "mode" "V4SF")])
951 (define_insn "*avx_vmrsqrtv4sf2"
952 [(set (match_operand:V4SF 0 "register_operand" "=x")
954 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
956 (match_operand:V4SF 2 "register_operand" "x")
959 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
960 [(set_attr "type" "sse")
961 (set_attr "prefix" "vex")
962 (set_attr "mode" "SF")])
964 (define_insn "sse_vmrsqrtv4sf2"
965 [(set (match_operand:V4SF 0 "register_operand" "=x")
967 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
969 (match_operand:V4SF 2 "register_operand" "0")
972 "rsqrtss\t{%1, %0|%0, %1}"
973 [(set_attr "type" "sse")
974 (set_attr "mode" "SF")])
976 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
977 ;; isn't really correct, as those rtl operators aren't defined when
978 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
980 (define_expand "<code><mode>3"
981 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
982 (smaxmin:AVX256MODEF2P
983 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
984 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
985 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
987 if (!flag_finite_math_only)
988 operands[1] = force_reg (<MODE>mode, operands[1]);
989 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
992 (define_expand "<code><mode>3"
993 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
995 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
996 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
997 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
999 if (!flag_finite_math_only)
1000 operands[1] = force_reg (<MODE>mode, operands[1]);
1001 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1004 (define_insn "*avx_<code><mode>3_finite"
1005 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1007 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1008 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1009 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1010 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1011 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1012 [(set_attr "type" "sseadd")
1013 (set_attr "prefix" "vex")
1014 (set_attr "mode" "<MODE>")])
1016 (define_insn "*<code><mode>3_finite"
1017 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1019 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1020 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1021 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1022 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1023 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1024 [(set_attr "type" "sseadd")
1025 (set_attr "mode" "<MODE>")])
1027 (define_insn "*avx_<code><mode>3"
1028 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1030 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1031 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1032 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1033 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1034 [(set_attr "type" "sseadd")
1035 (set_attr "prefix" "vex")
1036 (set_attr "mode" "<avxvecmode>")])
1038 (define_insn "*<code><mode>3"
1039 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1041 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1042 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1043 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1044 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1045 [(set_attr "type" "sseadd")
1046 (set_attr "mode" "<MODE>")])
1048 (define_insn "*avx_vm<code><mode>3"
1049 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1050 (vec_merge:SSEMODEF2P
1052 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1053 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1056 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1057 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1058 [(set_attr "type" "sse")
1059 (set_attr "prefix" "vex")
1060 (set_attr "mode" "<ssescalarmode>")])
1062 (define_insn "<sse>_vm<code><mode>3"
1063 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1064 (vec_merge:SSEMODEF2P
1066 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1067 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1070 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1071 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1072 [(set_attr "type" "sseadd")
1073 (set_attr "mode" "<ssescalarmode>")])
1075 ;; These versions of the min/max patterns implement exactly the operations
1076 ;; min = (op1 < op2 ? op1 : op2)
1077 ;; max = (!(op1 < op2) ? op1 : op2)
1078 ;; Their operands are not commutative, and thus they may be used in the
1079 ;; presence of -0.0 and NaN.
1081 (define_insn "*avx_ieee_smin<mode>3"
1082 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1084 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1085 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1087 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1088 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1089 [(set_attr "type" "sseadd")
1090 (set_attr "prefix" "vex")
1091 (set_attr "mode" "<avxvecmode>")])
1093 (define_insn "*avx_ieee_smax<mode>3"
1094 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1096 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1097 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1099 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1100 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1101 [(set_attr "type" "sseadd")
1102 (set_attr "prefix" "vex")
1103 (set_attr "mode" "<avxvecmode>")])
1105 (define_insn "*ieee_smin<mode>3"
1106 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1108 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1109 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1111 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1112 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1113 [(set_attr "type" "sseadd")
1114 (set_attr "mode" "<MODE>")])
1116 (define_insn "*ieee_smax<mode>3"
1117 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1119 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1120 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1122 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1123 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1124 [(set_attr "type" "sseadd")
1125 (set_attr "mode" "<MODE>")])
1127 (define_insn "avx_addsubv8sf3"
1128 [(set (match_operand:V8SF 0 "register_operand" "=x")
1131 (match_operand:V8SF 1 "register_operand" "x")
1132 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1133 (minus:V8SF (match_dup 1) (match_dup 2))
1136 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1137 [(set_attr "type" "sseadd")
1138 (set_attr "prefix" "vex")
1139 (set_attr "mode" "V8SF")])
1141 (define_insn "avx_addsubv4df3"
1142 [(set (match_operand:V4DF 0 "register_operand" "=x")
1145 (match_operand:V4DF 1 "register_operand" "x")
1146 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1147 (minus:V4DF (match_dup 1) (match_dup 2))
1150 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1151 [(set_attr "type" "sseadd")
1152 (set_attr "prefix" "vex")
1153 (set_attr "mode" "V4DF")])
1155 (define_insn "*avx_addsubv4sf3"
1156 [(set (match_operand:V4SF 0 "register_operand" "=x")
1159 (match_operand:V4SF 1 "register_operand" "x")
1160 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1161 (minus:V4SF (match_dup 1) (match_dup 2))
1164 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1165 [(set_attr "type" "sseadd")
1166 (set_attr "prefix" "vex")
1167 (set_attr "mode" "V4SF")])
1169 (define_insn "sse3_addsubv4sf3"
1170 [(set (match_operand:V4SF 0 "register_operand" "=x")
1173 (match_operand:V4SF 1 "register_operand" "0")
1174 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1175 (minus:V4SF (match_dup 1) (match_dup 2))
1178 "addsubps\t{%2, %0|%0, %2}"
1179 [(set_attr "type" "sseadd")
1180 (set_attr "prefix_rep" "1")
1181 (set_attr "mode" "V4SF")])
1183 (define_insn "*avx_addsubv2df3"
1184 [(set (match_operand:V2DF 0 "register_operand" "=x")
1187 (match_operand:V2DF 1 "register_operand" "x")
1188 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1189 (minus:V2DF (match_dup 1) (match_dup 2))
1192 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1193 [(set_attr "type" "sseadd")
1194 (set_attr "prefix" "vex")
1195 (set_attr "mode" "V2DF")])
1197 (define_insn "sse3_addsubv2df3"
1198 [(set (match_operand:V2DF 0 "register_operand" "=x")
1201 (match_operand:V2DF 1 "register_operand" "0")
1202 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1203 (minus:V2DF (match_dup 1) (match_dup 2))
1206 "addsubpd\t{%2, %0|%0, %2}"
1207 [(set_attr "type" "sseadd")
1208 (set_attr "atom_unit" "complex")
1209 (set_attr "mode" "V2DF")])
1211 (define_insn "avx_h<plusminus_insn>v4df3"
1212 [(set (match_operand:V4DF 0 "register_operand" "=x")
1217 (match_operand:V4DF 1 "register_operand" "x")
1218 (parallel [(const_int 0)]))
1219 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1221 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1222 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1226 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1227 (parallel [(const_int 0)]))
1228 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1230 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1231 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1233 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1234 [(set_attr "type" "sseadd")
1235 (set_attr "prefix" "vex")
1236 (set_attr "mode" "V4DF")])
1238 (define_insn "avx_h<plusminus_insn>v8sf3"
1239 [(set (match_operand:V8SF 0 "register_operand" "=x")
1245 (match_operand:V8SF 1 "register_operand" "x")
1246 (parallel [(const_int 0)]))
1247 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1249 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1250 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1254 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1255 (parallel [(const_int 0)]))
1256 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1258 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1259 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1263 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1264 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1266 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1267 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1270 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1271 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1273 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1274 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1276 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1277 [(set_attr "type" "sseadd")
1278 (set_attr "prefix" "vex")
1279 (set_attr "mode" "V8SF")])
1281 (define_insn "*avx_h<plusminus_insn>v4sf3"
1282 [(set (match_operand:V4SF 0 "register_operand" "=x")
1287 (match_operand:V4SF 1 "register_operand" "x")
1288 (parallel [(const_int 0)]))
1289 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1291 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1292 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1297 (parallel [(const_int 0)]))
1298 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1300 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1301 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1303 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1304 [(set_attr "type" "sseadd")
1305 (set_attr "prefix" "vex")
1306 (set_attr "mode" "V4SF")])
1308 (define_insn "sse3_h<plusminus_insn>v4sf3"
1309 [(set (match_operand:V4SF 0 "register_operand" "=x")
1314 (match_operand:V4SF 1 "register_operand" "0")
1315 (parallel [(const_int 0)]))
1316 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1318 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1319 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1323 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1324 (parallel [(const_int 0)]))
1325 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1327 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1328 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1330 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1331 [(set_attr "type" "sseadd")
1332 (set_attr "atom_unit" "complex")
1333 (set_attr "prefix_rep" "1")
1334 (set_attr "mode" "V4SF")])
1336 (define_insn "*avx_h<plusminus_insn>v2df3"
1337 [(set (match_operand:V2DF 0 "register_operand" "=x")
1341 (match_operand:V2DF 1 "register_operand" "x")
1342 (parallel [(const_int 0)]))
1343 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1346 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1347 (parallel [(const_int 0)]))
1348 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1350 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1351 [(set_attr "type" "sseadd")
1352 (set_attr "prefix" "vex")
1353 (set_attr "mode" "V2DF")])
1355 (define_insn "sse3_h<plusminus_insn>v2df3"
1356 [(set (match_operand:V2DF 0 "register_operand" "=x")
1360 (match_operand:V2DF 1 "register_operand" "0")
1361 (parallel [(const_int 0)]))
1362 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1365 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1366 (parallel [(const_int 0)]))
1367 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1369 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1370 [(set_attr "type" "sseadd")
1371 (set_attr "mode" "V2DF")])
1373 (define_expand "reduc_splus_v4sf"
1374 [(match_operand:V4SF 0 "register_operand" "")
1375 (match_operand:V4SF 1 "register_operand" "")]
1380 rtx tmp = gen_reg_rtx (V4SFmode);
1381 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1382 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1385 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1389 (define_expand "reduc_splus_v2df"
1390 [(match_operand:V2DF 0 "register_operand" "")
1391 (match_operand:V2DF 1 "register_operand" "")]
1394 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1398 (define_expand "reduc_smax_v4sf"
1399 [(match_operand:V4SF 0 "register_operand" "")
1400 (match_operand:V4SF 1 "register_operand" "")]
1403 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1407 (define_expand "reduc_smin_v4sf"
1408 [(match_operand:V4SF 0 "register_operand" "")
1409 (match_operand:V4SF 1 "register_operand" "")]
1412 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1416 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1418 ;; Parallel floating point comparisons
1420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1422 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1423 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1425 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1426 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1427 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1430 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1431 [(set_attr "type" "ssecmp")
1432 (set_attr "length_immediate" "1")
1433 (set_attr "prefix" "vex")
1434 (set_attr "mode" "<MODE>")])
1436 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1437 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1438 (vec_merge:SSEMODEF2P
1440 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1441 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1442 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1447 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1448 [(set_attr "type" "ssecmp")
1449 (set_attr "length_immediate" "1")
1450 (set_attr "prefix" "vex")
1451 (set_attr "mode" "<ssescalarmode>")])
1453 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1454 ;; may generate 256bit vector compare instructions.
1455 (define_insn "*avx_maskcmp<mode>3"
1456 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1457 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1458 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1459 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1460 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1461 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1462 [(set_attr "type" "ssecmp")
1463 (set_attr "prefix" "vex")
1464 (set_attr "length_immediate" "1")
1465 (set_attr "mode" "<avxvecmode>")])
1467 (define_insn "<sse>_maskcmp<mode>3"
1468 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1469 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1470 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1471 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1473 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1474 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1475 [(set_attr "type" "ssecmp")
1476 (set_attr "length_immediate" "1")
1477 (set_attr "mode" "<MODE>")])
1479 (define_insn "<sse>_vmmaskcmp<mode>3"
1480 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1481 (vec_merge:SSEMODEF2P
1482 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1483 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1484 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1487 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1488 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1489 [(set_attr "type" "ssecmp")
1490 (set_attr "length_immediate" "1")
1491 (set_attr "mode" "<ssescalarmode>")])
1493 (define_insn "<sse>_comi"
1494 [(set (reg:CCFP FLAGS_REG)
1497 (match_operand:<ssevecmode> 0 "register_operand" "x")
1498 (parallel [(const_int 0)]))
1500 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1501 (parallel [(const_int 0)]))))]
1502 "SSE_FLOAT_MODE_P (<MODE>mode)"
1503 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1504 [(set_attr "type" "ssecomi")
1505 (set_attr "prefix" "maybe_vex")
1506 (set_attr "prefix_rep" "0")
1507 (set (attr "prefix_data16")
1508 (if_then_else (eq_attr "mode" "DF")
1510 (const_string "0")))
1511 (set_attr "mode" "<MODE>")])
1513 (define_insn "<sse>_ucomi"
1514 [(set (reg:CCFPU FLAGS_REG)
1517 (match_operand:<ssevecmode> 0 "register_operand" "x")
1518 (parallel [(const_int 0)]))
1520 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1521 (parallel [(const_int 0)]))))]
1522 "SSE_FLOAT_MODE_P (<MODE>mode)"
1523 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1524 [(set_attr "type" "ssecomi")
1525 (set_attr "prefix" "maybe_vex")
1526 (set_attr "prefix_rep" "0")
1527 (set (attr "prefix_data16")
1528 (if_then_else (eq_attr "mode" "DF")
1530 (const_string "0")))
1531 (set_attr "mode" "<MODE>")])
1533 (define_expand "vcond<mode>"
1534 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1535 (if_then_else:SSEMODEF2P
1536 (match_operator 3 ""
1537 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1538 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1539 (match_operand:SSEMODEF2P 1 "general_operand" "")
1540 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1541 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1543 bool ok = ix86_expand_fp_vcond (operands);
1548 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1550 ;; Parallel floating point logical operations
1552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1554 (define_insn "avx_andnot<mode>3"
1555 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1558 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1559 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1560 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1561 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1562 [(set_attr "type" "sselog")
1563 (set_attr "prefix" "vex")
1564 (set_attr "mode" "<avxvecmode>")])
1566 (define_insn "<sse>_andnot<mode>3"
1567 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1570 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1571 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1572 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1573 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "sselog")
1575 (set_attr "mode" "<MODE>")])
1577 (define_expand "<code><mode>3"
1578 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1579 (plogic:AVX256MODEF2P
1580 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1581 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1582 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1583 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1585 (define_insn "*avx_<code><mode>3"
1586 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1588 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1589 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1590 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1591 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1592 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1593 [(set_attr "type" "sselog")
1594 (set_attr "prefix" "vex")
1595 (set_attr "mode" "<avxvecmode>")])
1597 (define_expand "<code><mode>3"
1598 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1600 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1601 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1602 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1603 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1605 (define_insn "*<code><mode>3"
1606 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1608 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1609 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1610 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1611 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1612 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1613 [(set_attr "type" "sselog")
1614 (set_attr "mode" "<MODE>")])
1616 (define_expand "copysign<mode>3"
1619 (not:SSEMODEF2P (match_dup 3))
1620 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1622 (and:SSEMODEF2P (match_dup 3)
1623 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1624 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1625 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1626 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1628 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1630 operands[4] = gen_reg_rtx (<MODE>mode);
1631 operands[5] = gen_reg_rtx (<MODE>mode);
1634 ;; Also define scalar versions. These are used for abs, neg, and
1635 ;; conditional move. Using subregs into vector modes causes register
1636 ;; allocation lossage. These patterns do not allow memory operands
1637 ;; because the native instructions read the full 128-bits.
1639 (define_insn "*avx_andnot<mode>3"
1640 [(set (match_operand:MODEF 0 "register_operand" "=x")
1643 (match_operand:MODEF 1 "register_operand" "x"))
1644 (match_operand:MODEF 2 "register_operand" "x")))]
1645 "AVX_FLOAT_MODE_P (<MODE>mode)"
1646 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1647 [(set_attr "type" "sselog")
1648 (set_attr "prefix" "vex")
1649 (set_attr "mode" "<ssevecmode>")])
1651 (define_insn "*andnot<mode>3"
1652 [(set (match_operand:MODEF 0 "register_operand" "=x")
1655 (match_operand:MODEF 1 "register_operand" "0"))
1656 (match_operand:MODEF 2 "register_operand" "x")))]
1657 "SSE_FLOAT_MODE_P (<MODE>mode)"
1658 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "sselog")
1660 (set_attr "mode" "<ssevecmode>")])
1662 (define_insn "*avx_<code><mode>3"
1663 [(set (match_operand:MODEF 0 "register_operand" "=x")
1665 (match_operand:MODEF 1 "register_operand" "x")
1666 (match_operand:MODEF 2 "register_operand" "x")))]
1667 "AVX_FLOAT_MODE_P (<MODE>mode)"
1668 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1669 [(set_attr "type" "sselog")
1670 (set_attr "prefix" "vex")
1671 (set_attr "mode" "<ssevecmode>")])
1673 (define_insn "*<code><mode>3"
1674 [(set (match_operand:MODEF 0 "register_operand" "=x")
1676 (match_operand:MODEF 1 "register_operand" "0")
1677 (match_operand:MODEF 2 "register_operand" "x")))]
1678 "SSE_FLOAT_MODE_P (<MODE>mode)"
1679 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1680 [(set_attr "type" "sselog")
1681 (set_attr "mode" "<ssevecmode>")])
1683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1685 ;; FMA4 floating point multiply/accumulate instructions This includes the
1686 ;; scalar version of the instructions as well as the vector
1688 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1690 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1691 ;; combine to generate a multiply/add with two memory references. We then
1692 ;; split this insn, into loading up the destination register with one of the
1693 ;; memory operations. If we don't manage to split the insn, reload will
1694 ;; generate the appropriate moves. The reason this is needed, is that combine
1695 ;; has already folded one of the memory references into both the multiply and
1696 ;; add insns, and it can't generate a new pseudo. I.e.:
1697 ;; (set (reg1) (mem (addr1)))
1698 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1699 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1701 (define_insn "fma4_fmadd<mode>4256"
1702 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1705 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1706 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1707 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1709 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1710 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1711 [(set_attr "type" "ssemuladd")
1712 (set_attr "mode" "<MODE>")])
1714 ;; Split fmadd with two memory operands into a load and the fmadd.
1716 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1719 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1720 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1721 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1723 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1724 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1725 && !reg_mentioned_p (operands[0], operands[1])
1726 && !reg_mentioned_p (operands[0], operands[2])
1727 && !reg_mentioned_p (operands[0], operands[3])"
1730 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1731 emit_insn (gen_fma4_fmadd<mode>4256 (operands[0], operands[1],
1732 operands[2], operands[3]));
1736 ;; Floating multiply and subtract
1737 ;; Allow two memory operands the same as fmadd
1738 (define_insn "fma4_fmsub<mode>4256"
1739 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1742 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1743 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1744 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1746 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1747 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1748 [(set_attr "type" "ssemuladd")
1749 (set_attr "mode" "<MODE>")])
1751 ;; Split fmsub with two memory operands into a load and the fmsub.
1753 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1756 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1757 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1758 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1760 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1761 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1762 && !reg_mentioned_p (operands[0], operands[1])
1763 && !reg_mentioned_p (operands[0], operands[2])
1764 && !reg_mentioned_p (operands[0], operands[3])"
1767 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1768 emit_insn (gen_fma4_fmsub<mode>4256 (operands[0], operands[1],
1769 operands[2], operands[3]));
1773 ;; Floating point negative multiply and add
1774 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1775 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1776 ;; Allow two memory operands to help in optimizing.
1777 (define_insn "fma4_fnmadd<mode>4256"
1778 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1780 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")
1782 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1783 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1785 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1786 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1787 [(set_attr "type" "ssemuladd")
1788 (set_attr "mode" "<MODE>")])
1790 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1792 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1794 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")
1796 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1797 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))))]
1799 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1800 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1801 && !reg_mentioned_p (operands[0], operands[1])
1802 && !reg_mentioned_p (operands[0], operands[2])
1803 && !reg_mentioned_p (operands[0], operands[3])"
1806 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1807 emit_insn (gen_fma4_fnmadd<mode>4256 (operands[0], operands[1],
1808 operands[2], operands[3]));
1812 ;; Floating point negative multiply and subtract
1813 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1814 ;; Allow 2 memory operands to help with optimization
1815 (define_insn "fma4_fnmsub<mode>4256"
1816 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1820 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
1821 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
1822 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1824 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
1825 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1826 [(set_attr "type" "ssemuladd")
1827 (set_attr "mode" "<MODE>")])
1829 ;; Split fnmsub with two memory operands into a load and the fmsub.
1831 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1835 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" ""))
1836 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1837 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1839 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
1840 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
1841 && !reg_mentioned_p (operands[0], operands[1])
1842 && !reg_mentioned_p (operands[0], operands[2])
1843 && !reg_mentioned_p (operands[0], operands[3])"
1846 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1847 emit_insn (gen_fma4_fnmsub<mode>4256 (operands[0], operands[1],
1848 operands[2], operands[3]));
1852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1853 (define_insn "fma4_fmadd<mode>4"
1854 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1857 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1858 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1859 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1861 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1862 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1863 [(set_attr "type" "ssemuladd")
1864 (set_attr "mode" "<MODE>")])
1866 ;; Split fmadd with two memory operands into a load and the fmadd.
1868 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1871 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1872 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1873 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1875 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1876 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1877 && !reg_mentioned_p (operands[0], operands[1])
1878 && !reg_mentioned_p (operands[0], operands[2])
1879 && !reg_mentioned_p (operands[0], operands[3])"
1882 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1883 emit_insn (gen_fma4_fmadd<mode>4 (operands[0], operands[1],
1884 operands[2], operands[3]));
1888 ;; For the scalar operations, use operand1 for the upper words that aren't
1889 ;; modified, so restrict the forms that are generated.
1890 ;; Scalar version of fmadd
1891 (define_insn "fma4_vmfmadd<mode>4"
1892 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1893 (vec_merge:SSEMODEF2P
1896 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1897 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1898 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1902 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
1903 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1904 [(set_attr "type" "ssemuladd")
1905 (set_attr "mode" "<MODE>")])
1907 ;; Floating multiply and subtract
1908 ;; Allow two memory operands the same as fmadd
1909 (define_insn "fma4_fmsub<mode>4"
1910 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1913 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1914 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1915 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1917 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1918 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1919 [(set_attr "type" "ssemuladd")
1920 (set_attr "mode" "<MODE>")])
1922 ;; Split fmsub with two memory operands into a load and the fmsub.
1924 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1927 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1928 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1929 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1931 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1932 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1933 && !reg_mentioned_p (operands[0], operands[1])
1934 && !reg_mentioned_p (operands[0], operands[2])
1935 && !reg_mentioned_p (operands[0], operands[3])"
1938 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1939 emit_insn (gen_fma4_fmsub<mode>4 (operands[0], operands[1],
1940 operands[2], operands[3]));
1944 ;; For the scalar operations, use operand1 for the upper words that aren't
1945 ;; modified, so restrict the forms that are generated.
1946 ;; Scalar version of fmsub
1947 (define_insn "fma4_vmfmsub<mode>4"
1948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1949 (vec_merge:SSEMODEF2P
1952 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1953 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1954 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1958 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
1959 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1960 [(set_attr "type" "ssemuladd")
1961 (set_attr "mode" "<MODE>")])
1963 ;; Floating point negative multiply and add
1964 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1965 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1966 ;; Allow two memory operands to help in optimizing.
1967 (define_insn "fma4_fnmadd<mode>4"
1968 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1970 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")
1972 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1973 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1975 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1976 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1977 [(set_attr "type" "ssemuladd")
1978 (set_attr "mode" "<MODE>")])
1980 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1982 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1984 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1986 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1987 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1989 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1990 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1991 && !reg_mentioned_p (operands[0], operands[1])
1992 && !reg_mentioned_p (operands[0], operands[2])
1993 && !reg_mentioned_p (operands[0], operands[3])"
1996 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1997 emit_insn (gen_fma4_fnmadd<mode>4 (operands[0], operands[1],
1998 operands[2], operands[3]));
2002 ;; For the scalar operations, use operand1 for the upper words that aren't
2003 ;; modified, so restrict the forms that are generated.
2004 ;; Scalar version of fnmadd
2005 (define_insn "fma4_vmfnmadd<mode>4"
2006 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2007 (vec_merge:SSEMODEF2P
2009 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2011 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2012 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2016 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2017 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2018 [(set_attr "type" "ssemuladd")
2019 (set_attr "mode" "<MODE>")])
2021 ;; Floating point negative multiply and subtract
2022 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
2023 ;; Allow 2 memory operands to help with optimization
2024 (define_insn "fma4_fnmsub<mode>4"
2025 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
2029 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x"))
2030 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
2031 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
2033 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2034 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2035 [(set_attr "type" "ssemuladd")
2036 (set_attr "mode" "<MODE>")])
2038 ;; Split fnmsub with two memory operands into a load and the fmsub.
2040 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
2044 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
2045 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
2046 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
2048 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
2049 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
2050 && !reg_mentioned_p (operands[0], operands[1])
2051 && !reg_mentioned_p (operands[0], operands[2])
2052 && !reg_mentioned_p (operands[0], operands[3])"
2055 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
2056 emit_insn (gen_fma4_fnmsub<mode>4 (operands[0], operands[1],
2057 operands[2], operands[3]));
2061 ;; For the scalar operations, use operand1 for the upper words that aren't
2062 ;; modified, so restrict the forms that are generated.
2063 ;; Scalar version of fnmsub
2064 (define_insn "fma4_vmfnmsub<mode>4"
2065 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2066 (vec_merge:SSEMODEF2P
2070 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2071 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2072 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2076 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2077 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2078 [(set_attr "type" "ssemuladd")
2079 (set_attr "mode" "<MODE>")])
2081 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2083 (define_insn "fma4i_fmadd<mode>4256"
2084 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2088 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2089 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2090 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2091 UNSPEC_FMA4_INTRINSIC))]
2092 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2093 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2094 [(set_attr "type" "ssemuladd")
2095 (set_attr "mode" "<MODE>")])
2097 (define_insn "fma4i_fmsub<mode>4256"
2098 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2102 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2103 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2104 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2105 UNSPEC_FMA4_INTRINSIC))]
2106 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2107 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2108 [(set_attr "type" "ssemuladd")
2109 (set_attr "mode" "<MODE>")])
2111 (define_insn "fma4i_fnmadd<mode>4256"
2112 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2115 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
2117 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2118 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")))]
2119 UNSPEC_FMA4_INTRINSIC))]
2120 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2121 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2122 [(set_attr "type" "ssemuladd")
2123 (set_attr "mode" "<MODE>")])
2125 (define_insn "fma4i_fnmsub<mode>4256"
2126 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2131 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
2132 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2133 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2134 UNSPEC_FMA4_INTRINSIC))]
2135 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2136 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2137 [(set_attr "type" "ssemuladd")
2138 (set_attr "mode" "<MODE>")])
2139 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2141 (define_insn "fma4i_fmadd<mode>4"
2142 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2146 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2147 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2148 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2149 UNSPEC_FMA4_INTRINSIC))]
2150 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2151 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2152 [(set_attr "type" "ssemuladd")
2153 (set_attr "mode" "<MODE>")])
2155 (define_insn "fma4i_fmsub<mode>4"
2156 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2160 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2161 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2162 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2163 UNSPEC_FMA4_INTRINSIC))]
2164 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2165 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2166 [(set_attr "type" "ssemuladd")
2167 (set_attr "mode" "<MODE>")])
2169 (define_insn "fma4i_fnmadd<mode>4"
2170 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2173 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2175 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2176 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))]
2177 UNSPEC_FMA4_INTRINSIC))]
2178 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2179 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2180 [(set_attr "type" "ssemuladd")
2181 (set_attr "mode" "<MODE>")])
2183 (define_insn "fma4i_fnmsub<mode>4"
2184 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2189 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2190 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2191 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2192 UNSPEC_FMA4_INTRINSIC))]
2193 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2194 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2195 [(set_attr "type" "ssemuladd")
2196 (set_attr "mode" "<MODE>")])
2198 ;; For the scalar operations, use operand1 for the upper words that aren't
2199 ;; modified, so restrict the forms that are accepted.
2200 (define_insn "fma4i_vmfmadd<mode>4"
2201 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2203 [(vec_merge:SSEMODEF2P
2206 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2207 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2208 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2211 UNSPEC_FMA4_INTRINSIC))]
2212 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2213 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2214 [(set_attr "type" "ssemuladd")
2215 (set_attr "mode" "<ssescalarmode>")])
2217 (define_insn "fma4i_vmfmsub<mode>4"
2218 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2220 [(vec_merge:SSEMODEF2P
2223 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2224 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2225 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2228 UNSPEC_FMA4_INTRINSIC))]
2229 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2230 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2231 [(set_attr "type" "ssemuladd")
2232 (set_attr "mode" "<ssescalarmode>")])
2234 (define_insn "fma4i_vmfnmadd<mode>4"
2235 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2237 [(vec_merge:SSEMODEF2P
2239 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2241 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2242 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2245 UNSPEC_FMA4_INTRINSIC))]
2246 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2247 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2248 [(set_attr "type" "ssemuladd")
2249 (set_attr "mode" "<ssescalarmode>")])
2251 (define_insn "fma4i_vmfnmsub<mode>4"
2252 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2254 [(vec_merge:SSEMODEF2P
2258 (match_operand:SSEMODEF2P 1 "register_operand" "x,x"))
2259 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2260 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2263 UNSPEC_FMA4_INTRINSIC))]
2264 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2265 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2266 [(set_attr "type" "ssemuladd")
2267 (set_attr "mode" "<ssescalarmode>")])
2269 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2271 ;; FMA4 Parallel floating point multiply addsub and subadd operations
2273 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2275 (define_insn "fma4_fmaddsubv8sf4"
2276 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2280 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2281 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2282 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2290 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2291 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2292 [(set_attr "type" "ssemuladd")
2293 (set_attr "mode" "V8SF")])
2295 (define_insn "fma4_fmaddsubv4df4"
2296 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2300 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2301 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2302 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2310 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2311 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2312 [(set_attr "type" "ssemuladd")
2313 (set_attr "mode" "V4DF")])
2315 (define_insn "fma4_fmaddsubv4sf4"
2316 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2320 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2321 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2322 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2330 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2331 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2332 [(set_attr "type" "ssemuladd")
2333 (set_attr "mode" "V4SF")])
2335 (define_insn "fma4_fmaddsubv2df4"
2336 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2340 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2341 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2342 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2350 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2351 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2352 [(set_attr "type" "ssemuladd")
2353 (set_attr "mode" "V2DF")])
2355 (define_insn "fma4_fmsubaddv8sf4"
2356 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2360 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2361 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2362 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2370 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2371 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2372 [(set_attr "type" "ssemuladd")
2373 (set_attr "mode" "V8SF")])
2375 (define_insn "fma4_fmsubaddv4df4"
2376 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2380 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2381 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2382 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2390 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2391 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2392 [(set_attr "type" "ssemuladd")
2393 (set_attr "mode" "V4DF")])
2395 (define_insn "fma4_fmsubaddv4sf4"
2396 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2400 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2401 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2402 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2410 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2411 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2412 [(set_attr "type" "ssemuladd")
2413 (set_attr "mode" "V4SF")])
2415 (define_insn "fma4_fmsubaddv2df4"
2416 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2420 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2421 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2422 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2430 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2431 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2432 [(set_attr "type" "ssemuladd")
2433 (set_attr "mode" "V2DF")])
2435 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2437 (define_insn "fma4i_fmaddsubv8sf4"
2438 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2443 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2444 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2445 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2452 UNSPEC_FMA4_INTRINSIC))]
2454 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2455 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2456 [(set_attr "type" "ssemuladd")
2457 (set_attr "mode" "V8SF")])
2459 (define_insn "fma4i_fmaddsubv4df4"
2460 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2465 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2466 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2467 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2474 UNSPEC_FMA4_INTRINSIC))]
2476 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2477 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2478 [(set_attr "type" "ssemuladd")
2479 (set_attr "mode" "V4DF")])
2481 (define_insn "fma4i_fmaddsubv4sf4"
2482 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2487 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2488 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2489 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2496 UNSPEC_FMA4_INTRINSIC))]
2498 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2499 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2500 [(set_attr "type" "ssemuladd")
2501 (set_attr "mode" "V4SF")])
2503 (define_insn "fma4i_fmaddsubv2df4"
2504 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2509 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2510 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2511 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2518 UNSPEC_FMA4_INTRINSIC))]
2520 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2521 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2522 [(set_attr "type" "ssemuladd")
2523 (set_attr "mode" "V2DF")])
2525 (define_insn "fma4i_fmsubaddv8sf4"
2526 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2531 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2532 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2533 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2540 UNSPEC_FMA4_INTRINSIC))]
2542 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2543 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2544 [(set_attr "type" "ssemuladd")
2545 (set_attr "mode" "V8SF")])
2547 (define_insn "fma4i_fmsubaddv4df4"
2548 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2553 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2554 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2555 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2562 UNSPEC_FMA4_INTRINSIC))]
2564 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2565 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2566 [(set_attr "type" "ssemuladd")
2567 (set_attr "mode" "V4DF")])
2569 (define_insn "fma4i_fmsubaddv4sf4"
2570 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2575 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2576 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2577 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2584 UNSPEC_FMA4_INTRINSIC))]
2586 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2587 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2588 [(set_attr "type" "ssemuladd")
2589 (set_attr "mode" "V4SF")])
2591 (define_insn "fma4i_fmsubaddv2df4"
2592 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2597 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2598 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2599 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2606 UNSPEC_FMA4_INTRINSIC))]
2608 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2609 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2610 [(set_attr "type" "ssemuladd")
2611 (set_attr "mode" "V2DF")])
2613 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2615 ;; Parallel single-precision floating point conversion operations
2617 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2619 (define_insn "sse_cvtpi2ps"
2620 [(set (match_operand:V4SF 0 "register_operand" "=x")
2623 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2624 (match_operand:V4SF 1 "register_operand" "0")
2627 "cvtpi2ps\t{%2, %0|%0, %2}"
2628 [(set_attr "type" "ssecvt")
2629 (set_attr "mode" "V4SF")])
2631 (define_insn "sse_cvtps2pi"
2632 [(set (match_operand:V2SI 0 "register_operand" "=y")
2634 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2636 (parallel [(const_int 0) (const_int 1)])))]
2638 "cvtps2pi\t{%1, %0|%0, %1}"
2639 [(set_attr "type" "ssecvt")
2640 (set_attr "unit" "mmx")
2641 (set_attr "mode" "DI")])
2643 (define_insn "sse_cvttps2pi"
2644 [(set (match_operand:V2SI 0 "register_operand" "=y")
2646 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2647 (parallel [(const_int 0) (const_int 1)])))]
2649 "cvttps2pi\t{%1, %0|%0, %1}"
2650 [(set_attr "type" "ssecvt")
2651 (set_attr "unit" "mmx")
2652 (set_attr "prefix_rep" "0")
2653 (set_attr "mode" "SF")])
2655 (define_insn "*avx_cvtsi2ss"
2656 [(set (match_operand:V4SF 0 "register_operand" "=x")
2659 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2660 (match_operand:V4SF 1 "register_operand" "x")
2663 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2664 [(set_attr "type" "sseicvt")
2665 (set_attr "prefix" "vex")
2666 (set_attr "mode" "SF")])
2668 (define_insn "sse_cvtsi2ss"
2669 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2672 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2673 (match_operand:V4SF 1 "register_operand" "0,0")
2676 "cvtsi2ss\t{%2, %0|%0, %2}"
2677 [(set_attr "type" "sseicvt")
2678 (set_attr "athlon_decode" "vector,double")
2679 (set_attr "amdfam10_decode" "vector,double")
2680 (set_attr "mode" "SF")])
2682 (define_insn "*avx_cvtsi2ssq"
2683 [(set (match_operand:V4SF 0 "register_operand" "=x")
2686 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2687 (match_operand:V4SF 1 "register_operand" "x")
2689 "TARGET_AVX && TARGET_64BIT"
2690 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2691 [(set_attr "type" "sseicvt")
2692 (set_attr "length_vex" "4")
2693 (set_attr "prefix" "vex")
2694 (set_attr "mode" "SF")])
2696 (define_insn "sse_cvtsi2ssq"
2697 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2700 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2701 (match_operand:V4SF 1 "register_operand" "0,0")
2703 "TARGET_SSE && TARGET_64BIT"
2704 "cvtsi2ssq\t{%2, %0|%0, %2}"
2705 [(set_attr "type" "sseicvt")
2706 (set_attr "prefix_rex" "1")
2707 (set_attr "athlon_decode" "vector,double")
2708 (set_attr "amdfam10_decode" "vector,double")
2709 (set_attr "mode" "SF")])
2711 (define_insn "sse_cvtss2si"
2712 [(set (match_operand:SI 0 "register_operand" "=r,r")
2715 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2716 (parallel [(const_int 0)]))]
2717 UNSPEC_FIX_NOTRUNC))]
2719 "%vcvtss2si\t{%1, %0|%0, %1}"
2720 [(set_attr "type" "sseicvt")
2721 (set_attr "athlon_decode" "double,vector")
2722 (set_attr "prefix_rep" "1")
2723 (set_attr "prefix" "maybe_vex")
2724 (set_attr "mode" "SI")])
2726 (define_insn "sse_cvtss2si_2"
2727 [(set (match_operand:SI 0 "register_operand" "=r,r")
2728 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2729 UNSPEC_FIX_NOTRUNC))]
2731 "%vcvtss2si\t{%1, %0|%0, %1}"
2732 [(set_attr "type" "sseicvt")
2733 (set_attr "athlon_decode" "double,vector")
2734 (set_attr "amdfam10_decode" "double,double")
2735 (set_attr "prefix_rep" "1")
2736 (set_attr "prefix" "maybe_vex")
2737 (set_attr "mode" "SI")])
2739 (define_insn "sse_cvtss2siq"
2740 [(set (match_operand:DI 0 "register_operand" "=r,r")
2743 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2744 (parallel [(const_int 0)]))]
2745 UNSPEC_FIX_NOTRUNC))]
2746 "TARGET_SSE && TARGET_64BIT"
2747 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2748 [(set_attr "type" "sseicvt")
2749 (set_attr "athlon_decode" "double,vector")
2750 (set_attr "prefix_rep" "1")
2751 (set_attr "prefix" "maybe_vex")
2752 (set_attr "mode" "DI")])
2754 (define_insn "sse_cvtss2siq_2"
2755 [(set (match_operand:DI 0 "register_operand" "=r,r")
2756 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2757 UNSPEC_FIX_NOTRUNC))]
2758 "TARGET_SSE && TARGET_64BIT"
2759 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2760 [(set_attr "type" "sseicvt")
2761 (set_attr "athlon_decode" "double,vector")
2762 (set_attr "amdfam10_decode" "double,double")
2763 (set_attr "prefix_rep" "1")
2764 (set_attr "prefix" "maybe_vex")
2765 (set_attr "mode" "DI")])
2767 (define_insn "sse_cvttss2si"
2768 [(set (match_operand:SI 0 "register_operand" "=r,r")
2771 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2772 (parallel [(const_int 0)]))))]
2774 "%vcvttss2si\t{%1, %0|%0, %1}"
2775 [(set_attr "type" "sseicvt")
2776 (set_attr "athlon_decode" "double,vector")
2777 (set_attr "amdfam10_decode" "double,double")
2778 (set_attr "prefix_rep" "1")
2779 (set_attr "prefix" "maybe_vex")
2780 (set_attr "mode" "SI")])
2782 (define_insn "sse_cvttss2siq"
2783 [(set (match_operand:DI 0 "register_operand" "=r,r")
2786 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2787 (parallel [(const_int 0)]))))]
2788 "TARGET_SSE && TARGET_64BIT"
2789 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2790 [(set_attr "type" "sseicvt")
2791 (set_attr "athlon_decode" "double,vector")
2792 (set_attr "amdfam10_decode" "double,double")
2793 (set_attr "prefix_rep" "1")
2794 (set_attr "prefix" "maybe_vex")
2795 (set_attr "mode" "DI")])
2797 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2798 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2799 (float:AVXMODEDCVTDQ2PS
2800 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2802 "vcvtdq2ps\t{%1, %0|%0, %1}"
2803 [(set_attr "type" "ssecvt")
2804 (set_attr "prefix" "vex")
2805 (set_attr "mode" "<avxvecmode>")])
2807 (define_insn "sse2_cvtdq2ps"
2808 [(set (match_operand:V4SF 0 "register_operand" "=x")
2809 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2811 "cvtdq2ps\t{%1, %0|%0, %1}"
2812 [(set_attr "type" "ssecvt")
2813 (set_attr "mode" "V4SF")])
2815 (define_expand "sse2_cvtudq2ps"
2817 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2819 (lt:V4SF (match_dup 5) (match_dup 3)))
2821 (and:V4SF (match_dup 6) (match_dup 4)))
2822 (set (match_operand:V4SF 0 "register_operand" "")
2823 (plus:V4SF (match_dup 5) (match_dup 7)))]
2826 REAL_VALUE_TYPE TWO32r;
2830 real_ldexp (&TWO32r, &dconst1, 32);
2831 x = const_double_from_real_value (TWO32r, SFmode);
2833 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2834 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2836 for (i = 5; i < 8; i++)
2837 operands[i] = gen_reg_rtx (V4SFmode);
2840 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2841 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2842 (unspec:AVXMODEDCVTPS2DQ
2843 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2844 UNSPEC_FIX_NOTRUNC))]
2846 "vcvtps2dq\t{%1, %0|%0, %1}"
2847 [(set_attr "type" "ssecvt")
2848 (set_attr "prefix" "vex")
2849 (set_attr "mode" "<avxvecmode>")])
2851 (define_insn "sse2_cvtps2dq"
2852 [(set (match_operand:V4SI 0 "register_operand" "=x")
2853 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2854 UNSPEC_FIX_NOTRUNC))]
2856 "cvtps2dq\t{%1, %0|%0, %1}"
2857 [(set_attr "type" "ssecvt")
2858 (set_attr "prefix_data16" "1")
2859 (set_attr "mode" "TI")])
2861 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2862 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2863 (fix:AVXMODEDCVTPS2DQ
2864 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2866 "vcvttps2dq\t{%1, %0|%0, %1}"
2867 [(set_attr "type" "ssecvt")
2868 (set_attr "prefix" "vex")
2869 (set_attr "mode" "<avxvecmode>")])
2871 (define_insn "sse2_cvttps2dq"
2872 [(set (match_operand:V4SI 0 "register_operand" "=x")
2873 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2875 "cvttps2dq\t{%1, %0|%0, %1}"
2876 [(set_attr "type" "ssecvt")
2877 (set_attr "prefix_rep" "1")
2878 (set_attr "prefix_data16" "0")
2879 (set_attr "mode" "TI")])
2881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2883 ;; Parallel double-precision floating point conversion operations
2885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2887 (define_insn "sse2_cvtpi2pd"
2888 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2889 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2891 "cvtpi2pd\t{%1, %0|%0, %1}"
2892 [(set_attr "type" "ssecvt")
2893 (set_attr "unit" "mmx,*")
2894 (set_attr "prefix_data16" "1,*")
2895 (set_attr "mode" "V2DF")])
2897 (define_insn "sse2_cvtpd2pi"
2898 [(set (match_operand:V2SI 0 "register_operand" "=y")
2899 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2900 UNSPEC_FIX_NOTRUNC))]
2902 "cvtpd2pi\t{%1, %0|%0, %1}"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "unit" "mmx")
2905 (set_attr "prefix_data16" "1")
2906 (set_attr "mode" "DI")])
2908 (define_insn "sse2_cvttpd2pi"
2909 [(set (match_operand:V2SI 0 "register_operand" "=y")
2910 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2912 "cvttpd2pi\t{%1, %0|%0, %1}"
2913 [(set_attr "type" "ssecvt")
2914 (set_attr "unit" "mmx")
2915 (set_attr "prefix_data16" "1")
2916 (set_attr "mode" "TI")])
2918 (define_insn "*avx_cvtsi2sd"
2919 [(set (match_operand:V2DF 0 "register_operand" "=x")
2922 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2923 (match_operand:V2DF 1 "register_operand" "x")
2926 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2927 [(set_attr "type" "sseicvt")
2928 (set_attr "prefix" "vex")
2929 (set_attr "mode" "DF")])
2931 (define_insn "sse2_cvtsi2sd"
2932 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2935 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2936 (match_operand:V2DF 1 "register_operand" "0,0")
2939 "cvtsi2sd\t{%2, %0|%0, %2}"
2940 [(set_attr "type" "sseicvt")
2941 (set_attr "mode" "DF")
2942 (set_attr "athlon_decode" "double,direct")
2943 (set_attr "amdfam10_decode" "vector,double")])
2945 (define_insn "*avx_cvtsi2sdq"
2946 [(set (match_operand:V2DF 0 "register_operand" "=x")
2949 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2950 (match_operand:V2DF 1 "register_operand" "x")
2952 "TARGET_AVX && TARGET_64BIT"
2953 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2954 [(set_attr "type" "sseicvt")
2955 (set_attr "length_vex" "4")
2956 (set_attr "prefix" "vex")
2957 (set_attr "mode" "DF")])
2959 (define_insn "sse2_cvtsi2sdq"
2960 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2963 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2964 (match_operand:V2DF 1 "register_operand" "0,0")
2966 "TARGET_SSE2 && TARGET_64BIT"
2967 "cvtsi2sdq\t{%2, %0|%0, %2}"
2968 [(set_attr "type" "sseicvt")
2969 (set_attr "prefix_rex" "1")
2970 (set_attr "mode" "DF")
2971 (set_attr "athlon_decode" "double,direct")
2972 (set_attr "amdfam10_decode" "vector,double")])
2974 (define_insn "sse2_cvtsd2si"
2975 [(set (match_operand:SI 0 "register_operand" "=r,r")
2978 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2979 (parallel [(const_int 0)]))]
2980 UNSPEC_FIX_NOTRUNC))]
2982 "%vcvtsd2si\t{%1, %0|%0, %1}"
2983 [(set_attr "type" "sseicvt")
2984 (set_attr "athlon_decode" "double,vector")
2985 (set_attr "prefix_rep" "1")
2986 (set_attr "prefix" "maybe_vex")
2987 (set_attr "mode" "SI")])
2989 (define_insn "sse2_cvtsd2si_2"
2990 [(set (match_operand:SI 0 "register_operand" "=r,r")
2991 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2992 UNSPEC_FIX_NOTRUNC))]
2994 "%vcvtsd2si\t{%1, %0|%0, %1}"
2995 [(set_attr "type" "sseicvt")
2996 (set_attr "athlon_decode" "double,vector")
2997 (set_attr "amdfam10_decode" "double,double")
2998 (set_attr "prefix_rep" "1")
2999 (set_attr "prefix" "maybe_vex")
3000 (set_attr "mode" "SI")])
3002 (define_insn "sse2_cvtsd2siq"
3003 [(set (match_operand:DI 0 "register_operand" "=r,r")
3006 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3007 (parallel [(const_int 0)]))]
3008 UNSPEC_FIX_NOTRUNC))]
3009 "TARGET_SSE2 && TARGET_64BIT"
3010 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3011 [(set_attr "type" "sseicvt")
3012 (set_attr "athlon_decode" "double,vector")
3013 (set_attr "prefix_rep" "1")
3014 (set_attr "prefix" "maybe_vex")
3015 (set_attr "mode" "DI")])
3017 (define_insn "sse2_cvtsd2siq_2"
3018 [(set (match_operand:DI 0 "register_operand" "=r,r")
3019 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
3020 UNSPEC_FIX_NOTRUNC))]
3021 "TARGET_SSE2 && TARGET_64BIT"
3022 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3023 [(set_attr "type" "sseicvt")
3024 (set_attr "athlon_decode" "double,vector")
3025 (set_attr "amdfam10_decode" "double,double")
3026 (set_attr "prefix_rep" "1")
3027 (set_attr "prefix" "maybe_vex")
3028 (set_attr "mode" "DI")])
3030 (define_insn "sse2_cvttsd2si"
3031 [(set (match_operand:SI 0 "register_operand" "=r,r")
3034 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3035 (parallel [(const_int 0)]))))]
3037 "%vcvttsd2si\t{%1, %0|%0, %1}"
3038 [(set_attr "type" "sseicvt")
3039 (set_attr "prefix_rep" "1")
3040 (set_attr "prefix" "maybe_vex")
3041 (set_attr "mode" "SI")
3042 (set_attr "athlon_decode" "double,vector")
3043 (set_attr "amdfam10_decode" "double,double")])
3045 (define_insn "sse2_cvttsd2siq"
3046 [(set (match_operand:DI 0 "register_operand" "=r,r")
3049 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3050 (parallel [(const_int 0)]))))]
3051 "TARGET_SSE2 && TARGET_64BIT"
3052 "%vcvttsd2siq\t{%1, %0|%0, %1}"
3053 [(set_attr "type" "sseicvt")
3054 (set_attr "prefix_rep" "1")
3055 (set_attr "prefix" "maybe_vex")
3056 (set_attr "mode" "DI")
3057 (set_attr "athlon_decode" "double,vector")
3058 (set_attr "amdfam10_decode" "double,double")])
3060 (define_insn "avx_cvtdq2pd256"
3061 [(set (match_operand:V4DF 0 "register_operand" "=x")
3062 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
3064 "vcvtdq2pd\t{%1, %0|%0, %1}"
3065 [(set_attr "type" "ssecvt")
3066 (set_attr "prefix" "vex")
3067 (set_attr "mode" "V4DF")])
3069 (define_insn "sse2_cvtdq2pd"
3070 [(set (match_operand:V2DF 0 "register_operand" "=x")
3073 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3074 (parallel [(const_int 0) (const_int 1)]))))]
3076 "%vcvtdq2pd\t{%1, %0|%0, %1}"
3077 [(set_attr "type" "ssecvt")
3078 (set_attr "prefix" "maybe_vex")
3079 (set_attr "mode" "V2DF")])
3081 (define_insn "avx_cvtpd2dq256"
3082 [(set (match_operand:V4SI 0 "register_operand" "=x")
3083 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3084 UNSPEC_FIX_NOTRUNC))]
3086 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3087 [(set_attr "type" "ssecvt")
3088 (set_attr "prefix" "vex")
3089 (set_attr "mode" "OI")])
3091 (define_expand "sse2_cvtpd2dq"
3092 [(set (match_operand:V4SI 0 "register_operand" "")
3094 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
3098 "operands[2] = CONST0_RTX (V2SImode);")
3100 (define_insn "*sse2_cvtpd2dq"
3101 [(set (match_operand:V4SI 0 "register_operand" "=x")
3103 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3105 (match_operand:V2SI 2 "const0_operand" "")))]
3107 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
3108 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
3109 [(set_attr "type" "ssecvt")
3110 (set_attr "prefix_rep" "1")
3111 (set_attr "prefix_data16" "0")
3112 (set_attr "prefix" "maybe_vex")
3113 (set_attr "mode" "TI")
3114 (set_attr "amdfam10_decode" "double")])
3116 (define_insn "avx_cvttpd2dq256"
3117 [(set (match_operand:V4SI 0 "register_operand" "=x")
3118 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3120 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3121 [(set_attr "type" "ssecvt")
3122 (set_attr "prefix" "vex")
3123 (set_attr "mode" "OI")])
3125 (define_expand "sse2_cvttpd2dq"
3126 [(set (match_operand:V4SI 0 "register_operand" "")
3128 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
3131 "operands[2] = CONST0_RTX (V2SImode);")
3133 (define_insn "*sse2_cvttpd2dq"
3134 [(set (match_operand:V4SI 0 "register_operand" "=x")
3136 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3137 (match_operand:V2SI 2 "const0_operand" "")))]
3139 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
3140 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
3141 [(set_attr "type" "ssecvt")
3142 (set_attr "prefix" "maybe_vex")
3143 (set_attr "mode" "TI")
3144 (set_attr "amdfam10_decode" "double")])
3146 (define_insn "*avx_cvtsd2ss"
3147 [(set (match_operand:V4SF 0 "register_operand" "=x")
3150 (float_truncate:V2SF
3151 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
3152 (match_operand:V4SF 1 "register_operand" "x")
3155 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
3156 [(set_attr "type" "ssecvt")
3157 (set_attr "prefix" "vex")
3158 (set_attr "mode" "SF")])
3160 (define_insn "sse2_cvtsd2ss"
3161 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3164 (float_truncate:V2SF
3165 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
3166 (match_operand:V4SF 1 "register_operand" "0,0")
3169 "cvtsd2ss\t{%2, %0|%0, %2}"
3170 [(set_attr "type" "ssecvt")
3171 (set_attr "athlon_decode" "vector,double")
3172 (set_attr "amdfam10_decode" "vector,double")
3173 (set_attr "mode" "SF")])
3175 (define_insn "*avx_cvtss2sd"
3176 [(set (match_operand:V2DF 0 "register_operand" "=x")
3180 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3181 (parallel [(const_int 0) (const_int 1)])))
3182 (match_operand:V2DF 1 "register_operand" "x")
3185 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3186 [(set_attr "type" "ssecvt")
3187 (set_attr "prefix" "vex")
3188 (set_attr "mode" "DF")])
3190 (define_insn "sse2_cvtss2sd"
3191 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3195 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3196 (parallel [(const_int 0) (const_int 1)])))
3197 (match_operand:V2DF 1 "register_operand" "0,0")
3200 "cvtss2sd\t{%2, %0|%0, %2}"
3201 [(set_attr "type" "ssecvt")
3202 (set_attr "amdfam10_decode" "vector,double")
3203 (set_attr "mode" "DF")])
3205 (define_insn "avx_cvtpd2ps256"
3206 [(set (match_operand:V4SF 0 "register_operand" "=x")
3207 (float_truncate:V4SF
3208 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3210 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3211 [(set_attr "type" "ssecvt")
3212 (set_attr "prefix" "vex")
3213 (set_attr "mode" "V4SF")])
3215 (define_expand "sse2_cvtpd2ps"
3216 [(set (match_operand:V4SF 0 "register_operand" "")
3218 (float_truncate:V2SF
3219 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3222 "operands[2] = CONST0_RTX (V2SFmode);")
3224 (define_insn "*sse2_cvtpd2ps"
3225 [(set (match_operand:V4SF 0 "register_operand" "=x")
3227 (float_truncate:V2SF
3228 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3229 (match_operand:V2SF 2 "const0_operand" "")))]
3231 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3232 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3233 [(set_attr "type" "ssecvt")
3234 (set_attr "prefix_data16" "1")
3235 (set_attr "prefix" "maybe_vex")
3236 (set_attr "mode" "V4SF")
3237 (set_attr "amdfam10_decode" "double")])
3239 (define_insn "avx_cvtps2pd256"
3240 [(set (match_operand:V4DF 0 "register_operand" "=x")
3242 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3244 "vcvtps2pd\t{%1, %0|%0, %1}"
3245 [(set_attr "type" "ssecvt")
3246 (set_attr "prefix" "vex")
3247 (set_attr "mode" "V4DF")])
3249 (define_insn "sse2_cvtps2pd"
3250 [(set (match_operand:V2DF 0 "register_operand" "=x")
3253 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3254 (parallel [(const_int 0) (const_int 1)]))))]
3256 "%vcvtps2pd\t{%1, %0|%0, %1}"
3257 [(set_attr "type" "ssecvt")
3258 (set_attr "prefix" "maybe_vex")
3259 (set_attr "mode" "V2DF")
3260 (set_attr "prefix_data16" "0")
3261 (set_attr "amdfam10_decode" "direct")])
3263 (define_expand "vec_unpacks_hi_v4sf"
3268 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3269 (parallel [(const_int 6)
3273 (set (match_operand:V2DF 0 "register_operand" "")
3277 (parallel [(const_int 0) (const_int 1)]))))]
3280 operands[2] = gen_reg_rtx (V4SFmode);
3283 (define_expand "vec_unpacks_lo_v4sf"
3284 [(set (match_operand:V2DF 0 "register_operand" "")
3287 (match_operand:V4SF 1 "nonimmediate_operand" "")
3288 (parallel [(const_int 0) (const_int 1)]))))]
3291 (define_expand "vec_unpacks_float_hi_v8hi"
3292 [(match_operand:V4SF 0 "register_operand" "")
3293 (match_operand:V8HI 1 "register_operand" "")]
3296 rtx tmp = gen_reg_rtx (V4SImode);
3298 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3299 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3303 (define_expand "vec_unpacks_float_lo_v8hi"
3304 [(match_operand:V4SF 0 "register_operand" "")
3305 (match_operand:V8HI 1 "register_operand" "")]
3308 rtx tmp = gen_reg_rtx (V4SImode);
3310 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3311 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3315 (define_expand "vec_unpacku_float_hi_v8hi"
3316 [(match_operand:V4SF 0 "register_operand" "")
3317 (match_operand:V8HI 1 "register_operand" "")]
3320 rtx tmp = gen_reg_rtx (V4SImode);
3322 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3323 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3327 (define_expand "vec_unpacku_float_lo_v8hi"
3328 [(match_operand:V4SF 0 "register_operand" "")
3329 (match_operand:V8HI 1 "register_operand" "")]
3332 rtx tmp = gen_reg_rtx (V4SImode);
3334 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3335 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3339 (define_expand "vec_unpacks_float_hi_v4si"
3342 (match_operand:V4SI 1 "nonimmediate_operand" "")
3343 (parallel [(const_int 2)
3347 (set (match_operand:V2DF 0 "register_operand" "")
3351 (parallel [(const_int 0) (const_int 1)]))))]
3353 "operands[2] = gen_reg_rtx (V4SImode);")
3355 (define_expand "vec_unpacks_float_lo_v4si"
3356 [(set (match_operand:V2DF 0 "register_operand" "")
3359 (match_operand:V4SI 1 "nonimmediate_operand" "")
3360 (parallel [(const_int 0) (const_int 1)]))))]
3363 (define_expand "vec_unpacku_float_hi_v4si"
3366 (match_operand:V4SI 1 "nonimmediate_operand" "")
3367 (parallel [(const_int 2)
3375 (parallel [(const_int 0) (const_int 1)]))))
3377 (lt:V2DF (match_dup 6) (match_dup 3)))
3379 (and:V2DF (match_dup 7) (match_dup 4)))
3380 (set (match_operand:V2DF 0 "register_operand" "")
3381 (plus:V2DF (match_dup 6) (match_dup 8)))]
3384 REAL_VALUE_TYPE TWO32r;
3388 real_ldexp (&TWO32r, &dconst1, 32);
3389 x = const_double_from_real_value (TWO32r, DFmode);
3391 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3392 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3394 operands[5] = gen_reg_rtx (V4SImode);
3396 for (i = 6; i < 9; i++)
3397 operands[i] = gen_reg_rtx (V2DFmode);
3400 (define_expand "vec_unpacku_float_lo_v4si"
3404 (match_operand:V4SI 1 "nonimmediate_operand" "")
3405 (parallel [(const_int 0) (const_int 1)]))))
3407 (lt:V2DF (match_dup 5) (match_dup 3)))
3409 (and:V2DF (match_dup 6) (match_dup 4)))
3410 (set (match_operand:V2DF 0 "register_operand" "")
3411 (plus:V2DF (match_dup 5) (match_dup 7)))]
3414 REAL_VALUE_TYPE TWO32r;
3418 real_ldexp (&TWO32r, &dconst1, 32);
3419 x = const_double_from_real_value (TWO32r, DFmode);
3421 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3422 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3424 for (i = 5; i < 8; i++)
3425 operands[i] = gen_reg_rtx (V2DFmode);
3428 (define_expand "vec_pack_trunc_v2df"
3429 [(match_operand:V4SF 0 "register_operand" "")
3430 (match_operand:V2DF 1 "nonimmediate_operand" "")
3431 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3436 r1 = gen_reg_rtx (V4SFmode);
3437 r2 = gen_reg_rtx (V4SFmode);
3439 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3440 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3441 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3445 (define_expand "vec_pack_sfix_trunc_v2df"
3446 [(match_operand:V4SI 0 "register_operand" "")
3447 (match_operand:V2DF 1 "nonimmediate_operand" "")
3448 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3453 r1 = gen_reg_rtx (V4SImode);
3454 r2 = gen_reg_rtx (V4SImode);
3456 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3457 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3458 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3459 gen_lowpart (V2DImode, r1),
3460 gen_lowpart (V2DImode, r2)));
3464 (define_expand "vec_pack_sfix_v2df"
3465 [(match_operand:V4SI 0 "register_operand" "")
3466 (match_operand:V2DF 1 "nonimmediate_operand" "")
3467 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3472 r1 = gen_reg_rtx (V4SImode);
3473 r2 = gen_reg_rtx (V4SImode);
3475 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3476 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3477 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3478 gen_lowpart (V2DImode, r1),
3479 gen_lowpart (V2DImode, r2)));
3483 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3485 ;; Parallel single-precision floating point element swizzling
3487 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3489 (define_expand "sse_movhlps_exp"
3490 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3493 (match_operand:V4SF 1 "nonimmediate_operand" "")
3494 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3495 (parallel [(const_int 6)
3500 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3502 (define_insn "*avx_movhlps"
3503 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3506 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3507 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3508 (parallel [(const_int 6)
3512 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3514 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3515 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3516 vmovhps\t{%2, %0|%0, %2}"
3517 [(set_attr "type" "ssemov")
3518 (set_attr "prefix" "vex")
3519 (set_attr "mode" "V4SF,V2SF,V2SF")])
3521 (define_insn "sse_movhlps"
3522 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3525 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3526 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3527 (parallel [(const_int 6)
3531 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3533 movhlps\t{%2, %0|%0, %2}
3534 movlps\t{%H2, %0|%0, %H2}
3535 movhps\t{%2, %0|%0, %2}"
3536 [(set_attr "type" "ssemov")
3537 (set_attr "mode" "V4SF,V2SF,V2SF")])
3539 (define_expand "sse_movlhps_exp"
3540 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3543 (match_operand:V4SF 1 "nonimmediate_operand" "")
3544 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3545 (parallel [(const_int 0)
3550 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3552 (define_insn "*avx_movlhps"
3553 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3556 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3557 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3558 (parallel [(const_int 0)
3562 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3564 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3565 vmovhps\t{%2, %1, %0|%0, %1, %2}
3566 vmovlps\t{%2, %H0|%H0, %2}"
3567 [(set_attr "type" "ssemov")
3568 (set_attr "prefix" "vex")
3569 (set_attr "mode" "V4SF,V2SF,V2SF")])
3571 (define_insn "sse_movlhps"
3572 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3575 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3576 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3577 (parallel [(const_int 0)
3581 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3583 movlhps\t{%2, %0|%0, %2}
3584 movhps\t{%2, %0|%0, %2}
3585 movlps\t{%2, %H0|%H0, %2}"
3586 [(set_attr "type" "ssemov")
3587 (set_attr "mode" "V4SF,V2SF,V2SF")])
3589 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3590 (define_insn "avx_unpckhps256"
3591 [(set (match_operand:V8SF 0 "register_operand" "=x")
3594 (match_operand:V8SF 1 "register_operand" "x")
3595 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3596 (parallel [(const_int 2) (const_int 10)
3597 (const_int 3) (const_int 11)
3598 (const_int 6) (const_int 14)
3599 (const_int 7) (const_int 15)])))]
3601 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3602 [(set_attr "type" "sselog")
3603 (set_attr "prefix" "vex")
3604 (set_attr "mode" "V8SF")])
3606 (define_insn "*avx_interleave_highv4sf"
3607 [(set (match_operand:V4SF 0 "register_operand" "=x")
3610 (match_operand:V4SF 1 "register_operand" "x")
3611 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3612 (parallel [(const_int 2) (const_int 6)
3613 (const_int 3) (const_int 7)])))]
3615 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3616 [(set_attr "type" "sselog")
3617 (set_attr "prefix" "vex")
3618 (set_attr "mode" "V4SF")])
3620 (define_insn "vec_interleave_highv4sf"
3621 [(set (match_operand:V4SF 0 "register_operand" "=x")
3624 (match_operand:V4SF 1 "register_operand" "0")
3625 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3626 (parallel [(const_int 2) (const_int 6)
3627 (const_int 3) (const_int 7)])))]
3629 "unpckhps\t{%2, %0|%0, %2}"
3630 [(set_attr "type" "sselog")
3631 (set_attr "mode" "V4SF")])
3633 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3634 (define_insn "avx_unpcklps256"
3635 [(set (match_operand:V8SF 0 "register_operand" "=x")
3638 (match_operand:V8SF 1 "register_operand" "x")
3639 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3640 (parallel [(const_int 0) (const_int 8)
3641 (const_int 1) (const_int 9)
3642 (const_int 4) (const_int 12)
3643 (const_int 5) (const_int 13)])))]
3645 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3646 [(set_attr "type" "sselog")
3647 (set_attr "prefix" "vex")
3648 (set_attr "mode" "V8SF")])
3650 (define_insn "*avx_interleave_lowv4sf"
3651 [(set (match_operand:V4SF 0 "register_operand" "=x")
3654 (match_operand:V4SF 1 "register_operand" "x")
3655 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3656 (parallel [(const_int 0) (const_int 4)
3657 (const_int 1) (const_int 5)])))]
3659 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3660 [(set_attr "type" "sselog")
3661 (set_attr "prefix" "vex")
3662 (set_attr "mode" "V4SF")])
3664 (define_insn "vec_interleave_lowv4sf"
3665 [(set (match_operand:V4SF 0 "register_operand" "=x")
3668 (match_operand:V4SF 1 "register_operand" "0")
3669 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3670 (parallel [(const_int 0) (const_int 4)
3671 (const_int 1) (const_int 5)])))]
3673 "unpcklps\t{%2, %0|%0, %2}"
3674 [(set_attr "type" "sselog")
3675 (set_attr "mode" "V4SF")])
3677 ;; These are modeled with the same vec_concat as the others so that we
3678 ;; capture users of shufps that can use the new instructions
3679 (define_insn "avx_movshdup256"
3680 [(set (match_operand:V8SF 0 "register_operand" "=x")
3683 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3685 (parallel [(const_int 1) (const_int 1)
3686 (const_int 3) (const_int 3)
3687 (const_int 5) (const_int 5)
3688 (const_int 7) (const_int 7)])))]
3690 "vmovshdup\t{%1, %0|%0, %1}"
3691 [(set_attr "type" "sse")
3692 (set_attr "prefix" "vex")
3693 (set_attr "mode" "V8SF")])
3695 (define_insn "sse3_movshdup"
3696 [(set (match_operand:V4SF 0 "register_operand" "=x")
3699 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3701 (parallel [(const_int 1)
3706 "%vmovshdup\t{%1, %0|%0, %1}"
3707 [(set_attr "type" "sse")
3708 (set_attr "prefix_rep" "1")
3709 (set_attr "prefix" "maybe_vex")
3710 (set_attr "mode" "V4SF")])
3712 (define_insn "avx_movsldup256"
3713 [(set (match_operand:V8SF 0 "register_operand" "=x")
3716 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3718 (parallel [(const_int 0) (const_int 0)
3719 (const_int 2) (const_int 2)
3720 (const_int 4) (const_int 4)
3721 (const_int 6) (const_int 6)])))]
3723 "vmovsldup\t{%1, %0|%0, %1}"
3724 [(set_attr "type" "sse")
3725 (set_attr "prefix" "vex")
3726 (set_attr "mode" "V8SF")])
3728 (define_insn "sse3_movsldup"
3729 [(set (match_operand:V4SF 0 "register_operand" "=x")
3732 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3734 (parallel [(const_int 0)
3739 "%vmovsldup\t{%1, %0|%0, %1}"
3740 [(set_attr "type" "sse")
3741 (set_attr "prefix_rep" "1")
3742 (set_attr "prefix" "maybe_vex")
3743 (set_attr "mode" "V4SF")])
3745 (define_expand "avx_shufps256"
3746 [(match_operand:V8SF 0 "register_operand" "")
3747 (match_operand:V8SF 1 "register_operand" "")
3748 (match_operand:V8SF 2 "nonimmediate_operand" "")
3749 (match_operand:SI 3 "const_int_operand" "")]
3752 int mask = INTVAL (operands[3]);
3753 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3754 GEN_INT ((mask >> 0) & 3),
3755 GEN_INT ((mask >> 2) & 3),
3756 GEN_INT (((mask >> 4) & 3) + 8),
3757 GEN_INT (((mask >> 6) & 3) + 8),
3758 GEN_INT (((mask >> 0) & 3) + 4),
3759 GEN_INT (((mask >> 2) & 3) + 4),
3760 GEN_INT (((mask >> 4) & 3) + 12),
3761 GEN_INT (((mask >> 6) & 3) + 12)));
3765 ;; One bit in mask selects 2 elements.
3766 (define_insn "avx_shufps256_1"
3767 [(set (match_operand:V8SF 0 "register_operand" "=x")
3770 (match_operand:V8SF 1 "register_operand" "x")
3771 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3772 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3773 (match_operand 4 "const_0_to_3_operand" "")
3774 (match_operand 5 "const_8_to_11_operand" "")
3775 (match_operand 6 "const_8_to_11_operand" "")
3776 (match_operand 7 "const_4_to_7_operand" "")
3777 (match_operand 8 "const_4_to_7_operand" "")
3778 (match_operand 9 "const_12_to_15_operand" "")
3779 (match_operand 10 "const_12_to_15_operand" "")])))]
3781 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3782 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3783 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3784 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3787 mask = INTVAL (operands[3]);
3788 mask |= INTVAL (operands[4]) << 2;
3789 mask |= (INTVAL (operands[5]) - 8) << 4;
3790 mask |= (INTVAL (operands[6]) - 8) << 6;
3791 operands[3] = GEN_INT (mask);
3793 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3795 [(set_attr "type" "sselog")
3796 (set_attr "length_immediate" "1")
3797 (set_attr "prefix" "vex")
3798 (set_attr "mode" "V8SF")])
3800 (define_expand "sse_shufps"
3801 [(match_operand:V4SF 0 "register_operand" "")
3802 (match_operand:V4SF 1 "register_operand" "")
3803 (match_operand:V4SF 2 "nonimmediate_operand" "")
3804 (match_operand:SI 3 "const_int_operand" "")]
3807 int mask = INTVAL (operands[3]);
3808 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3809 GEN_INT ((mask >> 0) & 3),
3810 GEN_INT ((mask >> 2) & 3),
3811 GEN_INT (((mask >> 4) & 3) + 4),
3812 GEN_INT (((mask >> 6) & 3) + 4)));
3816 (define_insn "*avx_shufps_<mode>"
3817 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3818 (vec_select:SSEMODE4S
3819 (vec_concat:<ssedoublesizemode>
3820 (match_operand:SSEMODE4S 1 "register_operand" "x")
3821 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3822 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3823 (match_operand 4 "const_0_to_3_operand" "")
3824 (match_operand 5 "const_4_to_7_operand" "")
3825 (match_operand 6 "const_4_to_7_operand" "")])))]
3829 mask |= INTVAL (operands[3]) << 0;
3830 mask |= INTVAL (operands[4]) << 2;
3831 mask |= (INTVAL (operands[5]) - 4) << 4;
3832 mask |= (INTVAL (operands[6]) - 4) << 6;
3833 operands[3] = GEN_INT (mask);
3835 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3837 [(set_attr "type" "sselog")
3838 (set_attr "length_immediate" "1")
3839 (set_attr "prefix" "vex")
3840 (set_attr "mode" "V4SF")])
3842 (define_insn "sse_shufps_<mode>"
3843 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3844 (vec_select:SSEMODE4S
3845 (vec_concat:<ssedoublesizemode>
3846 (match_operand:SSEMODE4S 1 "register_operand" "0")
3847 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3848 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3849 (match_operand 4 "const_0_to_3_operand" "")
3850 (match_operand 5 "const_4_to_7_operand" "")
3851 (match_operand 6 "const_4_to_7_operand" "")])))]
3855 mask |= INTVAL (operands[3]) << 0;
3856 mask |= INTVAL (operands[4]) << 2;
3857 mask |= (INTVAL (operands[5]) - 4) << 4;
3858 mask |= (INTVAL (operands[6]) - 4) << 6;
3859 operands[3] = GEN_INT (mask);
3861 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3863 [(set_attr "type" "sselog")
3864 (set_attr "length_immediate" "1")
3865 (set_attr "mode" "V4SF")])
3867 (define_insn "sse_storehps"
3868 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3870 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3871 (parallel [(const_int 2) (const_int 3)])))]
3874 %vmovhps\t{%1, %0|%0, %1}
3875 %vmovhlps\t{%1, %d0|%d0, %1}
3876 %vmovlps\t{%H1, %d0|%d0, %H1}"
3877 [(set_attr "type" "ssemov")
3878 (set_attr "prefix" "maybe_vex")
3879 (set_attr "mode" "V2SF,V4SF,V2SF")])
3881 (define_expand "sse_loadhps_exp"
3882 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3885 (match_operand:V4SF 1 "nonimmediate_operand" "")
3886 (parallel [(const_int 0) (const_int 1)]))
3887 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3889 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3891 (define_insn "*avx_loadhps"
3892 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3895 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3896 (parallel [(const_int 0) (const_int 1)]))
3897 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3900 vmovhps\t{%2, %1, %0|%0, %1, %2}
3901 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3902 vmovlps\t{%2, %H0|%H0, %2}"
3903 [(set_attr "type" "ssemov")
3904 (set_attr "prefix" "vex")
3905 (set_attr "mode" "V2SF,V4SF,V2SF")])
3907 (define_insn "sse_loadhps"
3908 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3911 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3912 (parallel [(const_int 0) (const_int 1)]))
3913 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3916 movhps\t{%2, %0|%0, %2}
3917 movlhps\t{%2, %0|%0, %2}
3918 movlps\t{%2, %H0|%H0, %2}"
3919 [(set_attr "type" "ssemov")
3920 (set_attr "mode" "V2SF,V4SF,V2SF")])
3922 (define_insn "*avx_storelps"
3923 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3925 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3926 (parallel [(const_int 0) (const_int 1)])))]
3929 vmovlps\t{%1, %0|%0, %1}
3930 vmovaps\t{%1, %0|%0, %1}
3931 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3932 [(set_attr "type" "ssemov")
3933 (set_attr "prefix" "vex")
3934 (set_attr "mode" "V2SF,V2DF,V2SF")])
3936 (define_insn "sse_storelps"
3937 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3939 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3940 (parallel [(const_int 0) (const_int 1)])))]
3943 movlps\t{%1, %0|%0, %1}
3944 movaps\t{%1, %0|%0, %1}
3945 movlps\t{%1, %0|%0, %1}"
3946 [(set_attr "type" "ssemov")
3947 (set_attr "mode" "V2SF,V4SF,V2SF")])
3949 (define_expand "sse_loadlps_exp"
3950 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3952 (match_operand:V2SF 2 "nonimmediate_operand" "")
3954 (match_operand:V4SF 1 "nonimmediate_operand" "")
3955 (parallel [(const_int 2) (const_int 3)]))))]
3957 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3959 (define_insn "*avx_loadlps"
3960 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3962 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3964 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3965 (parallel [(const_int 2) (const_int 3)]))))]
3968 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3969 vmovlps\t{%2, %1, %0|%0, %1, %2}
3970 vmovlps\t{%2, %0|%0, %2}"
3971 [(set_attr "type" "sselog,ssemov,ssemov")
3972 (set_attr "length_immediate" "1,*,*")
3973 (set_attr "prefix" "vex")
3974 (set_attr "mode" "V4SF,V2SF,V2SF")])
3976 (define_insn "sse_loadlps"
3977 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3979 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3981 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3982 (parallel [(const_int 2) (const_int 3)]))))]
3985 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3986 movlps\t{%2, %0|%0, %2}
3987 movlps\t{%2, %0|%0, %2}"
3988 [(set_attr "type" "sselog,ssemov,ssemov")
3989 (set_attr "length_immediate" "1,*,*")
3990 (set_attr "mode" "V4SF,V2SF,V2SF")])
3992 (define_insn "*avx_movss"
3993 [(set (match_operand:V4SF 0 "register_operand" "=x")
3995 (match_operand:V4SF 2 "register_operand" "x")
3996 (match_operand:V4SF 1 "register_operand" "x")
3999 "vmovss\t{%2, %1, %0|%0, %1, %2}"
4000 [(set_attr "type" "ssemov")
4001 (set_attr "prefix" "vex")
4002 (set_attr "mode" "SF")])
4004 (define_insn "sse_movss"
4005 [(set (match_operand:V4SF 0 "register_operand" "=x")
4007 (match_operand:V4SF 2 "register_operand" "x")
4008 (match_operand:V4SF 1 "register_operand" "0")
4011 "movss\t{%2, %0|%0, %2}"
4012 [(set_attr "type" "ssemov")
4013 (set_attr "mode" "SF")])
4015 (define_insn "*vec_dupv4sf_avx"
4016 [(set (match_operand:V4SF 0 "register_operand" "=x")
4018 (match_operand:SF 1 "register_operand" "x")))]
4020 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
4021 [(set_attr "type" "sselog1")
4022 (set_attr "length_immediate" "1")
4023 (set_attr "prefix" "vex")
4024 (set_attr "mode" "V4SF")])
4026 (define_insn "*vec_dupv4sf"
4027 [(set (match_operand:V4SF 0 "register_operand" "=x")
4029 (match_operand:SF 1 "register_operand" "0")))]
4031 "shufps\t{$0, %0, %0|%0, %0, 0}"
4032 [(set_attr "type" "sselog1")
4033 (set_attr "length_immediate" "1")
4034 (set_attr "mode" "V4SF")])
4036 (define_insn "*vec_concatv2sf_avx"
4037 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4039 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
4040 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4043 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4044 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4045 vmovss\t{%1, %0|%0, %1}
4046 punpckldq\t{%2, %0|%0, %2}
4047 movd\t{%1, %0|%0, %1}"
4048 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4049 (set_attr "length_immediate" "*,1,*,*,*")
4050 (set_attr "prefix_extra" "*,1,*,*,*")
4051 (set (attr "prefix")
4052 (if_then_else (eq_attr "alternative" "3,4")
4053 (const_string "orig")
4054 (const_string "vex")))
4055 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4057 ;; Although insertps takes register source, we prefer
4058 ;; unpcklps with register source since it is shorter.
4059 (define_insn "*vec_concatv2sf_sse4_1"
4060 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4062 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
4063 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4066 unpcklps\t{%2, %0|%0, %2}
4067 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4068 movss\t{%1, %0|%0, %1}
4069 punpckldq\t{%2, %0|%0, %2}
4070 movd\t{%1, %0|%0, %1}"
4071 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4072 (set_attr "prefix_data16" "*,1,*,*,*")
4073 (set_attr "prefix_extra" "*,1,*,*,*")
4074 (set_attr "length_immediate" "*,1,*,*,*")
4075 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4077 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4078 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4079 ;; alternatives pretty much forces the MMX alternative to be chosen.
4080 (define_insn "*vec_concatv2sf_sse"
4081 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4083 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4084 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4087 unpcklps\t{%2, %0|%0, %2}
4088 movss\t{%1, %0|%0, %1}
4089 punpckldq\t{%2, %0|%0, %2}
4090 movd\t{%1, %0|%0, %1}"
4091 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4092 (set_attr "mode" "V4SF,SF,DI,DI")])
4094 (define_insn "*vec_concatv4sf_avx"
4095 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4097 (match_operand:V2SF 1 "register_operand" " x,x")
4098 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4101 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4102 vmovhps\t{%2, %1, %0|%0, %1, %2}"
4103 [(set_attr "type" "ssemov")
4104 (set_attr "prefix" "vex")
4105 (set_attr "mode" "V4SF,V2SF")])
4107 (define_insn "*vec_concatv4sf_sse"
4108 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4110 (match_operand:V2SF 1 "register_operand" " 0,0")
4111 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4114 movlhps\t{%2, %0|%0, %2}
4115 movhps\t{%2, %0|%0, %2}"
4116 [(set_attr "type" "ssemov")
4117 (set_attr "mode" "V4SF,V2SF")])
4119 (define_expand "vec_init<mode>"
4120 [(match_operand:SSEMODE 0 "register_operand" "")
4121 (match_operand 1 "" "")]
4124 ix86_expand_vector_init (false, operands[0], operands[1]);
4128 (define_insn "*vec_setv4sf_0_avx"
4129 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
4132 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4133 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
4137 vmovss\t{%2, %1, %0|%0, %1, %2}
4138 vmovss\t{%2, %0|%0, %2}
4139 vmovd\t{%2, %0|%0, %2}
4141 [(set_attr "type" "ssemov")
4142 (set_attr "prefix" "vex")
4143 (set_attr "mode" "SF")])
4145 (define_insn "vec_setv4sf_0"
4146 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
4149 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4150 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
4154 movss\t{%2, %0|%0, %2}
4155 movss\t{%2, %0|%0, %2}
4156 movd\t{%2, %0|%0, %2}
4158 [(set_attr "type" "ssemov")
4159 (set_attr "mode" "SF")])
4161 ;; A subset is vec_setv4sf.
4162 (define_insn "*vec_setv4sf_avx"
4163 [(set (match_operand:V4SF 0 "register_operand" "=x")
4166 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4167 (match_operand:V4SF 1 "register_operand" "x")
4168 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4171 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4172 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4174 [(set_attr "type" "sselog")
4175 (set_attr "prefix_extra" "1")
4176 (set_attr "length_immediate" "1")
4177 (set_attr "prefix" "vex")
4178 (set_attr "mode" "V4SF")])
4180 (define_insn "*vec_setv4sf_sse4_1"
4181 [(set (match_operand:V4SF 0 "register_operand" "=x")
4184 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4185 (match_operand:V4SF 1 "register_operand" "0")
4186 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4189 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4190 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4192 [(set_attr "type" "sselog")
4193 (set_attr "prefix_data16" "1")
4194 (set_attr "prefix_extra" "1")
4195 (set_attr "length_immediate" "1")
4196 (set_attr "mode" "V4SF")])
4198 (define_insn "*avx_insertps"
4199 [(set (match_operand:V4SF 0 "register_operand" "=x")
4200 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4201 (match_operand:V4SF 1 "register_operand" "x")
4202 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4205 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4206 [(set_attr "type" "sselog")
4207 (set_attr "prefix" "vex")
4208 (set_attr "prefix_extra" "1")
4209 (set_attr "length_immediate" "1")
4210 (set_attr "mode" "V4SF")])
4212 (define_insn "sse4_1_insertps"
4213 [(set (match_operand:V4SF 0 "register_operand" "=x")
4214 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4215 (match_operand:V4SF 1 "register_operand" "0")
4216 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4219 "insertps\t{%3, %2, %0|%0, %2, %3}";
4220 [(set_attr "type" "sselog")
4221 (set_attr "prefix_data16" "1")
4222 (set_attr "prefix_extra" "1")
4223 (set_attr "length_immediate" "1")
4224 (set_attr "mode" "V4SF")])
4227 [(set (match_operand:V4SF 0 "memory_operand" "")
4230 (match_operand:SF 1 "nonmemory_operand" ""))
4233 "TARGET_SSE && reload_completed"
4236 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4240 (define_expand "vec_set<mode>"
4241 [(match_operand:SSEMODE 0 "register_operand" "")
4242 (match_operand:<ssescalarmode> 1 "register_operand" "")
4243 (match_operand 2 "const_int_operand" "")]
4246 ix86_expand_vector_set (false, operands[0], operands[1],
4247 INTVAL (operands[2]));
4251 (define_insn_and_split "*vec_extractv4sf_0"
4252 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4254 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4255 (parallel [(const_int 0)])))]
4256 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4258 "&& reload_completed"
4261 rtx op1 = operands[1];
4263 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4265 op1 = gen_lowpart (SFmode, op1);
4266 emit_move_insn (operands[0], op1);
4270 (define_expand "avx_vextractf128<mode>"
4271 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4272 (match_operand:AVX256MODE 1 "register_operand" "")
4273 (match_operand:SI 2 "const_0_to_1_operand" "")]
4276 switch (INTVAL (operands[2]))
4279 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4282 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4290 (define_insn "vec_extract_lo_<mode>"
4291 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4292 (vec_select:<avxhalfvecmode>
4293 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4294 (parallel [(const_int 0) (const_int 1)])))]
4296 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4297 [(set_attr "type" "sselog")
4298 (set_attr "prefix_extra" "1")
4299 (set_attr "length_immediate" "1")
4300 (set_attr "memory" "none,store")
4301 (set_attr "prefix" "vex")
4302 (set_attr "mode" "V8SF")])
4304 (define_insn "vec_extract_hi_<mode>"
4305 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4306 (vec_select:<avxhalfvecmode>
4307 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4308 (parallel [(const_int 2) (const_int 3)])))]
4310 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4311 [(set_attr "type" "sselog")
4312 (set_attr "prefix_extra" "1")
4313 (set_attr "length_immediate" "1")
4314 (set_attr "memory" "none,store")
4315 (set_attr "prefix" "vex")
4316 (set_attr "mode" "V8SF")])
4318 (define_insn "vec_extract_lo_<mode>"
4319 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4320 (vec_select:<avxhalfvecmode>
4321 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4322 (parallel [(const_int 0) (const_int 1)
4323 (const_int 2) (const_int 3)])))]
4325 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4326 [(set_attr "type" "sselog")
4327 (set_attr "prefix_extra" "1")
4328 (set_attr "length_immediate" "1")
4329 (set_attr "memory" "none,store")
4330 (set_attr "prefix" "vex")
4331 (set_attr "mode" "V8SF")])
4333 (define_insn "vec_extract_hi_<mode>"
4334 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4335 (vec_select:<avxhalfvecmode>
4336 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4337 (parallel [(const_int 4) (const_int 5)
4338 (const_int 6) (const_int 7)])))]
4340 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4341 [(set_attr "type" "sselog")
4342 (set_attr "prefix_extra" "1")
4343 (set_attr "length_immediate" "1")
4344 (set_attr "memory" "none,store")
4345 (set_attr "prefix" "vex")
4346 (set_attr "mode" "V8SF")])
4348 (define_insn "vec_extract_lo_v16hi"
4349 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4351 (match_operand:V16HI 1 "register_operand" "x,x")
4352 (parallel [(const_int 0) (const_int 1)
4353 (const_int 2) (const_int 3)
4354 (const_int 4) (const_int 5)
4355 (const_int 6) (const_int 7)])))]
4357 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4358 [(set_attr "type" "sselog")
4359 (set_attr "prefix_extra" "1")
4360 (set_attr "length_immediate" "1")
4361 (set_attr "memory" "none,store")
4362 (set_attr "prefix" "vex")
4363 (set_attr "mode" "V8SF")])
4365 (define_insn "vec_extract_hi_v16hi"
4366 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4368 (match_operand:V16HI 1 "register_operand" "x,x")
4369 (parallel [(const_int 8) (const_int 9)
4370 (const_int 10) (const_int 11)
4371 (const_int 12) (const_int 13)
4372 (const_int 14) (const_int 15)])))]
4374 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4375 [(set_attr "type" "sselog")
4376 (set_attr "prefix_extra" "1")
4377 (set_attr "length_immediate" "1")
4378 (set_attr "memory" "none,store")
4379 (set_attr "prefix" "vex")
4380 (set_attr "mode" "V8SF")])
4382 (define_insn "vec_extract_lo_v32qi"
4383 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4385 (match_operand:V32QI 1 "register_operand" "x,x")
4386 (parallel [(const_int 0) (const_int 1)
4387 (const_int 2) (const_int 3)
4388 (const_int 4) (const_int 5)
4389 (const_int 6) (const_int 7)
4390 (const_int 8) (const_int 9)
4391 (const_int 10) (const_int 11)
4392 (const_int 12) (const_int 13)
4393 (const_int 14) (const_int 15)])))]
4395 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4396 [(set_attr "type" "sselog")
4397 (set_attr "prefix_extra" "1")
4398 (set_attr "length_immediate" "1")
4399 (set_attr "memory" "none,store")
4400 (set_attr "prefix" "vex")
4401 (set_attr "mode" "V8SF")])
4403 (define_insn "vec_extract_hi_v32qi"
4404 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4406 (match_operand:V32QI 1 "register_operand" "x,x")
4407 (parallel [(const_int 16) (const_int 17)
4408 (const_int 18) (const_int 19)
4409 (const_int 20) (const_int 21)
4410 (const_int 22) (const_int 23)
4411 (const_int 24) (const_int 25)
4412 (const_int 26) (const_int 27)
4413 (const_int 28) (const_int 29)
4414 (const_int 30) (const_int 31)])))]
4416 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4417 [(set_attr "type" "sselog")
4418 (set_attr "prefix_extra" "1")
4419 (set_attr "length_immediate" "1")
4420 (set_attr "memory" "none,store")
4421 (set_attr "prefix" "vex")
4422 (set_attr "mode" "V8SF")])
4424 (define_insn "*sse4_1_extractps"
4425 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4427 (match_operand:V4SF 1 "register_operand" "x")
4428 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4430 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4431 [(set_attr "type" "sselog")
4432 (set_attr "prefix_data16" "1")
4433 (set_attr "prefix_extra" "1")
4434 (set_attr "length_immediate" "1")
4435 (set_attr "prefix" "maybe_vex")
4436 (set_attr "mode" "V4SF")])
4438 (define_insn_and_split "*vec_extract_v4sf_mem"
4439 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4441 (match_operand:V4SF 1 "memory_operand" "o")
4442 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4448 int i = INTVAL (operands[2]);
4450 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4454 (define_expand "vec_extract<mode>"
4455 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4456 (match_operand:SSEMODE 1 "register_operand" "")
4457 (match_operand 2 "const_int_operand" "")]
4460 ix86_expand_vector_extract (false, operands[0], operands[1],
4461 INTVAL (operands[2]));
4465 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4467 ;; Parallel double-precision floating point element swizzling
4469 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4471 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4472 (define_insn "avx_unpckhpd256"
4473 [(set (match_operand:V4DF 0 "register_operand" "=x")
4476 (match_operand:V4DF 1 "register_operand" "x")
4477 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4478 (parallel [(const_int 1) (const_int 5)
4479 (const_int 3) (const_int 7)])))]
4481 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4482 [(set_attr "type" "sselog")
4483 (set_attr "prefix" "vex")
4484 (set_attr "mode" "V4DF")])
4486 (define_expand "vec_interleave_highv2df"
4487 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4490 (match_operand:V2DF 1 "nonimmediate_operand" "")
4491 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4492 (parallel [(const_int 1)
4495 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4497 (define_insn "*avx_interleave_highv2df"
4498 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4501 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
4502 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
4503 (parallel [(const_int 1)
4505 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4507 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4508 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4509 vmovhpd\t{%1, %0|%0, %1}"
4510 [(set_attr "type" "sselog,ssemov,ssemov")
4511 (set_attr "prefix" "vex")
4512 (set_attr "mode" "V2DF,V1DF,V1DF")])
4514 (define_insn "*sse2_interleave_highv2df"
4515 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4518 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4519 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4520 (parallel [(const_int 1)
4522 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4524 unpckhpd\t{%2, %0|%0, %2}
4525 movlpd\t{%H1, %0|%0, %H1}
4526 movhpd\t{%1, %0|%0, %1}"
4527 [(set_attr "type" "sselog,ssemov,ssemov")
4528 (set_attr "prefix_data16" "*,1,1")
4529 (set_attr "mode" "V2DF,V1DF,V1DF")])
4531 (define_insn "avx_movddup256"
4532 [(set (match_operand:V4DF 0 "register_operand" "=x")
4535 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
4537 (parallel [(const_int 0) (const_int 2)
4538 (const_int 4) (const_int 6)])))]
4540 "vmovddup\t{%1, %0|%0, %1}"
4541 [(set_attr "type" "sselog1")
4542 (set_attr "prefix" "vex")
4543 (set_attr "mode" "V4DF")])
4545 (define_insn "*avx_movddup"
4546 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4549 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4551 (parallel [(const_int 0)
4553 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4555 vmovddup\t{%1, %0|%0, %1}
4557 [(set_attr "type" "sselog1,ssemov")
4558 (set_attr "prefix" "vex")
4559 (set_attr "mode" "V2DF")])
4561 (define_insn "*sse3_movddup"
4562 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4565 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4567 (parallel [(const_int 0)
4569 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4571 movddup\t{%1, %0|%0, %1}
4573 [(set_attr "type" "sselog1,ssemov")
4574 (set_attr "mode" "V2DF")])
4577 [(set (match_operand:V2DF 0 "memory_operand" "")
4580 (match_operand:V2DF 1 "register_operand" "")
4582 (parallel [(const_int 0)
4584 "TARGET_SSE3 && reload_completed"
4587 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4588 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4589 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4593 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4594 (define_insn "avx_unpcklpd256"
4595 [(set (match_operand:V4DF 0 "register_operand" "=x")
4598 (match_operand:V4DF 1 "register_operand" "x")
4599 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4600 (parallel [(const_int 0) (const_int 4)
4601 (const_int 2) (const_int 6)])))]
4603 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4604 [(set_attr "type" "sselog")
4605 (set_attr "prefix" "vex")
4606 (set_attr "mode" "V4DF")])
4608 (define_expand "vec_interleave_lowv2df"
4609 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4612 (match_operand:V2DF 1 "nonimmediate_operand" "")
4613 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4614 (parallel [(const_int 0)
4617 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4619 (define_insn "*avx_interleave_lowv2df"
4620 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4623 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4624 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4625 (parallel [(const_int 0)
4627 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4629 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4630 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4631 vmovlpd\t{%2, %H0|%H0, %2}"
4632 [(set_attr "type" "sselog,ssemov,ssemov")
4633 (set_attr "prefix" "vex")
4634 (set_attr "mode" "V2DF,V1DF,V1DF")])
4636 (define_insn "*sse2_interleave_lowv2df"
4637 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4640 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4641 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4642 (parallel [(const_int 0)
4644 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4646 unpcklpd\t{%2, %0|%0, %2}
4647 movhpd\t{%2, %0|%0, %2}
4648 movlpd\t{%2, %H0|%H0, %2}"
4649 [(set_attr "type" "sselog,ssemov,ssemov")
4650 (set_attr "prefix_data16" "*,1,1")
4651 (set_attr "mode" "V2DF,V1DF,V1DF")])
4653 (define_expand "avx_shufpd256"
4654 [(match_operand:V4DF 0 "register_operand" "")
4655 (match_operand:V4DF 1 "register_operand" "")
4656 (match_operand:V4DF 2 "nonimmediate_operand" "")
4657 (match_operand:SI 3 "const_int_operand" "")]
4660 int mask = INTVAL (operands[3]);
4661 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4663 GEN_INT (mask & 2 ? 5 : 4),
4664 GEN_INT (mask & 4 ? 3 : 2),
4665 GEN_INT (mask & 8 ? 7 : 6)));
4669 (define_insn "avx_shufpd256_1"
4670 [(set (match_operand:V4DF 0 "register_operand" "=x")
4673 (match_operand:V4DF 1 "register_operand" "x")
4674 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4675 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4676 (match_operand 4 "const_4_to_5_operand" "")
4677 (match_operand 5 "const_2_to_3_operand" "")
4678 (match_operand 6 "const_6_to_7_operand" "")])))]
4682 mask = INTVAL (operands[3]);
4683 mask |= (INTVAL (operands[4]) - 4) << 1;
4684 mask |= (INTVAL (operands[5]) - 2) << 2;
4685 mask |= (INTVAL (operands[6]) - 6) << 3;
4686 operands[3] = GEN_INT (mask);
4688 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4690 [(set_attr "type" "sselog")
4691 (set_attr "length_immediate" "1")
4692 (set_attr "prefix" "vex")
4693 (set_attr "mode" "V4DF")])
4695 (define_expand "sse2_shufpd"
4696 [(match_operand:V2DF 0 "register_operand" "")
4697 (match_operand:V2DF 1 "register_operand" "")
4698 (match_operand:V2DF 2 "nonimmediate_operand" "")
4699 (match_operand:SI 3 "const_int_operand" "")]
4702 int mask = INTVAL (operands[3]);
4703 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4705 GEN_INT (mask & 2 ? 3 : 2)));
4709 (define_expand "vec_extract_even<mode>"
4710 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4711 (match_operand:SSEMODE_EO 1 "register_operand" "")
4712 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4715 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4719 (define_expand "vec_extract_odd<mode>"
4720 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4721 (match_operand:SSEMODE_EO 1 "register_operand" "")
4722 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4725 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4729 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4730 (define_insn "*avx_interleave_highv2di"
4731 [(set (match_operand:V2DI 0 "register_operand" "=x")
4734 (match_operand:V2DI 1 "register_operand" "x")
4735 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4736 (parallel [(const_int 1)
4739 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4740 [(set_attr "type" "sselog")
4741 (set_attr "prefix" "vex")
4742 (set_attr "mode" "TI")])
4744 (define_insn "vec_interleave_highv2di"
4745 [(set (match_operand:V2DI 0 "register_operand" "=x")
4748 (match_operand:V2DI 1 "register_operand" "0")
4749 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4750 (parallel [(const_int 1)
4753 "punpckhqdq\t{%2, %0|%0, %2}"
4754 [(set_attr "type" "sselog")
4755 (set_attr "prefix_data16" "1")
4756 (set_attr "mode" "TI")])
4758 (define_insn "*avx_interleave_lowv2di"
4759 [(set (match_operand:V2DI 0 "register_operand" "=x")
4762 (match_operand:V2DI 1 "register_operand" "x")
4763 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4764 (parallel [(const_int 0)
4767 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4768 [(set_attr "type" "sselog")
4769 (set_attr "prefix" "vex")
4770 (set_attr "mode" "TI")])
4772 (define_insn "vec_interleave_lowv2di"
4773 [(set (match_operand:V2DI 0 "register_operand" "=x")
4776 (match_operand:V2DI 1 "register_operand" "0")
4777 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4778 (parallel [(const_int 0)
4781 "punpcklqdq\t{%2, %0|%0, %2}"
4782 [(set_attr "type" "sselog")
4783 (set_attr "prefix_data16" "1")
4784 (set_attr "mode" "TI")])
4786 (define_insn "*avx_shufpd_<mode>"
4787 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4788 (vec_select:SSEMODE2D
4789 (vec_concat:<ssedoublesizemode>
4790 (match_operand:SSEMODE2D 1 "register_operand" "x")
4791 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4792 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4793 (match_operand 4 "const_2_to_3_operand" "")])))]
4797 mask = INTVAL (operands[3]);
4798 mask |= (INTVAL (operands[4]) - 2) << 1;
4799 operands[3] = GEN_INT (mask);
4801 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4803 [(set_attr "type" "sselog")
4804 (set_attr "length_immediate" "1")
4805 (set_attr "prefix" "vex")
4806 (set_attr "mode" "V2DF")])
4808 (define_insn "sse2_shufpd_<mode>"
4809 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4810 (vec_select:SSEMODE2D
4811 (vec_concat:<ssedoublesizemode>
4812 (match_operand:SSEMODE2D 1 "register_operand" "0")
4813 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4814 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4815 (match_operand 4 "const_2_to_3_operand" "")])))]
4819 mask = INTVAL (operands[3]);
4820 mask |= (INTVAL (operands[4]) - 2) << 1;
4821 operands[3] = GEN_INT (mask);
4823 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4825 [(set_attr "type" "sselog")
4826 (set_attr "length_immediate" "1")
4827 (set_attr "mode" "V2DF")])
4829 ;; Avoid combining registers from different units in a single alternative,
4830 ;; see comment above inline_secondary_memory_needed function in i386.c
4831 (define_insn "*avx_storehpd"
4832 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4834 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4835 (parallel [(const_int 1)])))]
4836 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4838 vmovhpd\t{%1, %0|%0, %1}
4839 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4843 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4844 (set_attr "prefix" "vex")
4845 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4847 (define_insn "sse2_storehpd"
4848 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4850 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4851 (parallel [(const_int 1)])))]
4852 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4854 movhpd\t{%1, %0|%0, %1}
4859 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4860 (set_attr "prefix_data16" "1,*,*,*,*")
4861 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4864 [(set (match_operand:DF 0 "register_operand" "")
4866 (match_operand:V2DF 1 "memory_operand" "")
4867 (parallel [(const_int 1)])))]
4868 "TARGET_SSE2 && reload_completed"
4869 [(set (match_dup 0) (match_dup 1))]
4871 operands[1] = adjust_address (operands[1], DFmode, 8);
4874 ;; Avoid combining registers from different units in a single alternative,
4875 ;; see comment above inline_secondary_memory_needed function in i386.c
4876 (define_insn "sse2_storelpd"
4877 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4879 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4880 (parallel [(const_int 0)])))]
4881 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4883 %vmovlpd\t{%1, %0|%0, %1}
4888 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4889 (set_attr "prefix_data16" "1,*,*,*,*")
4890 (set_attr "prefix" "maybe_vex")
4891 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4894 [(set (match_operand:DF 0 "register_operand" "")
4896 (match_operand:V2DF 1 "nonimmediate_operand" "")
4897 (parallel [(const_int 0)])))]
4898 "TARGET_SSE2 && reload_completed"
4901 rtx op1 = operands[1];
4903 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4905 op1 = gen_lowpart (DFmode, op1);
4906 emit_move_insn (operands[0], op1);
4910 (define_expand "sse2_loadhpd_exp"
4911 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4914 (match_operand:V2DF 1 "nonimmediate_operand" "")
4915 (parallel [(const_int 0)]))
4916 (match_operand:DF 2 "nonimmediate_operand" "")))]
4918 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4920 ;; Avoid combining registers from different units in a single alternative,
4921 ;; see comment above inline_secondary_memory_needed function in i386.c
4922 (define_insn "*avx_loadhpd"
4923 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4926 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4927 (parallel [(const_int 0)]))
4928 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4929 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4931 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4932 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4936 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4937 (set_attr "prefix" "vex")
4938 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4940 (define_insn "sse2_loadhpd"
4941 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4944 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4945 (parallel [(const_int 0)]))
4946 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4947 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4949 movhpd\t{%2, %0|%0, %2}
4950 unpcklpd\t{%2, %0|%0, %2}
4951 shufpd\t{$1, %1, %0|%0, %1, 1}
4955 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4956 (set_attr "prefix_data16" "1,*,*,*,*,*")
4957 (set_attr "length_immediate" "*,*,1,*,*,*")
4958 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4961 [(set (match_operand:V2DF 0 "memory_operand" "")
4963 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4964 (match_operand:DF 1 "register_operand" "")))]
4965 "TARGET_SSE2 && reload_completed"
4966 [(set (match_dup 0) (match_dup 1))]
4968 operands[0] = adjust_address (operands[0], DFmode, 8);
4971 (define_expand "sse2_loadlpd_exp"
4972 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4974 (match_operand:DF 2 "nonimmediate_operand" "")
4976 (match_operand:V2DF 1 "nonimmediate_operand" "")
4977 (parallel [(const_int 1)]))))]
4979 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4981 ;; Avoid combining registers from different units in a single alternative,
4982 ;; see comment above inline_secondary_memory_needed function in i386.c
4983 (define_insn "*avx_loadlpd"
4984 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4986 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4988 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4989 (parallel [(const_int 1)]))))]
4990 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4992 vmovsd\t{%2, %0|%0, %2}
4993 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4994 vmovsd\t{%2, %1, %0|%0, %1, %2}
4995 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4999 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5000 (set_attr "prefix" "vex")
5001 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5003 (define_insn "sse2_loadlpd"
5004 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5006 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5008 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5009 (parallel [(const_int 1)]))))]
5010 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5012 movsd\t{%2, %0|%0, %2}
5013 movlpd\t{%2, %0|%0, %2}
5014 movsd\t{%2, %0|%0, %2}
5015 shufpd\t{$2, %2, %0|%0, %2, 2}
5016 movhpd\t{%H1, %0|%0, %H1}
5020 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5021 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5022 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5023 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5026 [(set (match_operand:V2DF 0 "memory_operand" "")
5028 (match_operand:DF 1 "register_operand" "")
5029 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5030 "TARGET_SSE2 && reload_completed"
5031 [(set (match_dup 0) (match_dup 1))]
5033 operands[0] = adjust_address (operands[0], DFmode, 8);
5036 ;; Not sure these two are ever used, but it doesn't hurt to have
5038 (define_insn "*vec_extractv2df_1_sse"
5039 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5041 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5042 (parallel [(const_int 1)])))]
5043 "!TARGET_SSE2 && TARGET_SSE
5044 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5046 movhps\t{%1, %0|%0, %1}
5047 movhlps\t{%1, %0|%0, %1}
5048 movlps\t{%H1, %0|%0, %H1}"
5049 [(set_attr "type" "ssemov")
5050 (set_attr "mode" "V2SF,V4SF,V2SF")])
5052 (define_insn "*vec_extractv2df_0_sse"
5053 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5055 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5056 (parallel [(const_int 0)])))]
5057 "!TARGET_SSE2 && TARGET_SSE
5058 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5060 movlps\t{%1, %0|%0, %1}
5061 movaps\t{%1, %0|%0, %1}
5062 movlps\t{%1, %0|%0, %1}"
5063 [(set_attr "type" "ssemov")
5064 (set_attr "mode" "V2SF,V4SF,V2SF")])
5066 (define_insn "*avx_movsd"
5067 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5069 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5070 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5074 vmovsd\t{%2, %1, %0|%0, %1, %2}
5075 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5076 vmovlpd\t{%2, %0|%0, %2}
5077 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5078 vmovhps\t{%1, %H0|%H0, %1}"
5079 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5080 (set_attr "prefix" "vex")
5081 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5083 (define_insn "sse2_movsd"
5084 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5086 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5087 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5091 movsd\t{%2, %0|%0, %2}
5092 movlpd\t{%2, %0|%0, %2}
5093 movlpd\t{%2, %0|%0, %2}
5094 shufpd\t{$2, %2, %0|%0, %2, 2}
5095 movhps\t{%H1, %0|%0, %H1}
5096 movhps\t{%1, %H0|%H0, %1}"
5097 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5098 (set_attr "prefix_data16" "*,1,1,*,*,*")
5099 (set_attr "length_immediate" "*,*,*,1,*,*")
5100 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5102 (define_insn "*vec_dupv2df_sse3"
5103 [(set (match_operand:V2DF 0 "register_operand" "=x")
5105 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5107 "%vmovddup\t{%1, %0|%0, %1}"
5108 [(set_attr "type" "sselog1")
5109 (set_attr "prefix" "maybe_vex")
5110 (set_attr "mode" "DF")])
5112 (define_insn "vec_dupv2df"
5113 [(set (match_operand:V2DF 0 "register_operand" "=x")
5115 (match_operand:DF 1 "register_operand" "0")))]
5118 [(set_attr "type" "sselog1")
5119 (set_attr "mode" "V2DF")])
5121 (define_insn "*vec_concatv2df_sse3"
5122 [(set (match_operand:V2DF 0 "register_operand" "=x")
5124 (match_operand:DF 1 "nonimmediate_operand" "xm")
5127 "%vmovddup\t{%1, %0|%0, %1}"
5128 [(set_attr "type" "sselog1")
5129 (set_attr "prefix" "maybe_vex")
5130 (set_attr "mode" "DF")])
5132 (define_insn "*vec_concatv2df_avx"
5133 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5135 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5136 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5139 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5140 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5141 vmovsd\t{%1, %0|%0, %1}"
5142 [(set_attr "type" "ssemov")
5143 (set_attr "prefix" "vex")
5144 (set_attr "mode" "DF,V1DF,DF")])
5146 (define_insn "*vec_concatv2df"
5147 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5149 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5150 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5153 unpcklpd\t{%2, %0|%0, %2}
5154 movhpd\t{%2, %0|%0, %2}
5155 movsd\t{%1, %0|%0, %1}
5156 movlhps\t{%2, %0|%0, %2}
5157 movhps\t{%2, %0|%0, %2}"
5158 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5159 (set_attr "prefix_data16" "*,1,*,*,*")
5160 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5164 ;; Parallel integral arithmetic
5166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5168 (define_expand "neg<mode>2"
5169 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5172 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5174 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5176 (define_expand "<plusminus_insn><mode>3"
5177 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5179 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5180 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5182 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5184 (define_insn "*avx_<plusminus_insn><mode>3"
5185 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5187 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5188 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5189 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5190 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5191 [(set_attr "type" "sseiadd")
5192 (set_attr "prefix" "vex")
5193 (set_attr "mode" "TI")])
5195 (define_insn "*<plusminus_insn><mode>3"
5196 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5198 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5199 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5200 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5201 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5202 [(set_attr "type" "sseiadd")
5203 (set_attr "prefix_data16" "1")
5204 (set_attr "mode" "TI")])
5206 (define_expand "sse2_<plusminus_insn><mode>3"
5207 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5208 (sat_plusminus:SSEMODE12
5209 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5210 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5212 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5214 (define_insn "*avx_<plusminus_insn><mode>3"
5215 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5216 (sat_plusminus:SSEMODE12
5217 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5218 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5219 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5220 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5221 [(set_attr "type" "sseiadd")
5222 (set_attr "prefix" "vex")
5223 (set_attr "mode" "TI")])
5225 (define_insn "*sse2_<plusminus_insn><mode>3"
5226 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5227 (sat_plusminus:SSEMODE12
5228 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5229 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5230 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5231 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5232 [(set_attr "type" "sseiadd")
5233 (set_attr "prefix_data16" "1")
5234 (set_attr "mode" "TI")])
5236 (define_insn_and_split "mulv16qi3"
5237 [(set (match_operand:V16QI 0 "register_operand" "")
5238 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5239 (match_operand:V16QI 2 "register_operand" "")))]
5241 && can_create_pseudo_p ()"
5249 for (i = 0; i < 6; ++i)
5250 t[i] = gen_reg_rtx (V16QImode);
5252 /* Unpack data such that we've got a source byte in each low byte of
5253 each word. We don't care what goes into the high byte of each word.
5254 Rather than trying to get zero in there, most convenient is to let
5255 it be a copy of the low byte. */
5256 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5257 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5258 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5259 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5261 /* Multiply words. The end-of-line annotations here give a picture of what
5262 the output of that instruction looks like. Dot means don't care; the
5263 letters are the bytes of the result with A being the most significant. */
5264 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5265 gen_lowpart (V8HImode, t[0]),
5266 gen_lowpart (V8HImode, t[1])));
5267 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5268 gen_lowpart (V8HImode, t[2]),
5269 gen_lowpart (V8HImode, t[3])));
5271 /* Extract the even bytes and merge them back together. */
5272 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5276 (define_expand "mulv8hi3"
5277 [(set (match_operand:V8HI 0 "register_operand" "")
5278 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5279 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5281 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5283 (define_insn "*avx_mulv8hi3"
5284 [(set (match_operand:V8HI 0 "register_operand" "=x")
5285 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5286 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5287 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5288 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5289 [(set_attr "type" "sseimul")
5290 (set_attr "prefix" "vex")
5291 (set_attr "mode" "TI")])
5293 (define_insn "*mulv8hi3"
5294 [(set (match_operand:V8HI 0 "register_operand" "=x")
5295 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5296 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5297 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5298 "pmullw\t{%2, %0|%0, %2}"
5299 [(set_attr "type" "sseimul")
5300 (set_attr "prefix_data16" "1")
5301 (set_attr "mode" "TI")])
5303 (define_expand "smulv8hi3_highpart"
5304 [(set (match_operand:V8HI 0 "register_operand" "")
5309 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5311 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5314 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5316 (define_insn "*avxv8hi3_highpart"
5317 [(set (match_operand:V8HI 0 "register_operand" "=x")
5322 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5324 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5326 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5327 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5328 [(set_attr "type" "sseimul")
5329 (set_attr "prefix" "vex")
5330 (set_attr "mode" "TI")])
5332 (define_insn "*smulv8hi3_highpart"
5333 [(set (match_operand:V8HI 0 "register_operand" "=x")
5338 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5340 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5342 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5343 "pmulhw\t{%2, %0|%0, %2}"
5344 [(set_attr "type" "sseimul")
5345 (set_attr "prefix_data16" "1")
5346 (set_attr "mode" "TI")])
5348 (define_expand "umulv8hi3_highpart"
5349 [(set (match_operand:V8HI 0 "register_operand" "")
5354 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5356 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5359 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5361 (define_insn "*avx_umulv8hi3_highpart"
5362 [(set (match_operand:V8HI 0 "register_operand" "=x")
5367 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5369 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5371 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5372 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5373 [(set_attr "type" "sseimul")
5374 (set_attr "prefix" "vex")
5375 (set_attr "mode" "TI")])
5377 (define_insn "*umulv8hi3_highpart"
5378 [(set (match_operand:V8HI 0 "register_operand" "=x")
5383 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5385 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5387 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5388 "pmulhuw\t{%2, %0|%0, %2}"
5389 [(set_attr "type" "sseimul")
5390 (set_attr "prefix_data16" "1")
5391 (set_attr "mode" "TI")])
5393 (define_expand "sse2_umulv2siv2di3"
5394 [(set (match_operand:V2DI 0 "register_operand" "")
5398 (match_operand:V4SI 1 "nonimmediate_operand" "")
5399 (parallel [(const_int 0) (const_int 2)])))
5402 (match_operand:V4SI 2 "nonimmediate_operand" "")
5403 (parallel [(const_int 0) (const_int 2)])))))]
5405 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5407 (define_insn "*avx_umulv2siv2di3"
5408 [(set (match_operand:V2DI 0 "register_operand" "=x")
5412 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5413 (parallel [(const_int 0) (const_int 2)])))
5416 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5417 (parallel [(const_int 0) (const_int 2)])))))]
5418 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5419 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5420 [(set_attr "type" "sseimul")
5421 (set_attr "prefix" "vex")
5422 (set_attr "mode" "TI")])
5424 (define_insn "*sse2_umulv2siv2di3"
5425 [(set (match_operand:V2DI 0 "register_operand" "=x")
5429 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5430 (parallel [(const_int 0) (const_int 2)])))
5433 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5434 (parallel [(const_int 0) (const_int 2)])))))]
5435 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5436 "pmuludq\t{%2, %0|%0, %2}"
5437 [(set_attr "type" "sseimul")
5438 (set_attr "prefix_data16" "1")
5439 (set_attr "mode" "TI")])
5441 (define_expand "sse4_1_mulv2siv2di3"
5442 [(set (match_operand:V2DI 0 "register_operand" "")
5446 (match_operand:V4SI 1 "nonimmediate_operand" "")
5447 (parallel [(const_int 0) (const_int 2)])))
5450 (match_operand:V4SI 2 "nonimmediate_operand" "")
5451 (parallel [(const_int 0) (const_int 2)])))))]
5453 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5455 (define_insn "*avx_mulv2siv2di3"
5456 [(set (match_operand:V2DI 0 "register_operand" "=x")
5460 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5461 (parallel [(const_int 0) (const_int 2)])))
5464 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5465 (parallel [(const_int 0) (const_int 2)])))))]
5466 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5467 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5468 [(set_attr "type" "sseimul")
5469 (set_attr "prefix_extra" "1")
5470 (set_attr "prefix" "vex")
5471 (set_attr "mode" "TI")])
5473 (define_insn "*sse4_1_mulv2siv2di3"
5474 [(set (match_operand:V2DI 0 "register_operand" "=x")
5478 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5479 (parallel [(const_int 0) (const_int 2)])))
5482 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5483 (parallel [(const_int 0) (const_int 2)])))))]
5484 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5485 "pmuldq\t{%2, %0|%0, %2}"
5486 [(set_attr "type" "sseimul")
5487 (set_attr "prefix_extra" "1")
5488 (set_attr "mode" "TI")])
5490 (define_expand "sse2_pmaddwd"
5491 [(set (match_operand:V4SI 0 "register_operand" "")
5496 (match_operand:V8HI 1 "nonimmediate_operand" "")
5497 (parallel [(const_int 0)
5503 (match_operand:V8HI 2 "nonimmediate_operand" "")
5504 (parallel [(const_int 0)
5510 (vec_select:V4HI (match_dup 1)
5511 (parallel [(const_int 1)
5516 (vec_select:V4HI (match_dup 2)
5517 (parallel [(const_int 1)
5520 (const_int 7)]))))))]
5522 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5524 (define_insn "*avx_pmaddwd"
5525 [(set (match_operand:V4SI 0 "register_operand" "=x")
5530 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5531 (parallel [(const_int 0)
5537 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5538 (parallel [(const_int 0)
5544 (vec_select:V4HI (match_dup 1)
5545 (parallel [(const_int 1)
5550 (vec_select:V4HI (match_dup 2)
5551 (parallel [(const_int 1)
5554 (const_int 7)]))))))]
5555 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5556 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5557 [(set_attr "type" "sseiadd")
5558 (set_attr "prefix" "vex")
5559 (set_attr "mode" "TI")])
5561 (define_insn "*sse2_pmaddwd"
5562 [(set (match_operand:V4SI 0 "register_operand" "=x")
5567 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5568 (parallel [(const_int 0)
5574 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5575 (parallel [(const_int 0)
5581 (vec_select:V4HI (match_dup 1)
5582 (parallel [(const_int 1)
5587 (vec_select:V4HI (match_dup 2)
5588 (parallel [(const_int 1)
5591 (const_int 7)]))))))]
5592 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5593 "pmaddwd\t{%2, %0|%0, %2}"
5594 [(set_attr "type" "sseiadd")
5595 (set_attr "atom_unit" "simul")
5596 (set_attr "prefix_data16" "1")
5597 (set_attr "mode" "TI")])
5599 (define_expand "mulv4si3"
5600 [(set (match_operand:V4SI 0 "register_operand" "")
5601 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5602 (match_operand:V4SI 2 "register_operand" "")))]
5605 if (TARGET_SSE4_1 || TARGET_AVX)
5606 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5609 (define_insn "*avx_mulv4si3"
5610 [(set (match_operand:V4SI 0 "register_operand" "=x")
5611 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5612 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5613 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5614 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5615 [(set_attr "type" "sseimul")
5616 (set_attr "prefix_extra" "1")
5617 (set_attr "prefix" "vex")
5618 (set_attr "mode" "TI")])
5620 (define_insn "*sse4_1_mulv4si3"
5621 [(set (match_operand:V4SI 0 "register_operand" "=x")
5622 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5623 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5624 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5625 "pmulld\t{%2, %0|%0, %2}"
5626 [(set_attr "type" "sseimul")
5627 (set_attr "prefix_extra" "1")
5628 (set_attr "mode" "TI")])
5630 (define_insn_and_split "*sse2_mulv4si3"
5631 [(set (match_operand:V4SI 0 "register_operand" "")
5632 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5633 (match_operand:V4SI 2 "register_operand" "")))]
5634 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5635 && can_create_pseudo_p ()"
5640 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5646 t1 = gen_reg_rtx (V4SImode);
5647 t2 = gen_reg_rtx (V4SImode);
5648 t3 = gen_reg_rtx (V4SImode);
5649 t4 = gen_reg_rtx (V4SImode);
5650 t5 = gen_reg_rtx (V4SImode);
5651 t6 = gen_reg_rtx (V4SImode);
5652 thirtytwo = GEN_INT (32);
5654 /* Multiply elements 2 and 0. */
5655 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5658 /* Shift both input vectors down one element, so that elements 3
5659 and 1 are now in the slots for elements 2 and 0. For K8, at
5660 least, this is faster than using a shuffle. */
5661 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5662 gen_lowpart (TImode, op1),
5664 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5665 gen_lowpart (TImode, op2),
5667 /* Multiply elements 3 and 1. */
5668 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5671 /* Move the results in element 2 down to element 1; we don't care
5672 what goes in elements 2 and 3. */
5673 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5674 const0_rtx, const0_rtx));
5675 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5676 const0_rtx, const0_rtx));
5678 /* Merge the parts back together. */
5679 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5683 (define_insn_and_split "mulv2di3"
5684 [(set (match_operand:V2DI 0 "register_operand" "")
5685 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5686 (match_operand:V2DI 2 "register_operand" "")))]
5688 && can_create_pseudo_p ()"
5693 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5702 /* op1: A,B,C,D, op2: E,F,G,H */
5703 op1 = gen_lowpart (V4SImode, op1);
5704 op2 = gen_lowpart (V4SImode, op2);
5706 t1 = gen_reg_rtx (V4SImode);
5707 t2 = gen_reg_rtx (V4SImode);
5708 t3 = gen_reg_rtx (V2DImode);
5709 t4 = gen_reg_rtx (V2DImode);
5712 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5718 /* t2: (B*E),(A*F),(D*G),(C*H) */
5719 emit_insn (gen_mulv4si3 (t2, t1, op2));
5721 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5722 emit_insn (gen_xop_phadddq (t3, t2));
5724 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5725 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5727 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5728 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5732 t1 = gen_reg_rtx (V2DImode);
5733 t2 = gen_reg_rtx (V2DImode);
5734 t3 = gen_reg_rtx (V2DImode);
5735 t4 = gen_reg_rtx (V2DImode);
5736 t5 = gen_reg_rtx (V2DImode);
5737 t6 = gen_reg_rtx (V2DImode);
5738 thirtytwo = GEN_INT (32);
5740 /* Multiply low parts. */
5741 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5742 gen_lowpart (V4SImode, op2)));
5744 /* Shift input vectors left 32 bits so we can multiply high parts. */
5745 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5746 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5748 /* Multiply high parts by low parts. */
5749 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5750 gen_lowpart (V4SImode, t3)));
5751 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5752 gen_lowpart (V4SImode, t2)));
5754 /* Shift them back. */
5755 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5756 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5758 /* Add the three parts together. */
5759 emit_insn (gen_addv2di3 (t6, t1, t4));
5760 emit_insn (gen_addv2di3 (op0, t6, t5));
5765 (define_expand "vec_widen_smult_hi_v8hi"
5766 [(match_operand:V4SI 0 "register_operand" "")
5767 (match_operand:V8HI 1 "register_operand" "")
5768 (match_operand:V8HI 2 "register_operand" "")]
5771 rtx op1, op2, t1, t2, dest;
5775 t1 = gen_reg_rtx (V8HImode);
5776 t2 = gen_reg_rtx (V8HImode);
5777 dest = gen_lowpart (V8HImode, operands[0]);
5779 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5780 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5781 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5785 (define_expand "vec_widen_smult_lo_v8hi"
5786 [(match_operand:V4SI 0 "register_operand" "")
5787 (match_operand:V8HI 1 "register_operand" "")
5788 (match_operand:V8HI 2 "register_operand" "")]
5791 rtx op1, op2, t1, t2, dest;
5795 t1 = gen_reg_rtx (V8HImode);
5796 t2 = gen_reg_rtx (V8HImode);
5797 dest = gen_lowpart (V8HImode, operands[0]);
5799 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5800 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5801 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5805 (define_expand "vec_widen_umult_hi_v8hi"
5806 [(match_operand:V4SI 0 "register_operand" "")
5807 (match_operand:V8HI 1 "register_operand" "")
5808 (match_operand:V8HI 2 "register_operand" "")]
5811 rtx op1, op2, t1, t2, dest;
5815 t1 = gen_reg_rtx (V8HImode);
5816 t2 = gen_reg_rtx (V8HImode);
5817 dest = gen_lowpart (V8HImode, operands[0]);
5819 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5820 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5821 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5825 (define_expand "vec_widen_umult_lo_v8hi"
5826 [(match_operand:V4SI 0 "register_operand" "")
5827 (match_operand:V8HI 1 "register_operand" "")
5828 (match_operand:V8HI 2 "register_operand" "")]
5831 rtx op1, op2, t1, t2, dest;
5835 t1 = gen_reg_rtx (V8HImode);
5836 t2 = gen_reg_rtx (V8HImode);
5837 dest = gen_lowpart (V8HImode, operands[0]);
5839 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5840 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5841 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5845 (define_expand "vec_widen_smult_hi_v4si"
5846 [(match_operand:V2DI 0 "register_operand" "")
5847 (match_operand:V4SI 1 "register_operand" "")
5848 (match_operand:V4SI 2 "register_operand" "")]
5853 t1 = gen_reg_rtx (V4SImode);
5854 t2 = gen_reg_rtx (V4SImode);
5856 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5861 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5866 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5870 (define_expand "vec_widen_smult_lo_v4si"
5871 [(match_operand:V2DI 0 "register_operand" "")
5872 (match_operand:V4SI 1 "register_operand" "")
5873 (match_operand:V4SI 2 "register_operand" "")]
5878 t1 = gen_reg_rtx (V4SImode);
5879 t2 = gen_reg_rtx (V4SImode);
5881 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5886 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5891 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5895 (define_expand "vec_widen_umult_hi_v4si"
5896 [(match_operand:V2DI 0 "register_operand" "")
5897 (match_operand:V4SI 1 "register_operand" "")
5898 (match_operand:V4SI 2 "register_operand" "")]
5901 rtx op1, op2, t1, t2;
5905 t1 = gen_reg_rtx (V4SImode);
5906 t2 = gen_reg_rtx (V4SImode);
5908 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5909 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5910 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5914 (define_expand "vec_widen_umult_lo_v4si"
5915 [(match_operand:V2DI 0 "register_operand" "")
5916 (match_operand:V4SI 1 "register_operand" "")
5917 (match_operand:V4SI 2 "register_operand" "")]
5920 rtx op1, op2, t1, t2;
5924 t1 = gen_reg_rtx (V4SImode);
5925 t2 = gen_reg_rtx (V4SImode);
5927 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5928 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5929 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5933 (define_expand "sdot_prodv8hi"
5934 [(match_operand:V4SI 0 "register_operand" "")
5935 (match_operand:V8HI 1 "register_operand" "")
5936 (match_operand:V8HI 2 "register_operand" "")
5937 (match_operand:V4SI 3 "register_operand" "")]
5940 rtx t = gen_reg_rtx (V4SImode);
5941 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5942 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5946 (define_expand "udot_prodv4si"
5947 [(match_operand:V2DI 0 "register_operand" "")
5948 (match_operand:V4SI 1 "register_operand" "")
5949 (match_operand:V4SI 2 "register_operand" "")
5950 (match_operand:V2DI 3 "register_operand" "")]
5955 t1 = gen_reg_rtx (V2DImode);
5956 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5957 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5959 t2 = gen_reg_rtx (V4SImode);
5960 t3 = gen_reg_rtx (V4SImode);
5961 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5962 gen_lowpart (TImode, operands[1]),
5964 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5965 gen_lowpart (TImode, operands[2]),
5968 t4 = gen_reg_rtx (V2DImode);
5969 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5971 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5975 (define_insn "*avx_ashr<mode>3"
5976 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5978 (match_operand:SSEMODE24 1 "register_operand" "x")
5979 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5981 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5982 [(set_attr "type" "sseishft")
5983 (set_attr "prefix" "vex")
5984 (set (attr "length_immediate")
5985 (if_then_else (match_operand 2 "const_int_operand" "")
5987 (const_string "0")))
5988 (set_attr "mode" "TI")])
5990 (define_insn "ashr<mode>3"
5991 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5993 (match_operand:SSEMODE24 1 "register_operand" "0")
5994 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5996 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5997 [(set_attr "type" "sseishft")
5998 (set_attr "prefix_data16" "1")
5999 (set (attr "length_immediate")
6000 (if_then_else (match_operand 2 "const_int_operand" "")
6002 (const_string "0")))
6003 (set_attr "mode" "TI")])
6005 (define_insn "*avx_lshr<mode>3"
6006 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6007 (lshiftrt:SSEMODE248
6008 (match_operand:SSEMODE248 1 "register_operand" "x")
6009 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6011 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6012 [(set_attr "type" "sseishft")
6013 (set_attr "prefix" "vex")
6014 (set (attr "length_immediate")
6015 (if_then_else (match_operand 2 "const_int_operand" "")
6017 (const_string "0")))
6018 (set_attr "mode" "TI")])
6020 (define_insn "lshr<mode>3"
6021 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6022 (lshiftrt:SSEMODE248
6023 (match_operand:SSEMODE248 1 "register_operand" "0")
6024 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6026 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6027 [(set_attr "type" "sseishft")
6028 (set_attr "prefix_data16" "1")
6029 (set (attr "length_immediate")
6030 (if_then_else (match_operand 2 "const_int_operand" "")
6032 (const_string "0")))
6033 (set_attr "mode" "TI")])
6035 (define_insn "*avx_ashl<mode>3"
6036 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6038 (match_operand:SSEMODE248 1 "register_operand" "x")
6039 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6041 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6042 [(set_attr "type" "sseishft")
6043 (set_attr "prefix" "vex")
6044 (set (attr "length_immediate")
6045 (if_then_else (match_operand 2 "const_int_operand" "")
6047 (const_string "0")))
6048 (set_attr "mode" "TI")])
6050 (define_insn "ashl<mode>3"
6051 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6053 (match_operand:SSEMODE248 1 "register_operand" "0")
6054 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6056 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6057 [(set_attr "type" "sseishft")
6058 (set_attr "prefix_data16" "1")
6059 (set (attr "length_immediate")
6060 (if_then_else (match_operand 2 "const_int_operand" "")
6062 (const_string "0")))
6063 (set_attr "mode" "TI")])
6065 (define_expand "vec_shl_<mode>"
6066 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6067 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
6068 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6071 operands[0] = gen_lowpart (TImode, operands[0]);
6072 operands[1] = gen_lowpart (TImode, operands[1]);
6075 (define_expand "vec_shr_<mode>"
6076 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6077 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
6078 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6081 operands[0] = gen_lowpart (TImode, operands[0]);
6082 operands[1] = gen_lowpart (TImode, operands[1]);
6085 (define_insn "*avx_<code><mode>3"
6086 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6088 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6089 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6090 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6091 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6092 [(set_attr "type" "sseiadd")
6093 (set (attr "prefix_extra")
6095 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6098 (const_string "0")))
6099 (set_attr "prefix" "vex")
6100 (set_attr "mode" "TI")])
6102 (define_expand "<code>v16qi3"
6103 [(set (match_operand:V16QI 0 "register_operand" "")
6105 (match_operand:V16QI 1 "nonimmediate_operand" "")
6106 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6108 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6110 (define_insn "*<code>v16qi3"
6111 [(set (match_operand:V16QI 0 "register_operand" "=x")
6113 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6114 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6115 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6116 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
6117 [(set_attr "type" "sseiadd")
6118 (set_attr "prefix_data16" "1")
6119 (set_attr "mode" "TI")])
6121 (define_expand "<code>v8hi3"
6122 [(set (match_operand:V8HI 0 "register_operand" "")
6124 (match_operand:V8HI 1 "nonimmediate_operand" "")
6125 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6127 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6129 (define_insn "*<code>v8hi3"
6130 [(set (match_operand:V8HI 0 "register_operand" "=x")
6132 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6133 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6134 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6135 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
6136 [(set_attr "type" "sseiadd")
6137 (set_attr "prefix_data16" "1")
6138 (set_attr "mode" "TI")])
6140 (define_expand "umaxv8hi3"
6141 [(set (match_operand:V8HI 0 "register_operand" "")
6142 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6143 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6147 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6150 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6151 if (rtx_equal_p (op3, op2))
6152 op3 = gen_reg_rtx (V8HImode);
6153 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6154 emit_insn (gen_addv8hi3 (op0, op3, op2));
6159 (define_expand "smax<mode>3"
6160 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6161 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6162 (match_operand:SSEMODE14 2 "register_operand" "")))]
6166 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6172 xops[0] = operands[0];
6173 xops[1] = operands[1];
6174 xops[2] = operands[2];
6175 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6176 xops[4] = operands[1];
6177 xops[5] = operands[2];
6178 ok = ix86_expand_int_vcond (xops);
6184 (define_insn "*sse4_1_<code><mode>3"
6185 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6187 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6188 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6189 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6190 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6191 [(set_attr "type" "sseiadd")
6192 (set_attr "prefix_extra" "1")
6193 (set_attr "mode" "TI")])
6195 (define_expand "umaxv4si3"
6196 [(set (match_operand:V4SI 0 "register_operand" "")
6197 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6198 (match_operand:V4SI 2 "register_operand" "")))]
6202 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6208 xops[0] = operands[0];
6209 xops[1] = operands[1];
6210 xops[2] = operands[2];
6211 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6212 xops[4] = operands[1];
6213 xops[5] = operands[2];
6214 ok = ix86_expand_int_vcond (xops);
6220 (define_insn "*sse4_1_<code><mode>3"
6221 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6223 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6224 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6225 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6226 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6227 [(set_attr "type" "sseiadd")
6228 (set_attr "prefix_extra" "1")
6229 (set_attr "mode" "TI")])
6231 (define_expand "smin<mode>3"
6232 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6233 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6234 (match_operand:SSEMODE14 2 "register_operand" "")))]
6238 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6244 xops[0] = operands[0];
6245 xops[1] = operands[2];
6246 xops[2] = operands[1];
6247 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6248 xops[4] = operands[1];
6249 xops[5] = operands[2];
6250 ok = ix86_expand_int_vcond (xops);
6256 (define_expand "umin<mode>3"
6257 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6258 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6259 (match_operand:SSEMODE24 2 "register_operand" "")))]
6263 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6269 xops[0] = operands[0];
6270 xops[1] = operands[2];
6271 xops[2] = operands[1];
6272 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6273 xops[4] = operands[1];
6274 xops[5] = operands[2];
6275 ok = ix86_expand_int_vcond (xops);
6281 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6283 ;; Parallel integral comparisons
6285 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6287 (define_expand "sse2_eq<mode>3"
6288 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6290 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6291 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6292 "TARGET_SSE2 && !TARGET_XOP "
6293 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6295 (define_insn "*avx_eq<mode>3"
6296 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6298 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6299 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6300 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6301 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6302 [(set_attr "type" "ssecmp")
6303 (set (attr "prefix_extra")
6304 (if_then_else (match_operand:V2DI 0 "" "")
6306 (const_string "*")))
6307 (set_attr "prefix" "vex")
6308 (set_attr "mode" "TI")])
6310 (define_insn "*sse2_eq<mode>3"
6311 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6313 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6314 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6315 "TARGET_SSE2 && !TARGET_XOP
6316 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6317 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6318 [(set_attr "type" "ssecmp")
6319 (set_attr "prefix_data16" "1")
6320 (set_attr "mode" "TI")])
6322 (define_expand "sse4_1_eqv2di3"
6323 [(set (match_operand:V2DI 0 "register_operand" "")
6325 (match_operand:V2DI 1 "nonimmediate_operand" "")
6326 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6328 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6330 (define_insn "*sse4_1_eqv2di3"
6331 [(set (match_operand:V2DI 0 "register_operand" "=x")
6333 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6334 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6335 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6336 "pcmpeqq\t{%2, %0|%0, %2}"
6337 [(set_attr "type" "ssecmp")
6338 (set_attr "prefix_extra" "1")
6339 (set_attr "mode" "TI")])
6341 (define_insn "*avx_gt<mode>3"
6342 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6344 (match_operand:SSEMODE1248 1 "register_operand" "x")
6345 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6347 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6348 [(set_attr "type" "ssecmp")
6349 (set (attr "prefix_extra")
6350 (if_then_else (match_operand:V2DI 0 "" "")
6352 (const_string "*")))
6353 (set_attr "prefix" "vex")
6354 (set_attr "mode" "TI")])
6356 (define_insn "sse2_gt<mode>3"
6357 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6359 (match_operand:SSEMODE124 1 "register_operand" "0")
6360 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6361 "TARGET_SSE2 && !TARGET_XOP"
6362 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6363 [(set_attr "type" "ssecmp")
6364 (set_attr "prefix_data16" "1")
6365 (set_attr "mode" "TI")])
6367 (define_insn "sse4_2_gtv2di3"
6368 [(set (match_operand:V2DI 0 "register_operand" "=x")
6370 (match_operand:V2DI 1 "register_operand" "0")
6371 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6373 "pcmpgtq\t{%2, %0|%0, %2}"
6374 [(set_attr "type" "ssecmp")
6375 (set_attr "prefix_extra" "1")
6376 (set_attr "mode" "TI")])
6378 (define_expand "vcond<mode>"
6379 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6380 (if_then_else:SSEMODE124C8
6381 (match_operator 3 ""
6382 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6383 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6384 (match_operand:SSEMODE124C8 1 "general_operand" "")
6385 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6388 bool ok = ix86_expand_int_vcond (operands);
6393 (define_expand "vcondu<mode>"
6394 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6395 (if_then_else:SSEMODE124C8
6396 (match_operator 3 ""
6397 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6398 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6399 (match_operand:SSEMODE124C8 1 "general_operand" "")
6400 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6403 bool ok = ix86_expand_int_vcond (operands);
6408 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6410 ;; Parallel bitwise logical operations
6412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6414 (define_expand "one_cmpl<mode>2"
6415 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6416 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6420 int i, n = GET_MODE_NUNITS (<MODE>mode);
6421 rtvec v = rtvec_alloc (n);
6423 for (i = 0; i < n; ++i)
6424 RTVEC_ELT (v, i) = constm1_rtx;
6426 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6429 (define_insn "*avx_andnot<mode>3"
6430 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6432 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6433 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6435 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6436 [(set_attr "type" "sselog")
6437 (set_attr "prefix" "vex")
6438 (set_attr "mode" "<avxvecpsmode>")])
6440 (define_insn "*sse_andnot<mode>3"
6441 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6443 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6444 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6445 "(TARGET_SSE && !TARGET_SSE2)"
6446 "andnps\t{%2, %0|%0, %2}"
6447 [(set_attr "type" "sselog")
6448 (set_attr "mode" "V4SF")])
6450 (define_insn "*avx_andnot<mode>3"
6451 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6453 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6454 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6456 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6457 [(set_attr "type" "sselog")
6458 (set_attr "prefix" "vex")
6459 (set_attr "mode" "TI")])
6461 (define_insn "sse2_andnot<mode>3"
6462 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6464 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6465 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6467 "pandn\t{%2, %0|%0, %2}"
6468 [(set_attr "type" "sselog")
6469 (set_attr "prefix_data16" "1")
6470 (set_attr "mode" "TI")])
6472 (define_insn "*andnottf3"
6473 [(set (match_operand:TF 0 "register_operand" "=x")
6475 (not:TF (match_operand:TF 1 "register_operand" "0"))
6476 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6478 "pandn\t{%2, %0|%0, %2}"
6479 [(set_attr "type" "sselog")
6480 (set_attr "prefix_data16" "1")
6481 (set_attr "mode" "TI")])
6483 (define_expand "<code><mode>3"
6484 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6486 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6487 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6489 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6491 (define_insn "*avx_<code><mode>3"
6492 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6494 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6495 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6497 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6498 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6499 [(set_attr "type" "sselog")
6500 (set_attr "prefix" "vex")
6501 (set_attr "mode" "<avxvecpsmode>")])
6503 (define_insn "*sse_<code><mode>3"
6504 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6506 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6507 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6508 "(TARGET_SSE && !TARGET_SSE2)
6509 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6510 "<plogicprefix>ps\t{%2, %0|%0, %2}"
6511 [(set_attr "type" "sselog")
6512 (set_attr "mode" "V4SF")])
6514 (define_insn "*avx_<code><mode>3"
6515 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6517 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6518 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6520 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6521 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6522 [(set_attr "type" "sselog")
6523 (set_attr "prefix" "vex")
6524 (set_attr "mode" "TI")])
6526 (define_insn "*sse2_<code><mode>3"
6527 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6529 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6530 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6531 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6532 "p<plogicprefix>\t{%2, %0|%0, %2}"
6533 [(set_attr "type" "sselog")
6534 (set_attr "prefix_data16" "1")
6535 (set_attr "mode" "TI")])
6537 (define_expand "<code>tf3"
6538 [(set (match_operand:TF 0 "register_operand" "")
6540 (match_operand:TF 1 "nonimmediate_operand" "")
6541 (match_operand:TF 2 "nonimmediate_operand" "")))]
6543 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6545 (define_insn "*<code>tf3"
6546 [(set (match_operand:TF 0 "register_operand" "=x")
6548 (match_operand:TF 1 "nonimmediate_operand" "%0")
6549 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6550 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6551 "p<plogicprefix>\t{%2, %0|%0, %2}"
6552 [(set_attr "type" "sselog")
6553 (set_attr "prefix_data16" "1")
6554 (set_attr "mode" "TI")])
6556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6558 ;; Parallel integral element swizzling
6560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6562 (define_expand "vec_pack_trunc_v8hi"
6563 [(match_operand:V16QI 0 "register_operand" "")
6564 (match_operand:V8HI 1 "register_operand" "")
6565 (match_operand:V8HI 2 "register_operand" "")]
6568 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6569 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6570 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6574 (define_expand "vec_pack_trunc_v4si"
6575 [(match_operand:V8HI 0 "register_operand" "")
6576 (match_operand:V4SI 1 "register_operand" "")
6577 (match_operand:V4SI 2 "register_operand" "")]
6580 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6581 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6582 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6586 (define_expand "vec_pack_trunc_v2di"
6587 [(match_operand:V4SI 0 "register_operand" "")
6588 (match_operand:V2DI 1 "register_operand" "")
6589 (match_operand:V2DI 2 "register_operand" "")]
6592 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6593 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6594 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6598 (define_insn "*avx_packsswb"
6599 [(set (match_operand:V16QI 0 "register_operand" "=x")
6602 (match_operand:V8HI 1 "register_operand" "x"))
6604 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6606 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6607 [(set_attr "type" "sselog")
6608 (set_attr "prefix" "vex")
6609 (set_attr "mode" "TI")])
6611 (define_insn "sse2_packsswb"
6612 [(set (match_operand:V16QI 0 "register_operand" "=x")
6615 (match_operand:V8HI 1 "register_operand" "0"))
6617 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6619 "packsswb\t{%2, %0|%0, %2}"
6620 [(set_attr "type" "sselog")
6621 (set_attr "prefix_data16" "1")
6622 (set_attr "mode" "TI")])
6624 (define_insn "*avx_packssdw"
6625 [(set (match_operand:V8HI 0 "register_operand" "=x")
6628 (match_operand:V4SI 1 "register_operand" "x"))
6630 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6632 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6633 [(set_attr "type" "sselog")
6634 (set_attr "prefix" "vex")
6635 (set_attr "mode" "TI")])
6637 (define_insn "sse2_packssdw"
6638 [(set (match_operand:V8HI 0 "register_operand" "=x")
6641 (match_operand:V4SI 1 "register_operand" "0"))
6643 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6645 "packssdw\t{%2, %0|%0, %2}"
6646 [(set_attr "type" "sselog")
6647 (set_attr "prefix_data16" "1")
6648 (set_attr "mode" "TI")])
6650 (define_insn "*avx_packuswb"
6651 [(set (match_operand:V16QI 0 "register_operand" "=x")
6654 (match_operand:V8HI 1 "register_operand" "x"))
6656 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6658 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6659 [(set_attr "type" "sselog")
6660 (set_attr "prefix" "vex")
6661 (set_attr "mode" "TI")])
6663 (define_insn "sse2_packuswb"
6664 [(set (match_operand:V16QI 0 "register_operand" "=x")
6667 (match_operand:V8HI 1 "register_operand" "0"))
6669 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6671 "packuswb\t{%2, %0|%0, %2}"
6672 [(set_attr "type" "sselog")
6673 (set_attr "prefix_data16" "1")
6674 (set_attr "mode" "TI")])
6676 (define_insn "*avx_interleave_highv16qi"
6677 [(set (match_operand:V16QI 0 "register_operand" "=x")
6680 (match_operand:V16QI 1 "register_operand" "x")
6681 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6682 (parallel [(const_int 8) (const_int 24)
6683 (const_int 9) (const_int 25)
6684 (const_int 10) (const_int 26)
6685 (const_int 11) (const_int 27)
6686 (const_int 12) (const_int 28)
6687 (const_int 13) (const_int 29)
6688 (const_int 14) (const_int 30)
6689 (const_int 15) (const_int 31)])))]
6691 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6692 [(set_attr "type" "sselog")
6693 (set_attr "prefix" "vex")
6694 (set_attr "mode" "TI")])
6696 (define_insn "vec_interleave_highv16qi"
6697 [(set (match_operand:V16QI 0 "register_operand" "=x")
6700 (match_operand:V16QI 1 "register_operand" "0")
6701 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6702 (parallel [(const_int 8) (const_int 24)
6703 (const_int 9) (const_int 25)
6704 (const_int 10) (const_int 26)
6705 (const_int 11) (const_int 27)
6706 (const_int 12) (const_int 28)
6707 (const_int 13) (const_int 29)
6708 (const_int 14) (const_int 30)
6709 (const_int 15) (const_int 31)])))]
6711 "punpckhbw\t{%2, %0|%0, %2}"
6712 [(set_attr "type" "sselog")
6713 (set_attr "prefix_data16" "1")
6714 (set_attr "mode" "TI")])
6716 (define_insn "*avx_interleave_lowv16qi"
6717 [(set (match_operand:V16QI 0 "register_operand" "=x")
6720 (match_operand:V16QI 1 "register_operand" "x")
6721 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6722 (parallel [(const_int 0) (const_int 16)
6723 (const_int 1) (const_int 17)
6724 (const_int 2) (const_int 18)
6725 (const_int 3) (const_int 19)
6726 (const_int 4) (const_int 20)
6727 (const_int 5) (const_int 21)
6728 (const_int 6) (const_int 22)
6729 (const_int 7) (const_int 23)])))]
6731 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6732 [(set_attr "type" "sselog")
6733 (set_attr "prefix" "vex")
6734 (set_attr "mode" "TI")])
6736 (define_insn "vec_interleave_lowv16qi"
6737 [(set (match_operand:V16QI 0 "register_operand" "=x")
6740 (match_operand:V16QI 1 "register_operand" "0")
6741 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6742 (parallel [(const_int 0) (const_int 16)
6743 (const_int 1) (const_int 17)
6744 (const_int 2) (const_int 18)
6745 (const_int 3) (const_int 19)
6746 (const_int 4) (const_int 20)
6747 (const_int 5) (const_int 21)
6748 (const_int 6) (const_int 22)
6749 (const_int 7) (const_int 23)])))]
6751 "punpcklbw\t{%2, %0|%0, %2}"
6752 [(set_attr "type" "sselog")
6753 (set_attr "prefix_data16" "1")
6754 (set_attr "mode" "TI")])
6756 (define_insn "*avx_interleave_highv8hi"
6757 [(set (match_operand:V8HI 0 "register_operand" "=x")
6760 (match_operand:V8HI 1 "register_operand" "x")
6761 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6762 (parallel [(const_int 4) (const_int 12)
6763 (const_int 5) (const_int 13)
6764 (const_int 6) (const_int 14)
6765 (const_int 7) (const_int 15)])))]
6767 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6768 [(set_attr "type" "sselog")
6769 (set_attr "prefix" "vex")
6770 (set_attr "mode" "TI")])
6772 (define_insn "vec_interleave_highv8hi"
6773 [(set (match_operand:V8HI 0 "register_operand" "=x")
6776 (match_operand:V8HI 1 "register_operand" "0")
6777 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6778 (parallel [(const_int 4) (const_int 12)
6779 (const_int 5) (const_int 13)
6780 (const_int 6) (const_int 14)
6781 (const_int 7) (const_int 15)])))]
6783 "punpckhwd\t{%2, %0|%0, %2}"
6784 [(set_attr "type" "sselog")
6785 (set_attr "prefix_data16" "1")
6786 (set_attr "mode" "TI")])
6788 (define_insn "*avx_interleave_lowv8hi"
6789 [(set (match_operand:V8HI 0 "register_operand" "=x")
6792 (match_operand:V8HI 1 "register_operand" "x")
6793 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6794 (parallel [(const_int 0) (const_int 8)
6795 (const_int 1) (const_int 9)
6796 (const_int 2) (const_int 10)
6797 (const_int 3) (const_int 11)])))]
6799 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6800 [(set_attr "type" "sselog")
6801 (set_attr "prefix" "vex")
6802 (set_attr "mode" "TI")])
6804 (define_insn "vec_interleave_lowv8hi"
6805 [(set (match_operand:V8HI 0 "register_operand" "=x")
6808 (match_operand:V8HI 1 "register_operand" "0")
6809 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6810 (parallel [(const_int 0) (const_int 8)
6811 (const_int 1) (const_int 9)
6812 (const_int 2) (const_int 10)
6813 (const_int 3) (const_int 11)])))]
6815 "punpcklwd\t{%2, %0|%0, %2}"
6816 [(set_attr "type" "sselog")
6817 (set_attr "prefix_data16" "1")
6818 (set_attr "mode" "TI")])
6820 (define_insn "*avx_interleave_highv4si"
6821 [(set (match_operand:V4SI 0 "register_operand" "=x")
6824 (match_operand:V4SI 1 "register_operand" "x")
6825 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6826 (parallel [(const_int 2) (const_int 6)
6827 (const_int 3) (const_int 7)])))]
6829 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6830 [(set_attr "type" "sselog")
6831 (set_attr "prefix" "vex")
6832 (set_attr "mode" "TI")])
6834 (define_insn "vec_interleave_highv4si"
6835 [(set (match_operand:V4SI 0 "register_operand" "=x")
6838 (match_operand:V4SI 1 "register_operand" "0")
6839 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6840 (parallel [(const_int 2) (const_int 6)
6841 (const_int 3) (const_int 7)])))]
6843 "punpckhdq\t{%2, %0|%0, %2}"
6844 [(set_attr "type" "sselog")
6845 (set_attr "prefix_data16" "1")
6846 (set_attr "mode" "TI")])
6848 (define_insn "*avx_interleave_lowv4si"
6849 [(set (match_operand:V4SI 0 "register_operand" "=x")
6852 (match_operand:V4SI 1 "register_operand" "x")
6853 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6854 (parallel [(const_int 0) (const_int 4)
6855 (const_int 1) (const_int 5)])))]
6857 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6858 [(set_attr "type" "sselog")
6859 (set_attr "prefix" "vex")
6860 (set_attr "mode" "TI")])
6862 (define_insn "vec_interleave_lowv4si"
6863 [(set (match_operand:V4SI 0 "register_operand" "=x")
6866 (match_operand:V4SI 1 "register_operand" "0")
6867 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6868 (parallel [(const_int 0) (const_int 4)
6869 (const_int 1) (const_int 5)])))]
6871 "punpckldq\t{%2, %0|%0, %2}"
6872 [(set_attr "type" "sselog")
6873 (set_attr "prefix_data16" "1")
6874 (set_attr "mode" "TI")])
6876 (define_insn "*avx_pinsr<ssevecsize>"
6877 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6878 (vec_merge:SSEMODE124
6879 (vec_duplicate:SSEMODE124
6880 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6881 (match_operand:SSEMODE124 1 "register_operand" "x")
6882 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6885 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6886 if (MEM_P (operands[2]))
6887 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6889 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6891 [(set_attr "type" "sselog")
6892 (set (attr "prefix_extra")
6893 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6895 (const_string "1")))
6896 (set_attr "length_immediate" "1")
6897 (set_attr "prefix" "vex")
6898 (set_attr "mode" "TI")])
6900 (define_insn "*sse4_1_pinsrb"
6901 [(set (match_operand:V16QI 0 "register_operand" "=x")
6903 (vec_duplicate:V16QI
6904 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6905 (match_operand:V16QI 1 "register_operand" "0")
6906 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6909 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6910 if (MEM_P (operands[2]))
6911 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6913 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6915 [(set_attr "type" "sselog")
6916 (set_attr "prefix_extra" "1")
6917 (set_attr "length_immediate" "1")
6918 (set_attr "mode" "TI")])
6920 (define_insn "*sse2_pinsrw"
6921 [(set (match_operand:V8HI 0 "register_operand" "=x")
6924 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6925 (match_operand:V8HI 1 "register_operand" "0")
6926 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6929 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6930 if (MEM_P (operands[2]))
6931 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6933 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6935 [(set_attr "type" "sselog")
6936 (set_attr "prefix_data16" "1")
6937 (set_attr "length_immediate" "1")
6938 (set_attr "mode" "TI")])
6940 ;; It must come before sse2_loadld since it is preferred.
6941 (define_insn "*sse4_1_pinsrd"
6942 [(set (match_operand:V4SI 0 "register_operand" "=x")
6945 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6946 (match_operand:V4SI 1 "register_operand" "0")
6947 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6950 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6951 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6953 [(set_attr "type" "sselog")
6954 (set_attr "prefix_extra" "1")
6955 (set_attr "length_immediate" "1")
6956 (set_attr "mode" "TI")])
6958 (define_insn "*avx_pinsrq"
6959 [(set (match_operand:V2DI 0 "register_operand" "=x")
6962 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6963 (match_operand:V2DI 1 "register_operand" "x")
6964 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6965 "TARGET_AVX && TARGET_64BIT"
6967 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6968 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6970 [(set_attr "type" "sselog")
6971 (set_attr "prefix_extra" "1")
6972 (set_attr "length_immediate" "1")
6973 (set_attr "prefix" "vex")
6974 (set_attr "mode" "TI")])
6976 (define_insn "*sse4_1_pinsrq"
6977 [(set (match_operand:V2DI 0 "register_operand" "=x")
6980 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6981 (match_operand:V2DI 1 "register_operand" "0")
6982 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6983 "TARGET_SSE4_1 && TARGET_64BIT"
6985 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6986 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6988 [(set_attr "type" "sselog")
6989 (set_attr "prefix_rex" "1")
6990 (set_attr "prefix_extra" "1")
6991 (set_attr "length_immediate" "1")
6992 (set_attr "mode" "TI")])
6994 (define_insn "*sse4_1_pextrb"
6995 [(set (match_operand:SI 0 "register_operand" "=r")
6998 (match_operand:V16QI 1 "register_operand" "x")
6999 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7001 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7002 [(set_attr "type" "sselog")
7003 (set_attr "prefix_extra" "1")
7004 (set_attr "length_immediate" "1")
7005 (set_attr "prefix" "maybe_vex")
7006 (set_attr "mode" "TI")])
7008 (define_insn "*sse4_1_pextrb_memory"
7009 [(set (match_operand:QI 0 "memory_operand" "=m")
7011 (match_operand:V16QI 1 "register_operand" "x")
7012 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7014 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7015 [(set_attr "type" "sselog")
7016 (set_attr "prefix_extra" "1")
7017 (set_attr "length_immediate" "1")
7018 (set_attr "prefix" "maybe_vex")
7019 (set_attr "mode" "TI")])
7021 (define_insn "*sse2_pextrw"
7022 [(set (match_operand:SI 0 "register_operand" "=r")
7025 (match_operand:V8HI 1 "register_operand" "x")
7026 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7028 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7029 [(set_attr "type" "sselog")
7030 (set_attr "prefix_data16" "1")
7031 (set_attr "length_immediate" "1")
7032 (set_attr "prefix" "maybe_vex")
7033 (set_attr "mode" "TI")])
7035 (define_insn "*sse4_1_pextrw_memory"
7036 [(set (match_operand:HI 0 "memory_operand" "=m")
7038 (match_operand:V8HI 1 "register_operand" "x")
7039 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7041 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7042 [(set_attr "type" "sselog")
7043 (set_attr "prefix_extra" "1")
7044 (set_attr "length_immediate" "1")
7045 (set_attr "prefix" "maybe_vex")
7046 (set_attr "mode" "TI")])
7048 (define_insn "*sse4_1_pextrd"
7049 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7051 (match_operand:V4SI 1 "register_operand" "x")
7052 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7054 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7055 [(set_attr "type" "sselog")
7056 (set_attr "prefix_extra" "1")
7057 (set_attr "length_immediate" "1")
7058 (set_attr "prefix" "maybe_vex")
7059 (set_attr "mode" "TI")])
7061 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7062 (define_insn "*sse4_1_pextrq"
7063 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7065 (match_operand:V2DI 1 "register_operand" "x")
7066 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7067 "TARGET_SSE4_1 && TARGET_64BIT"
7068 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7069 [(set_attr "type" "sselog")
7070 (set_attr "prefix_rex" "1")
7071 (set_attr "prefix_extra" "1")
7072 (set_attr "length_immediate" "1")
7073 (set_attr "prefix" "maybe_vex")
7074 (set_attr "mode" "TI")])
7076 (define_expand "sse2_pshufd"
7077 [(match_operand:V4SI 0 "register_operand" "")
7078 (match_operand:V4SI 1 "nonimmediate_operand" "")
7079 (match_operand:SI 2 "const_int_operand" "")]
7082 int mask = INTVAL (operands[2]);
7083 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7084 GEN_INT ((mask >> 0) & 3),
7085 GEN_INT ((mask >> 2) & 3),
7086 GEN_INT ((mask >> 4) & 3),
7087 GEN_INT ((mask >> 6) & 3)));
7091 (define_insn "sse2_pshufd_1"
7092 [(set (match_operand:V4SI 0 "register_operand" "=x")
7094 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7095 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7096 (match_operand 3 "const_0_to_3_operand" "")
7097 (match_operand 4 "const_0_to_3_operand" "")
7098 (match_operand 5 "const_0_to_3_operand" "")])))]
7102 mask |= INTVAL (operands[2]) << 0;
7103 mask |= INTVAL (operands[3]) << 2;
7104 mask |= INTVAL (operands[4]) << 4;
7105 mask |= INTVAL (operands[5]) << 6;
7106 operands[2] = GEN_INT (mask);
7108 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7110 [(set_attr "type" "sselog1")
7111 (set_attr "prefix_data16" "1")
7112 (set_attr "prefix" "maybe_vex")
7113 (set_attr "length_immediate" "1")
7114 (set_attr "mode" "TI")])
7116 (define_expand "sse2_pshuflw"
7117 [(match_operand:V8HI 0 "register_operand" "")
7118 (match_operand:V8HI 1 "nonimmediate_operand" "")
7119 (match_operand:SI 2 "const_int_operand" "")]
7122 int mask = INTVAL (operands[2]);
7123 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7124 GEN_INT ((mask >> 0) & 3),
7125 GEN_INT ((mask >> 2) & 3),
7126 GEN_INT ((mask >> 4) & 3),
7127 GEN_INT ((mask >> 6) & 3)));
7131 (define_insn "sse2_pshuflw_1"
7132 [(set (match_operand:V8HI 0 "register_operand" "=x")
7134 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7135 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7136 (match_operand 3 "const_0_to_3_operand" "")
7137 (match_operand 4 "const_0_to_3_operand" "")
7138 (match_operand 5 "const_0_to_3_operand" "")
7146 mask |= INTVAL (operands[2]) << 0;
7147 mask |= INTVAL (operands[3]) << 2;
7148 mask |= INTVAL (operands[4]) << 4;
7149 mask |= INTVAL (operands[5]) << 6;
7150 operands[2] = GEN_INT (mask);
7152 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7154 [(set_attr "type" "sselog")
7155 (set_attr "prefix_data16" "0")
7156 (set_attr "prefix_rep" "1")
7157 (set_attr "prefix" "maybe_vex")
7158 (set_attr "length_immediate" "1")
7159 (set_attr "mode" "TI")])
7161 (define_expand "sse2_pshufhw"
7162 [(match_operand:V8HI 0 "register_operand" "")
7163 (match_operand:V8HI 1 "nonimmediate_operand" "")
7164 (match_operand:SI 2 "const_int_operand" "")]
7167 int mask = INTVAL (operands[2]);
7168 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7169 GEN_INT (((mask >> 0) & 3) + 4),
7170 GEN_INT (((mask >> 2) & 3) + 4),
7171 GEN_INT (((mask >> 4) & 3) + 4),
7172 GEN_INT (((mask >> 6) & 3) + 4)));
7176 (define_insn "sse2_pshufhw_1"
7177 [(set (match_operand:V8HI 0 "register_operand" "=x")
7179 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7180 (parallel [(const_int 0)
7184 (match_operand 2 "const_4_to_7_operand" "")
7185 (match_operand 3 "const_4_to_7_operand" "")
7186 (match_operand 4 "const_4_to_7_operand" "")
7187 (match_operand 5 "const_4_to_7_operand" "")])))]
7191 mask |= (INTVAL (operands[2]) - 4) << 0;
7192 mask |= (INTVAL (operands[3]) - 4) << 2;
7193 mask |= (INTVAL (operands[4]) - 4) << 4;
7194 mask |= (INTVAL (operands[5]) - 4) << 6;
7195 operands[2] = GEN_INT (mask);
7197 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7199 [(set_attr "type" "sselog")
7200 (set_attr "prefix_rep" "1")
7201 (set_attr "prefix_data16" "0")
7202 (set_attr "prefix" "maybe_vex")
7203 (set_attr "length_immediate" "1")
7204 (set_attr "mode" "TI")])
7206 (define_expand "sse2_loadd"
7207 [(set (match_operand:V4SI 0 "register_operand" "")
7210 (match_operand:SI 1 "nonimmediate_operand" ""))
7214 "operands[2] = CONST0_RTX (V4SImode);")
7216 (define_insn "*avx_loadld"
7217 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7220 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7221 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7225 vmovd\t{%2, %0|%0, %2}
7226 vmovd\t{%2, %0|%0, %2}
7227 vmovss\t{%2, %1, %0|%0, %1, %2}"
7228 [(set_attr "type" "ssemov")
7229 (set_attr "prefix" "vex")
7230 (set_attr "mode" "TI,TI,V4SF")])
7232 (define_insn "sse2_loadld"
7233 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7236 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7237 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7241 movd\t{%2, %0|%0, %2}
7242 movd\t{%2, %0|%0, %2}
7243 movss\t{%2, %0|%0, %2}
7244 movss\t{%2, %0|%0, %2}"
7245 [(set_attr "type" "ssemov")
7246 (set_attr "mode" "TI,TI,V4SF,SF")])
7248 (define_insn_and_split "sse2_stored"
7249 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7251 (match_operand:V4SI 1 "register_operand" "x,Yi")
7252 (parallel [(const_int 0)])))]
7255 "&& reload_completed
7256 && (TARGET_INTER_UNIT_MOVES
7257 || MEM_P (operands [0])
7258 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7259 [(set (match_dup 0) (match_dup 1))]
7261 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7264 (define_insn_and_split "*vec_ext_v4si_mem"
7265 [(set (match_operand:SI 0 "register_operand" "=r")
7267 (match_operand:V4SI 1 "memory_operand" "o")
7268 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7274 int i = INTVAL (operands[2]);
7276 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7280 (define_expand "sse_storeq"
7281 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7283 (match_operand:V2DI 1 "register_operand" "")
7284 (parallel [(const_int 0)])))]
7288 (define_insn "*sse2_storeq_rex64"
7289 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7291 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7292 (parallel [(const_int 0)])))]
7293 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7297 %vmov{q}\t{%1, %0|%0, %1}"
7298 [(set_attr "type" "*,*,imov")
7299 (set_attr "prefix" "*,*,maybe_vex")
7300 (set_attr "mode" "*,*,DI")])
7302 (define_insn "*sse2_storeq"
7303 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7305 (match_operand:V2DI 1 "register_operand" "x")
7306 (parallel [(const_int 0)])))]
7311 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7313 (match_operand:V2DI 1 "register_operand" "")
7314 (parallel [(const_int 0)])))]
7317 && (TARGET_INTER_UNIT_MOVES
7318 || MEM_P (operands [0])
7319 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7320 [(set (match_dup 0) (match_dup 1))]
7322 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7325 (define_insn "*vec_extractv2di_1_rex64_avx"
7326 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7328 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7329 (parallel [(const_int 1)])))]
7332 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7334 vmovhps\t{%1, %0|%0, %1}
7335 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7336 vmovq\t{%H1, %0|%0, %H1}
7337 vmov{q}\t{%H1, %0|%0, %H1}"
7338 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7339 (set_attr "length_immediate" "*,1,*,*")
7340 (set_attr "memory" "*,none,*,*")
7341 (set_attr "prefix" "vex")
7342 (set_attr "mode" "V2SF,TI,TI,DI")])
7344 (define_insn "*vec_extractv2di_1_rex64"
7345 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7347 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7348 (parallel [(const_int 1)])))]
7349 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7351 movhps\t{%1, %0|%0, %1}
7352 psrldq\t{$8, %0|%0, 8}
7353 movq\t{%H1, %0|%0, %H1}
7354 mov{q}\t{%H1, %0|%0, %H1}"
7355 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7356 (set_attr "length_immediate" "*,1,*,*")
7357 (set_attr "atom_unit" "*,sishuf,*,*")
7358 (set_attr "memory" "*,none,*,*")
7359 (set_attr "mode" "V2SF,TI,TI,DI")])
7361 (define_insn "*vec_extractv2di_1_avx"
7362 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7364 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7365 (parallel [(const_int 1)])))]
7368 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7370 vmovhps\t{%1, %0|%0, %1}
7371 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7372 vmovq\t{%H1, %0|%0, %H1}"
7373 [(set_attr "type" "ssemov,sseishft,ssemov")
7374 (set_attr "length_immediate" "*,1,*")
7375 (set_attr "memory" "*,none,*")
7376 (set_attr "prefix" "vex")
7377 (set_attr "mode" "V2SF,TI,TI")])
7379 (define_insn "*vec_extractv2di_1_sse2"
7380 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7382 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7383 (parallel [(const_int 1)])))]
7385 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7387 movhps\t{%1, %0|%0, %1}
7388 psrldq\t{$8, %0|%0, 8}
7389 movq\t{%H1, %0|%0, %H1}"
7390 [(set_attr "type" "ssemov,sseishft,ssemov")
7391 (set_attr "length_immediate" "*,1,*")
7392 (set_attr "atom_unit" "*,sishuf,*")
7393 (set_attr "memory" "*,none,*")
7394 (set_attr "mode" "V2SF,TI,TI")])
7396 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7397 (define_insn "*vec_extractv2di_1_sse"
7398 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7400 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7401 (parallel [(const_int 1)])))]
7402 "!TARGET_SSE2 && TARGET_SSE
7403 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7405 movhps\t{%1, %0|%0, %1}
7406 movhlps\t{%1, %0|%0, %1}
7407 movlps\t{%H1, %0|%0, %H1}"
7408 [(set_attr "type" "ssemov")
7409 (set_attr "mode" "V2SF,V4SF,V2SF")])
7411 (define_insn "*vec_dupv4si"
7412 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7414 (match_operand:SI 1 "register_operand" " Y2,0")))]
7417 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7418 shufps\t{$0, %0, %0|%0, %0, 0}"
7419 [(set_attr "type" "sselog1")
7420 (set_attr "prefix" "maybe_vex,orig")
7421 (set_attr "length_immediate" "1")
7422 (set_attr "mode" "TI,V4SF")])
7424 (define_insn "*vec_dupv2di_avx"
7425 [(set (match_operand:V2DI 0 "register_operand" "=x")
7427 (match_operand:DI 1 "register_operand" "x")))]
7429 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7430 [(set_attr "type" "sselog1")
7431 (set_attr "prefix" "vex")
7432 (set_attr "mode" "TI")])
7434 (define_insn "*vec_dupv2di"
7435 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7437 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7442 [(set_attr "type" "sselog1,ssemov")
7443 (set_attr "mode" "TI,V4SF")])
7445 (define_insn "*vec_concatv2si_avx"
7446 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7448 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7449 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7452 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7453 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7454 vmovd\t{%1, %0|%0, %1}
7455 punpckldq\t{%2, %0|%0, %2}
7456 movd\t{%1, %0|%0, %1}"
7457 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7458 (set_attr "prefix_extra" "1,*,*,*,*")
7459 (set_attr "length_immediate" "1,*,*,*,*")
7460 (set (attr "prefix")
7461 (if_then_else (eq_attr "alternative" "3,4")
7462 (const_string "orig")
7463 (const_string "vex")))
7464 (set_attr "mode" "TI,TI,TI,DI,DI")])
7466 (define_insn "*vec_concatv2si_sse4_1"
7467 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7469 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7470 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7473 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7474 punpckldq\t{%2, %0|%0, %2}
7475 movd\t{%1, %0|%0, %1}
7476 punpckldq\t{%2, %0|%0, %2}
7477 movd\t{%1, %0|%0, %1}"
7478 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7479 (set_attr "prefix_extra" "1,*,*,*,*")
7480 (set_attr "length_immediate" "1,*,*,*,*")
7481 (set_attr "mode" "TI,TI,TI,DI,DI")])
7483 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7484 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7485 ;; alternatives pretty much forces the MMX alternative to be chosen.
7486 (define_insn "*vec_concatv2si_sse2"
7487 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7489 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7490 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7493 punpckldq\t{%2, %0|%0, %2}
7494 movd\t{%1, %0|%0, %1}
7495 punpckldq\t{%2, %0|%0, %2}
7496 movd\t{%1, %0|%0, %1}"
7497 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7498 (set_attr "mode" "TI,TI,DI,DI")])
7500 (define_insn "*vec_concatv2si_sse"
7501 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7503 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7504 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7507 unpcklps\t{%2, %0|%0, %2}
7508 movss\t{%1, %0|%0, %1}
7509 punpckldq\t{%2, %0|%0, %2}
7510 movd\t{%1, %0|%0, %1}"
7511 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7512 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7514 (define_insn "*vec_concatv4si_1_avx"
7515 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7517 (match_operand:V2SI 1 "register_operand" " x,x")
7518 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7521 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7522 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7523 [(set_attr "type" "sselog,ssemov")
7524 (set_attr "prefix" "vex")
7525 (set_attr "mode" "TI,V2SF")])
7527 (define_insn "*vec_concatv4si_1"
7528 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7530 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7531 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7534 punpcklqdq\t{%2, %0|%0, %2}
7535 movlhps\t{%2, %0|%0, %2}
7536 movhps\t{%2, %0|%0, %2}"
7537 [(set_attr "type" "sselog,ssemov,ssemov")
7538 (set_attr "mode" "TI,V4SF,V2SF")])
7540 (define_insn "*vec_concatv2di_avx"
7541 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7543 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7544 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7545 "!TARGET_64BIT && TARGET_AVX"
7547 vmovq\t{%1, %0|%0, %1}
7548 movq2dq\t{%1, %0|%0, %1}
7549 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7550 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7551 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7552 (set (attr "prefix")
7553 (if_then_else (eq_attr "alternative" "1")
7554 (const_string "orig")
7555 (const_string "vex")))
7556 (set_attr "mode" "TI,TI,TI,V2SF")])
7558 (define_insn "vec_concatv2di"
7559 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7561 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7562 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7563 "!TARGET_64BIT && TARGET_SSE"
7565 movq\t{%1, %0|%0, %1}
7566 movq2dq\t{%1, %0|%0, %1}
7567 punpcklqdq\t{%2, %0|%0, %2}
7568 movlhps\t{%2, %0|%0, %2}
7569 movhps\t{%2, %0|%0, %2}"
7570 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7571 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7573 (define_insn "*vec_concatv2di_rex64_avx"
7574 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7576 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7577 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7578 "TARGET_64BIT && TARGET_AVX"
7580 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7581 vmovq\t{%1, %0|%0, %1}
7582 vmovq\t{%1, %0|%0, %1}
7583 movq2dq\t{%1, %0|%0, %1}
7584 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7585 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7586 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7587 (set_attr "prefix_extra" "1,*,*,*,*,*")
7588 (set_attr "length_immediate" "1,*,*,*,*,*")
7589 (set (attr "prefix")
7590 (if_then_else (eq_attr "alternative" "3")
7591 (const_string "orig")
7592 (const_string "vex")))
7593 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7595 (define_insn "*vec_concatv2di_rex64_sse4_1"
7596 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7598 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7599 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7600 "TARGET_64BIT && TARGET_SSE4_1"
7602 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7603 movq\t{%1, %0|%0, %1}
7604 movq\t{%1, %0|%0, %1}
7605 movq2dq\t{%1, %0|%0, %1}
7606 punpcklqdq\t{%2, %0|%0, %2}
7607 movlhps\t{%2, %0|%0, %2}
7608 movhps\t{%2, %0|%0, %2}"
7609 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7610 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7611 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7612 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7613 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7615 (define_insn "*vec_concatv2di_rex64_sse"
7616 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7618 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7619 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7620 "TARGET_64BIT && TARGET_SSE"
7622 movq\t{%1, %0|%0, %1}
7623 movq\t{%1, %0|%0, %1}
7624 movq2dq\t{%1, %0|%0, %1}
7625 punpcklqdq\t{%2, %0|%0, %2}
7626 movlhps\t{%2, %0|%0, %2}
7627 movhps\t{%2, %0|%0, %2}"
7628 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7629 (set_attr "prefix_rex" "*,1,*,*,*,*")
7630 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7632 (define_expand "vec_unpacku_hi_v16qi"
7633 [(match_operand:V8HI 0 "register_operand" "")
7634 (match_operand:V16QI 1 "register_operand" "")]
7638 ix86_expand_sse4_unpack (operands, true, true);
7640 ix86_expand_sse_unpack (operands, true, true);
7644 (define_expand "vec_unpacks_hi_v16qi"
7645 [(match_operand:V8HI 0 "register_operand" "")
7646 (match_operand:V16QI 1 "register_operand" "")]
7650 ix86_expand_sse4_unpack (operands, false, true);
7652 ix86_expand_sse_unpack (operands, false, true);
7656 (define_expand "vec_unpacku_lo_v16qi"
7657 [(match_operand:V8HI 0 "register_operand" "")
7658 (match_operand:V16QI 1 "register_operand" "")]
7662 ix86_expand_sse4_unpack (operands, true, false);
7664 ix86_expand_sse_unpack (operands, true, false);
7668 (define_expand "vec_unpacks_lo_v16qi"
7669 [(match_operand:V8HI 0 "register_operand" "")
7670 (match_operand:V16QI 1 "register_operand" "")]
7674 ix86_expand_sse4_unpack (operands, false, false);
7676 ix86_expand_sse_unpack (operands, false, false);
7680 (define_expand "vec_unpacku_hi_v8hi"
7681 [(match_operand:V4SI 0 "register_operand" "")
7682 (match_operand:V8HI 1 "register_operand" "")]
7686 ix86_expand_sse4_unpack (operands, true, true);
7688 ix86_expand_sse_unpack (operands, true, true);
7692 (define_expand "vec_unpacks_hi_v8hi"
7693 [(match_operand:V4SI 0 "register_operand" "")
7694 (match_operand:V8HI 1 "register_operand" "")]
7698 ix86_expand_sse4_unpack (operands, false, true);
7700 ix86_expand_sse_unpack (operands, false, true);
7704 (define_expand "vec_unpacku_lo_v8hi"
7705 [(match_operand:V4SI 0 "register_operand" "")
7706 (match_operand:V8HI 1 "register_operand" "")]
7710 ix86_expand_sse4_unpack (operands, true, false);
7712 ix86_expand_sse_unpack (operands, true, false);
7716 (define_expand "vec_unpacks_lo_v8hi"
7717 [(match_operand:V4SI 0 "register_operand" "")
7718 (match_operand:V8HI 1 "register_operand" "")]
7722 ix86_expand_sse4_unpack (operands, false, false);
7724 ix86_expand_sse_unpack (operands, false, false);
7728 (define_expand "vec_unpacku_hi_v4si"
7729 [(match_operand:V2DI 0 "register_operand" "")
7730 (match_operand:V4SI 1 "register_operand" "")]
7734 ix86_expand_sse4_unpack (operands, true, true);
7736 ix86_expand_sse_unpack (operands, true, true);
7740 (define_expand "vec_unpacks_hi_v4si"
7741 [(match_operand:V2DI 0 "register_operand" "")
7742 (match_operand:V4SI 1 "register_operand" "")]
7746 ix86_expand_sse4_unpack (operands, false, true);
7748 ix86_expand_sse_unpack (operands, false, true);
7752 (define_expand "vec_unpacku_lo_v4si"
7753 [(match_operand:V2DI 0 "register_operand" "")
7754 (match_operand:V4SI 1 "register_operand" "")]
7758 ix86_expand_sse4_unpack (operands, true, false);
7760 ix86_expand_sse_unpack (operands, true, false);
7764 (define_expand "vec_unpacks_lo_v4si"
7765 [(match_operand:V2DI 0 "register_operand" "")
7766 (match_operand:V4SI 1 "register_operand" "")]
7770 ix86_expand_sse4_unpack (operands, false, false);
7772 ix86_expand_sse_unpack (operands, false, false);
7776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7782 (define_expand "sse2_uavgv16qi3"
7783 [(set (match_operand:V16QI 0 "register_operand" "")
7789 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7791 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7792 (const_vector:V16QI [(const_int 1) (const_int 1)
7793 (const_int 1) (const_int 1)
7794 (const_int 1) (const_int 1)
7795 (const_int 1) (const_int 1)
7796 (const_int 1) (const_int 1)
7797 (const_int 1) (const_int 1)
7798 (const_int 1) (const_int 1)
7799 (const_int 1) (const_int 1)]))
7802 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7804 (define_insn "*avx_uavgv16qi3"
7805 [(set (match_operand:V16QI 0 "register_operand" "=x")
7811 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7813 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7814 (const_vector:V16QI [(const_int 1) (const_int 1)
7815 (const_int 1) (const_int 1)
7816 (const_int 1) (const_int 1)
7817 (const_int 1) (const_int 1)
7818 (const_int 1) (const_int 1)
7819 (const_int 1) (const_int 1)
7820 (const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)]))
7823 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7824 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7825 [(set_attr "type" "sseiadd")
7826 (set_attr "prefix" "vex")
7827 (set_attr "mode" "TI")])
7829 (define_insn "*sse2_uavgv16qi3"
7830 [(set (match_operand:V16QI 0 "register_operand" "=x")
7836 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7838 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7839 (const_vector:V16QI [(const_int 1) (const_int 1)
7840 (const_int 1) (const_int 1)
7841 (const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)]))
7848 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7849 "pavgb\t{%2, %0|%0, %2}"
7850 [(set_attr "type" "sseiadd")
7851 (set_attr "prefix_data16" "1")
7852 (set_attr "mode" "TI")])
7854 (define_expand "sse2_uavgv8hi3"
7855 [(set (match_operand:V8HI 0 "register_operand" "")
7861 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7863 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7864 (const_vector:V8HI [(const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)]))
7870 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7872 (define_insn "*avx_uavgv8hi3"
7873 [(set (match_operand:V8HI 0 "register_operand" "=x")
7879 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7881 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7882 (const_vector:V8HI [(const_int 1) (const_int 1)
7883 (const_int 1) (const_int 1)
7884 (const_int 1) (const_int 1)
7885 (const_int 1) (const_int 1)]))
7887 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7888 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7889 [(set_attr "type" "sseiadd")
7890 (set_attr "prefix" "vex")
7891 (set_attr "mode" "TI")])
7893 (define_insn "*sse2_uavgv8hi3"
7894 [(set (match_operand:V8HI 0 "register_operand" "=x")
7900 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7902 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7903 (const_vector:V8HI [(const_int 1) (const_int 1)
7904 (const_int 1) (const_int 1)
7905 (const_int 1) (const_int 1)
7906 (const_int 1) (const_int 1)]))
7908 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7909 "pavgw\t{%2, %0|%0, %2}"
7910 [(set_attr "type" "sseiadd")
7911 (set_attr "prefix_data16" "1")
7912 (set_attr "mode" "TI")])
7914 ;; The correct representation for this is absolutely enormous, and
7915 ;; surely not generally useful.
7916 (define_insn "*avx_psadbw"
7917 [(set (match_operand:V2DI 0 "register_operand" "=x")
7918 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7919 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7922 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7923 [(set_attr "type" "sseiadd")
7924 (set_attr "prefix" "vex")
7925 (set_attr "mode" "TI")])
7927 (define_insn "sse2_psadbw"
7928 [(set (match_operand:V2DI 0 "register_operand" "=x")
7929 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7930 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7933 "psadbw\t{%2, %0|%0, %2}"
7934 [(set_attr "type" "sseiadd")
7935 (set_attr "atom_unit" "simul")
7936 (set_attr "prefix_data16" "1")
7937 (set_attr "mode" "TI")])
7939 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7940 [(set (match_operand:SI 0 "register_operand" "=r")
7942 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7944 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7945 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7946 [(set_attr "type" "ssecvt")
7947 (set_attr "prefix" "vex")
7948 (set_attr "mode" "<MODE>")])
7950 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7951 [(set (match_operand:SI 0 "register_operand" "=r")
7953 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7955 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7956 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7957 [(set_attr "type" "ssemov")
7958 (set_attr "prefix" "maybe_vex")
7959 (set_attr "mode" "<MODE>")])
7961 (define_insn "sse2_pmovmskb"
7962 [(set (match_operand:SI 0 "register_operand" "=r")
7963 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7966 "%vpmovmskb\t{%1, %0|%0, %1}"
7967 [(set_attr "type" "ssemov")
7968 (set_attr "prefix_data16" "1")
7969 (set_attr "prefix" "maybe_vex")
7970 (set_attr "mode" "SI")])
7972 (define_expand "sse2_maskmovdqu"
7973 [(set (match_operand:V16QI 0 "memory_operand" "")
7974 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7975 (match_operand:V16QI 2 "register_operand" "")
7981 (define_insn "*sse2_maskmovdqu"
7982 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7983 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7984 (match_operand:V16QI 2 "register_operand" "x")
7985 (mem:V16QI (match_dup 0))]
7987 "TARGET_SSE2 && !TARGET_64BIT"
7988 ;; @@@ check ordering of operands in intel/nonintel syntax
7989 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7990 [(set_attr "type" "ssemov")
7991 (set_attr "prefix_data16" "1")
7992 ;; The implicit %rdi operand confuses default length_vex computation.
7993 (set_attr "length_vex" "3")
7994 (set_attr "prefix" "maybe_vex")
7995 (set_attr "mode" "TI")])
7997 (define_insn "*sse2_maskmovdqu_rex64"
7998 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7999 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8000 (match_operand:V16QI 2 "register_operand" "x")
8001 (mem:V16QI (match_dup 0))]
8003 "TARGET_SSE2 && TARGET_64BIT"
8004 ;; @@@ check ordering of operands in intel/nonintel syntax
8005 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8006 [(set_attr "type" "ssemov")
8007 (set_attr "prefix_data16" "1")
8008 ;; The implicit %rdi operand confuses default length_vex computation.
8009 (set (attr "length_vex")
8010 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8011 (set_attr "prefix" "maybe_vex")
8012 (set_attr "mode" "TI")])
8014 (define_insn "sse_ldmxcsr"
8015 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8019 [(set_attr "type" "sse")
8020 (set_attr "atom_sse_attr" "mxcsr")
8021 (set_attr "prefix" "maybe_vex")
8022 (set_attr "memory" "load")])
8024 (define_insn "sse_stmxcsr"
8025 [(set (match_operand:SI 0 "memory_operand" "=m")
8026 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8029 [(set_attr "type" "sse")
8030 (set_attr "atom_sse_attr" "mxcsr")
8031 (set_attr "prefix" "maybe_vex")
8032 (set_attr "memory" "store")])
8034 (define_expand "sse_sfence"
8036 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8037 "TARGET_SSE || TARGET_3DNOW_A"
8039 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8040 MEM_VOLATILE_P (operands[0]) = 1;
8043 (define_insn "*sse_sfence"
8044 [(set (match_operand:BLK 0 "" "")
8045 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8046 "TARGET_SSE || TARGET_3DNOW_A"
8048 [(set_attr "type" "sse")
8049 (set_attr "length_address" "0")
8050 (set_attr "atom_sse_attr" "fence")
8051 (set_attr "memory" "unknown")])
8053 (define_insn "sse2_clflush"
8054 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8058 [(set_attr "type" "sse")
8059 (set_attr "atom_sse_attr" "fence")
8060 (set_attr "memory" "unknown")])
8062 (define_expand "sse2_mfence"
8064 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8067 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8068 MEM_VOLATILE_P (operands[0]) = 1;
8071 (define_insn "*sse2_mfence"
8072 [(set (match_operand:BLK 0 "" "")
8073 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8074 "TARGET_64BIT || TARGET_SSE2"
8076 [(set_attr "type" "sse")
8077 (set_attr "length_address" "0")
8078 (set_attr "atom_sse_attr" "fence")
8079 (set_attr "memory" "unknown")])
8081 (define_expand "sse2_lfence"
8083 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8086 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8087 MEM_VOLATILE_P (operands[0]) = 1;
8090 (define_insn "*sse2_lfence"
8091 [(set (match_operand:BLK 0 "" "")
8092 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8095 [(set_attr "type" "sse")
8096 (set_attr "length_address" "0")
8097 (set_attr "atom_sse_attr" "lfence")
8098 (set_attr "memory" "unknown")])
8100 (define_insn "sse3_mwait"
8101 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8102 (match_operand:SI 1 "register_operand" "c")]
8105 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8106 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8107 ;; we only need to set up 32bit registers.
8109 [(set_attr "length" "3")])
8111 (define_insn "sse3_monitor"
8112 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8113 (match_operand:SI 1 "register_operand" "c")
8114 (match_operand:SI 2 "register_operand" "d")]
8116 "TARGET_SSE3 && !TARGET_64BIT"
8117 "monitor\t%0, %1, %2"
8118 [(set_attr "length" "3")])
8120 (define_insn "sse3_monitor64"
8121 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8122 (match_operand:SI 1 "register_operand" "c")
8123 (match_operand:SI 2 "register_operand" "d")]
8125 "TARGET_SSE3 && TARGET_64BIT"
8126 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8127 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8128 ;; zero extended to 64bit, we only need to set up 32bit registers.
8130 [(set_attr "length" "3")])
8132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8134 ;; SSSE3 instructions
8136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8138 (define_insn "*avx_phaddwv8hi3"
8139 [(set (match_operand:V8HI 0 "register_operand" "=x")
8145 (match_operand:V8HI 1 "register_operand" "x")
8146 (parallel [(const_int 0)]))
8147 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8149 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8150 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8153 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8154 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8156 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8157 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8162 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8163 (parallel [(const_int 0)]))
8164 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8166 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8167 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8170 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8171 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8173 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8174 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8176 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8177 [(set_attr "type" "sseiadd")
8178 (set_attr "prefix_extra" "1")
8179 (set_attr "prefix" "vex")
8180 (set_attr "mode" "TI")])
8182 (define_insn "ssse3_phaddwv8hi3"
8183 [(set (match_operand:V8HI 0 "register_operand" "=x")
8189 (match_operand:V8HI 1 "register_operand" "0")
8190 (parallel [(const_int 0)]))
8191 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8193 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8194 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8197 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8198 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8200 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8201 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8206 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8207 (parallel [(const_int 0)]))
8208 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8210 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8211 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8215 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8218 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8220 "phaddw\t{%2, %0|%0, %2}"
8221 [(set_attr "type" "sseiadd")
8222 (set_attr "atom_unit" "complex")
8223 (set_attr "prefix_data16" "1")
8224 (set_attr "prefix_extra" "1")
8225 (set_attr "mode" "TI")])
8227 (define_insn "ssse3_phaddwv4hi3"
8228 [(set (match_operand:V4HI 0 "register_operand" "=y")
8233 (match_operand:V4HI 1 "register_operand" "0")
8234 (parallel [(const_int 0)]))
8235 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8237 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8238 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8242 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8243 (parallel [(const_int 0)]))
8244 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8246 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8247 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8249 "phaddw\t{%2, %0|%0, %2}"
8250 [(set_attr "type" "sseiadd")
8251 (set_attr "atom_unit" "complex")
8252 (set_attr "prefix_extra" "1")
8253 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8254 (set_attr "mode" "DI")])
8256 (define_insn "*avx_phadddv4si3"
8257 [(set (match_operand:V4SI 0 "register_operand" "=x")
8262 (match_operand:V4SI 1 "register_operand" "x")
8263 (parallel [(const_int 0)]))
8264 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8266 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8267 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8271 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8272 (parallel [(const_int 0)]))
8273 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8275 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8276 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8278 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8279 [(set_attr "type" "sseiadd")
8280 (set_attr "prefix_extra" "1")
8281 (set_attr "prefix" "vex")
8282 (set_attr "mode" "TI")])
8284 (define_insn "ssse3_phadddv4si3"
8285 [(set (match_operand:V4SI 0 "register_operand" "=x")
8290 (match_operand:V4SI 1 "register_operand" "0")
8291 (parallel [(const_int 0)]))
8292 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8294 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8295 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8299 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8300 (parallel [(const_int 0)]))
8301 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8303 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8304 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8306 "phaddd\t{%2, %0|%0, %2}"
8307 [(set_attr "type" "sseiadd")
8308 (set_attr "atom_unit" "complex")
8309 (set_attr "prefix_data16" "1")
8310 (set_attr "prefix_extra" "1")
8311 (set_attr "mode" "TI")])
8313 (define_insn "ssse3_phadddv2si3"
8314 [(set (match_operand:V2SI 0 "register_operand" "=y")
8318 (match_operand:V2SI 1 "register_operand" "0")
8319 (parallel [(const_int 0)]))
8320 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8323 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8324 (parallel [(const_int 0)]))
8325 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8327 "phaddd\t{%2, %0|%0, %2}"
8328 [(set_attr "type" "sseiadd")
8329 (set_attr "atom_unit" "complex")
8330 (set_attr "prefix_extra" "1")
8331 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8332 (set_attr "mode" "DI")])
8334 (define_insn "*avx_phaddswv8hi3"
8335 [(set (match_operand:V8HI 0 "register_operand" "=x")
8341 (match_operand:V8HI 1 "register_operand" "x")
8342 (parallel [(const_int 0)]))
8343 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8345 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8346 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8349 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8350 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8352 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8353 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8358 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8359 (parallel [(const_int 0)]))
8360 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8362 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8363 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8366 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8367 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8369 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8370 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8372 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8373 [(set_attr "type" "sseiadd")
8374 (set_attr "prefix_extra" "1")
8375 (set_attr "prefix" "vex")
8376 (set_attr "mode" "TI")])
8378 (define_insn "ssse3_phaddswv8hi3"
8379 [(set (match_operand:V8HI 0 "register_operand" "=x")
8385 (match_operand:V8HI 1 "register_operand" "0")
8386 (parallel [(const_int 0)]))
8387 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8389 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8390 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8393 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8394 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8396 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8397 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8402 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8403 (parallel [(const_int 0)]))
8404 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8406 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8407 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8410 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8413 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8414 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8416 "phaddsw\t{%2, %0|%0, %2}"
8417 [(set_attr "type" "sseiadd")
8418 (set_attr "atom_unit" "complex")
8419 (set_attr "prefix_data16" "1")
8420 (set_attr "prefix_extra" "1")
8421 (set_attr "mode" "TI")])
8423 (define_insn "ssse3_phaddswv4hi3"
8424 [(set (match_operand:V4HI 0 "register_operand" "=y")
8429 (match_operand:V4HI 1 "register_operand" "0")
8430 (parallel [(const_int 0)]))
8431 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8434 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8438 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8439 (parallel [(const_int 0)]))
8440 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8442 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8445 "phaddsw\t{%2, %0|%0, %2}"
8446 [(set_attr "type" "sseiadd")
8447 (set_attr "atom_unit" "complex")
8448 (set_attr "prefix_extra" "1")
8449 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8450 (set_attr "mode" "DI")])
8452 (define_insn "*avx_phsubwv8hi3"
8453 [(set (match_operand:V8HI 0 "register_operand" "=x")
8459 (match_operand:V8HI 1 "register_operand" "x")
8460 (parallel [(const_int 0)]))
8461 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8463 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8464 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8467 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8468 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8471 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8476 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8477 (parallel [(const_int 0)]))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8490 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8491 [(set_attr "type" "sseiadd")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "prefix" "vex")
8494 (set_attr "mode" "TI")])
8496 (define_insn "ssse3_phsubwv8hi3"
8497 [(set (match_operand:V8HI 0 "register_operand" "=x")
8503 (match_operand:V8HI 1 "register_operand" "0")
8504 (parallel [(const_int 0)]))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8507 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8512 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8520 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8521 (parallel [(const_int 0)]))
8522 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8524 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8529 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8534 "phsubw\t{%2, %0|%0, %2}"
8535 [(set_attr "type" "sseiadd")
8536 (set_attr "atom_unit" "complex")
8537 (set_attr "prefix_data16" "1")
8538 (set_attr "prefix_extra" "1")
8539 (set_attr "mode" "TI")])
8541 (define_insn "ssse3_phsubwv4hi3"
8542 [(set (match_operand:V4HI 0 "register_operand" "=y")
8547 (match_operand:V4HI 1 "register_operand" "0")
8548 (parallel [(const_int 0)]))
8549 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8551 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8552 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8556 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8557 (parallel [(const_int 0)]))
8558 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8560 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8561 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8563 "phsubw\t{%2, %0|%0, %2}"
8564 [(set_attr "type" "sseiadd")
8565 (set_attr "atom_unit" "complex")
8566 (set_attr "prefix_extra" "1")
8567 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8568 (set_attr "mode" "DI")])
8570 (define_insn "*avx_phsubdv4si3"
8571 [(set (match_operand:V4SI 0 "register_operand" "=x")
8576 (match_operand:V4SI 1 "register_operand" "x")
8577 (parallel [(const_int 0)]))
8578 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8580 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8581 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8585 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8586 (parallel [(const_int 0)]))
8587 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8589 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8590 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8592 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8593 [(set_attr "type" "sseiadd")
8594 (set_attr "prefix_extra" "1")
8595 (set_attr "prefix" "vex")
8596 (set_attr "mode" "TI")])
8598 (define_insn "ssse3_phsubdv4si3"
8599 [(set (match_operand:V4SI 0 "register_operand" "=x")
8604 (match_operand:V4SI 1 "register_operand" "0")
8605 (parallel [(const_int 0)]))
8606 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8608 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8609 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8613 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8614 (parallel [(const_int 0)]))
8615 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8617 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8618 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8620 "phsubd\t{%2, %0|%0, %2}"
8621 [(set_attr "type" "sseiadd")
8622 (set_attr "atom_unit" "complex")
8623 (set_attr "prefix_data16" "1")
8624 (set_attr "prefix_extra" "1")
8625 (set_attr "mode" "TI")])
8627 (define_insn "ssse3_phsubdv2si3"
8628 [(set (match_operand:V2SI 0 "register_operand" "=y")
8632 (match_operand:V2SI 1 "register_operand" "0")
8633 (parallel [(const_int 0)]))
8634 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8637 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8638 (parallel [(const_int 0)]))
8639 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8641 "phsubd\t{%2, %0|%0, %2}"
8642 [(set_attr "type" "sseiadd")
8643 (set_attr "atom_unit" "complex")
8644 (set_attr "prefix_extra" "1")
8645 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8646 (set_attr "mode" "DI")])
8648 (define_insn "*avx_phsubswv8hi3"
8649 [(set (match_operand:V8HI 0 "register_operand" "=x")
8655 (match_operand:V8HI 1 "register_operand" "x")
8656 (parallel [(const_int 0)]))
8657 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8660 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8663 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8664 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8667 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8672 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8673 (parallel [(const_int 0)]))
8674 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8676 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8677 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8680 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8683 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8684 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8686 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8687 [(set_attr "type" "sseiadd")
8688 (set_attr "prefix_extra" "1")
8689 (set_attr "prefix" "vex")
8690 (set_attr "mode" "TI")])
8692 (define_insn "ssse3_phsubswv8hi3"
8693 [(set (match_operand:V8HI 0 "register_operand" "=x")
8699 (match_operand:V8HI 1 "register_operand" "0")
8700 (parallel [(const_int 0)]))
8701 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8703 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8704 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8707 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8708 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8710 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8711 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8716 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8717 (parallel [(const_int 0)]))
8718 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8720 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8721 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8724 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8725 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8727 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8728 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8730 "phsubsw\t{%2, %0|%0, %2}"
8731 [(set_attr "type" "sseiadd")
8732 (set_attr "atom_unit" "complex")
8733 (set_attr "prefix_data16" "1")
8734 (set_attr "prefix_extra" "1")
8735 (set_attr "mode" "TI")])
8737 (define_insn "ssse3_phsubswv4hi3"
8738 [(set (match_operand:V4HI 0 "register_operand" "=y")
8743 (match_operand:V4HI 1 "register_operand" "0")
8744 (parallel [(const_int 0)]))
8745 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8747 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8748 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8752 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8753 (parallel [(const_int 0)]))
8754 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8756 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8757 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8759 "phsubsw\t{%2, %0|%0, %2}"
8760 [(set_attr "type" "sseiadd")
8761 (set_attr "atom_unit" "complex")
8762 (set_attr "prefix_extra" "1")
8763 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8764 (set_attr "mode" "DI")])
8766 (define_insn "*avx_pmaddubsw128"
8767 [(set (match_operand:V8HI 0 "register_operand" "=x")
8772 (match_operand:V16QI 1 "register_operand" "x")
8773 (parallel [(const_int 0)
8783 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8784 (parallel [(const_int 0)
8794 (vec_select:V16QI (match_dup 1)
8795 (parallel [(const_int 1)
8804 (vec_select:V16QI (match_dup 2)
8805 (parallel [(const_int 1)
8812 (const_int 15)]))))))]
8814 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8815 [(set_attr "type" "sseiadd")
8816 (set_attr "prefix_extra" "1")
8817 (set_attr "prefix" "vex")
8818 (set_attr "mode" "TI")])
8820 (define_insn "ssse3_pmaddubsw128"
8821 [(set (match_operand:V8HI 0 "register_operand" "=x")
8826 (match_operand:V16QI 1 "register_operand" "0")
8827 (parallel [(const_int 0)
8837 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8838 (parallel [(const_int 0)
8848 (vec_select:V16QI (match_dup 1)
8849 (parallel [(const_int 1)
8858 (vec_select:V16QI (match_dup 2)
8859 (parallel [(const_int 1)
8866 (const_int 15)]))))))]
8868 "pmaddubsw\t{%2, %0|%0, %2}"
8869 [(set_attr "type" "sseiadd")
8870 (set_attr "atom_unit" "simul")
8871 (set_attr "prefix_data16" "1")
8872 (set_attr "prefix_extra" "1")
8873 (set_attr "mode" "TI")])
8875 (define_insn "ssse3_pmaddubsw"
8876 [(set (match_operand:V4HI 0 "register_operand" "=y")
8881 (match_operand:V8QI 1 "register_operand" "0")
8882 (parallel [(const_int 0)
8888 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8889 (parallel [(const_int 0)
8895 (vec_select:V8QI (match_dup 1)
8896 (parallel [(const_int 1)
8901 (vec_select:V8QI (match_dup 2)
8902 (parallel [(const_int 1)
8905 (const_int 7)]))))))]
8907 "pmaddubsw\t{%2, %0|%0, %2}"
8908 [(set_attr "type" "sseiadd")
8909 (set_attr "atom_unit" "simul")
8910 (set_attr "prefix_extra" "1")
8911 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8912 (set_attr "mode" "DI")])
8914 (define_expand "ssse3_pmulhrswv8hi3"
8915 [(set (match_operand:V8HI 0 "register_operand" "")
8922 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8924 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8926 (const_vector:V8HI [(const_int 1) (const_int 1)
8927 (const_int 1) (const_int 1)
8928 (const_int 1) (const_int 1)
8929 (const_int 1) (const_int 1)]))
8932 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8934 (define_insn "*avx_pmulhrswv8hi3"
8935 [(set (match_operand:V8HI 0 "register_operand" "=x")
8942 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8944 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8946 (const_vector:V8HI [(const_int 1) (const_int 1)
8947 (const_int 1) (const_int 1)
8948 (const_int 1) (const_int 1)
8949 (const_int 1) (const_int 1)]))
8951 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8952 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8953 [(set_attr "type" "sseimul")
8954 (set_attr "prefix_extra" "1")
8955 (set_attr "prefix" "vex")
8956 (set_attr "mode" "TI")])
8958 (define_insn "*ssse3_pmulhrswv8hi3"
8959 [(set (match_operand:V8HI 0 "register_operand" "=x")
8966 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8968 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8970 (const_vector:V8HI [(const_int 1) (const_int 1)
8971 (const_int 1) (const_int 1)
8972 (const_int 1) (const_int 1)
8973 (const_int 1) (const_int 1)]))
8975 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8976 "pmulhrsw\t{%2, %0|%0, %2}"
8977 [(set_attr "type" "sseimul")
8978 (set_attr "prefix_data16" "1")
8979 (set_attr "prefix_extra" "1")
8980 (set_attr "mode" "TI")])
8982 (define_expand "ssse3_pmulhrswv4hi3"
8983 [(set (match_operand:V4HI 0 "register_operand" "")
8990 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8992 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8994 (const_vector:V4HI [(const_int 1) (const_int 1)
8995 (const_int 1) (const_int 1)]))
8998 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9000 (define_insn "*ssse3_pmulhrswv4hi3"
9001 [(set (match_operand:V4HI 0 "register_operand" "=y")
9008 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9010 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9012 (const_vector:V4HI [(const_int 1) (const_int 1)
9013 (const_int 1) (const_int 1)]))
9015 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9016 "pmulhrsw\t{%2, %0|%0, %2}"
9017 [(set_attr "type" "sseimul")
9018 (set_attr "prefix_extra" "1")
9019 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9020 (set_attr "mode" "DI")])
9022 (define_insn "*avx_pshufbv16qi3"
9023 [(set (match_operand:V16QI 0 "register_operand" "=x")
9024 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9025 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9028 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9029 [(set_attr "type" "sselog1")
9030 (set_attr "prefix_extra" "1")
9031 (set_attr "prefix" "vex")
9032 (set_attr "mode" "TI")])
9034 (define_insn "ssse3_pshufbv16qi3"
9035 [(set (match_operand:V16QI 0 "register_operand" "=x")
9036 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9037 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9040 "pshufb\t{%2, %0|%0, %2}";
9041 [(set_attr "type" "sselog1")
9042 (set_attr "prefix_data16" "1")
9043 (set_attr "prefix_extra" "1")
9044 (set_attr "mode" "TI")])
9046 (define_insn "ssse3_pshufbv8qi3"
9047 [(set (match_operand:V8QI 0 "register_operand" "=y")
9048 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9049 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9052 "pshufb\t{%2, %0|%0, %2}";
9053 [(set_attr "type" "sselog1")
9054 (set_attr "prefix_extra" "1")
9055 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9056 (set_attr "mode" "DI")])
9058 (define_insn "*avx_psign<mode>3"
9059 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9061 [(match_operand:SSEMODE124 1 "register_operand" "x")
9062 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9065 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9066 [(set_attr "type" "sselog1")
9067 (set_attr "prefix_extra" "1")
9068 (set_attr "prefix" "vex")
9069 (set_attr "mode" "TI")])
9071 (define_insn "ssse3_psign<mode>3"
9072 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9074 [(match_operand:SSEMODE124 1 "register_operand" "0")
9075 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9078 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9079 [(set_attr "type" "sselog1")
9080 (set_attr "prefix_data16" "1")
9081 (set_attr "prefix_extra" "1")
9082 (set_attr "mode" "TI")])
9084 (define_insn "ssse3_psign<mode>3"
9085 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9087 [(match_operand:MMXMODEI 1 "register_operand" "0")
9088 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9091 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9092 [(set_attr "type" "sselog1")
9093 (set_attr "prefix_extra" "1")
9094 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9095 (set_attr "mode" "DI")])
9097 (define_insn "*avx_palignrti"
9098 [(set (match_operand:TI 0 "register_operand" "=x")
9099 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9100 (match_operand:TI 2 "nonimmediate_operand" "xm")
9101 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9105 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9106 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9108 [(set_attr "type" "sseishft")
9109 (set_attr "prefix_extra" "1")
9110 (set_attr "length_immediate" "1")
9111 (set_attr "prefix" "vex")
9112 (set_attr "mode" "TI")])
9114 (define_insn "ssse3_palignrti"
9115 [(set (match_operand:TI 0 "register_operand" "=x")
9116 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9117 (match_operand:TI 2 "nonimmediate_operand" "xm")
9118 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9122 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9123 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9125 [(set_attr "type" "sseishft")
9126 (set_attr "atom_unit" "sishuf")
9127 (set_attr "prefix_data16" "1")
9128 (set_attr "prefix_extra" "1")
9129 (set_attr "length_immediate" "1")
9130 (set_attr "mode" "TI")])
9132 (define_insn "ssse3_palignrdi"
9133 [(set (match_operand:DI 0 "register_operand" "=y")
9134 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9135 (match_operand:DI 2 "nonimmediate_operand" "ym")
9136 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9140 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9141 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9143 [(set_attr "type" "sseishft")
9144 (set_attr "atom_unit" "sishuf")
9145 (set_attr "prefix_extra" "1")
9146 (set_attr "length_immediate" "1")
9147 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9148 (set_attr "mode" "DI")])
9150 (define_insn "abs<mode>2"
9151 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9152 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9154 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9155 [(set_attr "type" "sselog1")
9156 (set_attr "prefix_data16" "1")
9157 (set_attr "prefix_extra" "1")
9158 (set_attr "prefix" "maybe_vex")
9159 (set_attr "mode" "TI")])
9161 (define_insn "abs<mode>2"
9162 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9163 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9165 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9166 [(set_attr "type" "sselog1")
9167 (set_attr "prefix_rep" "0")
9168 (set_attr "prefix_extra" "1")
9169 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9170 (set_attr "mode" "DI")])
9172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9174 ;; AMD SSE4A instructions
9176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9178 (define_insn "sse4a_movnt<mode>"
9179 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9181 [(match_operand:MODEF 1 "register_operand" "x")]
9184 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9185 [(set_attr "type" "ssemov")
9186 (set_attr "mode" "<MODE>")])
9188 (define_insn "sse4a_vmmovnt<mode>"
9189 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9190 (unspec:<ssescalarmode>
9191 [(vec_select:<ssescalarmode>
9192 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9193 (parallel [(const_int 0)]))]
9196 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9197 [(set_attr "type" "ssemov")
9198 (set_attr "mode" "<ssescalarmode>")])
9200 (define_insn "sse4a_extrqi"
9201 [(set (match_operand:V2DI 0 "register_operand" "=x")
9202 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9203 (match_operand 2 "const_int_operand" "")
9204 (match_operand 3 "const_int_operand" "")]
9207 "extrq\t{%3, %2, %0|%0, %2, %3}"
9208 [(set_attr "type" "sse")
9209 (set_attr "prefix_data16" "1")
9210 (set_attr "length_immediate" "2")
9211 (set_attr "mode" "TI")])
9213 (define_insn "sse4a_extrq"
9214 [(set (match_operand:V2DI 0 "register_operand" "=x")
9215 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9216 (match_operand:V16QI 2 "register_operand" "x")]
9219 "extrq\t{%2, %0|%0, %2}"
9220 [(set_attr "type" "sse")
9221 (set_attr "prefix_data16" "1")
9222 (set_attr "mode" "TI")])
9224 (define_insn "sse4a_insertqi"
9225 [(set (match_operand:V2DI 0 "register_operand" "=x")
9226 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9227 (match_operand:V2DI 2 "register_operand" "x")
9228 (match_operand 3 "const_int_operand" "")
9229 (match_operand 4 "const_int_operand" "")]
9232 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9233 [(set_attr "type" "sseins")
9234 (set_attr "prefix_data16" "0")
9235 (set_attr "prefix_rep" "1")
9236 (set_attr "length_immediate" "2")
9237 (set_attr "mode" "TI")])
9239 (define_insn "sse4a_insertq"
9240 [(set (match_operand:V2DI 0 "register_operand" "=x")
9241 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9242 (match_operand:V2DI 2 "register_operand" "x")]
9245 "insertq\t{%2, %0|%0, %2}"
9246 [(set_attr "type" "sseins")
9247 (set_attr "prefix_data16" "0")
9248 (set_attr "prefix_rep" "1")
9249 (set_attr "mode" "TI")])
9251 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9253 ;; Intel SSE4.1 instructions
9255 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9257 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9258 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9259 (vec_merge:AVXMODEF2P
9260 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9261 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9262 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9264 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9265 [(set_attr "type" "ssemov")
9266 (set_attr "prefix_extra" "1")
9267 (set_attr "length_immediate" "1")
9268 (set_attr "prefix" "vex")
9269 (set_attr "mode" "<avxvecmode>")])
9271 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9272 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9274 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9275 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9276 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9279 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9280 [(set_attr "type" "ssemov")
9281 (set_attr "prefix_extra" "1")
9282 (set_attr "length_immediate" "1")
9283 (set_attr "prefix" "vex")
9284 (set_attr "mode" "<avxvecmode>")])
9286 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9287 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9288 (vec_merge:SSEMODEF2P
9289 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9290 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9291 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9293 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9294 [(set_attr "type" "ssemov")
9295 (set_attr "prefix_data16" "1")
9296 (set_attr "prefix_extra" "1")
9297 (set_attr "length_immediate" "1")
9298 (set_attr "mode" "<MODE>")])
9300 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9301 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9303 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9304 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9305 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9308 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9309 [(set_attr "type" "ssemov")
9310 (set_attr "prefix_data16" "1")
9311 (set_attr "prefix_extra" "1")
9312 (set_attr "mode" "<MODE>")])
9314 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9315 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9317 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9318 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9319 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9322 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9323 [(set_attr "type" "ssemul")
9324 (set_attr "prefix" "vex")
9325 (set_attr "prefix_extra" "1")
9326 (set_attr "length_immediate" "1")
9327 (set_attr "mode" "<avxvecmode>")])
9329 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9330 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9332 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9333 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9334 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9337 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9338 [(set_attr "type" "ssemul")
9339 (set_attr "prefix_data16" "1")
9340 (set_attr "prefix_extra" "1")
9341 (set_attr "length_immediate" "1")
9342 (set_attr "mode" "<MODE>")])
9344 (define_insn "sse4_1_movntdqa"
9345 [(set (match_operand:V2DI 0 "register_operand" "=x")
9346 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9349 "%vmovntdqa\t{%1, %0|%0, %1}"
9350 [(set_attr "type" "ssemov")
9351 (set_attr "prefix_extra" "1")
9352 (set_attr "prefix" "maybe_vex")
9353 (set_attr "mode" "TI")])
9355 (define_insn "*avx_mpsadbw"
9356 [(set (match_operand:V16QI 0 "register_operand" "=x")
9357 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9358 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9359 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9362 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9363 [(set_attr "type" "sselog1")
9364 (set_attr "prefix" "vex")
9365 (set_attr "prefix_extra" "1")
9366 (set_attr "length_immediate" "1")
9367 (set_attr "mode" "TI")])
9369 (define_insn "sse4_1_mpsadbw"
9370 [(set (match_operand:V16QI 0 "register_operand" "=x")
9371 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9372 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9373 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9376 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9377 [(set_attr "type" "sselog1")
9378 (set_attr "prefix_extra" "1")
9379 (set_attr "length_immediate" "1")
9380 (set_attr "mode" "TI")])
9382 (define_insn "*avx_packusdw"
9383 [(set (match_operand:V8HI 0 "register_operand" "=x")
9386 (match_operand:V4SI 1 "register_operand" "x"))
9388 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9390 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9391 [(set_attr "type" "sselog")
9392 (set_attr "prefix_extra" "1")
9393 (set_attr "prefix" "vex")
9394 (set_attr "mode" "TI")])
9396 (define_insn "sse4_1_packusdw"
9397 [(set (match_operand:V8HI 0 "register_operand" "=x")
9400 (match_operand:V4SI 1 "register_operand" "0"))
9402 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9404 "packusdw\t{%2, %0|%0, %2}"
9405 [(set_attr "type" "sselog")
9406 (set_attr "prefix_extra" "1")
9407 (set_attr "mode" "TI")])
9409 (define_insn "*avx_pblendvb"
9410 [(set (match_operand:V16QI 0 "register_operand" "=x")
9411 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9412 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9413 (match_operand:V16QI 3 "register_operand" "x")]
9416 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9417 [(set_attr "type" "ssemov")
9418 (set_attr "prefix_extra" "1")
9419 (set_attr "length_immediate" "1")
9420 (set_attr "prefix" "vex")
9421 (set_attr "mode" "TI")])
9423 (define_insn "sse4_1_pblendvb"
9424 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9425 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9426 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9427 (match_operand:V16QI 3 "register_operand" "Yz")]
9430 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9431 [(set_attr "type" "ssemov")
9432 (set_attr "prefix_extra" "1")
9433 (set_attr "mode" "TI")])
9435 (define_insn "*avx_pblendw"
9436 [(set (match_operand:V8HI 0 "register_operand" "=x")
9438 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9439 (match_operand:V8HI 1 "register_operand" "x")
9440 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9442 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9443 [(set_attr "type" "ssemov")
9444 (set_attr "prefix" "vex")
9445 (set_attr "prefix_extra" "1")
9446 (set_attr "length_immediate" "1")
9447 (set_attr "mode" "TI")])
9449 (define_insn "sse4_1_pblendw"
9450 [(set (match_operand:V8HI 0 "register_operand" "=x")
9452 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9453 (match_operand:V8HI 1 "register_operand" "0")
9454 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9456 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9457 [(set_attr "type" "ssemov")
9458 (set_attr "prefix_extra" "1")
9459 (set_attr "length_immediate" "1")
9460 (set_attr "mode" "TI")])
9462 (define_insn "sse4_1_phminposuw"
9463 [(set (match_operand:V8HI 0 "register_operand" "=x")
9464 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9465 UNSPEC_PHMINPOSUW))]
9467 "%vphminposuw\t{%1, %0|%0, %1}"
9468 [(set_attr "type" "sselog1")
9469 (set_attr "prefix_extra" "1")
9470 (set_attr "prefix" "maybe_vex")
9471 (set_attr "mode" "TI")])
9473 (define_insn "sse4_1_extendv8qiv8hi2"
9474 [(set (match_operand:V8HI 0 "register_operand" "=x")
9477 (match_operand:V16QI 1 "register_operand" "x")
9478 (parallel [(const_int 0)
9487 "%vpmovsxbw\t{%1, %0|%0, %1}"
9488 [(set_attr "type" "ssemov")
9489 (set_attr "prefix_extra" "1")
9490 (set_attr "prefix" "maybe_vex")
9491 (set_attr "mode" "TI")])
9493 (define_insn "*sse4_1_extendv8qiv8hi2"
9494 [(set (match_operand:V8HI 0 "register_operand" "=x")
9497 (vec_duplicate:V16QI
9498 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9499 (parallel [(const_int 0)
9508 "%vpmovsxbw\t{%1, %0|%0, %1}"
9509 [(set_attr "type" "ssemov")
9510 (set_attr "prefix_extra" "1")
9511 (set_attr "prefix" "maybe_vex")
9512 (set_attr "mode" "TI")])
9514 (define_insn "sse4_1_extendv4qiv4si2"
9515 [(set (match_operand:V4SI 0 "register_operand" "=x")
9518 (match_operand:V16QI 1 "register_operand" "x")
9519 (parallel [(const_int 0)
9524 "%vpmovsxbd\t{%1, %0|%0, %1}"
9525 [(set_attr "type" "ssemov")
9526 (set_attr "prefix_extra" "1")
9527 (set_attr "prefix" "maybe_vex")
9528 (set_attr "mode" "TI")])
9530 (define_insn "*sse4_1_extendv4qiv4si2"
9531 [(set (match_operand:V4SI 0 "register_operand" "=x")
9534 (vec_duplicate:V16QI
9535 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9536 (parallel [(const_int 0)
9541 "%vpmovsxbd\t{%1, %0|%0, %1}"
9542 [(set_attr "type" "ssemov")
9543 (set_attr "prefix_extra" "1")
9544 (set_attr "prefix" "maybe_vex")
9545 (set_attr "mode" "TI")])
9547 (define_insn "sse4_1_extendv2qiv2di2"
9548 [(set (match_operand:V2DI 0 "register_operand" "=x")
9551 (match_operand:V16QI 1 "register_operand" "x")
9552 (parallel [(const_int 0)
9555 "%vpmovsxbq\t{%1, %0|%0, %1}"
9556 [(set_attr "type" "ssemov")
9557 (set_attr "prefix_extra" "1")
9558 (set_attr "prefix" "maybe_vex")
9559 (set_attr "mode" "TI")])
9561 (define_insn "*sse4_1_extendv2qiv2di2"
9562 [(set (match_operand:V2DI 0 "register_operand" "=x")
9565 (vec_duplicate:V16QI
9566 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9567 (parallel [(const_int 0)
9570 "%vpmovsxbq\t{%1, %0|%0, %1}"
9571 [(set_attr "type" "ssemov")
9572 (set_attr "prefix_extra" "1")
9573 (set_attr "prefix" "maybe_vex")
9574 (set_attr "mode" "TI")])
9576 (define_insn "sse4_1_extendv4hiv4si2"
9577 [(set (match_operand:V4SI 0 "register_operand" "=x")
9580 (match_operand:V8HI 1 "register_operand" "x")
9581 (parallel [(const_int 0)
9586 "%vpmovsxwd\t{%1, %0|%0, %1}"
9587 [(set_attr "type" "ssemov")
9588 (set_attr "prefix_extra" "1")
9589 (set_attr "prefix" "maybe_vex")
9590 (set_attr "mode" "TI")])
9592 (define_insn "*sse4_1_extendv4hiv4si2"
9593 [(set (match_operand:V4SI 0 "register_operand" "=x")
9597 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9598 (parallel [(const_int 0)
9603 "%vpmovsxwd\t{%1, %0|%0, %1}"
9604 [(set_attr "type" "ssemov")
9605 (set_attr "prefix_extra" "1")
9606 (set_attr "prefix" "maybe_vex")
9607 (set_attr "mode" "TI")])
9609 (define_insn "sse4_1_extendv2hiv2di2"
9610 [(set (match_operand:V2DI 0 "register_operand" "=x")
9613 (match_operand:V8HI 1 "register_operand" "x")
9614 (parallel [(const_int 0)
9617 "%vpmovsxwq\t{%1, %0|%0, %1}"
9618 [(set_attr "type" "ssemov")
9619 (set_attr "prefix_extra" "1")
9620 (set_attr "prefix" "maybe_vex")
9621 (set_attr "mode" "TI")])
9623 (define_insn "*sse4_1_extendv2hiv2di2"
9624 [(set (match_operand:V2DI 0 "register_operand" "=x")
9628 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9629 (parallel [(const_int 0)
9632 "%vpmovsxwq\t{%1, %0|%0, %1}"
9633 [(set_attr "type" "ssemov")
9634 (set_attr "prefix_extra" "1")
9635 (set_attr "prefix" "maybe_vex")
9636 (set_attr "mode" "TI")])
9638 (define_insn "sse4_1_extendv2siv2di2"
9639 [(set (match_operand:V2DI 0 "register_operand" "=x")
9642 (match_operand:V4SI 1 "register_operand" "x")
9643 (parallel [(const_int 0)
9646 "%vpmovsxdq\t{%1, %0|%0, %1}"
9647 [(set_attr "type" "ssemov")
9648 (set_attr "prefix_extra" "1")
9649 (set_attr "prefix" "maybe_vex")
9650 (set_attr "mode" "TI")])
9652 (define_insn "*sse4_1_extendv2siv2di2"
9653 [(set (match_operand:V2DI 0 "register_operand" "=x")
9657 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9658 (parallel [(const_int 0)
9661 "%vpmovsxdq\t{%1, %0|%0, %1}"
9662 [(set_attr "type" "ssemov")
9663 (set_attr "prefix_extra" "1")
9664 (set_attr "prefix" "maybe_vex")
9665 (set_attr "mode" "TI")])
9667 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9668 [(set (match_operand:V8HI 0 "register_operand" "=x")
9671 (match_operand:V16QI 1 "register_operand" "x")
9672 (parallel [(const_int 0)
9681 "%vpmovzxbw\t{%1, %0|%0, %1}"
9682 [(set_attr "type" "ssemov")
9683 (set_attr "prefix_extra" "1")
9684 (set_attr "prefix" "maybe_vex")
9685 (set_attr "mode" "TI")])
9687 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9688 [(set (match_operand:V8HI 0 "register_operand" "=x")
9691 (vec_duplicate:V16QI
9692 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9693 (parallel [(const_int 0)
9702 "%vpmovzxbw\t{%1, %0|%0, %1}"
9703 [(set_attr "type" "ssemov")
9704 (set_attr "prefix_extra" "1")
9705 (set_attr "prefix" "maybe_vex")
9706 (set_attr "mode" "TI")])
9708 (define_insn "sse4_1_zero_extendv4qiv4si2"
9709 [(set (match_operand:V4SI 0 "register_operand" "=x")
9712 (match_operand:V16QI 1 "register_operand" "x")
9713 (parallel [(const_int 0)
9718 "%vpmovzxbd\t{%1, %0|%0, %1}"
9719 [(set_attr "type" "ssemov")
9720 (set_attr "prefix_extra" "1")
9721 (set_attr "prefix" "maybe_vex")
9722 (set_attr "mode" "TI")])
9724 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9725 [(set (match_operand:V4SI 0 "register_operand" "=x")
9728 (vec_duplicate:V16QI
9729 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9730 (parallel [(const_int 0)
9735 "%vpmovzxbd\t{%1, %0|%0, %1}"
9736 [(set_attr "type" "ssemov")
9737 (set_attr "prefix_extra" "1")
9738 (set_attr "prefix" "maybe_vex")
9739 (set_attr "mode" "TI")])
9741 (define_insn "sse4_1_zero_extendv2qiv2di2"
9742 [(set (match_operand:V2DI 0 "register_operand" "=x")
9745 (match_operand:V16QI 1 "register_operand" "x")
9746 (parallel [(const_int 0)
9749 "%vpmovzxbq\t{%1, %0|%0, %1}"
9750 [(set_attr "type" "ssemov")
9751 (set_attr "prefix_extra" "1")
9752 (set_attr "prefix" "maybe_vex")
9753 (set_attr "mode" "TI")])
9755 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9756 [(set (match_operand:V2DI 0 "register_operand" "=x")
9759 (vec_duplicate:V16QI
9760 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9761 (parallel [(const_int 0)
9764 "%vpmovzxbq\t{%1, %0|%0, %1}"
9765 [(set_attr "type" "ssemov")
9766 (set_attr "prefix_extra" "1")
9767 (set_attr "prefix" "maybe_vex")
9768 (set_attr "mode" "TI")])
9770 (define_insn "sse4_1_zero_extendv4hiv4si2"
9771 [(set (match_operand:V4SI 0 "register_operand" "=x")
9774 (match_operand:V8HI 1 "register_operand" "x")
9775 (parallel [(const_int 0)
9780 "%vpmovzxwd\t{%1, %0|%0, %1}"
9781 [(set_attr "type" "ssemov")
9782 (set_attr "prefix_extra" "1")
9783 (set_attr "prefix" "maybe_vex")
9784 (set_attr "mode" "TI")])
9786 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9787 [(set (match_operand:V4SI 0 "register_operand" "=x")
9791 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9792 (parallel [(const_int 0)
9797 "%vpmovzxwd\t{%1, %0|%0, %1}"
9798 [(set_attr "type" "ssemov")
9799 (set_attr "prefix_extra" "1")
9800 (set_attr "prefix" "maybe_vex")
9801 (set_attr "mode" "TI")])
9803 (define_insn "sse4_1_zero_extendv2hiv2di2"
9804 [(set (match_operand:V2DI 0 "register_operand" "=x")
9807 (match_operand:V8HI 1 "register_operand" "x")
9808 (parallel [(const_int 0)
9811 "%vpmovzxwq\t{%1, %0|%0, %1}"
9812 [(set_attr "type" "ssemov")
9813 (set_attr "prefix_extra" "1")
9814 (set_attr "prefix" "maybe_vex")
9815 (set_attr "mode" "TI")])
9817 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9818 [(set (match_operand:V2DI 0 "register_operand" "=x")
9822 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9823 (parallel [(const_int 0)
9826 "%vpmovzxwq\t{%1, %0|%0, %1}"
9827 [(set_attr "type" "ssemov")
9828 (set_attr "prefix_extra" "1")
9829 (set_attr "prefix" "maybe_vex")
9830 (set_attr "mode" "TI")])
9832 (define_insn "sse4_1_zero_extendv2siv2di2"
9833 [(set (match_operand:V2DI 0 "register_operand" "=x")
9836 (match_operand:V4SI 1 "register_operand" "x")
9837 (parallel [(const_int 0)
9840 "%vpmovzxdq\t{%1, %0|%0, %1}"
9841 [(set_attr "type" "ssemov")
9842 (set_attr "prefix_extra" "1")
9843 (set_attr "prefix" "maybe_vex")
9844 (set_attr "mode" "TI")])
9846 (define_insn "*sse4_1_zero_extendv2siv2di2"
9847 [(set (match_operand:V2DI 0 "register_operand" "=x")
9851 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9852 (parallel [(const_int 0)
9855 "%vpmovzxdq\t{%1, %0|%0, %1}"
9856 [(set_attr "type" "ssemov")
9857 (set_attr "prefix_extra" "1")
9858 (set_attr "prefix" "maybe_vex")
9859 (set_attr "mode" "TI")])
9861 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9862 ;; setting FLAGS_REG. But it is not a really compare instruction.
9863 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9864 [(set (reg:CC FLAGS_REG)
9865 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9866 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9869 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9870 [(set_attr "type" "ssecomi")
9871 (set_attr "prefix_extra" "1")
9872 (set_attr "prefix" "vex")
9873 (set_attr "mode" "<MODE>")])
9875 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9876 ;; But it is not a really compare instruction.
9877 (define_insn "avx_ptest256"
9878 [(set (reg:CC FLAGS_REG)
9879 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9880 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9883 "vptest\t{%1, %0|%0, %1}"
9884 [(set_attr "type" "ssecomi")
9885 (set_attr "prefix_extra" "1")
9886 (set_attr "prefix" "vex")
9887 (set_attr "mode" "OI")])
9889 (define_insn "sse4_1_ptest"
9890 [(set (reg:CC FLAGS_REG)
9891 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9892 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9895 "%vptest\t{%1, %0|%0, %1}"
9896 [(set_attr "type" "ssecomi")
9897 (set_attr "prefix_extra" "1")
9898 (set_attr "prefix" "maybe_vex")
9899 (set_attr "mode" "TI")])
9901 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9902 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9903 (unspec:AVX256MODEF2P
9904 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9905 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9908 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9909 [(set_attr "type" "ssecvt")
9910 (set_attr "prefix_extra" "1")
9911 (set_attr "length_immediate" "1")
9912 (set_attr "prefix" "vex")
9913 (set_attr "mode" "<MODE>")])
9915 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9916 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9918 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9919 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9922 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9923 [(set_attr "type" "ssecvt")
9924 (set_attr "prefix_data16" "1")
9925 (set_attr "prefix_extra" "1")
9926 (set_attr "length_immediate" "1")
9927 (set_attr "prefix" "maybe_vex")
9928 (set_attr "mode" "<MODE>")])
9930 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9931 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9932 (vec_merge:SSEMODEF2P
9934 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9935 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9937 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9940 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9941 [(set_attr "type" "ssecvt")
9942 (set_attr "prefix_extra" "1")
9943 (set_attr "length_immediate" "1")
9944 (set_attr "prefix" "vex")
9945 (set_attr "mode" "<MODE>")])
9947 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9949 (vec_merge:SSEMODEF2P
9951 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9952 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9954 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9957 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9958 [(set_attr "type" "ssecvt")
9959 (set_attr "prefix_data16" "1")
9960 (set_attr "prefix_extra" "1")
9961 (set_attr "length_immediate" "1")
9962 (set_attr "mode" "<MODE>")])
9964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9966 ;; Intel SSE4.2 string/text processing instructions
9968 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9970 (define_insn_and_split "sse4_2_pcmpestr"
9971 [(set (match_operand:SI 0 "register_operand" "=c,c")
9973 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9974 (match_operand:SI 3 "register_operand" "a,a")
9975 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9976 (match_operand:SI 5 "register_operand" "d,d")
9977 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9979 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9987 (set (reg:CC FLAGS_REG)
9996 && can_create_pseudo_p ()"
10001 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10002 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10003 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10006 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10007 operands[3], operands[4],
10008 operands[5], operands[6]));
10010 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10011 operands[3], operands[4],
10012 operands[5], operands[6]));
10013 if (flags && !(ecx || xmm0))
10014 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10015 operands[2], operands[3],
10016 operands[4], operands[5],
10020 [(set_attr "type" "sselog")
10021 (set_attr "prefix_data16" "1")
10022 (set_attr "prefix_extra" "1")
10023 (set_attr "length_immediate" "1")
10024 (set_attr "memory" "none,load")
10025 (set_attr "mode" "TI")])
10027 (define_insn "sse4_2_pcmpestri"
10028 [(set (match_operand:SI 0 "register_operand" "=c,c")
10030 [(match_operand:V16QI 1 "register_operand" "x,x")
10031 (match_operand:SI 2 "register_operand" "a,a")
10032 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10033 (match_operand:SI 4 "register_operand" "d,d")
10034 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10036 (set (reg:CC FLAGS_REG)
10045 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10046 [(set_attr "type" "sselog")
10047 (set_attr "prefix_data16" "1")
10048 (set_attr "prefix_extra" "1")
10049 (set_attr "prefix" "maybe_vex")
10050 (set_attr "length_immediate" "1")
10051 (set_attr "memory" "none,load")
10052 (set_attr "mode" "TI")])
10054 (define_insn "sse4_2_pcmpestrm"
10055 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10057 [(match_operand:V16QI 1 "register_operand" "x,x")
10058 (match_operand:SI 2 "register_operand" "a,a")
10059 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10060 (match_operand:SI 4 "register_operand" "d,d")
10061 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10063 (set (reg:CC FLAGS_REG)
10072 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10073 [(set_attr "type" "sselog")
10074 (set_attr "prefix_data16" "1")
10075 (set_attr "prefix_extra" "1")
10076 (set_attr "length_immediate" "1")
10077 (set_attr "prefix" "maybe_vex")
10078 (set_attr "memory" "none,load")
10079 (set_attr "mode" "TI")])
10081 (define_insn "sse4_2_pcmpestr_cconly"
10082 [(set (reg:CC FLAGS_REG)
10084 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10085 (match_operand:SI 3 "register_operand" "a,a,a,a")
10086 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10087 (match_operand:SI 5 "register_operand" "d,d,d,d")
10088 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10090 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10091 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10094 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10095 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10096 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10097 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10098 [(set_attr "type" "sselog")
10099 (set_attr "prefix_data16" "1")
10100 (set_attr "prefix_extra" "1")
10101 (set_attr "length_immediate" "1")
10102 (set_attr "memory" "none,load,none,load")
10103 (set_attr "prefix" "maybe_vex")
10104 (set_attr "mode" "TI")])
10106 (define_insn_and_split "sse4_2_pcmpistr"
10107 [(set (match_operand:SI 0 "register_operand" "=c,c")
10109 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10110 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10111 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10113 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10119 (set (reg:CC FLAGS_REG)
10126 && can_create_pseudo_p ()"
10131 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10132 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10133 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10136 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10137 operands[3], operands[4]));
10139 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10140 operands[3], operands[4]));
10141 if (flags && !(ecx || xmm0))
10142 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10143 operands[2], operands[3],
10147 [(set_attr "type" "sselog")
10148 (set_attr "prefix_data16" "1")
10149 (set_attr "prefix_extra" "1")
10150 (set_attr "length_immediate" "1")
10151 (set_attr "memory" "none,load")
10152 (set_attr "mode" "TI")])
10154 (define_insn "sse4_2_pcmpistri"
10155 [(set (match_operand:SI 0 "register_operand" "=c,c")
10157 [(match_operand:V16QI 1 "register_operand" "x,x")
10158 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10159 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10161 (set (reg:CC FLAGS_REG)
10168 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10169 [(set_attr "type" "sselog")
10170 (set_attr "prefix_data16" "1")
10171 (set_attr "prefix_extra" "1")
10172 (set_attr "length_immediate" "1")
10173 (set_attr "prefix" "maybe_vex")
10174 (set_attr "memory" "none,load")
10175 (set_attr "mode" "TI")])
10177 (define_insn "sse4_2_pcmpistrm"
10178 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10180 [(match_operand:V16QI 1 "register_operand" "x,x")
10181 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10182 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10184 (set (reg:CC FLAGS_REG)
10191 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10192 [(set_attr "type" "sselog")
10193 (set_attr "prefix_data16" "1")
10194 (set_attr "prefix_extra" "1")
10195 (set_attr "length_immediate" "1")
10196 (set_attr "prefix" "maybe_vex")
10197 (set_attr "memory" "none,load")
10198 (set_attr "mode" "TI")])
10200 (define_insn "sse4_2_pcmpistr_cconly"
10201 [(set (reg:CC FLAGS_REG)
10203 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10204 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10205 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10207 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10208 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10211 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10212 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10213 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10214 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10215 [(set_attr "type" "sselog")
10216 (set_attr "prefix_data16" "1")
10217 (set_attr "prefix_extra" "1")
10218 (set_attr "length_immediate" "1")
10219 (set_attr "memory" "none,load,none,load")
10220 (set_attr "prefix" "maybe_vex")
10221 (set_attr "mode" "TI")])
10223 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10225 ;; XOP instructions
10227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10229 ;; XOP parallel integer multiply/add instructions.
10230 ;; Note the instruction does not allow the value being added to be a memory
10231 ;; operation. However by pretending via the nonimmediate_operand predicate
10232 ;; that it does and splitting it later allows the following to be recognized:
10233 ;; a[i] = b[i] * c[i] + d[i];
10234 (define_insn "xop_pmacsww"
10235 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10238 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10239 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10240 (match_operand:V8HI 3 "register_operand" "x,x")))]
10241 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10243 vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10244 vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10245 [(set_attr "type" "ssemuladd")
10246 (set_attr "mode" "TI")])
10248 ;; Split pmacsww with two memory operands into a load and the pmacsww.
10250 [(set (match_operand:V8HI 0 "register_operand" "")
10252 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
10253 (match_operand:V8HI 2 "nonimmediate_operand" ""))
10254 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
10256 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10257 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10258 && !reg_mentioned_p (operands[0], operands[1])
10259 && !reg_mentioned_p (operands[0], operands[2])
10260 && !reg_mentioned_p (operands[0], operands[3])"
10263 ix86_expand_fma4_multiple_memory (operands, 4, V8HImode);
10264 emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
10269 (define_insn "xop_pmacssww"
10270 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10272 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10273 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10274 (match_operand:V8HI 3 "register_operand" "x,x")))]
10275 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10277 vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10278 vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10279 [(set_attr "type" "ssemuladd")
10280 (set_attr "mode" "TI")])
10282 ;; Note the instruction does not allow the value being added to be a memory
10283 ;; operation. However by pretending via the nonimmediate_operand predicate
10284 ;; that it does and splitting it later allows the following to be recognized:
10285 ;; a[i] = b[i] * c[i] + d[i];
10286 (define_insn "xop_pmacsdd"
10287 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10290 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10291 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10292 (match_operand:V4SI 3 "register_operand" "x,x")))]
10293 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10295 vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10296 vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10297 [(set_attr "type" "ssemuladd")
10298 (set_attr "mode" "TI")])
10300 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
10302 [(set (match_operand:V4SI 0 "register_operand" "")
10304 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
10305 (match_operand:V4SI 2 "nonimmediate_operand" ""))
10306 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
10308 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10309 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10310 && !reg_mentioned_p (operands[0], operands[1])
10311 && !reg_mentioned_p (operands[0], operands[2])
10312 && !reg_mentioned_p (operands[0], operands[3])"
10315 ix86_expand_fma4_multiple_memory (operands, 4, V4SImode);
10316 emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
10321 (define_insn "xop_pmacssdd"
10322 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10324 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10325 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10326 (match_operand:V4SI 3 "register_operand" "x,x")))]
10327 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10329 vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10330 vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10331 [(set_attr "type" "ssemuladd")
10332 (set_attr "mode" "TI")])
10334 (define_insn "xop_pmacssdql"
10335 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10340 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10341 (parallel [(const_int 1)
10344 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10345 (parallel [(const_int 1)
10347 (match_operand:V2DI 3 "register_operand" "x,x")))]
10348 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10350 vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10351 vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10352 [(set_attr "type" "ssemuladd")
10353 (set_attr "mode" "TI")])
10355 (define_insn "xop_pmacssdqh"
10356 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10361 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10362 (parallel [(const_int 0)
10366 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10367 (parallel [(const_int 0)
10369 (match_operand:V2DI 3 "register_operand" "x,x")))]
10370 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10372 vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10373 vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10374 [(set_attr "type" "ssemuladd")
10375 (set_attr "mode" "TI")])
10377 (define_insn "xop_pmacsdql"
10378 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10383 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10384 (parallel [(const_int 1)
10388 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10389 (parallel [(const_int 1)
10391 (match_operand:V2DI 3 "register_operand" "x,x")))]
10392 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10394 vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10395 vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10396 [(set_attr "type" "ssemuladd")
10397 (set_attr "mode" "TI")])
10399 (define_insn_and_split "*xop_pmacsdql_mem"
10400 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10405 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10406 (parallel [(const_int 1)
10410 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10411 (parallel [(const_int 1)
10413 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10414 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10416 "&& reload_completed"
10417 [(set (match_dup 0)
10425 (parallel [(const_int 1)
10430 (parallel [(const_int 1)
10434 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10435 ;; fake it with a multiply/add. In general, we expect the define_split to
10436 ;; occur before register allocation, so we have to handle the corner case where
10437 ;; the target is the same as operands 1/2
10438 (define_insn_and_split "xop_mulv2div2di3_low"
10439 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10443 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10444 (parallel [(const_int 1)
10448 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10449 (parallel [(const_int 1)
10450 (const_int 3)])))))]
10453 "&& reload_completed"
10454 [(set (match_dup 0)
10462 (parallel [(const_int 1)
10467 (parallel [(const_int 1)
10471 operands[3] = CONST0_RTX (V2DImode);
10473 [(set_attr "type" "ssemuladd")
10474 (set_attr "mode" "TI")])
10476 (define_insn "xop_pmacsdqh"
10477 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10482 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10483 (parallel [(const_int 0)
10487 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10488 (parallel [(const_int 0)
10490 (match_operand:V2DI 3 "register_operand" "x,x")))]
10491 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10493 vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10494 vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10495 [(set_attr "type" "ssemuladd")
10496 (set_attr "mode" "TI")])
10498 (define_insn_and_split "*xop_pmacsdqh_mem"
10499 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10504 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10505 (parallel [(const_int 0)
10509 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10510 (parallel [(const_int 0)
10512 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10513 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10515 "&& reload_completed"
10516 [(set (match_dup 0)
10524 (parallel [(const_int 0)
10529 (parallel [(const_int 0)
10533 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10534 ;; fake it with a multiply/add. In general, we expect the define_split to
10535 ;; occur before register allocation, so we have to handle the corner case where
10536 ;; the target is the same as either operands[1] or operands[2]
10537 (define_insn_and_split "xop_mulv2div2di3_high"
10538 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10542 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10543 (parallel [(const_int 0)
10547 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10548 (parallel [(const_int 0)
10549 (const_int 2)])))))]
10552 "&& reload_completed"
10553 [(set (match_dup 0)
10561 (parallel [(const_int 0)
10566 (parallel [(const_int 0)
10570 operands[3] = CONST0_RTX (V2DImode);
10572 [(set_attr "type" "ssemuladd")
10573 (set_attr "mode" "TI")])
10575 ;; XOP parallel integer multiply/add instructions for the intrinisics
10576 (define_insn "xop_pmacsswd"
10577 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10582 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10583 (parallel [(const_int 1)
10589 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10590 (parallel [(const_int 1)
10594 (match_operand:V4SI 3 "register_operand" "x,x")))]
10595 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10597 vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10598 vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10599 [(set_attr "type" "ssemuladd")
10600 (set_attr "mode" "TI")])
10602 (define_insn "xop_pmacswd"
10603 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10608 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10609 (parallel [(const_int 1)
10615 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10616 (parallel [(const_int 1)
10620 (match_operand:V4SI 3 "register_operand" "x,x")))]
10621 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10623 vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10624 vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10625 [(set_attr "type" "ssemuladd")
10626 (set_attr "mode" "TI")])
10628 (define_insn "xop_pmadcsswd"
10629 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10635 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10636 (parallel [(const_int 0)
10642 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10643 (parallel [(const_int 0)
10651 (parallel [(const_int 1)
10658 (parallel [(const_int 1)
10661 (const_int 7)])))))
10662 (match_operand:V4SI 3 "register_operand" "x,x")))]
10663 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10665 vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10666 vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10667 [(set_attr "type" "ssemuladd")
10668 (set_attr "mode" "TI")])
10670 (define_insn "xop_pmadcswd"
10671 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10677 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10678 (parallel [(const_int 0)
10684 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10685 (parallel [(const_int 0)
10693 (parallel [(const_int 1)
10700 (parallel [(const_int 1)
10703 (const_int 7)])))))
10704 (match_operand:V4SI 3 "register_operand" "x,x")))]
10705 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10707 vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10708 vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10709 [(set_attr "type" "ssemuladd")
10710 (set_attr "mode" "TI")])
10712 ;; XOP parallel XMM conditional moves
10713 (define_insn "xop_pcmov_<mode>"
10714 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
10715 (if_then_else:SSEMODE
10716 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,m")
10717 (match_operand:SSEMODE 1 "vector_move_operand" "x,m,x")
10718 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
10719 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10720 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10721 [(set_attr "type" "sse4arg")])
10723 (define_insn "xop_pcmov_<mode>256"
10724 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
10725 (if_then_else:AVX256MODE
10726 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,m")
10727 (match_operand:AVX256MODE 1 "vector_move_operand" "x,m,x")
10728 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
10729 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10730 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10731 [(set_attr "type" "sse4arg")])
10733 ;; XOP horizontal add/subtract instructions
10734 (define_insn "xop_phaddbw"
10735 [(set (match_operand:V8HI 0 "register_operand" "=x")
10739 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10740 (parallel [(const_int 0)
10751 (parallel [(const_int 1)
10758 (const_int 15)])))))]
10760 "vphaddbw\t{%1, %0|%0, %1}"
10761 [(set_attr "type" "sseiadd1")])
10763 (define_insn "xop_phaddbd"
10764 [(set (match_operand:V4SI 0 "register_operand" "=x")
10769 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10770 (parallel [(const_int 0)
10777 (parallel [(const_int 1)
10780 (const_int 13)]))))
10785 (parallel [(const_int 2)
10792 (parallel [(const_int 3)
10795 (const_int 15)]))))))]
10797 "vphaddbd\t{%1, %0|%0, %1}"
10798 [(set_attr "type" "sseiadd1")])
10800 (define_insn "xop_phaddbq"
10801 [(set (match_operand:V2DI 0 "register_operand" "=x")
10807 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10808 (parallel [(const_int 0)
10813 (parallel [(const_int 1)
10819 (parallel [(const_int 2)
10824 (parallel [(const_int 3)
10825 (const_int 7)])))))
10831 (parallel [(const_int 8)
10836 (parallel [(const_int 9)
10837 (const_int 13)]))))
10842 (parallel [(const_int 10)
10847 (parallel [(const_int 11)
10848 (const_int 15)])))))))]
10850 "vphaddbq\t{%1, %0|%0, %1}"
10851 [(set_attr "type" "sseiadd1")])
10853 (define_insn "xop_phaddwd"
10854 [(set (match_operand:V4SI 0 "register_operand" "=x")
10858 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10859 (parallel [(const_int 0)
10866 (parallel [(const_int 1)
10869 (const_int 7)])))))]
10871 "vphaddwd\t{%1, %0|%0, %1}"
10872 [(set_attr "type" "sseiadd1")])
10874 (define_insn "xop_phaddwq"
10875 [(set (match_operand:V2DI 0 "register_operand" "=x")
10880 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10881 (parallel [(const_int 0)
10886 (parallel [(const_int 1)
10892 (parallel [(const_int 2)
10897 (parallel [(const_int 3)
10898 (const_int 7)]))))))]
10900 "vphaddwq\t{%1, %0|%0, %1}"
10901 [(set_attr "type" "sseiadd1")])
10903 (define_insn "xop_phadddq"
10904 [(set (match_operand:V2DI 0 "register_operand" "=x")
10908 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10909 (parallel [(const_int 0)
10914 (parallel [(const_int 1)
10915 (const_int 3)])))))]
10917 "vphadddq\t{%1, %0|%0, %1}"
10918 [(set_attr "type" "sseiadd1")])
10920 (define_insn "xop_phaddubw"
10921 [(set (match_operand:V8HI 0 "register_operand" "=x")
10925 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10926 (parallel [(const_int 0)
10937 (parallel [(const_int 1)
10944 (const_int 15)])))))]
10946 "vphaddubw\t{%1, %0|%0, %1}"
10947 [(set_attr "type" "sseiadd1")])
10949 (define_insn "xop_phaddubd"
10950 [(set (match_operand:V4SI 0 "register_operand" "=x")
10955 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10956 (parallel [(const_int 0)
10963 (parallel [(const_int 1)
10966 (const_int 13)]))))
10971 (parallel [(const_int 2)
10978 (parallel [(const_int 3)
10981 (const_int 15)]))))))]
10983 "vphaddubd\t{%1, %0|%0, %1}"
10984 [(set_attr "type" "sseiadd1")])
10986 (define_insn "xop_phaddubq"
10987 [(set (match_operand:V2DI 0 "register_operand" "=x")
10993 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10994 (parallel [(const_int 0)
10999 (parallel [(const_int 1)
11005 (parallel [(const_int 2)
11010 (parallel [(const_int 3)
11011 (const_int 7)])))))
11017 (parallel [(const_int 8)
11022 (parallel [(const_int 9)
11023 (const_int 13)]))))
11028 (parallel [(const_int 10)
11033 (parallel [(const_int 11)
11034 (const_int 15)])))))))]
11036 "vphaddubq\t{%1, %0|%0, %1}"
11037 [(set_attr "type" "sseiadd1")])
11039 (define_insn "xop_phadduwd"
11040 [(set (match_operand:V4SI 0 "register_operand" "=x")
11044 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11045 (parallel [(const_int 0)
11052 (parallel [(const_int 1)
11055 (const_int 7)])))))]
11057 "vphadduwd\t{%1, %0|%0, %1}"
11058 [(set_attr "type" "sseiadd1")])
11060 (define_insn "xop_phadduwq"
11061 [(set (match_operand:V2DI 0 "register_operand" "=x")
11066 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11067 (parallel [(const_int 0)
11072 (parallel [(const_int 1)
11078 (parallel [(const_int 2)
11083 (parallel [(const_int 3)
11084 (const_int 7)]))))))]
11086 "vphadduwq\t{%1, %0|%0, %1}"
11087 [(set_attr "type" "sseiadd1")])
11089 (define_insn "xop_phaddudq"
11090 [(set (match_operand:V2DI 0 "register_operand" "=x")
11094 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11095 (parallel [(const_int 0)
11100 (parallel [(const_int 1)
11101 (const_int 3)])))))]
11103 "vphaddudq\t{%1, %0|%0, %1}"
11104 [(set_attr "type" "sseiadd1")])
11106 (define_insn "xop_phsubbw"
11107 [(set (match_operand:V8HI 0 "register_operand" "=x")
11111 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11112 (parallel [(const_int 0)
11123 (parallel [(const_int 1)
11130 (const_int 15)])))))]
11132 "vphsubbw\t{%1, %0|%0, %1}"
11133 [(set_attr "type" "sseiadd1")])
11135 (define_insn "xop_phsubwd"
11136 [(set (match_operand:V4SI 0 "register_operand" "=x")
11140 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11141 (parallel [(const_int 0)
11148 (parallel [(const_int 1)
11151 (const_int 7)])))))]
11153 "vphsubwd\t{%1, %0|%0, %1}"
11154 [(set_attr "type" "sseiadd1")])
11156 (define_insn "xop_phsubdq"
11157 [(set (match_operand:V2DI 0 "register_operand" "=x")
11161 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11162 (parallel [(const_int 0)
11167 (parallel [(const_int 1)
11168 (const_int 3)])))))]
11170 "vphsubdq\t{%1, %0|%0, %1}"
11171 [(set_attr "type" "sseiadd1")])
11173 ;; XOP permute instructions
11174 (define_insn "xop_pperm"
11175 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11177 [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,m")
11178 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x")
11179 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
11180 UNSPEC_XOP_PERMUTE))]
11181 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11182 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11183 [(set_attr "type" "sse4arg")
11184 (set_attr "mode" "TI")])
11186 ;; XOP pack instructions that combine two vectors into a smaller vector
11187 (define_insn "xop_pperm_pack_v2di_v4si"
11188 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
11191 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,m"))
11193 (match_operand:V2DI 2 "nonimmediate_operand" "x,m,x"))))
11194 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11195 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11196 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11197 [(set_attr "type" "sse4arg")
11198 (set_attr "mode" "TI")])
11200 (define_insn "xop_pperm_pack_v4si_v8hi"
11201 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
11204 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m"))
11206 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))))
11207 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11208 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11209 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11210 [(set_attr "type" "sse4arg")
11211 (set_attr "mode" "TI")])
11213 (define_insn "xop_pperm_pack_v8hi_v16qi"
11214 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11217 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m"))
11219 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))))
11220 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11221 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11222 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11223 [(set_attr "type" "sse4arg")
11224 (set_attr "mode" "TI")])
11226 ;; XOP packed rotate instructions
11227 (define_expand "rotl<mode>3"
11228 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11229 (rotate:SSEMODE1248
11230 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11231 (match_operand:SI 2 "general_operand")))]
11234 /* If we were given a scalar, convert it to parallel */
11235 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11237 rtvec vs = rtvec_alloc (<ssescalarnum>);
11238 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11239 rtx reg = gen_reg_rtx (<MODE>mode);
11240 rtx op2 = operands[2];
11243 if (GET_MODE (op2) != <ssescalarmode>mode)
11245 op2 = gen_reg_rtx (<ssescalarmode>mode);
11246 convert_move (op2, operands[2], false);
11249 for (i = 0; i < <ssescalarnum>; i++)
11250 RTVEC_ELT (vs, i) = op2;
11252 emit_insn (gen_vec_init<mode> (reg, par));
11253 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11258 (define_expand "rotr<mode>3"
11259 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11260 (rotatert:SSEMODE1248
11261 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11262 (match_operand:SI 2 "general_operand")))]
11265 /* If we were given a scalar, convert it to parallel */
11266 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11268 rtvec vs = rtvec_alloc (<ssescalarnum>);
11269 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11270 rtx neg = gen_reg_rtx (<MODE>mode);
11271 rtx reg = gen_reg_rtx (<MODE>mode);
11272 rtx op2 = operands[2];
11275 if (GET_MODE (op2) != <ssescalarmode>mode)
11277 op2 = gen_reg_rtx (<ssescalarmode>mode);
11278 convert_move (op2, operands[2], false);
11281 for (i = 0; i < <ssescalarnum>; i++)
11282 RTVEC_ELT (vs, i) = op2;
11284 emit_insn (gen_vec_init<mode> (reg, par));
11285 emit_insn (gen_neg<mode>2 (neg, reg));
11286 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11291 (define_insn "xop_rotl<mode>3"
11292 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11293 (rotate:SSEMODE1248
11294 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11295 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11297 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11298 [(set_attr "type" "sseishft")
11299 (set_attr "length_immediate" "1")
11300 (set_attr "mode" "TI")])
11302 (define_insn "xop_rotr<mode>3"
11303 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11304 (rotatert:SSEMODE1248
11305 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11306 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11309 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11310 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11312 [(set_attr "type" "sseishft")
11313 (set_attr "length_immediate" "1")
11314 (set_attr "mode" "TI")])
11316 (define_expand "vrotr<mode>3"
11317 [(match_operand:SSEMODE1248 0 "register_operand" "")
11318 (match_operand:SSEMODE1248 1 "register_operand" "")
11319 (match_operand:SSEMODE1248 2 "register_operand" "")]
11322 rtx reg = gen_reg_rtx (<MODE>mode);
11323 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11324 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11328 (define_expand "vrotl<mode>3"
11329 [(match_operand:SSEMODE1248 0 "register_operand" "")
11330 (match_operand:SSEMODE1248 1 "register_operand" "")
11331 (match_operand:SSEMODE1248 2 "register_operand" "")]
11334 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11338 (define_insn "xop_vrotl<mode>3"
11339 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11340 (if_then_else:SSEMODE1248
11342 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11344 (rotate:SSEMODE1248
11345 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11347 (rotatert:SSEMODE1248
11349 (neg:SSEMODE1248 (match_dup 2)))))]
11350 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11351 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11352 [(set_attr "type" "sseishft")
11353 (set_attr "prefix_data16" "0")
11354 (set_attr "prefix_extra" "2")
11355 (set_attr "mode" "TI")])
11357 ;; XOP packed shift instructions.
11358 ;; FIXME: add V2DI back in
11359 (define_expand "vlshr<mode>3"
11360 [(match_operand:SSEMODE124 0 "register_operand" "")
11361 (match_operand:SSEMODE124 1 "register_operand" "")
11362 (match_operand:SSEMODE124 2 "register_operand" "")]
11365 rtx neg = gen_reg_rtx (<MODE>mode);
11366 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11367 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11371 (define_expand "vashr<mode>3"
11372 [(match_operand:SSEMODE124 0 "register_operand" "")
11373 (match_operand:SSEMODE124 1 "register_operand" "")
11374 (match_operand:SSEMODE124 2 "register_operand" "")]
11377 rtx neg = gen_reg_rtx (<MODE>mode);
11378 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11379 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11383 (define_expand "vashl<mode>3"
11384 [(match_operand:SSEMODE124 0 "register_operand" "")
11385 (match_operand:SSEMODE124 1 "register_operand" "")
11386 (match_operand:SSEMODE124 2 "register_operand" "")]
11389 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11393 (define_insn "xop_ashl<mode>3"
11394 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11395 (if_then_else:SSEMODE1248
11397 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11399 (ashift:SSEMODE1248
11400 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11402 (ashiftrt:SSEMODE1248
11404 (neg:SSEMODE1248 (match_dup 2)))))]
11405 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11406 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11407 [(set_attr "type" "sseishft")
11408 (set_attr "prefix_data16" "0")
11409 (set_attr "prefix_extra" "2")
11410 (set_attr "mode" "TI")])
11412 (define_insn "xop_lshl<mode>3"
11413 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11414 (if_then_else:SSEMODE1248
11416 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11418 (ashift:SSEMODE1248
11419 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11421 (lshiftrt:SSEMODE1248
11423 (neg:SSEMODE1248 (match_dup 2)))))]
11424 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11425 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11426 [(set_attr "type" "sseishft")
11427 (set_attr "prefix_data16" "0")
11428 (set_attr "prefix_extra" "2")
11429 (set_attr "mode" "TI")])
11431 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11432 (define_expand "ashlv16qi3"
11433 [(match_operand:V16QI 0 "register_operand" "")
11434 (match_operand:V16QI 1 "register_operand" "")
11435 (match_operand:SI 2 "nonmemory_operand" "")]
11438 rtvec vs = rtvec_alloc (16);
11439 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11440 rtx reg = gen_reg_rtx (V16QImode);
11442 for (i = 0; i < 16; i++)
11443 RTVEC_ELT (vs, i) = operands[2];
11445 emit_insn (gen_vec_initv16qi (reg, par));
11446 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11450 (define_expand "lshlv16qi3"
11451 [(match_operand:V16QI 0 "register_operand" "")
11452 (match_operand:V16QI 1 "register_operand" "")
11453 (match_operand:SI 2 "nonmemory_operand" "")]
11456 rtvec vs = rtvec_alloc (16);
11457 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11458 rtx reg = gen_reg_rtx (V16QImode);
11460 for (i = 0; i < 16; i++)
11461 RTVEC_ELT (vs, i) = operands[2];
11463 emit_insn (gen_vec_initv16qi (reg, par));
11464 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11468 (define_expand "ashrv16qi3"
11469 [(match_operand:V16QI 0 "register_operand" "")
11470 (match_operand:V16QI 1 "register_operand" "")
11471 (match_operand:SI 2 "nonmemory_operand" "")]
11474 rtvec vs = rtvec_alloc (16);
11475 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11476 rtx reg = gen_reg_rtx (V16QImode);
11478 rtx ele = ((CONST_INT_P (operands[2]))
11479 ? GEN_INT (- INTVAL (operands[2]))
11482 for (i = 0; i < 16; i++)
11483 RTVEC_ELT (vs, i) = ele;
11485 emit_insn (gen_vec_initv16qi (reg, par));
11487 if (!CONST_INT_P (operands[2]))
11489 rtx neg = gen_reg_rtx (V16QImode);
11490 emit_insn (gen_negv16qi2 (neg, reg));
11491 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11494 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11499 (define_expand "ashrv2di3"
11500 [(match_operand:V2DI 0 "register_operand" "")
11501 (match_operand:V2DI 1 "register_operand" "")
11502 (match_operand:DI 2 "nonmemory_operand" "")]
11505 rtvec vs = rtvec_alloc (2);
11506 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11507 rtx reg = gen_reg_rtx (V2DImode);
11510 if (CONST_INT_P (operands[2]))
11511 ele = GEN_INT (- INTVAL (operands[2]));
11512 else if (GET_MODE (operands[2]) != DImode)
11514 rtx move = gen_reg_rtx (DImode);
11515 ele = gen_reg_rtx (DImode);
11516 convert_move (move, operands[2], false);
11517 emit_insn (gen_negdi2 (ele, move));
11521 ele = gen_reg_rtx (DImode);
11522 emit_insn (gen_negdi2 (ele, operands[2]));
11525 RTVEC_ELT (vs, 0) = ele;
11526 RTVEC_ELT (vs, 1) = ele;
11527 emit_insn (gen_vec_initv2di (reg, par));
11528 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11532 ;; XOP FRCZ support
11534 (define_insn "xop_frcz<mode>2"
11535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11537 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11540 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11541 [(set_attr "type" "ssecvt1")
11542 (set_attr "mode" "<MODE>")])
11545 (define_insn "xop_vmfrcz<mode>2"
11546 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11547 (vec_merge:SSEMODEF2P
11549 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11551 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11554 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11555 [(set_attr "type" "ssecvt1")
11556 (set_attr "mode" "<MODE>")])
11558 (define_insn "xop_frcz<mode>2256"
11559 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11561 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11564 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11565 [(set_attr "type" "ssecvt1")
11566 (set_attr "mode" "<MODE>")])
11568 (define_insn "xop_maskcmp<mode>3"
11569 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11570 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11571 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11572 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11574 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11575 [(set_attr "type" "sse4arg")
11576 (set_attr "prefix_data16" "0")
11577 (set_attr "prefix_rep" "0")
11578 (set_attr "prefix_extra" "2")
11579 (set_attr "length_immediate" "1")
11580 (set_attr "mode" "TI")])
11582 (define_insn "xop_maskcmp_uns<mode>3"
11583 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11584 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11585 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11586 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11588 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11589 [(set_attr "type" "ssecmp")
11590 (set_attr "prefix_data16" "0")
11591 (set_attr "prefix_rep" "0")
11592 (set_attr "prefix_extra" "2")
11593 (set_attr "length_immediate" "1")
11594 (set_attr "mode" "TI")])
11596 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11597 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11598 ;; the exact instruction generated for the intrinsic.
11599 (define_insn "xop_maskcmp_uns2<mode>3"
11600 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11601 (unspec:SSEMODE1248
11602 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11603 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11604 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11605 UNSPEC_XOP_UNSIGNED_CMP))]
11607 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11608 [(set_attr "type" "ssecmp")
11609 (set_attr "prefix_data16" "0")
11610 (set_attr "prefix_extra" "2")
11611 (set_attr "length_immediate" "1")
11612 (set_attr "mode" "TI")])
11614 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11615 ;; being added here to be complete.
11616 (define_insn "xop_pcom_tf<mode>3"
11617 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11618 (unspec:SSEMODE1248
11619 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11620 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11621 (match_operand:SI 3 "const_int_operand" "n")]
11622 UNSPEC_XOP_TRUEFALSE))]
11625 return ((INTVAL (operands[3]) != 0)
11626 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11627 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11629 [(set_attr "type" "ssecmp")
11630 (set_attr "prefix_data16" "0")
11631 (set_attr "prefix_extra" "2")
11632 (set_attr "length_immediate" "1")
11633 (set_attr "mode" "TI")])
11635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11636 (define_insn "*avx_aesenc"
11637 [(set (match_operand:V2DI 0 "register_operand" "=x")
11638 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11639 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11641 "TARGET_AES && TARGET_AVX"
11642 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11643 [(set_attr "type" "sselog1")
11644 (set_attr "prefix_extra" "1")
11645 (set_attr "prefix" "vex")
11646 (set_attr "mode" "TI")])
11648 (define_insn "aesenc"
11649 [(set (match_operand:V2DI 0 "register_operand" "=x")
11650 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11651 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11654 "aesenc\t{%2, %0|%0, %2}"
11655 [(set_attr "type" "sselog1")
11656 (set_attr "prefix_extra" "1")
11657 (set_attr "mode" "TI")])
11659 (define_insn "*avx_aesenclast"
11660 [(set (match_operand:V2DI 0 "register_operand" "=x")
11661 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11662 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11663 UNSPEC_AESENCLAST))]
11664 "TARGET_AES && TARGET_AVX"
11665 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11666 [(set_attr "type" "sselog1")
11667 (set_attr "prefix_extra" "1")
11668 (set_attr "prefix" "vex")
11669 (set_attr "mode" "TI")])
11671 (define_insn "aesenclast"
11672 [(set (match_operand:V2DI 0 "register_operand" "=x")
11673 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11674 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11675 UNSPEC_AESENCLAST))]
11677 "aesenclast\t{%2, %0|%0, %2}"
11678 [(set_attr "type" "sselog1")
11679 (set_attr "prefix_extra" "1")
11680 (set_attr "mode" "TI")])
11682 (define_insn "*avx_aesdec"
11683 [(set (match_operand:V2DI 0 "register_operand" "=x")
11684 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11685 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11687 "TARGET_AES && TARGET_AVX"
11688 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11689 [(set_attr "type" "sselog1")
11690 (set_attr "prefix_extra" "1")
11691 (set_attr "prefix" "vex")
11692 (set_attr "mode" "TI")])
11694 (define_insn "aesdec"
11695 [(set (match_operand:V2DI 0 "register_operand" "=x")
11696 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11697 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11700 "aesdec\t{%2, %0|%0, %2}"
11701 [(set_attr "type" "sselog1")
11702 (set_attr "prefix_extra" "1")
11703 (set_attr "mode" "TI")])
11705 (define_insn "*avx_aesdeclast"
11706 [(set (match_operand:V2DI 0 "register_operand" "=x")
11707 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11708 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11709 UNSPEC_AESDECLAST))]
11710 "TARGET_AES && TARGET_AVX"
11711 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11712 [(set_attr "type" "sselog1")
11713 (set_attr "prefix_extra" "1")
11714 (set_attr "prefix" "vex")
11715 (set_attr "mode" "TI")])
11717 (define_insn "aesdeclast"
11718 [(set (match_operand:V2DI 0 "register_operand" "=x")
11719 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11720 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11721 UNSPEC_AESDECLAST))]
11723 "aesdeclast\t{%2, %0|%0, %2}"
11724 [(set_attr "type" "sselog1")
11725 (set_attr "prefix_extra" "1")
11726 (set_attr "mode" "TI")])
11728 (define_insn "aesimc"
11729 [(set (match_operand:V2DI 0 "register_operand" "=x")
11730 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11733 "%vaesimc\t{%1, %0|%0, %1}"
11734 [(set_attr "type" "sselog1")
11735 (set_attr "prefix_extra" "1")
11736 (set_attr "prefix" "maybe_vex")
11737 (set_attr "mode" "TI")])
11739 (define_insn "aeskeygenassist"
11740 [(set (match_operand:V2DI 0 "register_operand" "=x")
11741 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11742 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11743 UNSPEC_AESKEYGENASSIST))]
11745 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11746 [(set_attr "type" "sselog1")
11747 (set_attr "prefix_extra" "1")
11748 (set_attr "length_immediate" "1")
11749 (set_attr "prefix" "maybe_vex")
11750 (set_attr "mode" "TI")])
11752 (define_insn "*vpclmulqdq"
11753 [(set (match_operand:V2DI 0 "register_operand" "=x")
11754 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11755 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11756 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11758 "TARGET_PCLMUL && TARGET_AVX"
11759 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11760 [(set_attr "type" "sselog1")
11761 (set_attr "prefix_extra" "1")
11762 (set_attr "length_immediate" "1")
11763 (set_attr "prefix" "vex")
11764 (set_attr "mode" "TI")])
11766 (define_insn "pclmulqdq"
11767 [(set (match_operand:V2DI 0 "register_operand" "=x")
11768 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11769 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11770 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11773 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11774 [(set_attr "type" "sselog1")
11775 (set_attr "prefix_extra" "1")
11776 (set_attr "length_immediate" "1")
11777 (set_attr "mode" "TI")])
11779 (define_expand "avx_vzeroall"
11780 [(match_par_dup 0 [(const_int 0)])]
11783 int nregs = TARGET_64BIT ? 16 : 8;
11786 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11788 XVECEXP (operands[0], 0, 0)
11789 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11792 for (regno = 0; regno < nregs; regno++)
11793 XVECEXP (operands[0], 0, regno + 1)
11794 = gen_rtx_SET (VOIDmode,
11795 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11796 CONST0_RTX (V8SImode));
11799 (define_insn "*avx_vzeroall"
11800 [(match_parallel 0 "vzeroall_operation"
11801 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11804 [(set_attr "type" "sse")
11805 (set_attr "modrm" "0")
11806 (set_attr "memory" "none")
11807 (set_attr "prefix" "vex")
11808 (set_attr "mode" "OI")])
11810 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11811 (define_expand "avx_vzeroupper"
11812 [(match_par_dup 0 [(const_int 0)])]
11815 int nregs = TARGET_64BIT ? 16 : 8;
11818 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11820 XVECEXP (operands[0], 0, 0)
11821 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11822 UNSPECV_VZEROUPPER);
11824 for (regno = 0; regno < nregs; regno++)
11825 XVECEXP (operands[0], 0, regno + 1)
11826 = gen_rtx_CLOBBER (VOIDmode,
11827 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11830 (define_insn "*avx_vzeroupper"
11831 [(match_parallel 0 "vzeroupper_operation"
11832 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11835 [(set_attr "type" "sse")
11836 (set_attr "modrm" "0")
11837 (set_attr "memory" "none")
11838 (set_attr "prefix" "vex")
11839 (set_attr "mode" "OI")])
11841 (define_expand "avx_vpermil<mode>"
11842 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11843 (vec_select:AVXMODEFDP
11844 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11845 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11848 int mask = INTVAL (operands[2]);
11849 rtx perm[<ssescalarnum>];
11851 perm[0] = GEN_INT (mask & 1);
11852 perm[1] = GEN_INT ((mask >> 1) & 1);
11853 if (<MODE>mode == V4DFmode)
11855 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11856 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11860 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11863 (define_expand "avx_vpermil<mode>"
11864 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11865 (vec_select:AVXMODEFSP
11866 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11867 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11870 int mask = INTVAL (operands[2]);
11871 rtx perm[<ssescalarnum>];
11873 perm[0] = GEN_INT (mask & 3);
11874 perm[1] = GEN_INT ((mask >> 2) & 3);
11875 perm[2] = GEN_INT ((mask >> 4) & 3);
11876 perm[3] = GEN_INT ((mask >> 6) & 3);
11877 if (<MODE>mode == V8SFmode)
11879 perm[4] = GEN_INT ((mask & 3) + 4);
11880 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11881 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11882 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11886 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11889 (define_insn "*avx_vpermilp<mode>"
11890 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11891 (vec_select:AVXMODEF2P
11892 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11893 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11894 [(match_operand 3 "const_int_operand" "")])))]
11897 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11898 operands[2] = GEN_INT (mask);
11899 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
11901 [(set_attr "type" "sselog")
11902 (set_attr "prefix_extra" "1")
11903 (set_attr "length_immediate" "1")
11904 (set_attr "prefix" "vex")
11905 (set_attr "mode" "<MODE>")])
11907 (define_insn "avx_vpermilvar<mode>3"
11908 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11910 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11911 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11914 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11915 [(set_attr "type" "sselog")
11916 (set_attr "prefix_extra" "1")
11917 (set_attr "prefix" "vex")
11918 (set_attr "mode" "<MODE>")])
11920 (define_insn "avx_vperm2f128<mode>3"
11921 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11922 (unspec:AVX256MODE2P
11923 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11924 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11925 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11926 UNSPEC_VPERMIL2F128))]
11928 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11929 [(set_attr "type" "sselog")
11930 (set_attr "prefix_extra" "1")
11931 (set_attr "length_immediate" "1")
11932 (set_attr "prefix" "vex")
11933 (set_attr "mode" "V8SF")])
11935 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
11936 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
11937 (vec_concat:AVXMODEF4P
11938 (vec_concat:<avxhalfvecmode>
11939 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
11941 (vec_concat:<avxhalfvecmode>
11945 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
11946 [(set_attr "type" "ssemov")
11947 (set_attr "prefix_extra" "1")
11948 (set_attr "prefix" "vex")
11949 (set_attr "mode" "<avxscalarmode>")])
11951 (define_insn "avx_vbroadcastss256"
11952 [(set (match_operand:V8SF 0 "register_operand" "=x")
11956 (match_operand:SF 1 "memory_operand" "m")
11969 "vbroadcastss\t{%1, %0|%0, %1}"
11970 [(set_attr "type" "ssemov")
11971 (set_attr "prefix_extra" "1")
11972 (set_attr "prefix" "vex")
11973 (set_attr "mode" "SF")])
11975 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11976 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11977 (vec_concat:AVX256MODEF2P
11978 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11981 "vbroadcastf128\t{%1, %0|%0, %1}"
11982 [(set_attr "type" "ssemov")
11983 (set_attr "prefix_extra" "1")
11984 (set_attr "prefix" "vex")
11985 (set_attr "mode" "V4SF")])
11987 (define_expand "avx_vinsertf128<mode>"
11988 [(match_operand:AVX256MODE 0 "register_operand" "")
11989 (match_operand:AVX256MODE 1 "register_operand" "")
11990 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11991 (match_operand:SI 3 "const_0_to_1_operand" "")]
11994 switch (INTVAL (operands[3]))
11997 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12001 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12005 gcc_unreachable ();
12010 (define_insn "vec_set_lo_<mode>"
12011 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12012 (vec_concat:AVX256MODE4P
12013 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12014 (vec_select:<avxhalfvecmode>
12015 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12016 (parallel [(const_int 2) (const_int 3)]))))]
12018 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12019 [(set_attr "type" "sselog")
12020 (set_attr "prefix_extra" "1")
12021 (set_attr "length_immediate" "1")
12022 (set_attr "prefix" "vex")
12023 (set_attr "mode" "V8SF")])
12025 (define_insn "vec_set_hi_<mode>"
12026 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12027 (vec_concat:AVX256MODE4P
12028 (vec_select:<avxhalfvecmode>
12029 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12030 (parallel [(const_int 0) (const_int 1)]))
12031 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12033 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12034 [(set_attr "type" "sselog")
12035 (set_attr "prefix_extra" "1")
12036 (set_attr "length_immediate" "1")
12037 (set_attr "prefix" "vex")
12038 (set_attr "mode" "V8SF")])
12040 (define_insn "vec_set_lo_<mode>"
12041 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12042 (vec_concat:AVX256MODE8P
12043 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12044 (vec_select:<avxhalfvecmode>
12045 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12046 (parallel [(const_int 4) (const_int 5)
12047 (const_int 6) (const_int 7)]))))]
12049 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12050 [(set_attr "type" "sselog")
12051 (set_attr "prefix_extra" "1")
12052 (set_attr "length_immediate" "1")
12053 (set_attr "prefix" "vex")
12054 (set_attr "mode" "V8SF")])
12056 (define_insn "vec_set_hi_<mode>"
12057 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12058 (vec_concat:AVX256MODE8P
12059 (vec_select:<avxhalfvecmode>
12060 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12061 (parallel [(const_int 0) (const_int 1)
12062 (const_int 2) (const_int 3)]))
12063 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12065 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12066 [(set_attr "type" "sselog")
12067 (set_attr "prefix_extra" "1")
12068 (set_attr "length_immediate" "1")
12069 (set_attr "prefix" "vex")
12070 (set_attr "mode" "V8SF")])
12072 (define_insn "vec_set_lo_v16hi"
12073 [(set (match_operand:V16HI 0 "register_operand" "=x")
12075 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12077 (match_operand:V16HI 1 "register_operand" "x")
12078 (parallel [(const_int 8) (const_int 9)
12079 (const_int 10) (const_int 11)
12080 (const_int 12) (const_int 13)
12081 (const_int 14) (const_int 15)]))))]
12083 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12084 [(set_attr "type" "sselog")
12085 (set_attr "prefix_extra" "1")
12086 (set_attr "length_immediate" "1")
12087 (set_attr "prefix" "vex")
12088 (set_attr "mode" "V8SF")])
12090 (define_insn "vec_set_hi_v16hi"
12091 [(set (match_operand:V16HI 0 "register_operand" "=x")
12094 (match_operand:V16HI 1 "register_operand" "x")
12095 (parallel [(const_int 0) (const_int 1)
12096 (const_int 2) (const_int 3)
12097 (const_int 4) (const_int 5)
12098 (const_int 6) (const_int 7)]))
12099 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12101 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12102 [(set_attr "type" "sselog")
12103 (set_attr "prefix_extra" "1")
12104 (set_attr "length_immediate" "1")
12105 (set_attr "prefix" "vex")
12106 (set_attr "mode" "V8SF")])
12108 (define_insn "vec_set_lo_v32qi"
12109 [(set (match_operand:V32QI 0 "register_operand" "=x")
12111 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12113 (match_operand:V32QI 1 "register_operand" "x")
12114 (parallel [(const_int 16) (const_int 17)
12115 (const_int 18) (const_int 19)
12116 (const_int 20) (const_int 21)
12117 (const_int 22) (const_int 23)
12118 (const_int 24) (const_int 25)
12119 (const_int 26) (const_int 27)
12120 (const_int 28) (const_int 29)
12121 (const_int 30) (const_int 31)]))))]
12123 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12124 [(set_attr "type" "sselog")
12125 (set_attr "prefix_extra" "1")
12126 (set_attr "length_immediate" "1")
12127 (set_attr "prefix" "vex")
12128 (set_attr "mode" "V8SF")])
12130 (define_insn "vec_set_hi_v32qi"
12131 [(set (match_operand:V32QI 0 "register_operand" "=x")
12134 (match_operand:V32QI 1 "register_operand" "x")
12135 (parallel [(const_int 0) (const_int 1)
12136 (const_int 2) (const_int 3)
12137 (const_int 4) (const_int 5)
12138 (const_int 6) (const_int 7)
12139 (const_int 8) (const_int 9)
12140 (const_int 10) (const_int 11)
12141 (const_int 12) (const_int 13)
12142 (const_int 14) (const_int 15)]))
12143 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12145 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12146 [(set_attr "type" "sselog")
12147 (set_attr "prefix_extra" "1")
12148 (set_attr "length_immediate" "1")
12149 (set_attr "prefix" "vex")
12150 (set_attr "mode" "V8SF")])
12152 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12153 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12155 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12156 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12160 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12161 [(set_attr "type" "sselog1")
12162 (set_attr "prefix_extra" "1")
12163 (set_attr "prefix" "vex")
12164 (set_attr "mode" "<MODE>")])
12166 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12167 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12169 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12170 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12172 UNSPEC_MASKSTORE))]
12174 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12175 [(set_attr "type" "sselog1")
12176 (set_attr "prefix_extra" "1")
12177 (set_attr "prefix" "vex")
12178 (set_attr "mode" "<MODE>")])
12180 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12181 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12182 (unspec:AVX256MODE2P
12183 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12187 switch (which_alternative)
12192 switch (get_attr_mode (insn))
12195 return "vmovaps\t{%1, %x0|%x0, %1}";
12197 return "vmovapd\t{%1, %x0|%x0, %1}";
12199 return "vmovdqa\t{%1, %x0|%x0, %1}";
12206 gcc_unreachable ();
12208 [(set_attr "type" "ssemov")
12209 (set_attr "prefix" "vex")
12210 (set_attr "mode" "<avxvecmode>")
12211 (set (attr "length")
12212 (if_then_else (eq_attr "alternative" "0")
12214 (const_string "*")))])
12216 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12217 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12218 (unspec:<avxhalfvecmode>
12219 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12223 switch (which_alternative)
12228 switch (get_attr_mode (insn))
12231 return "vmovaps\t{%x1, %0|%0, %x1}";
12233 return "vmovapd\t{%x1, %0|%0, %x1}";
12235 return "vmovdqa\t{%x1, %0|%0, %x1}";
12242 gcc_unreachable ();
12244 [(set_attr "type" "ssemov")
12245 (set_attr "prefix" "vex")
12246 (set_attr "mode" "<avxvecmode>")
12247 (set (attr "length")
12248 (if_then_else (eq_attr "alternative" "0")
12250 (const_string "*")))])
12252 (define_expand "vec_init<mode>"
12253 [(match_operand:AVX256MODE 0 "register_operand" "")
12254 (match_operand 1 "" "")]
12257 ix86_expand_vector_init (false, operands[0], operands[1]);
12261 (define_insn "*vec_concat<mode>_avx"
12262 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12263 (vec_concat:AVX256MODE
12264 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12265 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12268 switch (which_alternative)
12271 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12273 switch (get_attr_mode (insn))
12276 return "vmovaps\t{%1, %x0|%x0, %1}";
12278 return "vmovapd\t{%1, %x0|%x0, %1}";
12280 return "vmovdqa\t{%1, %x0|%x0, %1}";
12283 gcc_unreachable ();
12286 [(set_attr "type" "sselog,ssemov")
12287 (set_attr "prefix_extra" "1,*")
12288 (set_attr "length_immediate" "1,*")
12289 (set_attr "prefix" "vex")
12290 (set_attr "mode" "<avxvecmode>")])