1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Mapping from float mode to required SSE level
68 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
70 ;; Mapping from integer vector mode to mnemonic suffix
71 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
73 ;; Mapping of the sse5 suffix
74 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
75 (V4SF "ps") (V2DF "pd")])
76 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
77 (V4SF "ss") (V2DF "sd")])
78 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
80 ;; Mapping of the max integer size for sse5 rotate immediate constraint
81 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
83 ;; Mapping of vector modes back to the scalar modes
84 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
85 (V16QI "QI") (V8HI "HI")
86 (V4SI "SI") (V2DI "DI")])
88 ;; Mapping of vector modes to a vector mode of double size
89 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
90 (V4SF "V8SF") (V4SI "V8SI")])
92 ;; Number of scalar elements in each vector type
93 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
94 (V16QI "16") (V8HI "8")
95 (V4SI "4") (V2DI "2")])
98 (define_mode_attr avxvecmode
99 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
100 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
101 (V8SF "V8SF") (V4DF "V4DF")])
102 (define_mode_attr avxvecpsmode
103 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
104 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
105 (define_mode_attr avxhalfvecmode
106 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
107 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
108 (define_mode_attr avxscalarmode
109 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
110 (V8SF "SF") (V4DF "DF")])
111 (define_mode_attr avxcvtvecmode
112 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
113 (define_mode_attr avxpermvecmode
114 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
115 (define_mode_attr avxmodesuffixf2c
116 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
117 (define_mode_attr avxmodesuffixp
118 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
120 (define_mode_attr avxmodesuffix
121 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
122 (V8SI "256") (V8SF "256") (V4DF "256")])
124 ;; Mapping of immediate bits for blend instructions
125 (define_mode_attr blendbits
126 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
128 ;; Mapping of immediate bits for vpermil instructions
129 (define_mode_attr vpermilbits
130 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
132 ;; Mapping of immediate bits for pinsr instructions
133 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
135 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
143 (define_expand "mov<mode>"
144 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
145 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
148 ix86_expand_vector_move (<MODE>mode, operands);
152 (define_insn "*avx_mov<mode>_internal"
153 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
154 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
156 && (register_operand (operands[0], <MODE>mode)
157 || register_operand (operands[1], <MODE>mode))"
159 switch (which_alternative)
162 return standard_sse_constant_opcode (insn, operands[1]);
165 switch (get_attr_mode (insn))
169 return "vmovaps\t{%1, %0|%0, %1}";
172 return "vmovapd\t{%1, %0|%0, %1}";
174 return "vmovdqa\t{%1, %0|%0, %1}";
180 [(set_attr "type" "sselog1,ssemov,ssemov")
181 (set_attr "prefix" "vex")
182 (set_attr "mode" "<avxvecmode>")])
184 ;; All of these patterns are enabled for SSE1 as well as SSE2.
185 ;; This is essential for maintaining stable calling conventions.
187 (define_expand "mov<mode>"
188 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 ix86_expand_vector_move (<MODE>mode, operands);
196 (define_insn "*mov<mode>_internal"
197 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
198 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
200 && (register_operand (operands[0], <MODE>mode)
201 || register_operand (operands[1], <MODE>mode))"
203 switch (which_alternative)
206 return standard_sse_constant_opcode (insn, operands[1]);
209 switch (get_attr_mode (insn))
212 return "movaps\t{%1, %0|%0, %1}";
214 return "movapd\t{%1, %0|%0, %1}";
216 return "movdqa\t{%1, %0|%0, %1}";
222 [(set_attr "type" "sselog1,ssemov,ssemov")
224 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
225 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
226 (and (eq_attr "alternative" "2")
227 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
229 (const_string "V4SF")
230 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
231 (const_string "V4SF")
232 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
233 (const_string "V2DF")
235 (const_string "TI")))])
237 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
238 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
239 ;; from memory, we'd prefer to load the memory directly into the %xmm
240 ;; register. To facilitate this happy circumstance, this pattern won't
241 ;; split until after register allocation. If the 64-bit value didn't
242 ;; come from memory, this is the best we can do. This is much better
243 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
246 (define_insn_and_split "movdi_to_sse"
248 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
249 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
250 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
251 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
253 "&& reload_completed"
256 if (register_operand (operands[1], DImode))
258 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
259 Assemble the 64-bit DImode value in an xmm register. */
260 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
261 gen_rtx_SUBREG (SImode, operands[1], 0)));
262 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
263 gen_rtx_SUBREG (SImode, operands[1], 4)));
264 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
266 else if (memory_operand (operands[1], DImode))
267 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
273 [(set (match_operand:V4SF 0 "register_operand" "")
274 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
275 "TARGET_SSE && reload_completed"
278 (vec_duplicate:V4SF (match_dup 1))
282 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
283 operands[2] = CONST0_RTX (V4SFmode);
287 [(set (match_operand:V2DF 0 "register_operand" "")
288 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
289 "TARGET_SSE2 && reload_completed"
290 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
292 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
293 operands[2] = CONST0_RTX (DFmode);
296 (define_expand "push<mode>1"
297 [(match_operand:AVX256MODE 0 "register_operand" "")]
300 ix86_expand_push (<MODE>mode, operands[0]);
304 (define_expand "push<mode>1"
305 [(match_operand:SSEMODE 0 "register_operand" "")]
308 ix86_expand_push (<MODE>mode, operands[0]);
312 (define_expand "movmisalign<mode>"
313 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
314 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
317 ix86_expand_vector_move_misalign (<MODE>mode, operands);
321 (define_expand "movmisalign<mode>"
322 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
323 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
326 ix86_expand_vector_move_misalign (<MODE>mode, operands);
330 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
331 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
333 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
335 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
336 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
337 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
338 [(set_attr "type" "ssemov")
339 (set_attr "movu" "1")
340 (set_attr "prefix" "vex")
341 (set_attr "mode" "<MODE>")])
343 (define_insn "sse2_movq128"
344 [(set (match_operand:V2DI 0 "register_operand" "=x")
347 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
348 (parallel [(const_int 0)]))
351 "%vmovq\t{%1, %0|%0, %1}"
352 [(set_attr "type" "ssemov")
353 (set_attr "prefix" "maybe_vex")
354 (set_attr "mode" "TI")])
356 (define_insn "<sse>_movup<ssemodesuffixf2c>"
357 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
359 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
361 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
362 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
363 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
364 [(set_attr "type" "ssemov")
365 (set_attr "movu" "1")
366 (set_attr "mode" "<MODE>")])
368 (define_insn "avx_movdqu<avxmodesuffix>"
369 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
371 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
373 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
374 "vmovdqu\t{%1, %0|%0, %1}"
375 [(set_attr "type" "ssemov")
376 (set_attr "movu" "1")
377 (set_attr "prefix" "vex")
378 (set_attr "mode" "<avxvecmode>")])
380 (define_insn "sse2_movdqu"
381 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
382 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
384 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
385 "movdqu\t{%1, %0|%0, %1}"
386 [(set_attr "type" "ssemov")
387 (set_attr "movu" "1")
388 (set_attr "prefix_data16" "1")
389 (set_attr "mode" "TI")])
391 (define_insn "avx_movnt<mode>"
392 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
394 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
396 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
397 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
398 [(set_attr "type" "ssemov")
399 (set_attr "prefix" "vex")
400 (set_attr "mode" "<MODE>")])
402 (define_insn "<sse>_movnt<mode>"
403 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
405 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
407 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
408 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
409 [(set_attr "type" "ssemov")
410 (set_attr "mode" "<MODE>")])
412 (define_insn "avx_movnt<mode>"
413 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
415 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
418 "vmovntdq\t{%1, %0|%0, %1}"
419 [(set_attr "type" "ssecvt")
420 (set_attr "prefix" "vex")
421 (set_attr "mode" "<avxvecmode>")])
423 (define_insn "sse2_movntv2di"
424 [(set (match_operand:V2DI 0 "memory_operand" "=m")
425 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
428 "movntdq\t{%1, %0|%0, %1}"
429 [(set_attr "type" "ssemov")
430 (set_attr "prefix_data16" "1")
431 (set_attr "mode" "TI")])
433 (define_insn "sse2_movntsi"
434 [(set (match_operand:SI 0 "memory_operand" "=m")
435 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
438 "movnti\t{%1, %0|%0, %1}"
439 [(set_attr "type" "ssemov")
440 (set_attr "mode" "V2DF")])
442 (define_insn "avx_lddqu<avxmodesuffix>"
443 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
445 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
448 "vlddqu\t{%1, %0|%0, %1}"
449 [(set_attr "type" "ssecvt")
450 (set_attr "movu" "1")
451 (set_attr "prefix" "vex")
452 (set_attr "mode" "<avxvecmode>")])
454 (define_insn "sse3_lddqu"
455 [(set (match_operand:V16QI 0 "register_operand" "=x")
456 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
459 "lddqu\t{%1, %0|%0, %1}"
460 [(set_attr "type" "ssemov")
461 (set_attr "movu" "1")
462 (set_attr "prefix_rep" "1")
463 (set_attr "mode" "TI")])
465 ; Expand patterns for non-temporal stores. At the moment, only those
466 ; that directly map to insns are defined; it would be possible to
467 ; define patterns for other modes that would expand to several insns.
469 (define_expand "storent<mode>"
470 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
472 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
474 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
477 (define_expand "storent<mode>"
478 [(set (match_operand:MODEF 0 "memory_operand" "")
480 [(match_operand:MODEF 1 "register_operand" "")]
485 (define_expand "storentv2di"
486 [(set (match_operand:V2DI 0 "memory_operand" "")
487 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
492 (define_expand "storentsi"
493 [(set (match_operand:SI 0 "memory_operand" "")
494 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 ;; Parallel floating point arithmetic
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 (define_expand "<code><mode>2"
506 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
508 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
509 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
510 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
512 (define_expand "<plusminus_insn><mode>3"
513 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
514 (plusminus:AVX256MODEF2P
515 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
516 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
517 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
518 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
520 (define_insn "*avx_<plusminus_insn><mode>3"
521 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
522 (plusminus:AVXMODEF2P
523 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
524 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
525 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
526 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
527 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
528 [(set_attr "type" "sseadd")
529 (set_attr "prefix" "vex")
530 (set_attr "mode" "<avxvecmode>")])
532 (define_expand "<plusminus_insn><mode>3"
533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
534 (plusminus:SSEMODEF2P
535 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
536 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
537 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
538 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
540 (define_insn "*<plusminus_insn><mode>3"
541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
542 (plusminus:SSEMODEF2P
543 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
544 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
545 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
546 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
547 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
548 [(set_attr "type" "sseadd")
549 (set_attr "mode" "<MODE>")])
551 (define_insn "*avx_vm<plusminus_insn><mode>3"
552 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
553 (vec_merge:SSEMODEF2P
554 (plusminus:SSEMODEF2P
555 (match_operand:SSEMODEF2P 1 "register_operand" "x")
556 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
559 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
560 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
561 [(set_attr "type" "sseadd")
562 (set_attr "prefix" "vex")
563 (set_attr "mode" "<ssescalarmode>")])
565 (define_insn "<sse>_vm<plusminus_insn><mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
567 (vec_merge:SSEMODEF2P
568 (plusminus:SSEMODEF2P
569 (match_operand:SSEMODEF2P 1 "register_operand" "0")
570 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
573 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
574 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
575 [(set_attr "type" "sseadd")
576 (set_attr "mode" "<ssescalarmode>")])
578 (define_expand "mul<mode>3"
579 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
581 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
582 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
583 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
584 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
586 (define_insn "*avx_mul<mode>3"
587 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
589 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
590 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
591 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
592 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
593 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "type" "ssemul")
595 (set_attr "prefix" "vex")
596 (set_attr "mode" "<avxvecmode>")])
598 (define_expand "mul<mode>3"
599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
601 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
602 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
603 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
604 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
606 (define_insn "*mul<mode>3"
607 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
609 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
610 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
611 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
612 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
613 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
614 [(set_attr "type" "ssemul")
615 (set_attr "mode" "<MODE>")])
617 (define_insn "*avx_vmmul<mode>3"
618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
619 (vec_merge:SSEMODEF2P
621 (match_operand:SSEMODEF2P 1 "register_operand" "x")
622 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
625 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
626 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "type" "ssemul")
628 (set_attr "prefix" "vex")
629 (set_attr "mode" "<ssescalarmode>")])
631 (define_insn "<sse>_vmmul<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
633 (vec_merge:SSEMODEF2P
635 (match_operand:SSEMODEF2P 1 "register_operand" "0")
636 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
639 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
640 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
641 [(set_attr "type" "ssemul")
642 (set_attr "mode" "<ssescalarmode>")])
644 (define_expand "divv8sf3"
645 [(set (match_operand:V8SF 0 "register_operand" "")
646 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
647 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
650 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
652 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
653 && flag_finite_math_only && !flag_trapping_math
654 && flag_unsafe_math_optimizations)
656 ix86_emit_swdivsf (operands[0], operands[1],
657 operands[2], V8SFmode);
662 (define_expand "divv4df3"
663 [(set (match_operand:V4DF 0 "register_operand" "")
664 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
665 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
667 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
669 (define_insn "avx_div<mode>3"
670 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
672 (match_operand:AVXMODEF2P 1 "register_operand" "x")
673 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
674 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
675 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
676 [(set_attr "type" "ssediv")
677 (set_attr "prefix" "vex")
678 (set_attr "mode" "<MODE>")])
680 (define_expand "divv4sf3"
681 [(set (match_operand:V4SF 0 "register_operand" "")
682 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
683 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
686 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
687 && flag_finite_math_only && !flag_trapping_math
688 && flag_unsafe_math_optimizations)
690 ix86_emit_swdivsf (operands[0], operands[1],
691 operands[2], V4SFmode);
696 (define_expand "divv2df3"
697 [(set (match_operand:V2DF 0 "register_operand" "")
698 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
699 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
703 (define_insn "*avx_div<mode>3"
704 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
706 (match_operand:SSEMODEF2P 1 "register_operand" "x")
707 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
708 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
709 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
710 [(set_attr "type" "ssediv")
711 (set_attr "prefix" "vex")
712 (set_attr "mode" "<MODE>")])
714 (define_insn "<sse>_div<mode>3"
715 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
717 (match_operand:SSEMODEF2P 1 "register_operand" "0")
718 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
719 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
720 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
721 [(set_attr "type" "ssediv")
722 (set_attr "mode" "<MODE>")])
724 (define_insn "*avx_vmdiv<mode>3"
725 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
726 (vec_merge:SSEMODEF2P
728 (match_operand:SSEMODEF2P 1 "register_operand" "x")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
732 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
733 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
734 [(set_attr "type" "ssediv")
735 (set_attr "prefix" "vex")
736 (set_attr "mode" "<ssescalarmode>")])
738 (define_insn "<sse>_vmdiv<mode>3"
739 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
740 (vec_merge:SSEMODEF2P
742 (match_operand:SSEMODEF2P 1 "register_operand" "0")
743 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
746 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
747 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
748 [(set_attr "type" "ssediv")
749 (set_attr "mode" "<ssescalarmode>")])
751 (define_insn "avx_rcpv8sf2"
752 [(set (match_operand:V8SF 0 "register_operand" "=x")
754 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
756 "vrcpps\t{%1, %0|%0, %1}"
757 [(set_attr "type" "sse")
758 (set_attr "prefix" "vex")
759 (set_attr "mode" "V8SF")])
761 (define_insn "sse_rcpv4sf2"
762 [(set (match_operand:V4SF 0 "register_operand" "=x")
764 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
766 "%vrcpps\t{%1, %0|%0, %1}"
767 [(set_attr "type" "sse")
768 (set_attr "atom_sse_attr" "rcp")
769 (set_attr "prefix" "maybe_vex")
770 (set_attr "mode" "V4SF")])
772 (define_insn "*avx_vmrcpv4sf2"
773 [(set (match_operand:V4SF 0 "register_operand" "=x")
775 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
777 (match_operand:V4SF 2 "register_operand" "x")
780 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
781 [(set_attr "type" "sse")
782 (set_attr "prefix" "vex")
783 (set_attr "mode" "SF")])
785 (define_insn "sse_vmrcpv4sf2"
786 [(set (match_operand:V4SF 0 "register_operand" "=x")
788 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
790 (match_operand:V4SF 2 "register_operand" "0")
793 "rcpss\t{%1, %0|%0, %1}"
794 [(set_attr "type" "sse")
795 (set_attr "atom_sse_attr" "rcp")
796 (set_attr "mode" "SF")])
798 (define_expand "sqrtv8sf2"
799 [(set (match_operand:V8SF 0 "register_operand" "")
800 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
803 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
804 && flag_finite_math_only && !flag_trapping_math
805 && flag_unsafe_math_optimizations)
807 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
812 (define_insn "avx_sqrtv8sf2"
813 [(set (match_operand:V8SF 0 "register_operand" "=x")
814 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
816 "vsqrtps\t{%1, %0|%0, %1}"
817 [(set_attr "type" "sse")
818 (set_attr "prefix" "vex")
819 (set_attr "mode" "V8SF")])
821 (define_expand "sqrtv4sf2"
822 [(set (match_operand:V4SF 0 "register_operand" "")
823 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
826 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
827 && flag_finite_math_only && !flag_trapping_math
828 && flag_unsafe_math_optimizations)
830 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
835 (define_insn "sse_sqrtv4sf2"
836 [(set (match_operand:V4SF 0 "register_operand" "=x")
837 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
839 "%vsqrtps\t{%1, %0|%0, %1}"
840 [(set_attr "type" "sse")
841 (set_attr "atom_sse_attr" "sqrt")
842 (set_attr "prefix" "maybe_vex")
843 (set_attr "mode" "V4SF")])
845 (define_insn "sqrtv4df2"
846 [(set (match_operand:V4DF 0 "register_operand" "=x")
847 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
849 "vsqrtpd\t{%1, %0|%0, %1}"
850 [(set_attr "type" "sse")
851 (set_attr "prefix" "vex")
852 (set_attr "mode" "V4DF")])
854 (define_insn "sqrtv2df2"
855 [(set (match_operand:V2DF 0 "register_operand" "=x")
856 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
858 "%vsqrtpd\t{%1, %0|%0, %1}"
859 [(set_attr "type" "sse")
860 (set_attr "prefix" "maybe_vex")
861 (set_attr "mode" "V2DF")])
863 (define_insn "*avx_vmsqrt<mode>2"
864 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
865 (vec_merge:SSEMODEF2P
867 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
868 (match_operand:SSEMODEF2P 2 "register_operand" "x")
870 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
871 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
872 [(set_attr "type" "sse")
873 (set_attr "prefix" "vex")
874 (set_attr "mode" "<ssescalarmode>")])
876 (define_insn "<sse>_vmsqrt<mode>2"
877 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
878 (vec_merge:SSEMODEF2P
880 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
881 (match_operand:SSEMODEF2P 2 "register_operand" "0")
883 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
884 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
885 [(set_attr "type" "sse")
886 (set_attr "atom_sse_attr" "sqrt")
887 (set_attr "mode" "<ssescalarmode>")])
889 (define_expand "rsqrtv8sf2"
890 [(set (match_operand:V8SF 0 "register_operand" "")
892 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
893 "TARGET_AVX && TARGET_SSE_MATH"
895 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
899 (define_insn "avx_rsqrtv8sf2"
900 [(set (match_operand:V8SF 0 "register_operand" "=x")
902 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
904 "vrsqrtps\t{%1, %0|%0, %1}"
905 [(set_attr "type" "sse")
906 (set_attr "prefix" "vex")
907 (set_attr "mode" "V8SF")])
909 (define_expand "rsqrtv4sf2"
910 [(set (match_operand:V4SF 0 "register_operand" "")
912 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
915 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
919 (define_insn "sse_rsqrtv4sf2"
920 [(set (match_operand:V4SF 0 "register_operand" "=x")
922 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
924 "%vrsqrtps\t{%1, %0|%0, %1}"
925 [(set_attr "type" "sse")
926 (set_attr "prefix" "maybe_vex")
927 (set_attr "mode" "V4SF")])
929 (define_insn "*avx_vmrsqrtv4sf2"
930 [(set (match_operand:V4SF 0 "register_operand" "=x")
932 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
934 (match_operand:V4SF 2 "register_operand" "x")
937 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
938 [(set_attr "type" "sse")
939 (set_attr "prefix" "vex")
940 (set_attr "mode" "SF")])
942 (define_insn "sse_vmrsqrtv4sf2"
943 [(set (match_operand:V4SF 0 "register_operand" "=x")
945 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
947 (match_operand:V4SF 2 "register_operand" "0")
950 "rsqrtss\t{%1, %0|%0, %1}"
951 [(set_attr "type" "sse")
952 (set_attr "mode" "SF")])
954 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
955 ;; isn't really correct, as those rtl operators aren't defined when
956 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
958 (define_expand "<code><mode>3"
959 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
960 (smaxmin:AVX256MODEF2P
961 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
962 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
963 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
965 if (!flag_finite_math_only)
966 operands[1] = force_reg (<MODE>mode, operands[1]);
967 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
970 (define_expand "<code><mode>3"
971 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
973 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
974 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
975 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
977 if (!flag_finite_math_only)
978 operands[1] = force_reg (<MODE>mode, operands[1]);
979 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
982 (define_insn "*avx_<code><mode>3_finite"
983 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
985 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
986 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
987 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
988 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
989 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
990 [(set_attr "type" "sseadd")
991 (set_attr "prefix" "vex")
992 (set_attr "mode" "<MODE>")])
994 (define_insn "*<code><mode>3_finite"
995 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
997 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
998 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
999 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1000 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1001 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1002 [(set_attr "type" "sseadd")
1003 (set_attr "mode" "<MODE>")])
1005 (define_insn "*avx_<code><mode>3"
1006 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1008 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1009 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1010 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1011 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1012 [(set_attr "type" "sseadd")
1013 (set_attr "prefix" "vex")
1014 (set_attr "mode" "<avxvecmode>")])
1016 (define_insn "*<code><mode>3"
1017 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1019 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1020 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1021 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1022 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1023 [(set_attr "type" "sseadd")
1024 (set_attr "mode" "<MODE>")])
1026 (define_insn "*avx_vm<code><mode>3"
1027 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1028 (vec_merge:SSEMODEF2P
1030 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1031 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1034 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1035 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1036 [(set_attr "type" "sse")
1037 (set_attr "prefix" "vex")
1038 (set_attr "mode" "<ssescalarmode>")])
1040 (define_insn "<sse>_vm<code><mode>3"
1041 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1042 (vec_merge:SSEMODEF2P
1044 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1045 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1048 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1049 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1050 [(set_attr "type" "sseadd")
1051 (set_attr "mode" "<ssescalarmode>")])
1053 ;; These versions of the min/max patterns implement exactly the operations
1054 ;; min = (op1 < op2 ? op1 : op2)
1055 ;; max = (!(op1 < op2) ? op1 : op2)
1056 ;; Their operands are not commutative, and thus they may be used in the
1057 ;; presence of -0.0 and NaN.
1059 (define_insn "*avx_ieee_smin<mode>3"
1060 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1062 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1063 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1065 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1066 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1067 [(set_attr "type" "sseadd")
1068 (set_attr "prefix" "vex")
1069 (set_attr "mode" "<avxvecmode>")])
1071 (define_insn "*avx_ieee_smax<mode>3"
1072 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1074 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1075 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1077 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "prefix" "vex")
1081 (set_attr "mode" "<avxvecmode>")])
1083 (define_insn "*ieee_smin<mode>3"
1084 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1086 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1087 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1089 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1090 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1091 [(set_attr "type" "sseadd")
1092 (set_attr "mode" "<MODE>")])
1094 (define_insn "*ieee_smax<mode>3"
1095 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1097 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1098 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1100 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1101 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1102 [(set_attr "type" "sseadd")
1103 (set_attr "mode" "<MODE>")])
1105 (define_insn "avx_addsubv8sf3"
1106 [(set (match_operand:V8SF 0 "register_operand" "=x")
1109 (match_operand:V8SF 1 "register_operand" "x")
1110 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1111 (minus:V8SF (match_dup 1) (match_dup 2))
1114 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1115 [(set_attr "type" "sseadd")
1116 (set_attr "prefix" "vex")
1117 (set_attr "mode" "V8SF")])
1119 (define_insn "avx_addsubv4df3"
1120 [(set (match_operand:V4DF 0 "register_operand" "=x")
1123 (match_operand:V4DF 1 "register_operand" "x")
1124 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1125 (minus:V4DF (match_dup 1) (match_dup 2))
1128 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1129 [(set_attr "type" "sseadd")
1130 (set_attr "prefix" "vex")
1131 (set_attr "mode" "V4DF")])
1133 (define_insn "*avx_addsubv4sf3"
1134 [(set (match_operand:V4SF 0 "register_operand" "=x")
1137 (match_operand:V4SF 1 "register_operand" "x")
1138 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1139 (minus:V4SF (match_dup 1) (match_dup 2))
1142 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1143 [(set_attr "type" "sseadd")
1144 (set_attr "prefix" "vex")
1145 (set_attr "mode" "V4SF")])
1147 (define_insn "sse3_addsubv4sf3"
1148 [(set (match_operand:V4SF 0 "register_operand" "=x")
1151 (match_operand:V4SF 1 "register_operand" "0")
1152 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1153 (minus:V4SF (match_dup 1) (match_dup 2))
1156 "addsubps\t{%2, %0|%0, %2}"
1157 [(set_attr "type" "sseadd")
1158 (set_attr "prefix_rep" "1")
1159 (set_attr "mode" "V4SF")])
1161 (define_insn "*avx_addsubv2df3"
1162 [(set (match_operand:V2DF 0 "register_operand" "=x")
1165 (match_operand:V2DF 1 "register_operand" "x")
1166 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1167 (minus:V2DF (match_dup 1) (match_dup 2))
1170 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1171 [(set_attr "type" "sseadd")
1172 (set_attr "prefix" "vex")
1173 (set_attr "mode" "V2DF")])
1175 (define_insn "sse3_addsubv2df3"
1176 [(set (match_operand:V2DF 0 "register_operand" "=x")
1179 (match_operand:V2DF 1 "register_operand" "0")
1180 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1181 (minus:V2DF (match_dup 1) (match_dup 2))
1184 "addsubpd\t{%2, %0|%0, %2}"
1185 [(set_attr "type" "sseadd")
1186 (set_attr "atom_unit" "complex")
1187 (set_attr "mode" "V2DF")])
1189 (define_insn "avx_h<plusminus_insn>v4df3"
1190 [(set (match_operand:V4DF 0 "register_operand" "=x")
1195 (match_operand:V4DF 1 "register_operand" "x")
1196 (parallel [(const_int 0)]))
1197 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1199 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1200 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1204 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1205 (parallel [(const_int 0)]))
1206 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1208 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1209 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1211 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1212 [(set_attr "type" "sseadd")
1213 (set_attr "prefix" "vex")
1214 (set_attr "mode" "V4DF")])
1216 (define_insn "avx_h<plusminus_insn>v8sf3"
1217 [(set (match_operand:V8SF 0 "register_operand" "=x")
1223 (match_operand:V8SF 1 "register_operand" "x")
1224 (parallel [(const_int 0)]))
1225 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1227 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1228 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1232 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1233 (parallel [(const_int 0)]))
1234 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1236 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1237 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1241 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1242 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1244 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1245 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1248 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1249 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1251 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1252 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1254 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1255 [(set_attr "type" "sseadd")
1256 (set_attr "prefix" "vex")
1257 (set_attr "mode" "V8SF")])
1259 (define_insn "*avx_h<plusminus_insn>v4sf3"
1260 [(set (match_operand:V4SF 0 "register_operand" "=x")
1265 (match_operand:V4SF 1 "register_operand" "x")
1266 (parallel [(const_int 0)]))
1267 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1269 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1270 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1274 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1275 (parallel [(const_int 0)]))
1276 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1279 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1281 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1282 [(set_attr "type" "sseadd")
1283 (set_attr "prefix" "vex")
1284 (set_attr "mode" "V4SF")])
1286 (define_insn "sse3_h<plusminus_insn>v4sf3"
1287 [(set (match_operand:V4SF 0 "register_operand" "=x")
1292 (match_operand:V4SF 1 "register_operand" "0")
1293 (parallel [(const_int 0)]))
1294 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1297 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1301 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1302 (parallel [(const_int 0)]))
1303 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1306 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1308 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1309 [(set_attr "type" "sseadd")
1310 (set_attr "atom_unit" "complex")
1311 (set_attr "prefix_rep" "1")
1312 (set_attr "mode" "V4SF")])
1314 (define_insn "*avx_h<plusminus_insn>v2df3"
1315 [(set (match_operand:V2DF 0 "register_operand" "=x")
1319 (match_operand:V2DF 1 "register_operand" "x")
1320 (parallel [(const_int 0)]))
1321 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1324 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1325 (parallel [(const_int 0)]))
1326 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1328 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1329 [(set_attr "type" "sseadd")
1330 (set_attr "prefix" "vex")
1331 (set_attr "mode" "V2DF")])
1333 (define_insn "sse3_h<plusminus_insn>v2df3"
1334 [(set (match_operand:V2DF 0 "register_operand" "=x")
1338 (match_operand:V2DF 1 "register_operand" "0")
1339 (parallel [(const_int 0)]))
1340 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1343 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1344 (parallel [(const_int 0)]))
1345 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1347 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1348 [(set_attr "type" "sseadd")
1349 (set_attr "mode" "V2DF")])
1351 (define_expand "reduc_splus_v4sf"
1352 [(match_operand:V4SF 0 "register_operand" "")
1353 (match_operand:V4SF 1 "register_operand" "")]
1358 rtx tmp = gen_reg_rtx (V4SFmode);
1359 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1360 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1363 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1367 (define_expand "reduc_splus_v2df"
1368 [(match_operand:V2DF 0 "register_operand" "")
1369 (match_operand:V2DF 1 "register_operand" "")]
1372 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1376 (define_expand "reduc_smax_v4sf"
1377 [(match_operand:V4SF 0 "register_operand" "")
1378 (match_operand:V4SF 1 "register_operand" "")]
1381 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1385 (define_expand "reduc_smin_v4sf"
1386 [(match_operand:V4SF 0 "register_operand" "")
1387 (match_operand:V4SF 1 "register_operand" "")]
1390 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1396 ;; Parallel floating point comparisons
1398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1400 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1401 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1403 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1404 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1405 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1408 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1409 [(set_attr "type" "ssecmp")
1410 (set_attr "prefix" "vex")
1411 (set_attr "mode" "<MODE>")])
1413 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1414 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1415 (vec_merge:SSEMODEF2P
1417 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1418 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1419 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1424 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1425 [(set_attr "type" "ssecmp")
1426 (set_attr "prefix" "vex")
1427 (set_attr "mode" "<ssescalarmode>")])
1429 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1430 ;; may generate 256bit vector compare instructions.
1431 (define_insn "*avx_maskcmp<mode>3"
1432 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1433 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1434 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1435 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1436 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1437 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "prefix" "vex")
1440 (set_attr "mode" "<avxvecmode>")])
1442 (define_insn "<sse>_maskcmp<mode>3"
1443 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1444 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1445 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1446 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1447 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1449 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1450 [(set_attr "type" "ssecmp")
1451 (set_attr "mode" "<MODE>")])
1453 (define_insn "<sse>_vmmaskcmp<mode>3"
1454 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1455 (vec_merge:SSEMODEF2P
1456 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1457 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1458 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1461 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1462 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1463 [(set_attr "type" "ssecmp")
1464 (set_attr "mode" "<ssescalarmode>")])
1466 (define_insn "<sse>_comi"
1467 [(set (reg:CCFP FLAGS_REG)
1470 (match_operand:<ssevecmode> 0 "register_operand" "x")
1471 (parallel [(const_int 0)]))
1473 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1474 (parallel [(const_int 0)]))))]
1475 "SSE_FLOAT_MODE_P (<MODE>mode)"
1476 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1477 [(set_attr "type" "ssecomi")
1478 (set_attr "prefix" "maybe_vex")
1479 (set_attr "mode" "<MODE>")])
1481 (define_insn "<sse>_ucomi"
1482 [(set (reg:CCFPU FLAGS_REG)
1485 (match_operand:<ssevecmode> 0 "register_operand" "x")
1486 (parallel [(const_int 0)]))
1488 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1489 (parallel [(const_int 0)]))))]
1490 "SSE_FLOAT_MODE_P (<MODE>mode)"
1491 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1492 [(set_attr "type" "ssecomi")
1493 (set_attr "prefix" "maybe_vex")
1494 (set_attr "mode" "<MODE>")])
1496 (define_expand "vcond<mode>"
1497 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1498 (if_then_else:SSEMODEF2P
1499 (match_operator 3 ""
1500 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1501 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1502 (match_operand:SSEMODEF2P 1 "general_operand" "")
1503 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1504 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1506 if (ix86_expand_fp_vcond (operands))
1512 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1514 ;; Parallel floating point logical operations
1516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1518 (define_insn "avx_andnot<mode>3"
1519 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1522 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1523 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1524 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1525 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1526 [(set_attr "type" "sselog")
1527 (set_attr "prefix" "vex")
1528 (set_attr "mode" "<avxvecmode>")])
1530 (define_insn "<sse>_andnot<mode>3"
1531 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1534 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1535 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1536 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1537 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1538 [(set_attr "type" "sselog")
1539 (set_attr "mode" "<MODE>")])
1541 (define_expand "<code><mode>3"
1542 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1543 (plogic:AVX256MODEF2P
1544 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1545 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1546 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1547 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1549 (define_insn "*avx_<code><mode>3"
1550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1552 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1554 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1555 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1556 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1557 [(set_attr "type" "sselog")
1558 (set_attr "prefix" "vex")
1559 (set_attr "mode" "<avxvecmode>")])
1561 (define_expand "<code><mode>3"
1562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1564 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1565 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1566 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1567 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1569 (define_insn "*<code><mode>3"
1570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1572 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1574 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1575 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1576 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1577 [(set_attr "type" "sselog")
1578 (set_attr "mode" "<MODE>")])
1580 ;; Also define scalar versions. These are used for abs, neg, and
1581 ;; conditional move. Using subregs into vector modes causes register
1582 ;; allocation lossage. These patterns do not allow memory operands
1583 ;; because the native instructions read the full 128-bits.
1585 (define_insn "*avx_andnot<mode>3"
1586 [(set (match_operand:MODEF 0 "register_operand" "=x")
1589 (match_operand:MODEF 1 "register_operand" "x"))
1590 (match_operand:MODEF 2 "register_operand" "x")))]
1591 "AVX_FLOAT_MODE_P (<MODE>mode)"
1592 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1593 [(set_attr "type" "sselog")
1594 (set_attr "prefix" "vex")
1595 (set_attr "mode" "<ssevecmode>")])
1597 (define_insn "*andnot<mode>3"
1598 [(set (match_operand:MODEF 0 "register_operand" "=x")
1601 (match_operand:MODEF 1 "register_operand" "0"))
1602 (match_operand:MODEF 2 "register_operand" "x")))]
1603 "SSE_FLOAT_MODE_P (<MODE>mode)"
1604 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1605 [(set_attr "type" "sselog")
1606 (set_attr "mode" "<ssevecmode>")])
1608 (define_insn "*avx_<code><mode>3"
1609 [(set (match_operand:MODEF 0 "register_operand" "=x")
1611 (match_operand:MODEF 1 "register_operand" "x")
1612 (match_operand:MODEF 2 "register_operand" "x")))]
1613 "AVX_FLOAT_MODE_P (<MODE>mode)"
1614 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1615 [(set_attr "type" "sselog")
1616 (set_attr "prefix" "vex")
1617 (set_attr "mode" "<ssevecmode>")])
1619 (define_insn "*<code><mode>3"
1620 [(set (match_operand:MODEF 0 "register_operand" "=x")
1622 (match_operand:MODEF 1 "register_operand" "0")
1623 (match_operand:MODEF 2 "register_operand" "x")))]
1624 "SSE_FLOAT_MODE_P (<MODE>mode)"
1625 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1626 [(set_attr "type" "sselog")
1627 (set_attr "mode" "<ssevecmode>")])
1629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1631 ;; SSE5 floating point multiply/accumulate instructions This includes the
1632 ;; scalar version of the instructions as well as the vector
1634 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1636 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1637 ;; combine to generate a multiply/add with two memory references. We then
1638 ;; split this insn, into loading up the destination register with one of the
1639 ;; memory operations. If we don't manage to split the insn, reload will
1640 ;; generate the appropriate moves. The reason this is needed, is that combine
1641 ;; has already folded one of the memory references into both the multiply and
1642 ;; add insns, and it can't generate a new pseudo. I.e.:
1643 ;; (set (reg1) (mem (addr1)))
1644 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1645 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1647 (define_insn "sse5_fmadd<mode>4"
1648 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1651 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1652 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1653 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1654 "TARGET_SSE5 && TARGET_FUSED_MADD
1655 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1656 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1657 [(set_attr "type" "ssemuladd")
1658 (set_attr "mode" "<MODE>")])
1660 ;; Split fmadd with two memory operands into a load and the fmadd.
1662 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1665 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1666 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1667 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1669 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1670 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1671 && !reg_mentioned_p (operands[0], operands[1])
1672 && !reg_mentioned_p (operands[0], operands[2])
1673 && !reg_mentioned_p (operands[0], operands[3])"
1676 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1677 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1678 operands[2], operands[3]));
1682 ;; For the scalar operations, use operand1 for the upper words that aren't
1683 ;; modified, so restrict the forms that are generated.
1684 ;; Scalar version of fmadd
1685 (define_insn "sse5_vmfmadd<mode>4"
1686 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1687 (vec_merge:SSEMODEF2P
1690 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1691 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1692 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1695 "TARGET_SSE5 && TARGET_FUSED_MADD
1696 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1697 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1698 [(set_attr "type" "ssemuladd")
1699 (set_attr "mode" "<MODE>")])
1701 ;; Floating multiply and subtract
1702 ;; Allow two memory operands the same as fmadd
1703 (define_insn "sse5_fmsub<mode>4"
1704 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1707 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1708 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1709 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1710 "TARGET_SSE5 && TARGET_FUSED_MADD
1711 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1712 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1713 [(set_attr "type" "ssemuladd")
1714 (set_attr "mode" "<MODE>")])
1716 ;; Split fmsub with two memory operands into a load and the fmsub.
1718 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1721 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1722 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1723 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1725 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1726 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1727 && !reg_mentioned_p (operands[0], operands[1])
1728 && !reg_mentioned_p (operands[0], operands[2])
1729 && !reg_mentioned_p (operands[0], operands[3])"
1732 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1733 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1734 operands[2], operands[3]));
1738 ;; For the scalar operations, use operand1 for the upper words that aren't
1739 ;; modified, so restrict the forms that are generated.
1740 ;; Scalar version of fmsub
1741 (define_insn "sse5_vmfmsub<mode>4"
1742 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1743 (vec_merge:SSEMODEF2P
1746 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1747 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1748 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1751 "TARGET_SSE5 && TARGET_FUSED_MADD
1752 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1753 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1754 [(set_attr "type" "ssemuladd")
1755 (set_attr "mode" "<MODE>")])
1757 ;; Floating point negative multiply and add
1758 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1759 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1760 ;; Allow two memory operands to help in optimizing.
1761 (define_insn "sse5_fnmadd<mode>4"
1762 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1764 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1766 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1767 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1768 "TARGET_SSE5 && TARGET_FUSED_MADD
1769 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1770 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1771 [(set_attr "type" "ssemuladd")
1772 (set_attr "mode" "<MODE>")])
1774 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1776 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1778 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1780 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1781 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1783 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1784 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1785 && !reg_mentioned_p (operands[0], operands[1])
1786 && !reg_mentioned_p (operands[0], operands[2])
1787 && !reg_mentioned_p (operands[0], operands[3])"
1790 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1791 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1792 operands[2], operands[3]));
1796 ;; For the scalar operations, use operand1 for the upper words that aren't
1797 ;; modified, so restrict the forms that are generated.
1798 ;; Scalar version of fnmadd
1799 (define_insn "sse5_vmfnmadd<mode>4"
1800 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1801 (vec_merge:SSEMODEF2P
1803 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1805 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1806 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1809 "TARGET_SSE5 && TARGET_FUSED_MADD
1810 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1811 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1812 [(set_attr "type" "ssemuladd")
1813 (set_attr "mode" "<MODE>")])
1815 ;; Floating point negative multiply and subtract
1816 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1817 ;; Allow 2 memory operands to help with optimization
1818 (define_insn "sse5_fnmsub<mode>4"
1819 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1823 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1824 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1825 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1826 "TARGET_SSE5 && TARGET_FUSED_MADD
1827 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1828 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1829 [(set_attr "type" "ssemuladd")
1830 (set_attr "mode" "<MODE>")])
1832 ;; Split fnmsub with two memory operands into a load and the fmsub.
1834 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1838 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1839 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1840 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1842 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1843 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1844 && !reg_mentioned_p (operands[0], operands[1])
1845 && !reg_mentioned_p (operands[0], operands[2])
1846 && !reg_mentioned_p (operands[0], operands[3])"
1849 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1850 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1851 operands[2], operands[3]));
1855 ;; For the scalar operations, use operand1 for the upper words that aren't
1856 ;; modified, so restrict the forms that are generated.
1857 ;; Scalar version of fnmsub
1858 (define_insn "sse5_vmfnmsub<mode>4"
1859 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1860 (vec_merge:SSEMODEF2P
1864 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1865 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1866 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1869 "TARGET_SSE5 && TARGET_FUSED_MADD
1870 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1871 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1872 [(set_attr "type" "ssemuladd")
1873 (set_attr "mode" "<MODE>")])
1875 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1876 ;; even if the user used -mno-fused-madd
1877 ;; Parallel instructions. During instruction generation, just default
1878 ;; to registers, and let combine later build the appropriate instruction.
1879 (define_expand "sse5i_fmadd<mode>4"
1880 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1884 (match_operand:SSEMODEF2P 1 "register_operand" "")
1885 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1886 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1887 UNSPEC_SSE5_INTRINSIC))]
1890 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1891 if (TARGET_FUSED_MADD)
1893 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1894 operands[2], operands[3]));
1899 (define_insn "*sse5i_fmadd<mode>4"
1900 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1904 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1905 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1906 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1907 UNSPEC_SSE5_INTRINSIC))]
1908 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1909 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1910 [(set_attr "type" "ssemuladd")
1911 (set_attr "mode" "<MODE>")])
1913 (define_expand "sse5i_fmsub<mode>4"
1914 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1918 (match_operand:SSEMODEF2P 1 "register_operand" "")
1919 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1920 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1921 UNSPEC_SSE5_INTRINSIC))]
1924 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1925 if (TARGET_FUSED_MADD)
1927 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1928 operands[2], operands[3]));
1933 (define_insn "*sse5i_fmsub<mode>4"
1934 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1938 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1939 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1940 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1941 UNSPEC_SSE5_INTRINSIC))]
1942 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1943 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1944 [(set_attr "type" "ssemuladd")
1945 (set_attr "mode" "<MODE>")])
1947 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1948 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1949 (define_expand "sse5i_fnmadd<mode>4"
1950 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1953 (match_operand:SSEMODEF2P 3 "register_operand" "")
1955 (match_operand:SSEMODEF2P 1 "register_operand" "")
1956 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1957 UNSPEC_SSE5_INTRINSIC))]
1960 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1961 if (TARGET_FUSED_MADD)
1963 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1964 operands[2], operands[3]));
1969 (define_insn "*sse5i_fnmadd<mode>4"
1970 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1973 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1975 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1976 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1977 UNSPEC_SSE5_INTRINSIC))]
1978 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1979 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1980 [(set_attr "type" "ssemuladd")
1981 (set_attr "mode" "<MODE>")])
1983 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1984 (define_expand "sse5i_fnmsub<mode>4"
1985 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1990 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1991 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1992 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1993 UNSPEC_SSE5_INTRINSIC))]
1996 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1997 if (TARGET_FUSED_MADD)
1999 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
2000 operands[2], operands[3]));
2005 (define_insn "*sse5i_fnmsub<mode>4"
2006 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2011 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2012 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2013 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2014 UNSPEC_SSE5_INTRINSIC))]
2015 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2016 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2017 [(set_attr "type" "ssemuladd")
2018 (set_attr "mode" "<MODE>")])
2020 ;; Scalar instructions
2021 (define_expand "sse5i_vmfmadd<mode>4"
2022 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2024 [(vec_merge:SSEMODEF2P
2027 (match_operand:SSEMODEF2P 1 "register_operand" "")
2028 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2029 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2032 UNSPEC_SSE5_INTRINSIC))]
2035 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2036 if (TARGET_FUSED_MADD)
2038 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2039 operands[2], operands[3]));
2044 ;; For the scalar operations, use operand1 for the upper words that aren't
2045 ;; modified, so restrict the forms that are accepted.
2046 (define_insn "*sse5i_vmfmadd<mode>4"
2047 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2049 [(vec_merge:SSEMODEF2P
2052 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2053 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2054 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2057 UNSPEC_SSE5_INTRINSIC))]
2058 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2059 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2060 [(set_attr "type" "ssemuladd")
2061 (set_attr "mode" "<ssescalarmode>")])
2063 (define_expand "sse5i_vmfmsub<mode>4"
2064 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2066 [(vec_merge:SSEMODEF2P
2069 (match_operand:SSEMODEF2P 1 "register_operand" "")
2070 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2071 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2074 UNSPEC_SSE5_INTRINSIC))]
2077 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2078 if (TARGET_FUSED_MADD)
2080 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2081 operands[2], operands[3]));
2086 (define_insn "*sse5i_vmfmsub<mode>4"
2087 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2089 [(vec_merge:SSEMODEF2P
2092 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2093 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2094 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2097 UNSPEC_SSE5_INTRINSIC))]
2098 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2099 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2100 [(set_attr "type" "ssemuladd")
2101 (set_attr "mode" "<ssescalarmode>")])
2103 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2104 (define_expand "sse5i_vmfnmadd<mode>4"
2105 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2107 [(vec_merge:SSEMODEF2P
2109 (match_operand:SSEMODEF2P 3 "register_operand" "")
2111 (match_operand:SSEMODEF2P 1 "register_operand" "")
2112 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2115 UNSPEC_SSE5_INTRINSIC))]
2118 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2119 if (TARGET_FUSED_MADD)
2121 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2122 operands[2], operands[3]));
2127 (define_insn "*sse5i_vmfnmadd<mode>4"
2128 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2130 [(vec_merge:SSEMODEF2P
2132 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2134 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2135 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2138 UNSPEC_SSE5_INTRINSIC))]
2139 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2140 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2141 [(set_attr "type" "ssemuladd")
2142 (set_attr "mode" "<ssescalarmode>")])
2144 (define_expand "sse5i_vmfnmsub<mode>4"
2145 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2147 [(vec_merge:SSEMODEF2P
2151 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2152 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2153 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2156 UNSPEC_SSE5_INTRINSIC))]
2159 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2160 if (TARGET_FUSED_MADD)
2162 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2163 operands[2], operands[3]));
2168 (define_insn "*sse5i_vmfnmsub<mode>4"
2169 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2171 [(vec_merge:SSEMODEF2P
2175 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2176 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2177 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2180 UNSPEC_SSE5_INTRINSIC))]
2181 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2182 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2183 [(set_attr "type" "ssemuladd")
2184 (set_attr "mode" "<ssescalarmode>")])
2186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2188 ;; Parallel single-precision floating point conversion operations
2190 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2192 (define_insn "sse_cvtpi2ps"
2193 [(set (match_operand:V4SF 0 "register_operand" "=x")
2196 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2197 (match_operand:V4SF 1 "register_operand" "0")
2200 "cvtpi2ps\t{%2, %0|%0, %2}"
2201 [(set_attr "type" "ssecvt")
2202 (set_attr "mode" "V4SF")])
2204 (define_insn "sse_cvtps2pi"
2205 [(set (match_operand:V2SI 0 "register_operand" "=y")
2207 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2209 (parallel [(const_int 0) (const_int 1)])))]
2211 "cvtps2pi\t{%1, %0|%0, %1}"
2212 [(set_attr "type" "ssecvt")
2213 (set_attr "unit" "mmx")
2214 (set_attr "mode" "DI")])
2216 (define_insn "sse_cvttps2pi"
2217 [(set (match_operand:V2SI 0 "register_operand" "=y")
2219 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2220 (parallel [(const_int 0) (const_int 1)])))]
2222 "cvttps2pi\t{%1, %0|%0, %1}"
2223 [(set_attr "type" "ssecvt")
2224 (set_attr "unit" "mmx")
2225 (set_attr "mode" "SF")])
2227 (define_insn "*avx_cvtsi2ss"
2228 [(set (match_operand:V4SF 0 "register_operand" "=x")
2231 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2232 (match_operand:V4SF 1 "register_operand" "x")
2235 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2236 [(set_attr "type" "sseicvt")
2237 (set_attr "prefix" "vex")
2238 (set_attr "mode" "SF")])
2240 (define_insn "sse_cvtsi2ss"
2241 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2244 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2245 (match_operand:V4SF 1 "register_operand" "0,0")
2248 "cvtsi2ss\t{%2, %0|%0, %2}"
2249 [(set_attr "type" "sseicvt")
2250 (set_attr "athlon_decode" "vector,double")
2251 (set_attr "amdfam10_decode" "vector,double")
2252 (set_attr "mode" "SF")])
2254 (define_insn "*avx_cvtsi2ssq"
2255 [(set (match_operand:V4SF 0 "register_operand" "=x")
2258 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2259 (match_operand:V4SF 1 "register_operand" "x")
2261 "TARGET_AVX && TARGET_64BIT"
2262 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2263 [(set_attr "type" "sseicvt")
2264 (set_attr "prefix" "vex")
2265 (set_attr "mode" "SF")])
2267 (define_insn "sse_cvtsi2ssq"
2268 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2271 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2272 (match_operand:V4SF 1 "register_operand" "0,0")
2274 "TARGET_SSE && TARGET_64BIT"
2275 "cvtsi2ssq\t{%2, %0|%0, %2}"
2276 [(set_attr "type" "sseicvt")
2277 (set_attr "athlon_decode" "vector,double")
2278 (set_attr "amdfam10_decode" "vector,double")
2279 (set_attr "mode" "SF")])
2281 (define_insn "sse_cvtss2si"
2282 [(set (match_operand:SI 0 "register_operand" "=r,r")
2285 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2286 (parallel [(const_int 0)]))]
2287 UNSPEC_FIX_NOTRUNC))]
2289 "%vcvtss2si\t{%1, %0|%0, %1}"
2290 [(set_attr "type" "sseicvt")
2291 (set_attr "athlon_decode" "double,vector")
2292 (set_attr "prefix_rep" "1")
2293 (set_attr "prefix" "maybe_vex")
2294 (set_attr "mode" "SI")])
2296 (define_insn "sse_cvtss2si_2"
2297 [(set (match_operand:SI 0 "register_operand" "=r,r")
2298 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2299 UNSPEC_FIX_NOTRUNC))]
2301 "%vcvtss2si\t{%1, %0|%0, %1}"
2302 [(set_attr "type" "sseicvt")
2303 (set_attr "athlon_decode" "double,vector")
2304 (set_attr "amdfam10_decode" "double,double")
2305 (set_attr "prefix_rep" "1")
2306 (set_attr "prefix" "maybe_vex")
2307 (set_attr "mode" "SI")])
2309 (define_insn "sse_cvtss2siq"
2310 [(set (match_operand:DI 0 "register_operand" "=r,r")
2313 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2314 (parallel [(const_int 0)]))]
2315 UNSPEC_FIX_NOTRUNC))]
2316 "TARGET_SSE && TARGET_64BIT"
2317 "%vcvtss2siq\t{%1, %0|%0, %1}"
2318 [(set_attr "type" "sseicvt")
2319 (set_attr "athlon_decode" "double,vector")
2320 (set_attr "prefix_rep" "1")
2321 (set_attr "prefix" "maybe_vex")
2322 (set_attr "mode" "DI")])
2324 (define_insn "sse_cvtss2siq_2"
2325 [(set (match_operand:DI 0 "register_operand" "=r,r")
2326 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2327 UNSPEC_FIX_NOTRUNC))]
2328 "TARGET_SSE && TARGET_64BIT"
2329 "%vcvtss2siq\t{%1, %0|%0, %1}"
2330 [(set_attr "type" "sseicvt")
2331 (set_attr "athlon_decode" "double,vector")
2332 (set_attr "amdfam10_decode" "double,double")
2333 (set_attr "prefix_rep" "1")
2334 (set_attr "prefix" "maybe_vex")
2335 (set_attr "mode" "DI")])
2337 (define_insn "sse_cvttss2si"
2338 [(set (match_operand:SI 0 "register_operand" "=r,r")
2341 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2342 (parallel [(const_int 0)]))))]
2344 "%vcvttss2si\t{%1, %0|%0, %1}"
2345 [(set_attr "type" "sseicvt")
2346 (set_attr "athlon_decode" "double,vector")
2347 (set_attr "amdfam10_decode" "double,double")
2348 (set_attr "prefix_rep" "1")
2349 (set_attr "prefix" "maybe_vex")
2350 (set_attr "mode" "SI")])
2352 (define_insn "sse_cvttss2siq"
2353 [(set (match_operand:DI 0 "register_operand" "=r,r")
2356 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2357 (parallel [(const_int 0)]))))]
2358 "TARGET_SSE && TARGET_64BIT"
2359 "%vcvttss2siq\t{%1, %0|%0, %1}"
2360 [(set_attr "type" "sseicvt")
2361 (set_attr "athlon_decode" "double,vector")
2362 (set_attr "amdfam10_decode" "double,double")
2363 (set_attr "prefix_rep" "1")
2364 (set_attr "prefix" "maybe_vex")
2365 (set_attr "mode" "DI")])
2367 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2368 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2369 (float:AVXMODEDCVTDQ2PS
2370 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2372 "vcvtdq2ps\t{%1, %0|%0, %1}"
2373 [(set_attr "type" "ssecvt")
2374 (set_attr "prefix" "vex")
2375 (set_attr "mode" "<avxvecmode>")])
2377 (define_insn "sse2_cvtdq2ps"
2378 [(set (match_operand:V4SF 0 "register_operand" "=x")
2379 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2381 "cvtdq2ps\t{%1, %0|%0, %1}"
2382 [(set_attr "type" "ssecvt")
2383 (set_attr "mode" "V4SF")])
2385 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2386 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2387 (unspec:AVXMODEDCVTPS2DQ
2388 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2389 UNSPEC_FIX_NOTRUNC))]
2391 "vcvtps2dq\t{%1, %0|%0, %1}"
2392 [(set_attr "type" "ssecvt")
2393 (set_attr "prefix" "vex")
2394 (set_attr "mode" "<avxvecmode>")])
2396 (define_insn "sse2_cvtps2dq"
2397 [(set (match_operand:V4SI 0 "register_operand" "=x")
2398 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2399 UNSPEC_FIX_NOTRUNC))]
2401 "cvtps2dq\t{%1, %0|%0, %1}"
2402 [(set_attr "type" "ssecvt")
2403 (set_attr "prefix_data16" "1")
2404 (set_attr "mode" "TI")])
2406 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2407 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2408 (fix:AVXMODEDCVTPS2DQ
2409 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2411 "vcvttps2dq\t{%1, %0|%0, %1}"
2412 [(set_attr "type" "ssecvt")
2413 (set_attr "prefix" "vex")
2414 (set_attr "mode" "<avxvecmode>")])
2416 (define_insn "sse2_cvttps2dq"
2417 [(set (match_operand:V4SI 0 "register_operand" "=x")
2418 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2420 "cvttps2dq\t{%1, %0|%0, %1}"
2421 [(set_attr "type" "ssecvt")
2422 (set_attr "prefix_rep" "1")
2423 (set_attr "mode" "TI")])
2425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2427 ;; Parallel double-precision floating point conversion operations
2429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2431 (define_insn "sse2_cvtpi2pd"
2432 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2433 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2435 "cvtpi2pd\t{%1, %0|%0, %1}"
2436 [(set_attr "type" "ssecvt")
2437 (set_attr "unit" "mmx,*")
2438 (set_attr "mode" "V2DF")])
2440 (define_insn "sse2_cvtpd2pi"
2441 [(set (match_operand:V2SI 0 "register_operand" "=y")
2442 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2443 UNSPEC_FIX_NOTRUNC))]
2445 "cvtpd2pi\t{%1, %0|%0, %1}"
2446 [(set_attr "type" "ssecvt")
2447 (set_attr "unit" "mmx")
2448 (set_attr "prefix_data16" "1")
2449 (set_attr "mode" "DI")])
2451 (define_insn "sse2_cvttpd2pi"
2452 [(set (match_operand:V2SI 0 "register_operand" "=y")
2453 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2455 "cvttpd2pi\t{%1, %0|%0, %1}"
2456 [(set_attr "type" "ssecvt")
2457 (set_attr "unit" "mmx")
2458 (set_attr "prefix_data16" "1")
2459 (set_attr "mode" "TI")])
2461 (define_insn "*avx_cvtsi2sd"
2462 [(set (match_operand:V2DF 0 "register_operand" "=x")
2465 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2466 (match_operand:V2DF 1 "register_operand" "x")
2469 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2470 [(set_attr "type" "sseicvt")
2471 (set_attr "prefix" "vex")
2472 (set_attr "mode" "DF")])
2474 (define_insn "sse2_cvtsi2sd"
2475 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2478 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2479 (match_operand:V2DF 1 "register_operand" "0,0")
2482 "cvtsi2sd\t{%2, %0|%0, %2}"
2483 [(set_attr "type" "sseicvt")
2484 (set_attr "mode" "DF")
2485 (set_attr "athlon_decode" "double,direct")
2486 (set_attr "amdfam10_decode" "vector,double")])
2488 (define_insn "*avx_cvtsi2sdq"
2489 [(set (match_operand:V2DF 0 "register_operand" "=x")
2492 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2493 (match_operand:V2DF 1 "register_operand" "x")
2495 "TARGET_AVX && TARGET_64BIT"
2496 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2497 [(set_attr "type" "sseicvt")
2498 (set_attr "prefix" "vex")
2499 (set_attr "mode" "DF")])
2501 (define_insn "sse2_cvtsi2sdq"
2502 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2505 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2506 (match_operand:V2DF 1 "register_operand" "0,0")
2508 "TARGET_SSE2 && TARGET_64BIT"
2509 "cvtsi2sdq\t{%2, %0|%0, %2}"
2510 [(set_attr "type" "sseicvt")
2511 (set_attr "mode" "DF")
2512 (set_attr "athlon_decode" "double,direct")
2513 (set_attr "amdfam10_decode" "vector,double")])
2515 (define_insn "sse2_cvtsd2si"
2516 [(set (match_operand:SI 0 "register_operand" "=r,r")
2519 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2520 (parallel [(const_int 0)]))]
2521 UNSPEC_FIX_NOTRUNC))]
2523 "%vcvtsd2si\t{%1, %0|%0, %1}"
2524 [(set_attr "type" "sseicvt")
2525 (set_attr "athlon_decode" "double,vector")
2526 (set_attr "prefix_rep" "1")
2527 (set_attr "prefix" "maybe_vex")
2528 (set_attr "mode" "SI")])
2530 (define_insn "sse2_cvtsd2si_2"
2531 [(set (match_operand:SI 0 "register_operand" "=r,r")
2532 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2533 UNSPEC_FIX_NOTRUNC))]
2535 "%vcvtsd2si\t{%1, %0|%0, %1}"
2536 [(set_attr "type" "sseicvt")
2537 (set_attr "athlon_decode" "double,vector")
2538 (set_attr "amdfam10_decode" "double,double")
2539 (set_attr "prefix_rep" "1")
2540 (set_attr "prefix" "maybe_vex")
2541 (set_attr "mode" "SI")])
2543 (define_insn "sse2_cvtsd2siq"
2544 [(set (match_operand:DI 0 "register_operand" "=r,r")
2547 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2548 (parallel [(const_int 0)]))]
2549 UNSPEC_FIX_NOTRUNC))]
2550 "TARGET_SSE2 && TARGET_64BIT"
2551 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2552 [(set_attr "type" "sseicvt")
2553 (set_attr "athlon_decode" "double,vector")
2554 (set_attr "prefix_rep" "1")
2555 (set_attr "prefix" "maybe_vex")
2556 (set_attr "mode" "DI")])
2558 (define_insn "sse2_cvtsd2siq_2"
2559 [(set (match_operand:DI 0 "register_operand" "=r,r")
2560 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2561 UNSPEC_FIX_NOTRUNC))]
2562 "TARGET_SSE2 && TARGET_64BIT"
2563 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2564 [(set_attr "type" "sseicvt")
2565 (set_attr "athlon_decode" "double,vector")
2566 (set_attr "amdfam10_decode" "double,double")
2567 (set_attr "prefix_rep" "1")
2568 (set_attr "prefix" "maybe_vex")
2569 (set_attr "mode" "DI")])
2571 (define_insn "sse2_cvttsd2si"
2572 [(set (match_operand:SI 0 "register_operand" "=r,r")
2575 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2576 (parallel [(const_int 0)]))))]
2578 "%vcvttsd2si\t{%1, %0|%0, %1}"
2579 [(set_attr "type" "sseicvt")
2580 (set_attr "prefix_rep" "1")
2581 (set_attr "prefix" "maybe_vex")
2582 (set_attr "mode" "SI")
2583 (set_attr "athlon_decode" "double,vector")
2584 (set_attr "amdfam10_decode" "double,double")])
2586 (define_insn "sse2_cvttsd2siq"
2587 [(set (match_operand:DI 0 "register_operand" "=r,r")
2590 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2591 (parallel [(const_int 0)]))))]
2592 "TARGET_SSE2 && TARGET_64BIT"
2593 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2594 [(set_attr "type" "sseicvt")
2595 (set_attr "prefix_rep" "1")
2596 (set_attr "prefix" "maybe_vex")
2597 (set_attr "mode" "DI")
2598 (set_attr "athlon_decode" "double,vector")
2599 (set_attr "amdfam10_decode" "double,double")])
2601 (define_insn "avx_cvtdq2pd256"
2602 [(set (match_operand:V4DF 0 "register_operand" "=x")
2603 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2605 "vcvtdq2pd\t{%1, %0|%0, %1}"
2606 [(set_attr "type" "ssecvt")
2607 (set_attr "prefix" "vex")
2608 (set_attr "mode" "V4DF")])
2610 (define_insn "sse2_cvtdq2pd"
2611 [(set (match_operand:V2DF 0 "register_operand" "=x")
2614 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2615 (parallel [(const_int 0) (const_int 1)]))))]
2617 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2618 [(set_attr "type" "ssecvt")
2619 (set_attr "prefix" "maybe_vex")
2620 (set_attr "mode" "V2DF")])
2622 (define_insn "avx_cvtpd2dq256"
2623 [(set (match_operand:V4SI 0 "register_operand" "=x")
2624 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2625 UNSPEC_FIX_NOTRUNC))]
2627 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2628 [(set_attr "type" "ssecvt")
2629 (set_attr "prefix" "vex")
2630 (set_attr "mode" "OI")])
2632 (define_expand "sse2_cvtpd2dq"
2633 [(set (match_operand:V4SI 0 "register_operand" "")
2635 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2639 "operands[2] = CONST0_RTX (V2SImode);")
2641 (define_insn "*sse2_cvtpd2dq"
2642 [(set (match_operand:V4SI 0 "register_operand" "=x")
2644 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2646 (match_operand:V2SI 2 "const0_operand" "")))]
2648 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2649 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2650 [(set_attr "type" "ssecvt")
2651 (set_attr "prefix_rep" "1")
2652 (set_attr "prefix" "maybe_vex")
2653 (set_attr "mode" "TI")
2654 (set_attr "amdfam10_decode" "double")])
2656 (define_insn "avx_cvttpd2dq256"
2657 [(set (match_operand:V4SI 0 "register_operand" "=x")
2658 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2660 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2661 [(set_attr "type" "ssecvt")
2662 (set_attr "prefix" "vex")
2663 (set_attr "mode" "OI")])
2665 (define_expand "sse2_cvttpd2dq"
2666 [(set (match_operand:V4SI 0 "register_operand" "")
2668 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2671 "operands[2] = CONST0_RTX (V2SImode);")
2673 (define_insn "*sse2_cvttpd2dq"
2674 [(set (match_operand:V4SI 0 "register_operand" "=x")
2676 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2677 (match_operand:V2SI 2 "const0_operand" "")))]
2679 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2680 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2681 [(set_attr "type" "ssecvt")
2682 (set_attr "prefix_rep" "1")
2683 (set_attr "prefix" "maybe_vex")
2684 (set_attr "mode" "TI")
2685 (set_attr "amdfam10_decode" "double")])
2687 (define_insn "*avx_cvtsd2ss"
2688 [(set (match_operand:V4SF 0 "register_operand" "=x")
2691 (float_truncate:V2SF
2692 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2693 (match_operand:V4SF 1 "register_operand" "x")
2696 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2697 [(set_attr "type" "ssecvt")
2698 (set_attr "prefix" "vex")
2699 (set_attr "mode" "SF")])
2701 (define_insn "sse2_cvtsd2ss"
2702 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2705 (float_truncate:V2SF
2706 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2707 (match_operand:V4SF 1 "register_operand" "0,0")
2710 "cvtsd2ss\t{%2, %0|%0, %2}"
2711 [(set_attr "type" "ssecvt")
2712 (set_attr "athlon_decode" "vector,double")
2713 (set_attr "amdfam10_decode" "vector,double")
2714 (set_attr "mode" "SF")])
2716 (define_insn "*avx_cvtss2sd"
2717 [(set (match_operand:V2DF 0 "register_operand" "=x")
2721 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2722 (parallel [(const_int 0) (const_int 1)])))
2723 (match_operand:V2DF 1 "register_operand" "x")
2726 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2727 [(set_attr "type" "ssecvt")
2728 (set_attr "prefix" "vex")
2729 (set_attr "mode" "DF")])
2731 (define_insn "sse2_cvtss2sd"
2732 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2736 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2737 (parallel [(const_int 0) (const_int 1)])))
2738 (match_operand:V2DF 1 "register_operand" "0,0")
2741 "cvtss2sd\t{%2, %0|%0, %2}"
2742 [(set_attr "type" "ssecvt")
2743 (set_attr "amdfam10_decode" "vector,double")
2744 (set_attr "mode" "DF")])
2746 (define_insn "avx_cvtpd2ps256"
2747 [(set (match_operand:V4SF 0 "register_operand" "=x")
2748 (float_truncate:V4SF
2749 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2751 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2752 [(set_attr "type" "ssecvt")
2753 (set_attr "prefix" "vex")
2754 (set_attr "mode" "V4SF")])
2756 (define_expand "sse2_cvtpd2ps"
2757 [(set (match_operand:V4SF 0 "register_operand" "")
2759 (float_truncate:V2SF
2760 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2763 "operands[2] = CONST0_RTX (V2SFmode);")
2765 (define_insn "*sse2_cvtpd2ps"
2766 [(set (match_operand:V4SF 0 "register_operand" "=x")
2768 (float_truncate:V2SF
2769 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2770 (match_operand:V2SF 2 "const0_operand" "")))]
2772 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2773 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2774 [(set_attr "type" "ssecvt")
2775 (set_attr "prefix_data16" "1")
2776 (set_attr "prefix" "maybe_vex")
2777 (set_attr "mode" "V4SF")
2778 (set_attr "amdfam10_decode" "double")])
2780 (define_insn "avx_cvtps2pd256"
2781 [(set (match_operand:V4DF 0 "register_operand" "=x")
2783 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2785 "vcvtps2pd\t{%1, %0|%0, %1}"
2786 [(set_attr "type" "ssecvt")
2787 (set_attr "prefix" "vex")
2788 (set_attr "mode" "V4DF")])
2790 (define_insn "sse2_cvtps2pd"
2791 [(set (match_operand:V2DF 0 "register_operand" "=x")
2794 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2795 (parallel [(const_int 0) (const_int 1)]))))]
2797 "%vcvtps2pd\t{%1, %0|%0, %1}"
2798 [(set_attr "type" "ssecvt")
2799 (set_attr "prefix" "maybe_vex")
2800 (set_attr "mode" "V2DF")
2801 (set_attr "amdfam10_decode" "direct")])
2803 (define_expand "vec_unpacks_hi_v4sf"
2808 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2809 (parallel [(const_int 6)
2813 (set (match_operand:V2DF 0 "register_operand" "")
2817 (parallel [(const_int 0) (const_int 1)]))))]
2820 operands[2] = gen_reg_rtx (V4SFmode);
2823 (define_expand "vec_unpacks_lo_v4sf"
2824 [(set (match_operand:V2DF 0 "register_operand" "")
2827 (match_operand:V4SF 1 "nonimmediate_operand" "")
2828 (parallel [(const_int 0) (const_int 1)]))))]
2831 (define_expand "vec_unpacks_float_hi_v8hi"
2832 [(match_operand:V4SF 0 "register_operand" "")
2833 (match_operand:V8HI 1 "register_operand" "")]
2836 rtx tmp = gen_reg_rtx (V4SImode);
2838 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2839 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2843 (define_expand "vec_unpacks_float_lo_v8hi"
2844 [(match_operand:V4SF 0 "register_operand" "")
2845 (match_operand:V8HI 1 "register_operand" "")]
2848 rtx tmp = gen_reg_rtx (V4SImode);
2850 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2851 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2855 (define_expand "vec_unpacku_float_hi_v8hi"
2856 [(match_operand:V4SF 0 "register_operand" "")
2857 (match_operand:V8HI 1 "register_operand" "")]
2860 rtx tmp = gen_reg_rtx (V4SImode);
2862 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2863 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2867 (define_expand "vec_unpacku_float_lo_v8hi"
2868 [(match_operand:V4SF 0 "register_operand" "")
2869 (match_operand:V8HI 1 "register_operand" "")]
2872 rtx tmp = gen_reg_rtx (V4SImode);
2874 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2875 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2879 (define_expand "vec_unpacks_float_hi_v4si"
2882 (match_operand:V4SI 1 "nonimmediate_operand" "")
2883 (parallel [(const_int 2)
2887 (set (match_operand:V2DF 0 "register_operand" "")
2891 (parallel [(const_int 0) (const_int 1)]))))]
2894 operands[2] = gen_reg_rtx (V4SImode);
2897 (define_expand "vec_unpacks_float_lo_v4si"
2898 [(set (match_operand:V2DF 0 "register_operand" "")
2901 (match_operand:V4SI 1 "nonimmediate_operand" "")
2902 (parallel [(const_int 0) (const_int 1)]))))]
2905 (define_expand "vec_pack_trunc_v2df"
2906 [(match_operand:V4SF 0 "register_operand" "")
2907 (match_operand:V2DF 1 "nonimmediate_operand" "")
2908 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2913 r1 = gen_reg_rtx (V4SFmode);
2914 r2 = gen_reg_rtx (V4SFmode);
2916 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2917 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2918 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2922 (define_expand "vec_pack_sfix_trunc_v2df"
2923 [(match_operand:V4SI 0 "register_operand" "")
2924 (match_operand:V2DF 1 "nonimmediate_operand" "")
2925 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2930 r1 = gen_reg_rtx (V4SImode);
2931 r2 = gen_reg_rtx (V4SImode);
2933 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2934 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2935 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2936 gen_lowpart (V2DImode, r1),
2937 gen_lowpart (V2DImode, r2)));
2941 (define_expand "vec_pack_sfix_v2df"
2942 [(match_operand:V4SI 0 "register_operand" "")
2943 (match_operand:V2DF 1 "nonimmediate_operand" "")
2944 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2949 r1 = gen_reg_rtx (V4SImode);
2950 r2 = gen_reg_rtx (V4SImode);
2952 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2953 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2954 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2955 gen_lowpart (V2DImode, r1),
2956 gen_lowpart (V2DImode, r2)));
2960 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2962 ;; Parallel single-precision floating point element swizzling
2964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2966 (define_expand "sse_movhlps_exp"
2967 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2970 (match_operand:V4SF 1 "nonimmediate_operand" "")
2971 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2972 (parallel [(const_int 6)
2977 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2979 (define_insn "*avx_movhlps"
2980 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2983 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
2984 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2985 (parallel [(const_int 6)
2989 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2991 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2992 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2993 vmovhps\t{%2, %0|%0, %2}"
2994 [(set_attr "type" "ssemov")
2995 (set_attr "prefix" "vex")
2996 (set_attr "mode" "V4SF,V2SF,V2SF")])
2998 (define_insn "sse_movhlps"
2999 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3002 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3003 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3004 (parallel [(const_int 6)
3008 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3010 movhlps\t{%2, %0|%0, %2}
3011 movlps\t{%H2, %0|%0, %H2}
3012 movhps\t{%2, %0|%0, %2}"
3013 [(set_attr "type" "ssemov")
3014 (set_attr "mode" "V4SF,V2SF,V2SF")])
3016 (define_expand "sse_movlhps_exp"
3017 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3020 (match_operand:V4SF 1 "nonimmediate_operand" "")
3021 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3022 (parallel [(const_int 0)
3027 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3029 (define_insn "*avx_movlhps"
3030 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3033 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3034 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3035 (parallel [(const_int 0)
3039 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3041 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3042 vmovhps\t{%2, %1, %0|%0, %1, %2}
3043 vmovlps\t{%2, %H0|%H0, %2}"
3044 [(set_attr "type" "ssemov")
3045 (set_attr "prefix" "vex")
3046 (set_attr "mode" "V4SF,V2SF,V2SF")])
3048 (define_insn "sse_movlhps"
3049 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3052 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3053 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3054 (parallel [(const_int 0)
3058 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3060 movlhps\t{%2, %0|%0, %2}
3061 movhps\t{%2, %0|%0, %2}
3062 movlps\t{%2, %H0|%H0, %2}"
3063 [(set_attr "type" "ssemov")
3064 (set_attr "mode" "V4SF,V2SF,V2SF")])
3066 (define_insn "avx_unpckhps256"
3067 [(set (match_operand:V8SF 0 "register_operand" "=x")
3070 (match_operand:V8SF 1 "register_operand" "x")
3071 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3072 (parallel [(const_int 2) (const_int 10)
3073 (const_int 3) (const_int 11)
3074 (const_int 6) (const_int 14)
3075 (const_int 7) (const_int 15)])))]
3077 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3078 [(set_attr "type" "sselog")
3079 (set_attr "prefix" "vex")
3080 (set_attr "mode" "V8SF")])
3082 (define_insn "*avx_unpckhps"
3083 [(set (match_operand:V4SF 0 "register_operand" "=x")
3086 (match_operand:V4SF 1 "register_operand" "x")
3087 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3088 (parallel [(const_int 2) (const_int 6)
3089 (const_int 3) (const_int 7)])))]
3091 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3092 [(set_attr "type" "sselog")
3093 (set_attr "prefix" "vex")
3094 (set_attr "mode" "V4SF")])
3096 (define_insn "sse_unpckhps"
3097 [(set (match_operand:V4SF 0 "register_operand" "=x")
3100 (match_operand:V4SF 1 "register_operand" "0")
3101 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3102 (parallel [(const_int 2) (const_int 6)
3103 (const_int 3) (const_int 7)])))]
3105 "unpckhps\t{%2, %0|%0, %2}"
3106 [(set_attr "type" "sselog")
3107 (set_attr "mode" "V4SF")])
3109 (define_insn "avx_unpcklps256"
3110 [(set (match_operand:V8SF 0 "register_operand" "=x")
3113 (match_operand:V8SF 1 "register_operand" "x")
3114 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3115 (parallel [(const_int 0) (const_int 8)
3116 (const_int 1) (const_int 9)
3117 (const_int 4) (const_int 12)
3118 (const_int 5) (const_int 13)])))]
3120 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3121 [(set_attr "type" "sselog")
3122 (set_attr "prefix" "vex")
3123 (set_attr "mode" "V8SF")])
3125 (define_insn "*avx_unpcklps"
3126 [(set (match_operand:V4SF 0 "register_operand" "=x")
3129 (match_operand:V4SF 1 "register_operand" "x")
3130 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3131 (parallel [(const_int 0) (const_int 4)
3132 (const_int 1) (const_int 5)])))]
3134 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3135 [(set_attr "type" "sselog")
3136 (set_attr "prefix" "vex")
3137 (set_attr "mode" "V4SF")])
3139 (define_insn "sse_unpcklps"
3140 [(set (match_operand:V4SF 0 "register_operand" "=x")
3143 (match_operand:V4SF 1 "register_operand" "0")
3144 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3145 (parallel [(const_int 0) (const_int 4)
3146 (const_int 1) (const_int 5)])))]
3148 "unpcklps\t{%2, %0|%0, %2}"
3149 [(set_attr "type" "sselog")
3150 (set_attr "mode" "V4SF")])
3152 ;; These are modeled with the same vec_concat as the others so that we
3153 ;; capture users of shufps that can use the new instructions
3154 (define_insn "avx_movshdup256"
3155 [(set (match_operand:V8SF 0 "register_operand" "=x")
3158 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3160 (parallel [(const_int 1) (const_int 1)
3161 (const_int 3) (const_int 3)
3162 (const_int 5) (const_int 5)
3163 (const_int 7) (const_int 7)])))]
3165 "vmovshdup\t{%1, %0|%0, %1}"
3166 [(set_attr "type" "sse")
3167 (set_attr "prefix" "vex")
3168 (set_attr "mode" "V8SF")])
3170 (define_insn "sse3_movshdup"
3171 [(set (match_operand:V4SF 0 "register_operand" "=x")
3174 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3176 (parallel [(const_int 1)
3181 "%vmovshdup\t{%1, %0|%0, %1}"
3182 [(set_attr "type" "sse")
3183 (set_attr "prefix_rep" "1")
3184 (set_attr "prefix" "maybe_vex")
3185 (set_attr "mode" "V4SF")])
3187 (define_insn "avx_movsldup256"
3188 [(set (match_operand:V8SF 0 "register_operand" "=x")
3191 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3193 (parallel [(const_int 0) (const_int 0)
3194 (const_int 2) (const_int 2)
3195 (const_int 4) (const_int 4)
3196 (const_int 6) (const_int 6)])))]
3198 "vmovsldup\t{%1, %0|%0, %1}"
3199 [(set_attr "type" "sse")
3200 (set_attr "prefix" "vex")
3201 (set_attr "mode" "V8SF")])
3203 (define_insn "sse3_movsldup"
3204 [(set (match_operand:V4SF 0 "register_operand" "=x")
3207 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3209 (parallel [(const_int 0)
3214 "%vmovsldup\t{%1, %0|%0, %1}"
3215 [(set_attr "type" "sse")
3216 (set_attr "prefix_rep" "1")
3217 (set_attr "prefix" "maybe_vex")
3218 (set_attr "mode" "V4SF")])
3220 (define_expand "avx_shufps256"
3221 [(match_operand:V8SF 0 "register_operand" "")
3222 (match_operand:V8SF 1 "register_operand" "")
3223 (match_operand:V8SF 2 "nonimmediate_operand" "")
3224 (match_operand:SI 3 "const_int_operand" "")]
3227 int mask = INTVAL (operands[3]);
3228 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3229 GEN_INT ((mask >> 0) & 3),
3230 GEN_INT ((mask >> 2) & 3),
3231 GEN_INT (((mask >> 4) & 3) + 8),
3232 GEN_INT (((mask >> 6) & 3) + 8),
3233 GEN_INT (((mask >> 0) & 3) + 4),
3234 GEN_INT (((mask >> 2) & 3) + 4),
3235 GEN_INT (((mask >> 4) & 3) + 12),
3236 GEN_INT (((mask >> 6) & 3) + 12)));
3240 ;; One bit in mask selects 2 elements.
3241 (define_insn "avx_shufps256_1"
3242 [(set (match_operand:V8SF 0 "register_operand" "=x")
3245 (match_operand:V8SF 1 "register_operand" "x")
3246 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3247 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3248 (match_operand 4 "const_0_to_3_operand" "")
3249 (match_operand 5 "const_8_to_11_operand" "")
3250 (match_operand 6 "const_8_to_11_operand" "")
3251 (match_operand 7 "const_4_to_7_operand" "")
3252 (match_operand 8 "const_4_to_7_operand" "")
3253 (match_operand 9 "const_12_to_15_operand" "")
3254 (match_operand 10 "const_12_to_15_operand" "")])))]
3256 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3257 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3258 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3259 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3262 mask = INTVAL (operands[3]);
3263 mask |= INTVAL (operands[4]) << 2;
3264 mask |= (INTVAL (operands[5]) - 8) << 4;
3265 mask |= (INTVAL (operands[6]) - 8) << 6;
3266 operands[3] = GEN_INT (mask);
3268 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3270 [(set_attr "type" "sselog")
3271 (set_attr "prefix" "vex")
3272 (set_attr "mode" "V8SF")])
3274 (define_expand "sse_shufps"
3275 [(match_operand:V4SF 0 "register_operand" "")
3276 (match_operand:V4SF 1 "register_operand" "")
3277 (match_operand:V4SF 2 "nonimmediate_operand" "")
3278 (match_operand:SI 3 "const_int_operand" "")]
3281 int mask = INTVAL (operands[3]);
3282 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3283 GEN_INT ((mask >> 0) & 3),
3284 GEN_INT ((mask >> 2) & 3),
3285 GEN_INT (((mask >> 4) & 3) + 4),
3286 GEN_INT (((mask >> 6) & 3) + 4)));
3290 (define_insn "*avx_shufps_<mode>"
3291 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3292 (vec_select:SSEMODE4S
3293 (vec_concat:<ssedoublesizemode>
3294 (match_operand:SSEMODE4S 1 "register_operand" "x")
3295 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3296 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3297 (match_operand 4 "const_0_to_3_operand" "")
3298 (match_operand 5 "const_4_to_7_operand" "")
3299 (match_operand 6 "const_4_to_7_operand" "")])))]
3303 mask |= INTVAL (operands[3]) << 0;
3304 mask |= INTVAL (operands[4]) << 2;
3305 mask |= (INTVAL (operands[5]) - 4) << 4;
3306 mask |= (INTVAL (operands[6]) - 4) << 6;
3307 operands[3] = GEN_INT (mask);
3309 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3311 [(set_attr "type" "sselog")
3312 (set_attr "prefix" "vex")
3313 (set_attr "mode" "V4SF")])
3315 (define_insn "sse_shufps_<mode>"
3316 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3317 (vec_select:SSEMODE4S
3318 (vec_concat:<ssedoublesizemode>
3319 (match_operand:SSEMODE4S 1 "register_operand" "0")
3320 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3321 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3322 (match_operand 4 "const_0_to_3_operand" "")
3323 (match_operand 5 "const_4_to_7_operand" "")
3324 (match_operand 6 "const_4_to_7_operand" "")])))]
3328 mask |= INTVAL (operands[3]) << 0;
3329 mask |= INTVAL (operands[4]) << 2;
3330 mask |= (INTVAL (operands[5]) - 4) << 4;
3331 mask |= (INTVAL (operands[6]) - 4) << 6;
3332 operands[3] = GEN_INT (mask);
3334 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3336 [(set_attr "type" "sselog")
3337 (set_attr "mode" "V4SF")])
3339 (define_insn "sse_storehps"
3340 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3342 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3343 (parallel [(const_int 2) (const_int 3)])))]
3346 %vmovhps\t{%1, %0|%0, %1}
3347 %vmovhlps\t{%1, %d0|%d0, %1}
3348 %vmovlps\t{%H1, %d0|%d0, %H1}"
3349 [(set_attr "type" "ssemov")
3350 (set_attr "prefix" "maybe_vex")
3351 (set_attr "mode" "V2SF,V4SF,V2SF")])
3353 (define_expand "sse_loadhps_exp"
3354 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3357 (match_operand:V4SF 1 "nonimmediate_operand" "")
3358 (parallel [(const_int 0) (const_int 1)]))
3359 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3361 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3363 (define_insn "*avx_loadhps"
3364 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3367 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3368 (parallel [(const_int 0) (const_int 1)]))
3369 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3372 vmovhps\t{%2, %1, %0|%0, %1, %2}
3373 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3374 vmovlps\t{%2, %H0|%H0, %2}"
3375 [(set_attr "type" "ssemov")
3376 (set_attr "prefix" "vex")
3377 (set_attr "mode" "V2SF,V4SF,V2SF")])
3379 (define_insn "sse_loadhps"
3380 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3383 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3384 (parallel [(const_int 0) (const_int 1)]))
3385 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3388 movhps\t{%2, %0|%0, %2}
3389 movlhps\t{%2, %0|%0, %2}
3390 movlps\t{%2, %H0|%H0, %2}"
3391 [(set_attr "type" "ssemov")
3392 (set_attr "mode" "V2SF,V4SF,V2SF")])
3394 (define_insn "*avx_storelps"
3395 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3397 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3398 (parallel [(const_int 0) (const_int 1)])))]
3401 vmovlps\t{%1, %0|%0, %1}
3402 vmovaps\t{%1, %0|%0, %1}
3403 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3404 [(set_attr "type" "ssemov")
3405 (set_attr "prefix" "vex")
3406 (set_attr "mode" "V2SF,V2DF,V2SF")])
3408 (define_insn "sse_storelps"
3409 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3411 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3412 (parallel [(const_int 0) (const_int 1)])))]
3415 movlps\t{%1, %0|%0, %1}
3416 movaps\t{%1, %0|%0, %1}
3417 movlps\t{%1, %0|%0, %1}"
3418 [(set_attr "type" "ssemov")
3419 (set_attr "mode" "V2SF,V4SF,V2SF")])
3421 (define_expand "sse_loadlps_exp"
3422 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3424 (match_operand:V2SF 2 "nonimmediate_operand" "")
3426 (match_operand:V4SF 1 "nonimmediate_operand" "")
3427 (parallel [(const_int 2) (const_int 3)]))))]
3429 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3431 (define_insn "*avx_loadlps"
3432 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3434 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3436 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3437 (parallel [(const_int 2) (const_int 3)]))))]
3440 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3441 vmovlps\t{%2, %1, %0|%0, %1, %2}
3442 vmovlps\t{%2, %0|%0, %2}"
3443 [(set_attr "type" "sselog,ssemov,ssemov")
3444 (set_attr "prefix" "vex")
3445 (set_attr "mode" "V4SF,V2SF,V2SF")])
3447 (define_insn "sse_loadlps"
3448 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3450 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3452 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3453 (parallel [(const_int 2) (const_int 3)]))))]
3456 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3457 movlps\t{%2, %0|%0, %2}
3458 movlps\t{%2, %0|%0, %2}"
3459 [(set_attr "type" "sselog,ssemov,ssemov")
3460 (set_attr "mode" "V4SF,V2SF,V2SF")])
3462 (define_insn "*avx_movss"
3463 [(set (match_operand:V4SF 0 "register_operand" "=x")
3465 (match_operand:V4SF 2 "register_operand" "x")
3466 (match_operand:V4SF 1 "register_operand" "x")
3469 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3470 [(set_attr "type" "ssemov")
3471 (set_attr "prefix" "vex")
3472 (set_attr "mode" "SF")])
3474 (define_insn "sse_movss"
3475 [(set (match_operand:V4SF 0 "register_operand" "=x")
3477 (match_operand:V4SF 2 "register_operand" "x")
3478 (match_operand:V4SF 1 "register_operand" "0")
3481 "movss\t{%2, %0|%0, %2}"
3482 [(set_attr "type" "ssemov")
3483 (set_attr "mode" "SF")])
3485 (define_insn "*vec_dupv4sf_avx"
3486 [(set (match_operand:V4SF 0 "register_operand" "=x")
3488 (match_operand:SF 1 "register_operand" "x")))]
3490 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3491 [(set_attr "type" "sselog1")
3492 (set_attr "prefix" "vex")
3493 (set_attr "mode" "V4SF")])
3495 (define_insn "*vec_dupv4sf"
3496 [(set (match_operand:V4SF 0 "register_operand" "=x")
3498 (match_operand:SF 1 "register_operand" "0")))]
3500 "shufps\t{$0, %0, %0|%0, %0, 0}"
3501 [(set_attr "type" "sselog1")
3502 (set_attr "mode" "V4SF")])
3504 (define_insn "*vec_concatv2sf_avx"
3505 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3507 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3508 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3511 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3512 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3513 vmovss\t{%1, %0|%0, %1}
3514 punpckldq\t{%2, %0|%0, %2}
3515 movd\t{%1, %0|%0, %1}"
3516 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3517 (set (attr "prefix")
3518 (if_then_else (eq_attr "alternative" "3,4")
3519 (const_string "orig")
3520 (const_string "vex")))
3521 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3523 ;; Although insertps takes register source, we prefer
3524 ;; unpcklps with register source since it is shorter.
3525 (define_insn "*vec_concatv2sf_sse4_1"
3526 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3528 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3529 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3532 unpcklps\t{%2, %0|%0, %2}
3533 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3534 movss\t{%1, %0|%0, %1}
3535 punpckldq\t{%2, %0|%0, %2}
3536 movd\t{%1, %0|%0, %1}"
3537 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3538 (set_attr "prefix_extra" "*,1,*,*,*")
3539 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3541 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3542 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3543 ;; alternatives pretty much forces the MMX alternative to be chosen.
3544 (define_insn "*vec_concatv2sf_sse"
3545 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3547 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3548 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3551 unpcklps\t{%2, %0|%0, %2}
3552 movss\t{%1, %0|%0, %1}
3553 punpckldq\t{%2, %0|%0, %2}
3554 movd\t{%1, %0|%0, %1}"
3555 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3556 (set_attr "mode" "V4SF,SF,DI,DI")])
3558 (define_insn "*vec_concatv4sf_avx"
3559 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3561 (match_operand:V2SF 1 "register_operand" " x,x")
3562 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3565 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3566 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3567 [(set_attr "type" "ssemov")
3568 (set_attr "prefix" "vex")
3569 (set_attr "mode" "V4SF,V2SF")])
3571 (define_insn "*vec_concatv4sf_sse"
3572 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3574 (match_operand:V2SF 1 "register_operand" " 0,0")
3575 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3578 movlhps\t{%2, %0|%0, %2}
3579 movhps\t{%2, %0|%0, %2}"
3580 [(set_attr "type" "ssemov")
3581 (set_attr "mode" "V4SF,V2SF")])
3583 (define_expand "vec_init<mode>"
3584 [(match_operand:SSEMODE 0 "register_operand" "")
3585 (match_operand 1 "" "")]
3588 ix86_expand_vector_init (false, operands[0], operands[1]);
3592 (define_insn "*vec_setv4sf_0_avx"
3593 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3596 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3597 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3601 vmovss\t{%2, %1, %0|%0, %1, %2}
3602 vmovss\t{%2, %0|%0, %2}
3603 vmovd\t{%2, %0|%0, %2}
3605 [(set_attr "type" "ssemov")
3606 (set_attr "prefix" "vex")
3607 (set_attr "mode" "SF")])
3609 (define_insn "vec_setv4sf_0"
3610 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3613 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3614 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3618 movss\t{%2, %0|%0, %2}
3619 movss\t{%2, %0|%0, %2}
3620 movd\t{%2, %0|%0, %2}
3622 [(set_attr "type" "ssemov")
3623 (set_attr "mode" "SF")])
3625 ;; A subset is vec_setv4sf.
3626 (define_insn "*vec_setv4sf_avx"
3627 [(set (match_operand:V4SF 0 "register_operand" "=x")
3630 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3631 (match_operand:V4SF 1 "register_operand" "x")
3632 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3635 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3636 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3638 [(set_attr "type" "sselog")
3639 (set_attr "prefix" "vex")
3640 (set_attr "mode" "V4SF")])
3642 (define_insn "*vec_setv4sf_sse4_1"
3643 [(set (match_operand:V4SF 0 "register_operand" "=x")
3646 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3647 (match_operand:V4SF 1 "register_operand" "0")
3648 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3651 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3652 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3654 [(set_attr "type" "sselog")
3655 (set_attr "prefix_extra" "1")
3656 (set_attr "mode" "V4SF")])
3658 (define_insn "*avx_insertps"
3659 [(set (match_operand:V4SF 0 "register_operand" "=x")
3660 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3661 (match_operand:V4SF 1 "register_operand" "x")
3662 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3665 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3666 [(set_attr "type" "sselog")
3667 (set_attr "prefix" "vex")
3668 (set_attr "mode" "V4SF")])
3670 (define_insn "sse4_1_insertps"
3671 [(set (match_operand:V4SF 0 "register_operand" "=x")
3672 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3673 (match_operand:V4SF 1 "register_operand" "0")
3674 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3677 "insertps\t{%3, %2, %0|%0, %2, %3}";
3678 [(set_attr "type" "sselog")
3679 (set_attr "prefix_extra" "1")
3680 (set_attr "mode" "V4SF")])
3683 [(set (match_operand:V4SF 0 "memory_operand" "")
3686 (match_operand:SF 1 "nonmemory_operand" ""))
3689 "TARGET_SSE && reload_completed"
3692 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3696 (define_expand "vec_set<mode>"
3697 [(match_operand:SSEMODE 0 "register_operand" "")
3698 (match_operand:<ssescalarmode> 1 "register_operand" "")
3699 (match_operand 2 "const_int_operand" "")]
3702 ix86_expand_vector_set (false, operands[0], operands[1],
3703 INTVAL (operands[2]));
3707 (define_insn_and_split "*vec_extractv4sf_0"
3708 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3710 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3711 (parallel [(const_int 0)])))]
3712 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3714 "&& reload_completed"
3717 rtx op1 = operands[1];
3719 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3721 op1 = gen_lowpart (SFmode, op1);
3722 emit_move_insn (operands[0], op1);
3726 (define_expand "avx_vextractf128<mode>"
3727 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3728 (match_operand:AVX256MODE 1 "register_operand" "")
3729 (match_operand:SI 2 "const_0_to_1_operand" "")]
3732 switch (INTVAL (operands[2]))
3735 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3738 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3746 (define_insn "vec_extract_lo_<mode>"
3747 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3748 (vec_select:<avxhalfvecmode>
3749 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3750 (parallel [(const_int 0) (const_int 1)])))]
3752 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3753 [(set_attr "type" "sselog")
3754 (set_attr "memory" "none,store")
3755 (set_attr "prefix" "vex")
3756 (set_attr "mode" "V8SF")])
3758 (define_insn "vec_extract_hi_<mode>"
3759 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3760 (vec_select:<avxhalfvecmode>
3761 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3762 (parallel [(const_int 2) (const_int 3)])))]
3764 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3765 [(set_attr "type" "sselog")
3766 (set_attr "memory" "none,store")
3767 (set_attr "prefix" "vex")
3768 (set_attr "mode" "V8SF")])
3770 (define_insn "vec_extract_lo_<mode>"
3771 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3772 (vec_select:<avxhalfvecmode>
3773 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3774 (parallel [(const_int 0) (const_int 1)
3775 (const_int 2) (const_int 3)])))]
3777 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3778 [(set_attr "type" "sselog")
3779 (set_attr "memory" "none,store")
3780 (set_attr "prefix" "vex")
3781 (set_attr "mode" "V8SF")])
3783 (define_insn "vec_extract_hi_<mode>"
3784 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3785 (vec_select:<avxhalfvecmode>
3786 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3787 (parallel [(const_int 4) (const_int 5)
3788 (const_int 6) (const_int 7)])))]
3790 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3791 [(set_attr "type" "sselog")
3792 (set_attr "memory" "none,store")
3793 (set_attr "prefix" "vex")
3794 (set_attr "mode" "V8SF")])
3796 (define_insn "vec_extract_lo_v16hi"
3797 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3799 (match_operand:V16HI 1 "register_operand" "x,x")
3800 (parallel [(const_int 0) (const_int 1)
3801 (const_int 2) (const_int 3)
3802 (const_int 4) (const_int 5)
3803 (const_int 6) (const_int 7)])))]
3805 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3806 [(set_attr "type" "sselog")
3807 (set_attr "memory" "none,store")
3808 (set_attr "prefix" "vex")
3809 (set_attr "mode" "V8SF")])
3811 (define_insn "vec_extract_hi_v16hi"
3812 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3814 (match_operand:V16HI 1 "register_operand" "x,x")
3815 (parallel [(const_int 8) (const_int 9)
3816 (const_int 10) (const_int 11)
3817 (const_int 12) (const_int 13)
3818 (const_int 14) (const_int 15)])))]
3820 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3821 [(set_attr "type" "sselog")
3822 (set_attr "memory" "none,store")
3823 (set_attr "prefix" "vex")
3824 (set_attr "mode" "V8SF")])
3826 (define_insn "vec_extract_lo_v32qi"
3827 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3829 (match_operand:V32QI 1 "register_operand" "x,x")
3830 (parallel [(const_int 0) (const_int 1)
3831 (const_int 2) (const_int 3)
3832 (const_int 4) (const_int 5)
3833 (const_int 6) (const_int 7)
3834 (const_int 8) (const_int 9)
3835 (const_int 10) (const_int 11)
3836 (const_int 12) (const_int 13)
3837 (const_int 14) (const_int 15)])))]
3839 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3840 [(set_attr "type" "sselog")
3841 (set_attr "memory" "none,store")
3842 (set_attr "prefix" "vex")
3843 (set_attr "mode" "V8SF")])
3845 (define_insn "vec_extract_hi_v32qi"
3846 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3848 (match_operand:V32QI 1 "register_operand" "x,x")
3849 (parallel [(const_int 16) (const_int 17)
3850 (const_int 18) (const_int 19)
3851 (const_int 20) (const_int 21)
3852 (const_int 22) (const_int 23)
3853 (const_int 24) (const_int 25)
3854 (const_int 26) (const_int 27)
3855 (const_int 28) (const_int 29)
3856 (const_int 30) (const_int 31)])))]
3858 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3859 [(set_attr "type" "sselog")
3860 (set_attr "memory" "none,store")
3861 (set_attr "prefix" "vex")
3862 (set_attr "mode" "V8SF")])
3864 (define_insn "*sse4_1_extractps"
3865 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3867 (match_operand:V4SF 1 "register_operand" "x")
3868 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3870 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3871 [(set_attr "type" "sselog")
3872 (set_attr "prefix_extra" "1")
3873 (set_attr "prefix" "maybe_vex")
3874 (set_attr "mode" "V4SF")])
3876 (define_insn_and_split "*vec_extract_v4sf_mem"
3877 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3879 (match_operand:V4SF 1 "memory_operand" "o")
3880 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3886 int i = INTVAL (operands[2]);
3888 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3892 (define_expand "vec_extract<mode>"
3893 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3894 (match_operand:SSEMODE 1 "register_operand" "")
3895 (match_operand 2 "const_int_operand" "")]
3898 ix86_expand_vector_extract (false, operands[0], operands[1],
3899 INTVAL (operands[2]));
3903 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3905 ;; Parallel double-precision floating point element swizzling
3907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3909 (define_insn "avx_unpckhpd256"
3910 [(set (match_operand:V4DF 0 "register_operand" "=x")
3913 (match_operand:V4DF 1 "register_operand" "x")
3914 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3915 (parallel [(const_int 1) (const_int 5)
3916 (const_int 3) (const_int 7)])))]
3918 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3919 [(set_attr "type" "sselog")
3920 (set_attr "prefix" "vex")
3921 (set_attr "mode" "V4DF")])
3923 (define_expand "sse2_unpckhpd_exp"
3924 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
3927 (match_operand:V2DF 1 "nonimmediate_operand" "")
3928 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3929 (parallel [(const_int 1)
3932 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
3934 (define_insn "*avx_unpckhpd"
3935 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3938 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
3939 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
3940 (parallel [(const_int 1)
3942 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3944 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3945 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3946 vmovhpd\t{%1, %0|%0, %1}"
3947 [(set_attr "type" "sselog,ssemov,ssemov")
3948 (set_attr "prefix" "vex")
3949 (set_attr "mode" "V2DF,V1DF,V1DF")])
3951 (define_insn "sse2_unpckhpd"
3952 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3955 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3956 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3957 (parallel [(const_int 1)
3959 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3961 unpckhpd\t{%2, %0|%0, %2}
3962 movlpd\t{%H1, %0|%0, %H1}
3963 movhpd\t{%1, %0|%0, %1}"
3964 [(set_attr "type" "sselog,ssemov,ssemov")
3965 (set_attr "mode" "V2DF,V1DF,V1DF")])
3967 (define_insn "avx_movddup256"
3968 [(set (match_operand:V4DF 0 "register_operand" "=x")
3971 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
3973 (parallel [(const_int 0) (const_int 2)
3974 (const_int 4) (const_int 6)])))]
3976 "vmovddup\t{%1, %0|%0, %1}"
3977 [(set_attr "type" "sselog1")
3978 (set_attr "prefix" "vex")
3979 (set_attr "mode" "V4DF")])
3981 (define_insn "*avx_movddup"
3982 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3985 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3987 (parallel [(const_int 0)
3989 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3991 vmovddup\t{%1, %0|%0, %1}
3993 [(set_attr "type" "sselog1,ssemov")
3994 (set_attr "prefix" "vex")
3995 (set_attr "mode" "V2DF")])
3997 (define_insn "*sse3_movddup"
3998 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4001 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4003 (parallel [(const_int 0)
4005 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4007 movddup\t{%1, %0|%0, %1}
4009 [(set_attr "type" "sselog1,ssemov")
4010 (set_attr "mode" "V2DF")])
4013 [(set (match_operand:V2DF 0 "memory_operand" "")
4016 (match_operand:V2DF 1 "register_operand" "")
4018 (parallel [(const_int 0)
4020 "TARGET_SSE3 && reload_completed"
4023 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4024 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4025 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4029 (define_insn "avx_unpcklpd256"
4030 [(set (match_operand:V4DF 0 "register_operand" "=x")
4033 (match_operand:V4DF 1 "register_operand" "x")
4034 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4035 (parallel [(const_int 0) (const_int 4)
4036 (const_int 2) (const_int 6)])))]
4038 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4039 [(set_attr "type" "sselog")
4040 (set_attr "prefix" "vex")
4041 (set_attr "mode" "V4DF")])
4043 (define_expand "sse2_unpcklpd_exp"
4044 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4047 (match_operand:V2DF 1 "nonimmediate_operand" "")
4048 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4049 (parallel [(const_int 0)
4052 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4054 (define_insn "*avx_unpcklpd"
4055 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4058 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4059 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4060 (parallel [(const_int 0)
4062 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4064 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4065 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4066 vmovlpd\t{%2, %H0|%H0, %2}"
4067 [(set_attr "type" "sselog,ssemov,ssemov")
4068 (set_attr "prefix" "vex")
4069 (set_attr "mode" "V2DF,V1DF,V1DF")])
4071 (define_insn "sse2_unpcklpd"
4072 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4075 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4076 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4077 (parallel [(const_int 0)
4079 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4081 unpcklpd\t{%2, %0|%0, %2}
4082 movhpd\t{%2, %0|%0, %2}
4083 movlpd\t{%2, %H0|%H0, %2}"
4084 [(set_attr "type" "sselog,ssemov,ssemov")
4085 (set_attr "mode" "V2DF,V1DF,V1DF")])
4087 (define_expand "avx_shufpd256"
4088 [(match_operand:V4DF 0 "register_operand" "")
4089 (match_operand:V4DF 1 "register_operand" "")
4090 (match_operand:V4DF 2 "nonimmediate_operand" "")
4091 (match_operand:SI 3 "const_int_operand" "")]
4094 int mask = INTVAL (operands[3]);
4095 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4097 GEN_INT (mask & 2 ? 5 : 4),
4098 GEN_INT (mask & 4 ? 3 : 2),
4099 GEN_INT (mask & 8 ? 7 : 6)));
4103 (define_insn "avx_shufpd256_1"
4104 [(set (match_operand:V4DF 0 "register_operand" "=x")
4107 (match_operand:V4DF 1 "register_operand" "x")
4108 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4109 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4110 (match_operand 4 "const_4_to_5_operand" "")
4111 (match_operand 5 "const_2_to_3_operand" "")
4112 (match_operand 6 "const_6_to_7_operand" "")])))]
4116 mask = INTVAL (operands[3]);
4117 mask |= (INTVAL (operands[4]) - 4) << 1;
4118 mask |= (INTVAL (operands[5]) - 2) << 2;
4119 mask |= (INTVAL (operands[6]) - 6) << 3;
4120 operands[3] = GEN_INT (mask);
4122 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4124 [(set_attr "type" "sselog")
4125 (set_attr "prefix" "vex")
4126 (set_attr "mode" "V4DF")])
4128 (define_expand "sse2_shufpd"
4129 [(match_operand:V2DF 0 "register_operand" "")
4130 (match_operand:V2DF 1 "register_operand" "")
4131 (match_operand:V2DF 2 "nonimmediate_operand" "")
4132 (match_operand:SI 3 "const_int_operand" "")]
4135 int mask = INTVAL (operands[3]);
4136 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4138 GEN_INT (mask & 2 ? 3 : 2)));
4142 (define_expand "vec_extract_even<mode>"
4143 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4144 (vec_select:SSEMODE4S
4145 (vec_concat:<ssedoublesizemode>
4146 (match_operand:SSEMODE4S 1 "register_operand" "")
4147 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4148 (parallel [(const_int 0)
4154 (define_expand "vec_extract_odd<mode>"
4155 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4156 (vec_select:SSEMODE4S
4157 (vec_concat:<ssedoublesizemode>
4158 (match_operand:SSEMODE4S 1 "register_operand" "")
4159 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4160 (parallel [(const_int 1)
4166 (define_expand "vec_extract_even<mode>"
4167 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4168 (vec_select:SSEMODE2D
4169 (vec_concat:<ssedoublesizemode>
4170 (match_operand:SSEMODE2D 1 "register_operand" "")
4171 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4172 (parallel [(const_int 0)
4176 (define_expand "vec_extract_odd<mode>"
4177 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4178 (vec_select:SSEMODE2D
4179 (vec_concat:<ssedoublesizemode>
4180 (match_operand:SSEMODE2D 1 "register_operand" "")
4181 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4182 (parallel [(const_int 1)
4186 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4187 (define_insn "*avx_punpckhqdq"
4188 [(set (match_operand:V2DI 0 "register_operand" "=x")
4191 (match_operand:V2DI 1 "register_operand" "x")
4192 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4193 (parallel [(const_int 1)
4196 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4197 [(set_attr "type" "sselog")
4198 (set_attr "prefix" "vex")
4199 (set_attr "mode" "TI")])
4201 (define_insn "sse2_punpckhqdq"
4202 [(set (match_operand:V2DI 0 "register_operand" "=x")
4205 (match_operand:V2DI 1 "register_operand" "0")
4206 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4207 (parallel [(const_int 1)
4210 "punpckhqdq\t{%2, %0|%0, %2}"
4211 [(set_attr "type" "sselog")
4212 (set_attr "prefix_data16" "1")
4213 (set_attr "mode" "TI")])
4215 (define_insn "*avx_punpcklqdq"
4216 [(set (match_operand:V2DI 0 "register_operand" "=x")
4219 (match_operand:V2DI 1 "register_operand" "x")
4220 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4221 (parallel [(const_int 0)
4224 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4225 [(set_attr "type" "sselog")
4226 (set_attr "prefix" "vex")
4227 (set_attr "mode" "TI")])
4229 (define_insn "sse2_punpcklqdq"
4230 [(set (match_operand:V2DI 0 "register_operand" "=x")
4233 (match_operand:V2DI 1 "register_operand" "0")
4234 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4235 (parallel [(const_int 0)
4238 "punpcklqdq\t{%2, %0|%0, %2}"
4239 [(set_attr "type" "sselog")
4240 (set_attr "prefix_data16" "1")
4241 (set_attr "mode" "TI")])
4243 (define_insn "*avx_shufpd_<mode>"
4244 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4245 (vec_select:SSEMODE2D
4246 (vec_concat:<ssedoublesizemode>
4247 (match_operand:SSEMODE2D 1 "register_operand" "x")
4248 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4249 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4250 (match_operand 4 "const_2_to_3_operand" "")])))]
4254 mask = INTVAL (operands[3]);
4255 mask |= (INTVAL (operands[4]) - 2) << 1;
4256 operands[3] = GEN_INT (mask);
4258 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4260 [(set_attr "type" "sselog")
4261 (set_attr "prefix" "vex")
4262 (set_attr "mode" "V2DF")])
4264 (define_insn "sse2_shufpd_<mode>"
4265 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4266 (vec_select:SSEMODE2D
4267 (vec_concat:<ssedoublesizemode>
4268 (match_operand:SSEMODE2D 1 "register_operand" "0")
4269 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4270 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4271 (match_operand 4 "const_2_to_3_operand" "")])))]
4275 mask = INTVAL (operands[3]);
4276 mask |= (INTVAL (operands[4]) - 2) << 1;
4277 operands[3] = GEN_INT (mask);
4279 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4281 [(set_attr "type" "sselog")
4282 (set_attr "mode" "V2DF")])
4284 ;; Avoid combining registers from different units in a single alternative,
4285 ;; see comment above inline_secondary_memory_needed function in i386.c
4286 (define_insn "*avx_storehpd"
4287 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4289 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4290 (parallel [(const_int 1)])))]
4291 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4293 vmovhpd\t{%1, %0|%0, %1}
4294 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4298 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4299 (set_attr "prefix" "vex")
4300 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4302 (define_insn "sse2_storehpd"
4303 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4305 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4306 (parallel [(const_int 1)])))]
4307 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4309 movhpd\t{%1, %0|%0, %1}
4314 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4315 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4318 [(set (match_operand:DF 0 "register_operand" "")
4320 (match_operand:V2DF 1 "memory_operand" "")
4321 (parallel [(const_int 1)])))]
4322 "TARGET_SSE2 && reload_completed"
4323 [(set (match_dup 0) (match_dup 1))]
4325 operands[1] = adjust_address (operands[1], DFmode, 8);
4328 ;; Avoid combining registers from different units in a single alternative,
4329 ;; see comment above inline_secondary_memory_needed function in i386.c
4330 (define_insn "sse2_storelpd"
4331 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4333 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4334 (parallel [(const_int 0)])))]
4335 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4337 %vmovlpd\t{%1, %0|%0, %1}
4342 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4343 (set_attr "prefix" "maybe_vex")
4344 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4347 [(set (match_operand:DF 0 "register_operand" "")
4349 (match_operand:V2DF 1 "nonimmediate_operand" "")
4350 (parallel [(const_int 0)])))]
4351 "TARGET_SSE2 && reload_completed"
4354 rtx op1 = operands[1];
4356 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4358 op1 = gen_lowpart (DFmode, op1);
4359 emit_move_insn (operands[0], op1);
4363 (define_expand "sse2_loadhpd_exp"
4364 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4367 (match_operand:V2DF 1 "nonimmediate_operand" "")
4368 (parallel [(const_int 0)]))
4369 (match_operand:DF 2 "nonimmediate_operand" "")))]
4371 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4373 ;; Avoid combining registers from different units in a single alternative,
4374 ;; see comment above inline_secondary_memory_needed function in i386.c
4375 (define_insn "*avx_loadhpd"
4376 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4379 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4380 (parallel [(const_int 0)]))
4381 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4382 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4384 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4385 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4389 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4390 (set_attr "prefix" "vex")
4391 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4393 (define_insn "sse2_loadhpd"
4394 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4397 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4398 (parallel [(const_int 0)]))
4399 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4400 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4402 movhpd\t{%2, %0|%0, %2}
4403 unpcklpd\t{%2, %0|%0, %2}
4404 shufpd\t{$1, %1, %0|%0, %1, 1}
4408 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4409 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4412 [(set (match_operand:V2DF 0 "memory_operand" "")
4414 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4415 (match_operand:DF 1 "register_operand" "")))]
4416 "TARGET_SSE2 && reload_completed"
4417 [(set (match_dup 0) (match_dup 1))]
4419 operands[0] = adjust_address (operands[0], DFmode, 8);
4422 (define_expand "sse2_loadlpd_exp"
4423 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4425 (match_operand:DF 2 "nonimmediate_operand" "")
4427 (match_operand:V2DF 1 "nonimmediate_operand" "")
4428 (parallel [(const_int 1)]))))]
4430 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4432 ;; Avoid combining registers from different units in a single alternative,
4433 ;; see comment above inline_secondary_memory_needed function in i386.c
4434 (define_insn "*avx_loadlpd"
4435 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4437 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4439 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4440 (parallel [(const_int 1)]))))]
4441 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4443 vmovsd\t{%2, %0|%0, %2}
4444 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4445 vmovsd\t{%2, %1, %0|%0, %1, %2}
4446 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4450 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4451 (set_attr "prefix" "vex")
4452 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4454 (define_insn "sse2_loadlpd"
4455 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4457 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4459 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4460 (parallel [(const_int 1)]))))]
4461 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4463 movsd\t{%2, %0|%0, %2}
4464 movlpd\t{%2, %0|%0, %2}
4465 movsd\t{%2, %0|%0, %2}
4466 shufpd\t{$2, %2, %0|%0, %2, 2}
4467 movhpd\t{%H1, %0|%0, %H1}
4471 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4472 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4475 [(set (match_operand:V2DF 0 "memory_operand" "")
4477 (match_operand:DF 1 "register_operand" "")
4478 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4479 "TARGET_SSE2 && reload_completed"
4480 [(set (match_dup 0) (match_dup 1))]
4482 operands[0] = adjust_address (operands[0], DFmode, 8);
4485 ;; Not sure these two are ever used, but it doesn't hurt to have
4487 (define_insn "*vec_extractv2df_1_sse"
4488 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4490 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4491 (parallel [(const_int 1)])))]
4492 "!TARGET_SSE2 && TARGET_SSE
4493 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4495 movhps\t{%1, %0|%0, %1}
4496 movhlps\t{%1, %0|%0, %1}
4497 movlps\t{%H1, %0|%0, %H1}"
4498 [(set_attr "type" "ssemov")
4499 (set_attr "mode" "V2SF,V4SF,V2SF")])
4501 (define_insn "*vec_extractv2df_0_sse"
4502 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4504 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4505 (parallel [(const_int 0)])))]
4506 "!TARGET_SSE2 && TARGET_SSE
4507 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4509 movlps\t{%1, %0|%0, %1}
4510 movaps\t{%1, %0|%0, %1}
4511 movlps\t{%1, %0|%0, %1}"
4512 [(set_attr "type" "ssemov")
4513 (set_attr "mode" "V2SF,V4SF,V2SF")])
4515 (define_insn "*avx_movsd"
4516 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4518 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4519 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4523 vmovsd\t{%2, %1, %0|%0, %1, %2}
4524 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4525 vmovlpd\t{%2, %0|%0, %2}
4526 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4527 vmovhps\t{%1, %H0|%H0, %1}"
4528 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4529 (set_attr "prefix" "vex")
4530 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4532 (define_insn "sse2_movsd"
4533 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4535 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4536 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4540 movsd\t{%2, %0|%0, %2}
4541 movlpd\t{%2, %0|%0, %2}
4542 movlpd\t{%2, %0|%0, %2}
4543 shufpd\t{$2, %2, %0|%0, %2, 2}
4544 movhps\t{%H1, %0|%0, %H1}
4545 movhps\t{%1, %H0|%H0, %1}"
4546 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4547 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4549 (define_insn "*vec_dupv2df_sse3"
4550 [(set (match_operand:V2DF 0 "register_operand" "=x")
4552 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4554 "%vmovddup\t{%1, %0|%0, %1}"
4555 [(set_attr "type" "sselog1")
4556 (set_attr "prefix" "maybe_vex")
4557 (set_attr "mode" "DF")])
4559 (define_insn "vec_dupv2df"
4560 [(set (match_operand:V2DF 0 "register_operand" "=x")
4562 (match_operand:DF 1 "register_operand" "0")))]
4565 [(set_attr "type" "sselog1")
4566 (set_attr "mode" "V2DF")])
4568 (define_insn "*vec_concatv2df_sse3"
4569 [(set (match_operand:V2DF 0 "register_operand" "=x")
4571 (match_operand:DF 1 "nonimmediate_operand" "xm")
4574 "%vmovddup\t{%1, %0|%0, %1}"
4575 [(set_attr "type" "sselog1")
4576 (set_attr "prefix" "maybe_vex")
4577 (set_attr "mode" "DF")])
4579 (define_insn "*vec_concatv2df_avx"
4580 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4582 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4583 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4586 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4587 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4588 vmovsd\t{%1, %0|%0, %1}"
4589 [(set_attr "type" "ssemov")
4590 (set_attr "prefix" "vex")
4591 (set_attr "mode" "DF,V1DF,DF")])
4593 (define_insn "*vec_concatv2df"
4594 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4596 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4597 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4600 unpcklpd\t{%2, %0|%0, %2}
4601 movhpd\t{%2, %0|%0, %2}
4602 movsd\t{%1, %0|%0, %1}
4603 movlhps\t{%2, %0|%0, %2}
4604 movhps\t{%2, %0|%0, %2}"
4605 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4606 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4608 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4610 ;; Parallel integral arithmetic
4612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4614 (define_expand "neg<mode>2"
4615 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4618 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4620 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4622 (define_expand "<plusminus_insn><mode>3"
4623 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4625 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4626 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4628 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4630 (define_insn "*avx_<plusminus_insn><mode>3"
4631 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4633 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4634 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4635 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4636 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4637 [(set_attr "type" "sseiadd")
4638 (set_attr "prefix" "vex")
4639 (set_attr "mode" "TI")])
4641 (define_insn "*<plusminus_insn><mode>3"
4642 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4644 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4645 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4646 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4647 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4648 [(set_attr "type" "sseiadd")
4649 (set_attr "prefix_data16" "1")
4650 (set_attr "mode" "TI")])
4652 (define_expand "sse2_<plusminus_insn><mode>3"
4653 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4654 (sat_plusminus:SSEMODE12
4655 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4656 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4658 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4660 (define_insn "*avx_<plusminus_insn><mode>3"
4661 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4662 (sat_plusminus:SSEMODE12
4663 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4664 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4665 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4666 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4667 [(set_attr "type" "sseiadd")
4668 (set_attr "prefix" "vex")
4669 (set_attr "mode" "TI")])
4671 (define_insn "*sse2_<plusminus_insn><mode>3"
4672 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4673 (sat_plusminus:SSEMODE12
4674 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4675 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4676 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4677 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4678 [(set_attr "type" "sseiadd")
4679 (set_attr "prefix_data16" "1")
4680 (set_attr "mode" "TI")])
4682 (define_insn_and_split "mulv16qi3"
4683 [(set (match_operand:V16QI 0 "register_operand" "")
4684 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4685 (match_operand:V16QI 2 "register_operand" "")))]
4687 && !(reload_completed || reload_in_progress)"
4692 rtx t[12], op0, op[3];
4697 /* On SSE5, we can take advantage of the pperm instruction to pack and
4698 unpack the bytes. Unpack data such that we've got a source byte in
4699 each low byte of each word. We don't care what goes into the high
4700 byte, so put 0 there. */
4701 for (i = 0; i < 6; ++i)
4702 t[i] = gen_reg_rtx (V8HImode);
4704 for (i = 0; i < 2; i++)
4707 op[1] = operands[i+1];
4708 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4711 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4714 /* Multiply words. */
4715 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4716 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4718 /* Pack the low byte of each word back into a single xmm */
4719 op[0] = operands[0];
4722 ix86_expand_sse5_pack (op);
4726 for (i = 0; i < 12; ++i)
4727 t[i] = gen_reg_rtx (V16QImode);
4729 /* Unpack data such that we've got a source byte in each low byte of
4730 each word. We don't care what goes into the high byte of each word.
4731 Rather than trying to get zero in there, most convenient is to let
4732 it be a copy of the low byte. */
4733 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4734 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4735 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4736 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4738 /* Multiply words. The end-of-line annotations here give a picture of what
4739 the output of that instruction looks like. Dot means don't care; the
4740 letters are the bytes of the result with A being the most significant. */
4741 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4742 gen_lowpart (V8HImode, t[0]),
4743 gen_lowpart (V8HImode, t[1])));
4744 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4745 gen_lowpart (V8HImode, t[2]),
4746 gen_lowpart (V8HImode, t[3])));
4748 /* Extract the relevant bytes and merge them back together. */
4749 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4750 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4751 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4752 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4753 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4754 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4757 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4761 (define_expand "mulv8hi3"
4762 [(set (match_operand:V8HI 0 "register_operand" "")
4763 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4764 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4766 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4768 (define_insn "*avx_mulv8hi3"
4769 [(set (match_operand:V8HI 0 "register_operand" "=x")
4770 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4771 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4772 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4773 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4774 [(set_attr "type" "sseimul")
4775 (set_attr "prefix" "vex")
4776 (set_attr "mode" "TI")])
4778 (define_insn "*mulv8hi3"
4779 [(set (match_operand:V8HI 0 "register_operand" "=x")
4780 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4781 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4782 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4783 "pmullw\t{%2, %0|%0, %2}"
4784 [(set_attr "type" "sseimul")
4785 (set_attr "prefix_data16" "1")
4786 (set_attr "mode" "TI")])
4788 (define_expand "smulv8hi3_highpart"
4789 [(set (match_operand:V8HI 0 "register_operand" "")
4794 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4796 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4799 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4801 (define_insn "*avxv8hi3_highpart"
4802 [(set (match_operand:V8HI 0 "register_operand" "=x")
4807 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4809 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4811 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4812 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4813 [(set_attr "type" "sseimul")
4814 (set_attr "prefix" "vex")
4815 (set_attr "mode" "TI")])
4817 (define_insn "*smulv8hi3_highpart"
4818 [(set (match_operand:V8HI 0 "register_operand" "=x")
4823 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4825 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4827 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4828 "pmulhw\t{%2, %0|%0, %2}"
4829 [(set_attr "type" "sseimul")
4830 (set_attr "prefix_data16" "1")
4831 (set_attr "mode" "TI")])
4833 (define_expand "umulv8hi3_highpart"
4834 [(set (match_operand:V8HI 0 "register_operand" "")
4839 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4841 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4844 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4846 (define_insn "*avx_umulv8hi3_highpart"
4847 [(set (match_operand:V8HI 0 "register_operand" "=x")
4852 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4854 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4856 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4857 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
4858 [(set_attr "type" "sseimul")
4859 (set_attr "prefix" "vex")
4860 (set_attr "mode" "TI")])
4862 (define_insn "*umulv8hi3_highpart"
4863 [(set (match_operand:V8HI 0 "register_operand" "=x")
4868 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4870 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4872 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4873 "pmulhuw\t{%2, %0|%0, %2}"
4874 [(set_attr "type" "sseimul")
4875 (set_attr "prefix_data16" "1")
4876 (set_attr "mode" "TI")])
4878 (define_expand "sse2_umulv2siv2di3"
4879 [(set (match_operand:V2DI 0 "register_operand" "")
4883 (match_operand:V4SI 1 "nonimmediate_operand" "")
4884 (parallel [(const_int 0) (const_int 2)])))
4887 (match_operand:V4SI 2 "nonimmediate_operand" "")
4888 (parallel [(const_int 0) (const_int 2)])))))]
4890 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4892 (define_insn "*avx_umulv2siv2di3"
4893 [(set (match_operand:V2DI 0 "register_operand" "=x")
4897 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4898 (parallel [(const_int 0) (const_int 2)])))
4901 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4902 (parallel [(const_int 0) (const_int 2)])))))]
4903 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4904 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4905 [(set_attr "type" "sseimul")
4906 (set_attr "prefix" "vex")
4907 (set_attr "mode" "TI")])
4909 (define_insn "*sse2_umulv2siv2di3"
4910 [(set (match_operand:V2DI 0 "register_operand" "=x")
4914 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4915 (parallel [(const_int 0) (const_int 2)])))
4918 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4919 (parallel [(const_int 0) (const_int 2)])))))]
4920 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4921 "pmuludq\t{%2, %0|%0, %2}"
4922 [(set_attr "type" "sseimul")
4923 (set_attr "prefix_data16" "1")
4924 (set_attr "mode" "TI")])
4926 (define_expand "sse4_1_mulv2siv2di3"
4927 [(set (match_operand:V2DI 0 "register_operand" "")
4931 (match_operand:V4SI 1 "nonimmediate_operand" "")
4932 (parallel [(const_int 0) (const_int 2)])))
4935 (match_operand:V4SI 2 "nonimmediate_operand" "")
4936 (parallel [(const_int 0) (const_int 2)])))))]
4938 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4940 (define_insn "*avx_mulv2siv2di3"
4941 [(set (match_operand:V2DI 0 "register_operand" "=x")
4945 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4946 (parallel [(const_int 0) (const_int 2)])))
4949 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4950 (parallel [(const_int 0) (const_int 2)])))))]
4951 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4952 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4953 [(set_attr "type" "sseimul")
4954 (set_attr "prefix" "vex")
4955 (set_attr "mode" "TI")])
4957 (define_insn "*sse4_1_mulv2siv2di3"
4958 [(set (match_operand:V2DI 0 "register_operand" "=x")
4962 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4963 (parallel [(const_int 0) (const_int 2)])))
4966 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4967 (parallel [(const_int 0) (const_int 2)])))))]
4968 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4969 "pmuldq\t{%2, %0|%0, %2}"
4970 [(set_attr "type" "sseimul")
4971 (set_attr "prefix_extra" "1")
4972 (set_attr "mode" "TI")])
4974 (define_expand "sse2_pmaddwd"
4975 [(set (match_operand:V4SI 0 "register_operand" "")
4980 (match_operand:V8HI 1 "nonimmediate_operand" "")
4981 (parallel [(const_int 0)
4987 (match_operand:V8HI 2 "nonimmediate_operand" "")
4988 (parallel [(const_int 0)
4994 (vec_select:V4HI (match_dup 1)
4995 (parallel [(const_int 1)
5000 (vec_select:V4HI (match_dup 2)
5001 (parallel [(const_int 1)
5004 (const_int 7)]))))))]
5006 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5008 (define_insn "*avx_pmaddwd"
5009 [(set (match_operand:V4SI 0 "register_operand" "=x")
5014 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5015 (parallel [(const_int 0)
5021 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5022 (parallel [(const_int 0)
5028 (vec_select:V4HI (match_dup 1)
5029 (parallel [(const_int 1)
5034 (vec_select:V4HI (match_dup 2)
5035 (parallel [(const_int 1)
5038 (const_int 7)]))))))]
5039 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5040 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5041 [(set_attr "type" "sseiadd")
5042 (set_attr "prefix" "vex")
5043 (set_attr "mode" "TI")])
5045 (define_insn "*sse2_pmaddwd"
5046 [(set (match_operand:V4SI 0 "register_operand" "=x")
5051 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5052 (parallel [(const_int 0)
5058 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5059 (parallel [(const_int 0)
5065 (vec_select:V4HI (match_dup 1)
5066 (parallel [(const_int 1)
5071 (vec_select:V4HI (match_dup 2)
5072 (parallel [(const_int 1)
5075 (const_int 7)]))))))]
5076 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5077 "pmaddwd\t{%2, %0|%0, %2}"
5078 [(set_attr "type" "sseiadd")
5079 (set_attr "atom_unit" "simul")
5080 (set_attr "prefix_data16" "1")
5081 (set_attr "mode" "TI")])
5083 (define_expand "mulv4si3"
5084 [(set (match_operand:V4SI 0 "register_operand" "")
5085 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5086 (match_operand:V4SI 2 "register_operand" "")))]
5089 if (TARGET_SSE4_1 || TARGET_SSE5)
5090 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5093 (define_insn "*avx_mulv4si3"
5094 [(set (match_operand:V4SI 0 "register_operand" "=x")
5095 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5096 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5097 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5098 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5099 [(set_attr "type" "sseimul")
5100 (set_attr "prefix" "vex")
5101 (set_attr "mode" "TI")])
5103 (define_insn "*sse4_1_mulv4si3"
5104 [(set (match_operand:V4SI 0 "register_operand" "=x")
5105 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5106 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5107 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5108 "pmulld\t{%2, %0|%0, %2}"
5109 [(set_attr "type" "sseimul")
5110 (set_attr "prefix_extra" "1")
5111 (set_attr "mode" "TI")])
5113 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5114 ;; multiply/add. In general, we expect the define_split to occur before
5115 ;; register allocation, so we have to handle the corner case where the target
5116 ;; is the same as one of the inputs.
5117 (define_insn_and_split "*sse5_mulv4si3"
5118 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5119 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5120 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5123 "&& (reload_completed
5124 || (!reg_mentioned_p (operands[0], operands[1])
5125 && !reg_mentioned_p (operands[0], operands[2])))"
5129 (plus:V4SI (mult:V4SI (match_dup 1)
5133 operands[3] = CONST0_RTX (V4SImode);
5135 [(set_attr "type" "ssemuladd")
5136 (set_attr "mode" "TI")])
5138 (define_insn_and_split "*sse2_mulv4si3"
5139 [(set (match_operand:V4SI 0 "register_operand" "")
5140 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5141 (match_operand:V4SI 2 "register_operand" "")))]
5142 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5143 && !(reload_completed || reload_in_progress)"
5148 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5154 t1 = gen_reg_rtx (V4SImode);
5155 t2 = gen_reg_rtx (V4SImode);
5156 t3 = gen_reg_rtx (V4SImode);
5157 t4 = gen_reg_rtx (V4SImode);
5158 t5 = gen_reg_rtx (V4SImode);
5159 t6 = gen_reg_rtx (V4SImode);
5160 thirtytwo = GEN_INT (32);
5162 /* Multiply elements 2 and 0. */
5163 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5166 /* Shift both input vectors down one element, so that elements 3
5167 and 1 are now in the slots for elements 2 and 0. For K8, at
5168 least, this is faster than using a shuffle. */
5169 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5170 gen_lowpart (TImode, op1),
5172 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5173 gen_lowpart (TImode, op2),
5175 /* Multiply elements 3 and 1. */
5176 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5179 /* Move the results in element 2 down to element 1; we don't care
5180 what goes in elements 2 and 3. */
5181 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5182 const0_rtx, const0_rtx));
5183 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5184 const0_rtx, const0_rtx));
5186 /* Merge the parts back together. */
5187 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5191 (define_insn_and_split "mulv2di3"
5192 [(set (match_operand:V2DI 0 "register_operand" "")
5193 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5194 (match_operand:V2DI 2 "register_operand" "")))]
5196 && !(reload_completed || reload_in_progress)"
5201 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5206 /* op1: A,B,C,D, op2: E,F,G,H */
5208 op1 = gen_lowpart (V4SImode, operands[1]);
5209 op2 = gen_lowpart (V4SImode, operands[2]);
5210 t1 = gen_reg_rtx (V4SImode);
5211 t2 = gen_reg_rtx (V4SImode);
5212 t3 = gen_reg_rtx (V4SImode);
5213 t4 = gen_reg_rtx (V2DImode);
5214 t5 = gen_reg_rtx (V2DImode);
5217 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5224 emit_move_insn (t2, CONST0_RTX (V4SImode));
5226 /* t3: (B*E),(A*F),(D*G),(C*H) */
5227 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5229 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5230 emit_insn (gen_sse5_phadddq (t4, t3));
5232 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5233 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5235 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5236 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5243 t1 = gen_reg_rtx (V2DImode);
5244 t2 = gen_reg_rtx (V2DImode);
5245 t3 = gen_reg_rtx (V2DImode);
5246 t4 = gen_reg_rtx (V2DImode);
5247 t5 = gen_reg_rtx (V2DImode);
5248 t6 = gen_reg_rtx (V2DImode);
5249 thirtytwo = GEN_INT (32);
5251 /* Multiply low parts. */
5252 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5253 gen_lowpart (V4SImode, op2)));
5255 /* Shift input vectors left 32 bits so we can multiply high parts. */
5256 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5257 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5259 /* Multiply high parts by low parts. */
5260 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5261 gen_lowpart (V4SImode, t3)));
5262 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5263 gen_lowpart (V4SImode, t2)));
5265 /* Shift them back. */
5266 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5267 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5269 /* Add the three parts together. */
5270 emit_insn (gen_addv2di3 (t6, t1, t4));
5271 emit_insn (gen_addv2di3 (op0, t6, t5));
5275 (define_expand "vec_widen_smult_hi_v8hi"
5276 [(match_operand:V4SI 0 "register_operand" "")
5277 (match_operand:V8HI 1 "register_operand" "")
5278 (match_operand:V8HI 2 "register_operand" "")]
5281 rtx op1, op2, t1, t2, dest;
5285 t1 = gen_reg_rtx (V8HImode);
5286 t2 = gen_reg_rtx (V8HImode);
5287 dest = gen_lowpart (V8HImode, operands[0]);
5289 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5290 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5291 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5295 (define_expand "vec_widen_smult_lo_v8hi"
5296 [(match_operand:V4SI 0 "register_operand" "")
5297 (match_operand:V8HI 1 "register_operand" "")
5298 (match_operand:V8HI 2 "register_operand" "")]
5301 rtx op1, op2, t1, t2, dest;
5305 t1 = gen_reg_rtx (V8HImode);
5306 t2 = gen_reg_rtx (V8HImode);
5307 dest = gen_lowpart (V8HImode, operands[0]);
5309 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5310 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5311 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5315 (define_expand "vec_widen_umult_hi_v8hi"
5316 [(match_operand:V4SI 0 "register_operand" "")
5317 (match_operand:V8HI 1 "register_operand" "")
5318 (match_operand:V8HI 2 "register_operand" "")]
5321 rtx op1, op2, t1, t2, dest;
5325 t1 = gen_reg_rtx (V8HImode);
5326 t2 = gen_reg_rtx (V8HImode);
5327 dest = gen_lowpart (V8HImode, operands[0]);
5329 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5330 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5331 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5335 (define_expand "vec_widen_umult_lo_v8hi"
5336 [(match_operand:V4SI 0 "register_operand" "")
5337 (match_operand:V8HI 1 "register_operand" "")
5338 (match_operand:V8HI 2 "register_operand" "")]
5341 rtx op1, op2, t1, t2, dest;
5345 t1 = gen_reg_rtx (V8HImode);
5346 t2 = gen_reg_rtx (V8HImode);
5347 dest = gen_lowpart (V8HImode, operands[0]);
5349 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5350 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5351 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5355 (define_expand "vec_widen_smult_hi_v4si"
5356 [(match_operand:V2DI 0 "register_operand" "")
5357 (match_operand:V4SI 1 "register_operand" "")
5358 (match_operand:V4SI 2 "register_operand" "")]
5363 t1 = gen_reg_rtx (V4SImode);
5364 t2 = gen_reg_rtx (V4SImode);
5366 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5371 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5376 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5380 (define_expand "vec_widen_smult_lo_v4si"
5381 [(match_operand:V2DI 0 "register_operand" "")
5382 (match_operand:V4SI 1 "register_operand" "")
5383 (match_operand:V4SI 2 "register_operand" "")]
5388 t1 = gen_reg_rtx (V4SImode);
5389 t2 = gen_reg_rtx (V4SImode);
5391 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5396 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5401 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5406 (define_expand "vec_widen_umult_hi_v4si"
5407 [(match_operand:V2DI 0 "register_operand" "")
5408 (match_operand:V4SI 1 "register_operand" "")
5409 (match_operand:V4SI 2 "register_operand" "")]
5412 rtx op1, op2, t1, t2;
5416 t1 = gen_reg_rtx (V4SImode);
5417 t2 = gen_reg_rtx (V4SImode);
5419 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5420 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5421 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5425 (define_expand "vec_widen_umult_lo_v4si"
5426 [(match_operand:V2DI 0 "register_operand" "")
5427 (match_operand:V4SI 1 "register_operand" "")
5428 (match_operand:V4SI 2 "register_operand" "")]
5431 rtx op1, op2, t1, t2;
5435 t1 = gen_reg_rtx (V4SImode);
5436 t2 = gen_reg_rtx (V4SImode);
5438 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5439 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5440 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5444 (define_expand "sdot_prodv8hi"
5445 [(match_operand:V4SI 0 "register_operand" "")
5446 (match_operand:V8HI 1 "register_operand" "")
5447 (match_operand:V8HI 2 "register_operand" "")
5448 (match_operand:V4SI 3 "register_operand" "")]
5451 rtx t = gen_reg_rtx (V4SImode);
5452 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5453 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5457 (define_expand "udot_prodv4si"
5458 [(match_operand:V2DI 0 "register_operand" "")
5459 (match_operand:V4SI 1 "register_operand" "")
5460 (match_operand:V4SI 2 "register_operand" "")
5461 (match_operand:V2DI 3 "register_operand" "")]
5466 t1 = gen_reg_rtx (V2DImode);
5467 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5468 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5470 t2 = gen_reg_rtx (V4SImode);
5471 t3 = gen_reg_rtx (V4SImode);
5472 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5473 gen_lowpart (TImode, operands[1]),
5475 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5476 gen_lowpart (TImode, operands[2]),
5479 t4 = gen_reg_rtx (V2DImode);
5480 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5482 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5486 (define_insn "*avx_ashr<mode>3"
5487 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5489 (match_operand:SSEMODE24 1 "register_operand" "x")
5490 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5492 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5493 [(set_attr "type" "sseishft")
5494 (set_attr "prefix" "vex")
5495 (set_attr "mode" "TI")])
5497 (define_insn "ashr<mode>3"
5498 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5500 (match_operand:SSEMODE24 1 "register_operand" "0")
5501 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5503 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5504 [(set_attr "type" "sseishft")
5505 (set_attr "prefix_data16" "1")
5506 (set_attr "mode" "TI")])
5508 (define_insn "*avx_lshr<mode>3"
5509 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5510 (lshiftrt:SSEMODE248
5511 (match_operand:SSEMODE248 1 "register_operand" "x")
5512 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5514 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5515 [(set_attr "type" "sseishft")
5516 (set_attr "prefix" "vex")
5517 (set_attr "mode" "TI")])
5519 (define_insn "lshr<mode>3"
5520 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5521 (lshiftrt:SSEMODE248
5522 (match_operand:SSEMODE248 1 "register_operand" "0")
5523 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5525 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5526 [(set_attr "type" "sseishft")
5527 (set_attr "prefix_data16" "1")
5528 (set_attr "mode" "TI")])
5530 (define_insn "*avx_ashl<mode>3"
5531 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5533 (match_operand:SSEMODE248 1 "register_operand" "x")
5534 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5536 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5537 [(set_attr "type" "sseishft")
5538 (set_attr "prefix" "vex")
5539 (set_attr "mode" "TI")])
5541 (define_insn "ashl<mode>3"
5542 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5544 (match_operand:SSEMODE248 1 "register_operand" "0")
5545 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5547 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5548 [(set_attr "type" "sseishft")
5549 (set_attr "prefix_data16" "1")
5550 (set_attr "mode" "TI")])
5552 (define_expand "vec_shl_<mode>"
5553 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5554 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5555 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5558 operands[0] = gen_lowpart (TImode, operands[0]);
5559 operands[1] = gen_lowpart (TImode, operands[1]);
5562 (define_expand "vec_shr_<mode>"
5563 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5564 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5565 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5568 operands[0] = gen_lowpart (TImode, operands[0]);
5569 operands[1] = gen_lowpart (TImode, operands[1]);
5572 (define_insn "*avx_<code><mode>3"
5573 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5575 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5576 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5577 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5578 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5579 [(set_attr "type" "sseiadd")
5580 (set_attr "prefix" "vex")
5581 (set_attr "mode" "TI")])
5583 (define_expand "<code>v16qi3"
5584 [(set (match_operand:V16QI 0 "register_operand" "")
5586 (match_operand:V16QI 1 "nonimmediate_operand" "")
5587 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5589 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5591 (define_insn "*<code>v16qi3"
5592 [(set (match_operand:V16QI 0 "register_operand" "=x")
5594 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5595 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5596 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5597 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5598 [(set_attr "type" "sseiadd")
5599 (set_attr "prefix_data16" "1")
5600 (set_attr "mode" "TI")])
5602 (define_expand "<code>v8hi3"
5603 [(set (match_operand:V8HI 0 "register_operand" "")
5605 (match_operand:V8HI 1 "nonimmediate_operand" "")
5606 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5608 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5610 (define_insn "*<code>v8hi3"
5611 [(set (match_operand:V8HI 0 "register_operand" "=x")
5613 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5614 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5615 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5616 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5617 [(set_attr "type" "sseiadd")
5618 (set_attr "prefix_data16" "1")
5619 (set_attr "mode" "TI")])
5621 (define_expand "umaxv8hi3"
5622 [(set (match_operand:V8HI 0 "register_operand" "")
5623 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5624 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5628 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5631 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5632 if (rtx_equal_p (op3, op2))
5633 op3 = gen_reg_rtx (V8HImode);
5634 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5635 emit_insn (gen_addv8hi3 (op0, op3, op2));
5640 (define_expand "smax<mode>3"
5641 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5642 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5643 (match_operand:SSEMODE14 2 "register_operand" "")))]
5647 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5653 xops[0] = operands[0];
5654 xops[1] = operands[1];
5655 xops[2] = operands[2];
5656 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5657 xops[4] = operands[1];
5658 xops[5] = operands[2];
5659 ok = ix86_expand_int_vcond (xops);
5665 (define_insn "*sse4_1_<code><mode>3"
5666 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5668 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5669 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5670 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5671 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5672 [(set_attr "type" "sseiadd")
5673 (set_attr "prefix_extra" "1")
5674 (set_attr "mode" "TI")])
5676 (define_expand "umaxv4si3"
5677 [(set (match_operand:V4SI 0 "register_operand" "")
5678 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5679 (match_operand:V4SI 2 "register_operand" "")))]
5683 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5689 xops[0] = operands[0];
5690 xops[1] = operands[1];
5691 xops[2] = operands[2];
5692 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5693 xops[4] = operands[1];
5694 xops[5] = operands[2];
5695 ok = ix86_expand_int_vcond (xops);
5701 (define_insn "*sse4_1_<code><mode>3"
5702 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5704 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5705 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5706 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5707 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5708 [(set_attr "type" "sseiadd")
5709 (set_attr "prefix_extra" "1")
5710 (set_attr "mode" "TI")])
5712 (define_expand "smin<mode>3"
5713 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5714 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5715 (match_operand:SSEMODE14 2 "register_operand" "")))]
5719 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5725 xops[0] = operands[0];
5726 xops[1] = operands[2];
5727 xops[2] = operands[1];
5728 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5729 xops[4] = operands[1];
5730 xops[5] = operands[2];
5731 ok = ix86_expand_int_vcond (xops);
5737 (define_expand "umin<mode>3"
5738 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5739 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5740 (match_operand:SSEMODE24 2 "register_operand" "")))]
5744 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5750 xops[0] = operands[0];
5751 xops[1] = operands[2];
5752 xops[2] = operands[1];
5753 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5754 xops[4] = operands[1];
5755 xops[5] = operands[2];
5756 ok = ix86_expand_int_vcond (xops);
5762 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5764 ;; Parallel integral comparisons
5766 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5768 (define_expand "sse2_eq<mode>3"
5769 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5771 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5772 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5773 "TARGET_SSE2 && !TARGET_SSE5"
5774 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5776 (define_insn "*avx_eq<mode>3"
5777 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5779 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5780 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5781 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5782 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5783 [(set_attr "type" "ssecmp")
5784 (set_attr "prefix" "vex")
5785 (set_attr "mode" "TI")])
5787 (define_insn "*sse2_eq<mode>3"
5788 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5790 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5791 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5792 "TARGET_SSE2 && !TARGET_SSE5
5793 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5794 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5795 [(set_attr "type" "ssecmp")
5796 (set_attr "prefix_data16" "1")
5797 (set_attr "mode" "TI")])
5799 (define_expand "sse4_1_eqv2di3"
5800 [(set (match_operand:V2DI 0 "register_operand" "")
5802 (match_operand:V2DI 1 "nonimmediate_operand" "")
5803 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5805 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5807 (define_insn "*sse4_1_eqv2di3"
5808 [(set (match_operand:V2DI 0 "register_operand" "=x")
5810 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
5811 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5812 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5813 "pcmpeqq\t{%2, %0|%0, %2}"
5814 [(set_attr "type" "ssecmp")
5815 (set_attr "prefix_extra" "1")
5816 (set_attr "mode" "TI")])
5818 (define_insn "*avx_gt<mode>3"
5819 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5821 (match_operand:SSEMODE1248 1 "register_operand" "x")
5822 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5824 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5825 [(set_attr "type" "ssecmp")
5826 (set_attr "prefix" "vex")
5827 (set_attr "mode" "TI")])
5829 (define_insn "sse2_gt<mode>3"
5830 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5832 (match_operand:SSEMODE124 1 "register_operand" "0")
5833 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5834 "TARGET_SSE2 && !TARGET_SSE5"
5835 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5836 [(set_attr "type" "ssecmp")
5837 (set_attr "prefix_data16" "1")
5838 (set_attr "mode" "TI")])
5840 (define_insn "sse4_2_gtv2di3"
5841 [(set (match_operand:V2DI 0 "register_operand" "=x")
5843 (match_operand:V2DI 1 "register_operand" "0")
5844 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5846 "pcmpgtq\t{%2, %0|%0, %2}"
5847 [(set_attr "type" "ssecmp")
5848 (set_attr "mode" "TI")])
5850 (define_expand "vcond<mode>"
5851 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5852 (if_then_else:SSEMODEI
5853 (match_operator 3 ""
5854 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
5855 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
5856 (match_operand:SSEMODEI 1 "general_operand" "")
5857 (match_operand:SSEMODEI 2 "general_operand" "")))]
5860 if (ix86_expand_int_vcond (operands))
5866 (define_expand "vcondu<mode>"
5867 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5868 (if_then_else:SSEMODEI
5869 (match_operator 3 ""
5870 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
5871 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
5872 (match_operand:SSEMODEI 1 "general_operand" "")
5873 (match_operand:SSEMODEI 2 "general_operand" "")))]
5876 if (ix86_expand_int_vcond (operands))
5882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5884 ;; Parallel bitwise logical operations
5886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5888 (define_expand "one_cmpl<mode>2"
5889 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5890 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5894 int i, n = GET_MODE_NUNITS (<MODE>mode);
5895 rtvec v = rtvec_alloc (n);
5897 for (i = 0; i < n; ++i)
5898 RTVEC_ELT (v, i) = constm1_rtx;
5900 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5903 (define_insn "*avx_andnot<mode>3"
5904 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5906 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
5907 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5909 "vandnps\t{%2, %1, %0|%0, %1, %2}"
5910 [(set_attr "type" "sselog")
5911 (set_attr "prefix" "vex")
5912 (set_attr "mode" "<avxvecpsmode>")])
5914 (define_insn "*sse_andnot<mode>3"
5915 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5917 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5918 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5919 "(TARGET_SSE && !TARGET_SSE2)"
5920 "andnps\t{%2, %0|%0, %2}"
5921 [(set_attr "type" "sselog")
5922 (set_attr "mode" "V4SF")])
5924 (define_insn "*avx_andnot<mode>3"
5925 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5927 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
5928 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5930 "vpandn\t{%2, %1, %0|%0, %1, %2}"
5931 [(set_attr "type" "sselog")
5932 (set_attr "prefix" "vex")
5933 (set_attr "mode" "TI")])
5935 (define_insn "sse2_andnot<mode>3"
5936 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5938 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5939 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5941 "pandn\t{%2, %0|%0, %2}"
5942 [(set_attr "type" "sselog")
5943 (set_attr "prefix_data16" "1")
5944 (set_attr "mode" "TI")])
5946 (define_insn "*andnottf3"
5947 [(set (match_operand:TF 0 "register_operand" "=x")
5949 (not:TF (match_operand:TF 1 "register_operand" "0"))
5950 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5952 "pandn\t{%2, %0|%0, %2}"
5953 [(set_attr "type" "sselog")
5954 (set_attr "prefix_data16" "1")
5955 (set_attr "mode" "TI")])
5957 (define_expand "<code><mode>3"
5958 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5960 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5961 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5963 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5965 (define_insn "*avx_<code><mode>3"
5966 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5968 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5969 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5971 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5972 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
5973 [(set_attr "type" "sselog")
5974 (set_attr "prefix" "vex")
5975 (set_attr "mode" "<avxvecpsmode>")])
5977 (define_insn "*sse_<code><mode>3"
5978 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5980 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5981 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5982 "(TARGET_SSE && !TARGET_SSE2)
5983 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5984 "<plogicprefix>ps\t{%2, %0|%0, %2}"
5985 [(set_attr "type" "sselog")
5986 (set_attr "mode" "V4SF")])
5988 (define_insn "*avx_<code><mode>3"
5989 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5991 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5992 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5994 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5995 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
5996 [(set_attr "type" "sselog")
5997 (set_attr "prefix" "vex")
5998 (set_attr "mode" "TI")])
6000 (define_insn "*sse2_<code><mode>3"
6001 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6003 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6004 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6005 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6006 "p<plogicprefix>\t{%2, %0|%0, %2}"
6007 [(set_attr "type" "sselog")
6008 (set_attr "prefix_data16" "1")
6009 (set_attr "mode" "TI")])
6011 (define_expand "<code>tf3"
6012 [(set (match_operand:TF 0 "register_operand" "")
6014 (match_operand:TF 1 "nonimmediate_operand" "")
6015 (match_operand:TF 2 "nonimmediate_operand" "")))]
6017 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6019 (define_insn "*<code>tf3"
6020 [(set (match_operand:TF 0 "register_operand" "=x")
6022 (match_operand:TF 1 "nonimmediate_operand" "%0")
6023 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6024 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6025 "p<plogicprefix>\t{%2, %0|%0, %2}"
6026 [(set_attr "type" "sselog")
6027 (set_attr "prefix_data16" "1")
6028 (set_attr "mode" "TI")])
6030 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6032 ;; Parallel integral element swizzling
6034 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6037 ;; op1 = abcdefghijklmnop
6038 ;; op2 = qrstuvwxyz012345
6039 ;; h1 = aqbrcsdteufvgwhx
6040 ;; l1 = iyjzk0l1m2n3o4p5
6041 ;; h2 = aiqybjrzcks0dlt1
6042 ;; l2 = emu2fnv3gow4hpx5
6043 ;; h3 = aeimquy2bfjnrvz3
6044 ;; l3 = cgkosw04dhlptx15
6045 ;; result = bdfhjlnprtvxz135
6046 (define_expand "vec_pack_trunc_v8hi"
6047 [(match_operand:V16QI 0 "register_operand" "")
6048 (match_operand:V8HI 1 "register_operand" "")
6049 (match_operand:V8HI 2 "register_operand" "")]
6052 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6056 ix86_expand_sse5_pack (operands);
6060 op1 = gen_lowpart (V16QImode, operands[1]);
6061 op2 = gen_lowpart (V16QImode, operands[2]);
6062 h1 = gen_reg_rtx (V16QImode);
6063 l1 = gen_reg_rtx (V16QImode);
6064 h2 = gen_reg_rtx (V16QImode);
6065 l2 = gen_reg_rtx (V16QImode);
6066 h3 = gen_reg_rtx (V16QImode);
6067 l3 = gen_reg_rtx (V16QImode);
6069 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6070 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6071 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6072 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6073 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6074 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6075 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6086 ;; result = bdfhjlnp
6087 (define_expand "vec_pack_trunc_v4si"
6088 [(match_operand:V8HI 0 "register_operand" "")
6089 (match_operand:V4SI 1 "register_operand" "")
6090 (match_operand:V4SI 2 "register_operand" "")]
6093 rtx op1, op2, h1, l1, h2, l2;
6097 ix86_expand_sse5_pack (operands);
6101 op1 = gen_lowpart (V8HImode, operands[1]);
6102 op2 = gen_lowpart (V8HImode, operands[2]);
6103 h1 = gen_reg_rtx (V8HImode);
6104 l1 = gen_reg_rtx (V8HImode);
6105 h2 = gen_reg_rtx (V8HImode);
6106 l2 = gen_reg_rtx (V8HImode);
6108 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6109 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6110 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6111 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6112 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6122 (define_expand "vec_pack_trunc_v2di"
6123 [(match_operand:V4SI 0 "register_operand" "")
6124 (match_operand:V2DI 1 "register_operand" "")
6125 (match_operand:V2DI 2 "register_operand" "")]
6128 rtx op1, op2, h1, l1;
6132 ix86_expand_sse5_pack (operands);
6136 op1 = gen_lowpart (V4SImode, operands[1]);
6137 op2 = gen_lowpart (V4SImode, operands[2]);
6138 h1 = gen_reg_rtx (V4SImode);
6139 l1 = gen_reg_rtx (V4SImode);
6141 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6142 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6143 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6147 (define_expand "vec_interleave_highv16qi"
6148 [(set (match_operand:V16QI 0 "register_operand" "")
6151 (match_operand:V16QI 1 "register_operand" "")
6152 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6153 (parallel [(const_int 8) (const_int 24)
6154 (const_int 9) (const_int 25)
6155 (const_int 10) (const_int 26)
6156 (const_int 11) (const_int 27)
6157 (const_int 12) (const_int 28)
6158 (const_int 13) (const_int 29)
6159 (const_int 14) (const_int 30)
6160 (const_int 15) (const_int 31)])))]
6163 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6167 (define_expand "vec_interleave_lowv16qi"
6168 [(set (match_operand:V16QI 0 "register_operand" "")
6171 (match_operand:V16QI 1 "register_operand" "")
6172 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6173 (parallel [(const_int 0) (const_int 16)
6174 (const_int 1) (const_int 17)
6175 (const_int 2) (const_int 18)
6176 (const_int 3) (const_int 19)
6177 (const_int 4) (const_int 20)
6178 (const_int 5) (const_int 21)
6179 (const_int 6) (const_int 22)
6180 (const_int 7) (const_int 23)])))]
6183 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6187 (define_expand "vec_interleave_highv8hi"
6188 [(set (match_operand:V8HI 0 "register_operand" "=")
6191 (match_operand:V8HI 1 "register_operand" "")
6192 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6193 (parallel [(const_int 4) (const_int 12)
6194 (const_int 5) (const_int 13)
6195 (const_int 6) (const_int 14)
6196 (const_int 7) (const_int 15)])))]
6199 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6203 (define_expand "vec_interleave_lowv8hi"
6204 [(set (match_operand:V8HI 0 "register_operand" "")
6207 (match_operand:V8HI 1 "register_operand" "")
6208 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6209 (parallel [(const_int 0) (const_int 8)
6210 (const_int 1) (const_int 9)
6211 (const_int 2) (const_int 10)
6212 (const_int 3) (const_int 11)])))]
6215 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6219 (define_expand "vec_interleave_highv4si"
6220 [(set (match_operand:V4SI 0 "register_operand" "")
6223 (match_operand:V4SI 1 "register_operand" "")
6224 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6225 (parallel [(const_int 2) (const_int 6)
6226 (const_int 3) (const_int 7)])))]
6229 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6233 (define_expand "vec_interleave_lowv4si"
6234 [(set (match_operand:V4SI 0 "register_operand" "")
6237 (match_operand:V4SI 1 "register_operand" "")
6238 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6239 (parallel [(const_int 0) (const_int 4)
6240 (const_int 1) (const_int 5)])))]
6243 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6247 (define_expand "vec_interleave_highv2di"
6248 [(set (match_operand:V2DI 0 "register_operand" "")
6251 (match_operand:V2DI 1 "register_operand" "")
6252 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6253 (parallel [(const_int 1)
6257 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6261 (define_expand "vec_interleave_lowv2di"
6262 [(set (match_operand:V2DI 0 "register_operand" "")
6265 (match_operand:V2DI 1 "register_operand" "")
6266 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6267 (parallel [(const_int 0)
6271 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6275 (define_expand "vec_interleave_highv4sf"
6276 [(set (match_operand:V4SF 0 "register_operand" "")
6279 (match_operand:V4SF 1 "register_operand" "")
6280 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6281 (parallel [(const_int 2) (const_int 6)
6282 (const_int 3) (const_int 7)])))]
6285 (define_expand "vec_interleave_lowv4sf"
6286 [(set (match_operand:V4SF 0 "register_operand" "")
6289 (match_operand:V4SF 1 "register_operand" "")
6290 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6291 (parallel [(const_int 0) (const_int 4)
6292 (const_int 1) (const_int 5)])))]
6295 (define_expand "vec_interleave_highv2df"
6296 [(set (match_operand:V2DF 0 "register_operand" "")
6299 (match_operand:V2DF 1 "register_operand" "")
6300 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6301 (parallel [(const_int 1)
6305 (define_expand "vec_interleave_lowv2df"
6306 [(set (match_operand:V2DF 0 "register_operand" "")
6309 (match_operand:V2DF 1 "register_operand" "")
6310 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6311 (parallel [(const_int 0)
6315 (define_insn "*avx_packsswb"
6316 [(set (match_operand:V16QI 0 "register_operand" "=x")
6319 (match_operand:V8HI 1 "register_operand" "x"))
6321 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6323 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6324 [(set_attr "type" "sselog")
6325 (set_attr "prefix" "vex")
6326 (set_attr "mode" "TI")])
6328 (define_insn "sse2_packsswb"
6329 [(set (match_operand:V16QI 0 "register_operand" "=x")
6332 (match_operand:V8HI 1 "register_operand" "0"))
6334 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6336 "packsswb\t{%2, %0|%0, %2}"
6337 [(set_attr "type" "sselog")
6338 (set_attr "prefix_data16" "1")
6339 (set_attr "mode" "TI")])
6341 (define_insn "*avx_packssdw"
6342 [(set (match_operand:V8HI 0 "register_operand" "=x")
6345 (match_operand:V4SI 1 "register_operand" "x"))
6347 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6349 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6350 [(set_attr "type" "sselog")
6351 (set_attr "prefix" "vex")
6352 (set_attr "mode" "TI")])
6354 (define_insn "sse2_packssdw"
6355 [(set (match_operand:V8HI 0 "register_operand" "=x")
6358 (match_operand:V4SI 1 "register_operand" "0"))
6360 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6362 "packssdw\t{%2, %0|%0, %2}"
6363 [(set_attr "type" "sselog")
6364 (set_attr "prefix_data16" "1")
6365 (set_attr "mode" "TI")])
6367 (define_insn "*avx_packuswb"
6368 [(set (match_operand:V16QI 0 "register_operand" "=x")
6371 (match_operand:V8HI 1 "register_operand" "x"))
6373 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6375 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6376 [(set_attr "type" "sselog")
6377 (set_attr "prefix" "vex")
6378 (set_attr "mode" "TI")])
6380 (define_insn "sse2_packuswb"
6381 [(set (match_operand:V16QI 0 "register_operand" "=x")
6384 (match_operand:V8HI 1 "register_operand" "0"))
6386 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6388 "packuswb\t{%2, %0|%0, %2}"
6389 [(set_attr "type" "sselog")
6390 (set_attr "prefix_data16" "1")
6391 (set_attr "mode" "TI")])
6393 (define_insn "*avx_punpckhbw"
6394 [(set (match_operand:V16QI 0 "register_operand" "=x")
6397 (match_operand:V16QI 1 "register_operand" "x")
6398 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6399 (parallel [(const_int 8) (const_int 24)
6400 (const_int 9) (const_int 25)
6401 (const_int 10) (const_int 26)
6402 (const_int 11) (const_int 27)
6403 (const_int 12) (const_int 28)
6404 (const_int 13) (const_int 29)
6405 (const_int 14) (const_int 30)
6406 (const_int 15) (const_int 31)])))]
6408 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6409 [(set_attr "type" "sselog")
6410 (set_attr "prefix" "vex")
6411 (set_attr "mode" "TI")])
6413 (define_insn "sse2_punpckhbw"
6414 [(set (match_operand:V16QI 0 "register_operand" "=x")
6417 (match_operand:V16QI 1 "register_operand" "0")
6418 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6419 (parallel [(const_int 8) (const_int 24)
6420 (const_int 9) (const_int 25)
6421 (const_int 10) (const_int 26)
6422 (const_int 11) (const_int 27)
6423 (const_int 12) (const_int 28)
6424 (const_int 13) (const_int 29)
6425 (const_int 14) (const_int 30)
6426 (const_int 15) (const_int 31)])))]
6428 "punpckhbw\t{%2, %0|%0, %2}"
6429 [(set_attr "type" "sselog")
6430 (set_attr "prefix_data16" "1")
6431 (set_attr "mode" "TI")])
6433 (define_insn "*avx_punpcklbw"
6434 [(set (match_operand:V16QI 0 "register_operand" "=x")
6437 (match_operand:V16QI 1 "register_operand" "x")
6438 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6439 (parallel [(const_int 0) (const_int 16)
6440 (const_int 1) (const_int 17)
6441 (const_int 2) (const_int 18)
6442 (const_int 3) (const_int 19)
6443 (const_int 4) (const_int 20)
6444 (const_int 5) (const_int 21)
6445 (const_int 6) (const_int 22)
6446 (const_int 7) (const_int 23)])))]
6448 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6449 [(set_attr "type" "sselog")
6450 (set_attr "prefix" "vex")
6451 (set_attr "mode" "TI")])
6453 (define_insn "sse2_punpcklbw"
6454 [(set (match_operand:V16QI 0 "register_operand" "=x")
6457 (match_operand:V16QI 1 "register_operand" "0")
6458 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6459 (parallel [(const_int 0) (const_int 16)
6460 (const_int 1) (const_int 17)
6461 (const_int 2) (const_int 18)
6462 (const_int 3) (const_int 19)
6463 (const_int 4) (const_int 20)
6464 (const_int 5) (const_int 21)
6465 (const_int 6) (const_int 22)
6466 (const_int 7) (const_int 23)])))]
6468 "punpcklbw\t{%2, %0|%0, %2}"
6469 [(set_attr "type" "sselog")
6470 (set_attr "prefix_data16" "1")
6471 (set_attr "mode" "TI")])
6473 (define_insn "*avx_punpckhwd"
6474 [(set (match_operand:V8HI 0 "register_operand" "=x")
6477 (match_operand:V8HI 1 "register_operand" "x")
6478 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6479 (parallel [(const_int 4) (const_int 12)
6480 (const_int 5) (const_int 13)
6481 (const_int 6) (const_int 14)
6482 (const_int 7) (const_int 15)])))]
6484 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6485 [(set_attr "type" "sselog")
6486 (set_attr "prefix" "vex")
6487 (set_attr "mode" "TI")])
6489 (define_insn "sse2_punpckhwd"
6490 [(set (match_operand:V8HI 0 "register_operand" "=x")
6493 (match_operand:V8HI 1 "register_operand" "0")
6494 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6495 (parallel [(const_int 4) (const_int 12)
6496 (const_int 5) (const_int 13)
6497 (const_int 6) (const_int 14)
6498 (const_int 7) (const_int 15)])))]
6500 "punpckhwd\t{%2, %0|%0, %2}"
6501 [(set_attr "type" "sselog")
6502 (set_attr "prefix_data16" "1")
6503 (set_attr "mode" "TI")])
6505 (define_insn "*avx_punpcklwd"
6506 [(set (match_operand:V8HI 0 "register_operand" "=x")
6509 (match_operand:V8HI 1 "register_operand" "x")
6510 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6511 (parallel [(const_int 0) (const_int 8)
6512 (const_int 1) (const_int 9)
6513 (const_int 2) (const_int 10)
6514 (const_int 3) (const_int 11)])))]
6516 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6517 [(set_attr "type" "sselog")
6518 (set_attr "prefix" "vex")
6519 (set_attr "mode" "TI")])
6521 (define_insn "sse2_punpcklwd"
6522 [(set (match_operand:V8HI 0 "register_operand" "=x")
6525 (match_operand:V8HI 1 "register_operand" "0")
6526 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6527 (parallel [(const_int 0) (const_int 8)
6528 (const_int 1) (const_int 9)
6529 (const_int 2) (const_int 10)
6530 (const_int 3) (const_int 11)])))]
6532 "punpcklwd\t{%2, %0|%0, %2}"
6533 [(set_attr "type" "sselog")
6534 (set_attr "prefix_data16" "1")
6535 (set_attr "mode" "TI")])
6537 (define_insn "*avx_punpckhdq"
6538 [(set (match_operand:V4SI 0 "register_operand" "=x")
6541 (match_operand:V4SI 1 "register_operand" "x")
6542 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6543 (parallel [(const_int 2) (const_int 6)
6544 (const_int 3) (const_int 7)])))]
6546 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6547 [(set_attr "type" "sselog")
6548 (set_attr "prefix" "vex")
6549 (set_attr "mode" "TI")])
6551 (define_insn "sse2_punpckhdq"
6552 [(set (match_operand:V4SI 0 "register_operand" "=x")
6555 (match_operand:V4SI 1 "register_operand" "0")
6556 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6557 (parallel [(const_int 2) (const_int 6)
6558 (const_int 3) (const_int 7)])))]
6560 "punpckhdq\t{%2, %0|%0, %2}"
6561 [(set_attr "type" "sselog")
6562 (set_attr "prefix_data16" "1")
6563 (set_attr "mode" "TI")])
6565 (define_insn "*avx_punpckldq"
6566 [(set (match_operand:V4SI 0 "register_operand" "=x")
6569 (match_operand:V4SI 1 "register_operand" "x")
6570 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6571 (parallel [(const_int 0) (const_int 4)
6572 (const_int 1) (const_int 5)])))]
6574 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6575 [(set_attr "type" "sselog")
6576 (set_attr "prefix" "vex")
6577 (set_attr "mode" "TI")])
6579 (define_insn "sse2_punpckldq"
6580 [(set (match_operand:V4SI 0 "register_operand" "=x")
6583 (match_operand:V4SI 1 "register_operand" "0")
6584 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6585 (parallel [(const_int 0) (const_int 4)
6586 (const_int 1) (const_int 5)])))]
6588 "punpckldq\t{%2, %0|%0, %2}"
6589 [(set_attr "type" "sselog")
6590 (set_attr "prefix_data16" "1")
6591 (set_attr "mode" "TI")])
6593 (define_insn "*avx_pinsr<ssevecsize>"
6594 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6595 (vec_merge:SSEMODE124
6596 (vec_duplicate:SSEMODE124
6597 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6598 (match_operand:SSEMODE124 1 "register_operand" "x")
6599 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6602 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6603 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6605 [(set_attr "type" "sselog")
6606 (set_attr "prefix" "vex")
6607 (set_attr "mode" "TI")])
6609 (define_insn "*sse4_1_pinsrb"
6610 [(set (match_operand:V16QI 0 "register_operand" "=x")
6612 (vec_duplicate:V16QI
6613 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6614 (match_operand:V16QI 1 "register_operand" "0")
6615 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6618 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6619 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6621 [(set_attr "type" "sselog")
6622 (set_attr "prefix_extra" "1")
6623 (set_attr "mode" "TI")])
6625 (define_insn "*sse2_pinsrw"
6626 [(set (match_operand:V8HI 0 "register_operand" "=x")
6629 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6630 (match_operand:V8HI 1 "register_operand" "0")
6631 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6634 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6635 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6637 [(set_attr "type" "sselog")
6638 (set_attr "prefix_data16" "1")
6639 (set_attr "mode" "TI")])
6641 ;; It must come before sse2_loadld since it is preferred.
6642 (define_insn "*sse4_1_pinsrd"
6643 [(set (match_operand:V4SI 0 "register_operand" "=x")
6646 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6647 (match_operand:V4SI 1 "register_operand" "0")
6648 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6651 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6652 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6654 [(set_attr "type" "sselog")
6655 (set_attr "prefix_extra" "1")
6656 (set_attr "mode" "TI")])
6658 (define_insn "*avx_pinsrq"
6659 [(set (match_operand:V2DI 0 "register_operand" "=x")
6662 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6663 (match_operand:V2DI 1 "register_operand" "x")
6664 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6665 "TARGET_AVX && TARGET_64BIT"
6667 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6668 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix" "vex")
6672 (set_attr "mode" "TI")])
6674 (define_insn "*sse4_1_pinsrq"
6675 [(set (match_operand:V2DI 0 "register_operand" "=x")
6678 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6679 (match_operand:V2DI 1 "register_operand" "0")
6680 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6681 "TARGET_SSE4_1 && TARGET_64BIT"
6683 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6684 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix_extra" "1")
6688 (set_attr "mode" "TI")])
6690 (define_insn "*sse4_1_pextrb"
6691 [(set (match_operand:SI 0 "register_operand" "=r")
6694 (match_operand:V16QI 1 "register_operand" "x")
6695 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6697 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6698 [(set_attr "type" "sselog")
6699 (set_attr "prefix_extra" "1")
6700 (set_attr "prefix" "maybe_vex")
6701 (set_attr "mode" "TI")])
6703 (define_insn "*sse4_1_pextrb_memory"
6704 [(set (match_operand:QI 0 "memory_operand" "=m")
6706 (match_operand:V16QI 1 "register_operand" "x")
6707 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6709 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6710 [(set_attr "type" "sselog")
6711 (set_attr "prefix_extra" "1")
6712 (set_attr "prefix" "maybe_vex")
6713 (set_attr "mode" "TI")])
6715 (define_insn "*sse2_pextrw"
6716 [(set (match_operand:SI 0 "register_operand" "=r")
6719 (match_operand:V8HI 1 "register_operand" "x")
6720 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6722 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6723 [(set_attr "type" "sselog")
6724 (set_attr "prefix_data16" "1")
6725 (set_attr "prefix" "maybe_vex")
6726 (set_attr "mode" "TI")])
6728 (define_insn "*sse4_1_pextrw_memory"
6729 [(set (match_operand:HI 0 "memory_operand" "=m")
6731 (match_operand:V8HI 1 "register_operand" "x")
6732 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6734 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6735 [(set_attr "type" "sselog")
6736 (set_attr "prefix_extra" "1")
6737 (set_attr "prefix" "maybe_vex")
6738 (set_attr "mode" "TI")])
6740 (define_insn "*sse4_1_pextrd"
6741 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6743 (match_operand:V4SI 1 "register_operand" "x")
6744 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6746 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6747 [(set_attr "type" "sselog")
6748 (set_attr "prefix_extra" "1")
6749 (set_attr "prefix" "maybe_vex")
6750 (set_attr "mode" "TI")])
6752 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6753 (define_insn "*sse4_1_pextrq"
6754 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6756 (match_operand:V2DI 1 "register_operand" "x")
6757 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6758 "TARGET_SSE4_1 && TARGET_64BIT"
6759 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6760 [(set_attr "type" "sselog")
6761 (set_attr "prefix_extra" "1")
6762 (set_attr "prefix" "maybe_vex")
6763 (set_attr "mode" "TI")])
6765 (define_expand "sse2_pshufd"
6766 [(match_operand:V4SI 0 "register_operand" "")
6767 (match_operand:V4SI 1 "nonimmediate_operand" "")
6768 (match_operand:SI 2 "const_int_operand" "")]
6771 int mask = INTVAL (operands[2]);
6772 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6773 GEN_INT ((mask >> 0) & 3),
6774 GEN_INT ((mask >> 2) & 3),
6775 GEN_INT ((mask >> 4) & 3),
6776 GEN_INT ((mask >> 6) & 3)));
6780 (define_insn "sse2_pshufd_1"
6781 [(set (match_operand:V4SI 0 "register_operand" "=x")
6783 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6784 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6785 (match_operand 3 "const_0_to_3_operand" "")
6786 (match_operand 4 "const_0_to_3_operand" "")
6787 (match_operand 5 "const_0_to_3_operand" "")])))]
6791 mask |= INTVAL (operands[2]) << 0;
6792 mask |= INTVAL (operands[3]) << 2;
6793 mask |= INTVAL (operands[4]) << 4;
6794 mask |= INTVAL (operands[5]) << 6;
6795 operands[2] = GEN_INT (mask);
6797 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6799 [(set_attr "type" "sselog1")
6800 (set_attr "prefix_data16" "1")
6801 (set_attr "prefix" "vex")
6802 (set_attr "mode" "TI")])
6804 (define_expand "sse2_pshuflw"
6805 [(match_operand:V8HI 0 "register_operand" "")
6806 (match_operand:V8HI 1 "nonimmediate_operand" "")
6807 (match_operand:SI 2 "const_int_operand" "")]
6810 int mask = INTVAL (operands[2]);
6811 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6812 GEN_INT ((mask >> 0) & 3),
6813 GEN_INT ((mask >> 2) & 3),
6814 GEN_INT ((mask >> 4) & 3),
6815 GEN_INT ((mask >> 6) & 3)));
6819 (define_insn "sse2_pshuflw_1"
6820 [(set (match_operand:V8HI 0 "register_operand" "=x")
6822 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6823 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6824 (match_operand 3 "const_0_to_3_operand" "")
6825 (match_operand 4 "const_0_to_3_operand" "")
6826 (match_operand 5 "const_0_to_3_operand" "")
6834 mask |= INTVAL (operands[2]) << 0;
6835 mask |= INTVAL (operands[3]) << 2;
6836 mask |= INTVAL (operands[4]) << 4;
6837 mask |= INTVAL (operands[5]) << 6;
6838 operands[2] = GEN_INT (mask);
6840 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6842 [(set_attr "type" "sselog")
6843 (set_attr "prefix_rep" "1")
6844 (set_attr "prefix" "maybe_vex")
6845 (set_attr "mode" "TI")])
6847 (define_expand "sse2_pshufhw"
6848 [(match_operand:V8HI 0 "register_operand" "")
6849 (match_operand:V8HI 1 "nonimmediate_operand" "")
6850 (match_operand:SI 2 "const_int_operand" "")]
6853 int mask = INTVAL (operands[2]);
6854 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6855 GEN_INT (((mask >> 0) & 3) + 4),
6856 GEN_INT (((mask >> 2) & 3) + 4),
6857 GEN_INT (((mask >> 4) & 3) + 4),
6858 GEN_INT (((mask >> 6) & 3) + 4)));
6862 (define_insn "sse2_pshufhw_1"
6863 [(set (match_operand:V8HI 0 "register_operand" "=x")
6865 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6866 (parallel [(const_int 0)
6870 (match_operand 2 "const_4_to_7_operand" "")
6871 (match_operand 3 "const_4_to_7_operand" "")
6872 (match_operand 4 "const_4_to_7_operand" "")
6873 (match_operand 5 "const_4_to_7_operand" "")])))]
6877 mask |= (INTVAL (operands[2]) - 4) << 0;
6878 mask |= (INTVAL (operands[3]) - 4) << 2;
6879 mask |= (INTVAL (operands[4]) - 4) << 4;
6880 mask |= (INTVAL (operands[5]) - 4) << 6;
6881 operands[2] = GEN_INT (mask);
6883 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6885 [(set_attr "type" "sselog")
6886 (set_attr "prefix_rep" "1")
6887 (set_attr "prefix" "maybe_vex")
6888 (set_attr "mode" "TI")])
6890 (define_expand "sse2_loadd"
6891 [(set (match_operand:V4SI 0 "register_operand" "")
6894 (match_operand:SI 1 "nonimmediate_operand" ""))
6898 "operands[2] = CONST0_RTX (V4SImode);")
6900 (define_insn "*avx_loadld"
6901 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
6904 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
6905 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
6909 vmovd\t{%2, %0|%0, %2}
6910 vmovd\t{%2, %0|%0, %2}
6911 vmovss\t{%2, %1, %0|%0, %1, %2}"
6912 [(set_attr "type" "ssemov")
6913 (set_attr "prefix" "vex")
6914 (set_attr "mode" "TI,TI,V4SF")])
6916 (define_insn "sse2_loadld"
6917 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
6920 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
6921 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
6925 movd\t{%2, %0|%0, %2}
6926 movd\t{%2, %0|%0, %2}
6927 movss\t{%2, %0|%0, %2}
6928 movss\t{%2, %0|%0, %2}"
6929 [(set_attr "type" "ssemov")
6930 (set_attr "mode" "TI,TI,V4SF,SF")])
6932 (define_insn_and_split "sse2_stored"
6933 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
6935 (match_operand:V4SI 1 "register_operand" "x,Yi")
6936 (parallel [(const_int 0)])))]
6939 "&& reload_completed
6940 && (TARGET_INTER_UNIT_MOVES
6941 || MEM_P (operands [0])
6942 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6943 [(set (match_dup 0) (match_dup 1))]
6945 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
6948 (define_insn_and_split "*vec_ext_v4si_mem"
6949 [(set (match_operand:SI 0 "register_operand" "=r")
6951 (match_operand:V4SI 1 "memory_operand" "o")
6952 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6958 int i = INTVAL (operands[2]);
6960 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6964 (define_expand "sse_storeq"
6965 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6967 (match_operand:V2DI 1 "register_operand" "")
6968 (parallel [(const_int 0)])))]
6972 (define_insn "*sse2_storeq_rex64"
6973 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
6975 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6976 (parallel [(const_int 0)])))]
6977 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6981 %vmov{q}\t{%1, %0|%0, %1}"
6982 [(set_attr "type" "*,*,imov")
6983 (set_attr "prefix" "*,*,maybe_vex")
6984 (set_attr "mode" "*,*,DI")])
6986 (define_insn "*sse2_storeq"
6987 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
6989 (match_operand:V2DI 1 "register_operand" "x")
6990 (parallel [(const_int 0)])))]
6995 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6997 (match_operand:V2DI 1 "register_operand" "")
6998 (parallel [(const_int 0)])))]
7001 && (TARGET_INTER_UNIT_MOVES
7002 || MEM_P (operands [0])
7003 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7004 [(set (match_dup 0) (match_dup 1))]
7006 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7009 (define_insn "*vec_extractv2di_1_rex64_avx"
7010 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7012 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7013 (parallel [(const_int 1)])))]
7016 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7018 vmovhps\t{%1, %0|%0, %1}
7019 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7020 vmovq\t{%H1, %0|%0, %H1}
7021 vmov{q}\t{%H1, %0|%0, %H1}"
7022 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7023 (set_attr "memory" "*,none,*,*")
7024 (set_attr "prefix" "vex")
7025 (set_attr "mode" "V2SF,TI,TI,DI")])
7027 (define_insn "*vec_extractv2di_1_rex64"
7028 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7030 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7031 (parallel [(const_int 1)])))]
7032 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7034 movhps\t{%1, %0|%0, %1}
7035 psrldq\t{$8, %0|%0, 8}
7036 movq\t{%H1, %0|%0, %H1}
7037 mov{q}\t{%H1, %0|%0, %H1}"
7038 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7039 (set_attr "atom_unit" "*,sishuf,*,*")
7040 (set_attr "memory" "*,none,*,*")
7041 (set_attr "mode" "V2SF,TI,TI,DI")])
7043 (define_insn "*vec_extractv2di_1_avx"
7044 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7046 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7047 (parallel [(const_int 1)])))]
7050 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7052 vmovhps\t{%1, %0|%0, %1}
7053 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7054 vmovq\t{%H1, %0|%0, %H1}"
7055 [(set_attr "type" "ssemov,sseishft,ssemov")
7056 (set_attr "memory" "*,none,*")
7057 (set_attr "prefix" "vex")
7058 (set_attr "mode" "V2SF,TI,TI")])
7060 (define_insn "*vec_extractv2di_1_sse2"
7061 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7063 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7064 (parallel [(const_int 1)])))]
7066 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7068 movhps\t{%1, %0|%0, %1}
7069 psrldq\t{$8, %0|%0, 8}
7070 movq\t{%H1, %0|%0, %H1}"
7071 [(set_attr "type" "ssemov,sseishft,ssemov")
7072 (set_attr "atom_unit" "*,sishuf,*")
7073 (set_attr "memory" "*,none,*")
7074 (set_attr "mode" "V2SF,TI,TI")])
7076 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7077 (define_insn "*vec_extractv2di_1_sse"
7078 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7080 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7081 (parallel [(const_int 1)])))]
7082 "!TARGET_SSE2 && TARGET_SSE
7083 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7085 movhps\t{%1, %0|%0, %1}
7086 movhlps\t{%1, %0|%0, %1}
7087 movlps\t{%H1, %0|%0, %H1}"
7088 [(set_attr "type" "ssemov")
7089 (set_attr "mode" "V2SF,V4SF,V2SF")])
7091 (define_insn "*vec_dupv4si"
7092 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7094 (match_operand:SI 1 "register_operand" " Y2,0")))]
7097 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7098 shufps\t{$0, %0, %0|%0, %0, 0}"
7099 [(set_attr "type" "sselog1")
7100 (set_attr "prefix" "maybe_vex,orig")
7101 (set_attr "mode" "TI,V4SF")])
7103 (define_insn "*vec_dupv2di_avx"
7104 [(set (match_operand:V2DI 0 "register_operand" "=x")
7106 (match_operand:DI 1 "register_operand" "x")))]
7108 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7109 [(set_attr "type" "sselog1")
7110 (set_attr "prefix" "vex")
7111 (set_attr "mode" "TI")])
7113 (define_insn "*vec_dupv2di"
7114 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7116 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7121 [(set_attr "type" "sselog1,ssemov")
7122 (set_attr "mode" "TI,V4SF")])
7124 (define_insn "*vec_concatv2si_avx"
7125 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7127 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7128 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7131 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7132 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7133 vmovd\t{%1, %0|%0, %1}
7134 punpckldq\t{%2, %0|%0, %2}
7135 movd\t{%1, %0|%0, %1}"
7136 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7137 (set (attr "prefix")
7138 (if_then_else (eq_attr "alternative" "3,4")
7139 (const_string "orig")
7140 (const_string "vex")))
7141 (set_attr "mode" "TI,TI,TI,DI,DI")])
7143 (define_insn "*vec_concatv2si_sse4_1"
7144 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7146 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7147 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7150 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7151 punpckldq\t{%2, %0|%0, %2}
7152 movd\t{%1, %0|%0, %1}
7153 punpckldq\t{%2, %0|%0, %2}
7154 movd\t{%1, %0|%0, %1}"
7155 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7156 (set_attr "prefix_extra" "1,*,*,*,*")
7157 (set_attr "mode" "TI,TI,TI,DI,DI")])
7159 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7160 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7161 ;; alternatives pretty much forces the MMX alternative to be chosen.
7162 (define_insn "*vec_concatv2si_sse2"
7163 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7165 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7166 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7169 punpckldq\t{%2, %0|%0, %2}
7170 movd\t{%1, %0|%0, %1}
7171 punpckldq\t{%2, %0|%0, %2}
7172 movd\t{%1, %0|%0, %1}"
7173 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7174 (set_attr "mode" "TI,TI,DI,DI")])
7176 (define_insn "*vec_concatv2si_sse"
7177 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7179 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7180 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7183 unpcklps\t{%2, %0|%0, %2}
7184 movss\t{%1, %0|%0, %1}
7185 punpckldq\t{%2, %0|%0, %2}
7186 movd\t{%1, %0|%0, %1}"
7187 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7188 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7190 (define_insn "*vec_concatv4si_1_avx"
7191 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7193 (match_operand:V2SI 1 "register_operand" " x,x")
7194 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7197 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7198 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7199 [(set_attr "type" "sselog,ssemov")
7200 (set_attr "prefix" "vex")
7201 (set_attr "mode" "TI,V2SF")])
7203 (define_insn "*vec_concatv4si_1"
7204 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7206 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7207 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7210 punpcklqdq\t{%2, %0|%0, %2}
7211 movlhps\t{%2, %0|%0, %2}
7212 movhps\t{%2, %0|%0, %2}"
7213 [(set_attr "type" "sselog,ssemov,ssemov")
7214 (set_attr "mode" "TI,V4SF,V2SF")])
7216 (define_insn "*vec_concatv2di_avx"
7217 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7219 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7220 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7221 "!TARGET_64BIT && TARGET_AVX"
7223 vmovq\t{%1, %0|%0, %1}
7224 movq2dq\t{%1, %0|%0, %1}
7225 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7226 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7227 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7228 (set (attr "prefix")
7229 (if_then_else (eq_attr "alternative" "1")
7230 (const_string "orig")
7231 (const_string "vex")))
7232 (set_attr "mode" "TI,TI,TI,V2SF")])
7234 (define_insn "vec_concatv2di"
7235 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7237 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7238 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7239 "!TARGET_64BIT && TARGET_SSE"
7241 movq\t{%1, %0|%0, %1}
7242 movq2dq\t{%1, %0|%0, %1}
7243 punpcklqdq\t{%2, %0|%0, %2}
7244 movlhps\t{%2, %0|%0, %2}
7245 movhps\t{%2, %0|%0, %2}"
7246 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7247 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7249 (define_insn "*vec_concatv2di_rex64_avx"
7250 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7252 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7253 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7254 "TARGET_64BIT && TARGET_AVX"
7256 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7257 vmovq\t{%1, %0|%0, %1}
7258 vmovq\t{%1, %0|%0, %1}
7259 movq2dq\t{%1, %0|%0, %1}
7260 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7261 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7262 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7263 (set (attr "prefix")
7264 (if_then_else (eq_attr "alternative" "3")
7265 (const_string "orig")
7266 (const_string "vex")))
7267 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7269 (define_insn "*vec_concatv2di_rex64_sse4_1"
7270 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7272 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7273 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7274 "TARGET_64BIT && TARGET_SSE4_1"
7276 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7277 movq\t{%1, %0|%0, %1}
7278 movq\t{%1, %0|%0, %1}
7279 movq2dq\t{%1, %0|%0, %1}
7280 punpcklqdq\t{%2, %0|%0, %2}
7281 movlhps\t{%2, %0|%0, %2}
7282 movhps\t{%2, %0|%0, %2}"
7283 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7284 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7285 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7287 (define_insn "*vec_concatv2di_rex64_sse"
7288 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7290 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7291 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7292 "TARGET_64BIT && TARGET_SSE"
7294 movq\t{%1, %0|%0, %1}
7295 movq\t{%1, %0|%0, %1}
7296 movq2dq\t{%1, %0|%0, %1}
7297 punpcklqdq\t{%2, %0|%0, %2}
7298 movlhps\t{%2, %0|%0, %2}
7299 movhps\t{%2, %0|%0, %2}"
7300 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7301 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7303 (define_expand "vec_unpacku_hi_v16qi"
7304 [(match_operand:V8HI 0 "register_operand" "")
7305 (match_operand:V16QI 1 "register_operand" "")]
7309 ix86_expand_sse4_unpack (operands, true, true);
7310 else if (TARGET_SSE5)
7311 ix86_expand_sse5_unpack (operands, true, true);
7313 ix86_expand_sse_unpack (operands, true, true);
7317 (define_expand "vec_unpacks_hi_v16qi"
7318 [(match_operand:V8HI 0 "register_operand" "")
7319 (match_operand:V16QI 1 "register_operand" "")]
7323 ix86_expand_sse4_unpack (operands, false, true);
7324 else if (TARGET_SSE5)
7325 ix86_expand_sse5_unpack (operands, false, true);
7327 ix86_expand_sse_unpack (operands, false, true);
7331 (define_expand "vec_unpacku_lo_v16qi"
7332 [(match_operand:V8HI 0 "register_operand" "")
7333 (match_operand:V16QI 1 "register_operand" "")]
7337 ix86_expand_sse4_unpack (operands, true, false);
7338 else if (TARGET_SSE5)
7339 ix86_expand_sse5_unpack (operands, true, false);
7341 ix86_expand_sse_unpack (operands, true, false);
7345 (define_expand "vec_unpacks_lo_v16qi"
7346 [(match_operand:V8HI 0 "register_operand" "")
7347 (match_operand:V16QI 1 "register_operand" "")]
7351 ix86_expand_sse4_unpack (operands, false, false);
7352 else if (TARGET_SSE5)
7353 ix86_expand_sse5_unpack (operands, false, false);
7355 ix86_expand_sse_unpack (operands, false, false);
7359 (define_expand "vec_unpacku_hi_v8hi"
7360 [(match_operand:V4SI 0 "register_operand" "")
7361 (match_operand:V8HI 1 "register_operand" "")]
7365 ix86_expand_sse4_unpack (operands, true, true);
7366 else if (TARGET_SSE5)
7367 ix86_expand_sse5_unpack (operands, true, true);
7369 ix86_expand_sse_unpack (operands, true, true);
7373 (define_expand "vec_unpacks_hi_v8hi"
7374 [(match_operand:V4SI 0 "register_operand" "")
7375 (match_operand:V8HI 1 "register_operand" "")]
7379 ix86_expand_sse4_unpack (operands, false, true);
7380 else if (TARGET_SSE5)
7381 ix86_expand_sse5_unpack (operands, false, true);
7383 ix86_expand_sse_unpack (operands, false, true);
7387 (define_expand "vec_unpacku_lo_v8hi"
7388 [(match_operand:V4SI 0 "register_operand" "")
7389 (match_operand:V8HI 1 "register_operand" "")]
7393 ix86_expand_sse4_unpack (operands, true, false);
7394 else if (TARGET_SSE5)
7395 ix86_expand_sse5_unpack (operands, true, false);
7397 ix86_expand_sse_unpack (operands, true, false);
7401 (define_expand "vec_unpacks_lo_v8hi"
7402 [(match_operand:V4SI 0 "register_operand" "")
7403 (match_operand:V8HI 1 "register_operand" "")]
7407 ix86_expand_sse4_unpack (operands, false, false);
7408 else if (TARGET_SSE5)
7409 ix86_expand_sse5_unpack (operands, false, false);
7411 ix86_expand_sse_unpack (operands, false, false);
7415 (define_expand "vec_unpacku_hi_v4si"
7416 [(match_operand:V2DI 0 "register_operand" "")
7417 (match_operand:V4SI 1 "register_operand" "")]
7421 ix86_expand_sse4_unpack (operands, true, true);
7422 else if (TARGET_SSE5)
7423 ix86_expand_sse5_unpack (operands, true, true);
7425 ix86_expand_sse_unpack (operands, true, true);
7429 (define_expand "vec_unpacks_hi_v4si"
7430 [(match_operand:V2DI 0 "register_operand" "")
7431 (match_operand:V4SI 1 "register_operand" "")]
7435 ix86_expand_sse4_unpack (operands, false, true);
7436 else if (TARGET_SSE5)
7437 ix86_expand_sse5_unpack (operands, false, true);
7439 ix86_expand_sse_unpack (operands, false, true);
7443 (define_expand "vec_unpacku_lo_v4si"
7444 [(match_operand:V2DI 0 "register_operand" "")
7445 (match_operand:V4SI 1 "register_operand" "")]
7449 ix86_expand_sse4_unpack (operands, true, false);
7450 else if (TARGET_SSE5)
7451 ix86_expand_sse5_unpack (operands, true, false);
7453 ix86_expand_sse_unpack (operands, true, false);
7457 (define_expand "vec_unpacks_lo_v4si"
7458 [(match_operand:V2DI 0 "register_operand" "")
7459 (match_operand:V4SI 1 "register_operand" "")]
7463 ix86_expand_sse4_unpack (operands, false, false);
7464 else if (TARGET_SSE5)
7465 ix86_expand_sse5_unpack (operands, false, false);
7467 ix86_expand_sse_unpack (operands, false, false);
7471 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7477 (define_expand "sse2_uavgv16qi3"
7478 [(set (match_operand:V16QI 0 "register_operand" "")
7484 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7486 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7487 (const_vector:V16QI [(const_int 1) (const_int 1)
7488 (const_int 1) (const_int 1)
7489 (const_int 1) (const_int 1)
7490 (const_int 1) (const_int 1)
7491 (const_int 1) (const_int 1)
7492 (const_int 1) (const_int 1)
7493 (const_int 1) (const_int 1)
7494 (const_int 1) (const_int 1)]))
7497 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7499 (define_insn "*avx_uavgv16qi3"
7500 [(set (match_operand:V16QI 0 "register_operand" "=x")
7506 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7508 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7509 (const_vector:V16QI [(const_int 1) (const_int 1)
7510 (const_int 1) (const_int 1)
7511 (const_int 1) (const_int 1)
7512 (const_int 1) (const_int 1)
7513 (const_int 1) (const_int 1)
7514 (const_int 1) (const_int 1)
7515 (const_int 1) (const_int 1)
7516 (const_int 1) (const_int 1)]))
7518 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7519 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7520 [(set_attr "type" "sseiadd")
7521 (set_attr "prefix" "vex")
7522 (set_attr "mode" "TI")])
7524 (define_insn "*sse2_uavgv16qi3"
7525 [(set (match_operand:V16QI 0 "register_operand" "=x")
7531 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7533 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7534 (const_vector:V16QI [(const_int 1) (const_int 1)
7535 (const_int 1) (const_int 1)
7536 (const_int 1) (const_int 1)
7537 (const_int 1) (const_int 1)
7538 (const_int 1) (const_int 1)
7539 (const_int 1) (const_int 1)
7540 (const_int 1) (const_int 1)
7541 (const_int 1) (const_int 1)]))
7543 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7544 "pavgb\t{%2, %0|%0, %2}"
7545 [(set_attr "type" "sseiadd")
7546 (set_attr "prefix_data16" "1")
7547 (set_attr "mode" "TI")])
7549 (define_expand "sse2_uavgv8hi3"
7550 [(set (match_operand:V8HI 0 "register_operand" "")
7556 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7558 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7559 (const_vector:V8HI [(const_int 1) (const_int 1)
7560 (const_int 1) (const_int 1)
7561 (const_int 1) (const_int 1)
7562 (const_int 1) (const_int 1)]))
7565 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7567 (define_insn "*avx_uavgv8hi3"
7568 [(set (match_operand:V8HI 0 "register_operand" "=x")
7574 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7576 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7577 (const_vector:V8HI [(const_int 1) (const_int 1)
7578 (const_int 1) (const_int 1)
7579 (const_int 1) (const_int 1)
7580 (const_int 1) (const_int 1)]))
7582 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7583 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7584 [(set_attr "type" "sseiadd")
7585 (set_attr "prefix" "vex")
7586 (set_attr "mode" "TI")])
7588 (define_insn "*sse2_uavgv8hi3"
7589 [(set (match_operand:V8HI 0 "register_operand" "=x")
7595 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7597 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7598 (const_vector:V8HI [(const_int 1) (const_int 1)
7599 (const_int 1) (const_int 1)
7600 (const_int 1) (const_int 1)
7601 (const_int 1) (const_int 1)]))
7603 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7604 "pavgw\t{%2, %0|%0, %2}"
7605 [(set_attr "type" "sseiadd")
7606 (set_attr "prefix_data16" "1")
7607 (set_attr "mode" "TI")])
7609 ;; The correct representation for this is absolutely enormous, and
7610 ;; surely not generally useful.
7611 (define_insn "*avx_psadbw"
7612 [(set (match_operand:V2DI 0 "register_operand" "=x")
7613 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7614 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7617 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7618 [(set_attr "type" "sseiadd")
7619 (set_attr "prefix" "vex")
7620 (set_attr "mode" "TI")])
7622 (define_insn "sse2_psadbw"
7623 [(set (match_operand:V2DI 0 "register_operand" "=x")
7624 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7625 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7628 "psadbw\t{%2, %0|%0, %2}"
7629 [(set_attr "type" "sseiadd")
7630 (set_attr "atom_unit" "simul")
7631 (set_attr "prefix_data16" "1")
7632 (set_attr "mode" "TI")])
7634 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7635 [(set (match_operand:SI 0 "register_operand" "=r")
7637 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7639 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7640 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7641 [(set_attr "type" "ssecvt")
7642 (set_attr "prefix" "vex")
7643 (set_attr "mode" "<MODE>")])
7645 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7646 [(set (match_operand:SI 0 "register_operand" "=r")
7648 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7650 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7651 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7652 [(set_attr "type" "ssemov")
7653 (set_attr "prefix" "maybe_vex")
7654 (set_attr "mode" "<MODE>")])
7656 (define_insn "sse2_pmovmskb"
7657 [(set (match_operand:SI 0 "register_operand" "=r")
7658 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7661 "%vpmovmskb\t{%1, %0|%0, %1}"
7662 [(set_attr "type" "ssemov")
7663 (set_attr "prefix_data16" "1")
7664 (set_attr "prefix" "maybe_vex")
7665 (set_attr "mode" "SI")])
7667 (define_expand "sse2_maskmovdqu"
7668 [(set (match_operand:V16QI 0 "memory_operand" "")
7669 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7670 (match_operand:V16QI 2 "register_operand" "")
7676 (define_insn "*sse2_maskmovdqu"
7677 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7678 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7679 (match_operand:V16QI 2 "register_operand" "x")
7680 (mem:V16QI (match_dup 0))]
7682 "TARGET_SSE2 && !TARGET_64BIT"
7683 ;; @@@ check ordering of operands in intel/nonintel syntax
7684 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7685 [(set_attr "type" "ssemov")
7686 (set_attr "prefix_data16" "1")
7687 (set_attr "prefix" "maybe_vex")
7688 (set_attr "mode" "TI")])
7690 (define_insn "*sse2_maskmovdqu_rex64"
7691 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7692 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7693 (match_operand:V16QI 2 "register_operand" "x")
7694 (mem:V16QI (match_dup 0))]
7696 "TARGET_SSE2 && TARGET_64BIT"
7697 ;; @@@ check ordering of operands in intel/nonintel syntax
7698 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7699 [(set_attr "type" "ssemov")
7700 (set_attr "prefix_data16" "1")
7701 (set_attr "prefix" "maybe_vex")
7702 (set_attr "mode" "TI")])
7704 (define_insn "sse_ldmxcsr"
7705 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7709 [(set_attr "type" "sse")
7710 (set_attr "atom_sse_attr" "mxcsr")
7711 (set_attr "prefix" "maybe_vex")
7712 (set_attr "memory" "load")])
7714 (define_insn "sse_stmxcsr"
7715 [(set (match_operand:SI 0 "memory_operand" "=m")
7716 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7719 [(set_attr "type" "sse")
7720 (set_attr "atom_sse_attr" "mxcsr")
7721 (set_attr "prefix" "maybe_vex")
7722 (set_attr "memory" "store")])
7724 (define_expand "sse_sfence"
7726 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7727 "TARGET_SSE || TARGET_3DNOW_A"
7729 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7730 MEM_VOLATILE_P (operands[0]) = 1;
7733 (define_insn "*sse_sfence"
7734 [(set (match_operand:BLK 0 "" "")
7735 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7736 "TARGET_SSE || TARGET_3DNOW_A"
7738 [(set_attr "type" "sse")
7739 (set_attr "atom_sse_attr" "fence")
7740 (set_attr "memory" "unknown")])
7742 (define_insn "sse2_clflush"
7743 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7747 [(set_attr "type" "sse")
7748 (set_attr "atom_sse_attr" "fence")
7749 (set_attr "memory" "unknown")])
7751 (define_expand "sse2_mfence"
7753 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7756 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7757 MEM_VOLATILE_P (operands[0]) = 1;
7760 (define_insn "*sse2_mfence"
7761 [(set (match_operand:BLK 0 "" "")
7762 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7763 "TARGET_64BIT || TARGET_SSE2"
7765 [(set_attr "type" "sse")
7766 (set_attr "atom_sse_attr" "fence")
7767 (set_attr "memory" "unknown")])
7769 (define_expand "sse2_lfence"
7771 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7774 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7775 MEM_VOLATILE_P (operands[0]) = 1;
7778 (define_insn "*sse2_lfence"
7779 [(set (match_operand:BLK 0 "" "")
7780 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7783 [(set_attr "type" "sse")
7784 (set_attr "atom_sse_attr" "lfence")
7785 (set_attr "memory" "unknown")])
7787 (define_insn "sse3_mwait"
7788 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7789 (match_operand:SI 1 "register_operand" "c")]
7792 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7793 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7794 ;; we only need to set up 32bit registers.
7796 [(set_attr "length" "3")])
7798 (define_insn "sse3_monitor"
7799 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7800 (match_operand:SI 1 "register_operand" "c")
7801 (match_operand:SI 2 "register_operand" "d")]
7803 "TARGET_SSE3 && !TARGET_64BIT"
7804 "monitor\t%0, %1, %2"
7805 [(set_attr "length" "3")])
7807 (define_insn "sse3_monitor64"
7808 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7809 (match_operand:SI 1 "register_operand" "c")
7810 (match_operand:SI 2 "register_operand" "d")]
7812 "TARGET_SSE3 && TARGET_64BIT"
7813 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7814 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7815 ;; zero extended to 64bit, we only need to set up 32bit registers.
7817 [(set_attr "length" "3")])
7819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7821 ;; SSSE3 instructions
7823 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7825 (define_insn "*avx_phaddwv8hi3"
7826 [(set (match_operand:V8HI 0 "register_operand" "=x")
7832 (match_operand:V8HI 1 "register_operand" "x")
7833 (parallel [(const_int 0)]))
7834 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7836 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7837 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7840 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7841 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7843 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7844 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7849 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7850 (parallel [(const_int 0)]))
7851 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7853 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7854 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7857 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7858 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7860 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7861 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7863 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7864 [(set_attr "type" "sseiadd")
7865 (set_attr "prefix" "vex")
7866 (set_attr "mode" "TI")])
7868 (define_insn "ssse3_phaddwv8hi3"
7869 [(set (match_operand:V8HI 0 "register_operand" "=x")
7875 (match_operand:V8HI 1 "register_operand" "0")
7876 (parallel [(const_int 0)]))
7877 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7879 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7880 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7883 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7884 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7886 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7887 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7892 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7893 (parallel [(const_int 0)]))
7894 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7896 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7897 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7900 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7901 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7903 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7904 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7906 "phaddw\t{%2, %0|%0, %2}"
7907 [(set_attr "type" "sseiadd")
7908 (set_attr "atom_unit" "complex")
7909 (set_attr "prefix_data16" "1")
7910 (set_attr "prefix_extra" "1")
7911 (set_attr "mode" "TI")])
7913 (define_insn "ssse3_phaddwv4hi3"
7914 [(set (match_operand:V4HI 0 "register_operand" "=y")
7919 (match_operand:V4HI 1 "register_operand" "0")
7920 (parallel [(const_int 0)]))
7921 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7923 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7924 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7928 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7929 (parallel [(const_int 0)]))
7930 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7932 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7933 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7935 "phaddw\t{%2, %0|%0, %2}"
7936 [(set_attr "type" "sseiadd")
7937 (set_attr "atom_unit" "complex")
7938 (set_attr "prefix_extra" "1")
7939 (set_attr "mode" "DI")])
7941 (define_insn "*avx_phadddv4si3"
7942 [(set (match_operand:V4SI 0 "register_operand" "=x")
7947 (match_operand:V4SI 1 "register_operand" "x")
7948 (parallel [(const_int 0)]))
7949 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7951 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7952 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7956 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7957 (parallel [(const_int 0)]))
7958 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7960 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7961 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7963 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7964 [(set_attr "type" "sseiadd")
7965 (set_attr "prefix" "vex")
7966 (set_attr "mode" "TI")])
7968 (define_insn "ssse3_phadddv4si3"
7969 [(set (match_operand:V4SI 0 "register_operand" "=x")
7974 (match_operand:V4SI 1 "register_operand" "0")
7975 (parallel [(const_int 0)]))
7976 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7978 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7979 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7983 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7984 (parallel [(const_int 0)]))
7985 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7987 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7988 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7990 "phaddd\t{%2, %0|%0, %2}"
7991 [(set_attr "type" "sseiadd")
7992 (set_attr "atom_unit" "complex")
7993 (set_attr "prefix_data16" "1")
7994 (set_attr "prefix_extra" "1")
7995 (set_attr "mode" "TI")])
7997 (define_insn "ssse3_phadddv2si3"
7998 [(set (match_operand:V2SI 0 "register_operand" "=y")
8002 (match_operand:V2SI 1 "register_operand" "0")
8003 (parallel [(const_int 0)]))
8004 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8007 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8008 (parallel [(const_int 0)]))
8009 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8011 "phaddd\t{%2, %0|%0, %2}"
8012 [(set_attr "type" "sseiadd")
8013 (set_attr "atom_unit" "complex")
8014 (set_attr "prefix_extra" "1")
8015 (set_attr "mode" "DI")])
8017 (define_insn "*avx_phaddswv8hi3"
8018 [(set (match_operand:V8HI 0 "register_operand" "=x")
8024 (match_operand:V8HI 1 "register_operand" "x")
8025 (parallel [(const_int 0)]))
8026 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8028 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8029 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8032 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8033 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8035 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8036 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8041 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8042 (parallel [(const_int 0)]))
8043 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8045 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8046 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8049 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8050 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8052 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8053 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8055 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8056 [(set_attr "type" "sseiadd")
8057 (set_attr "prefix" "vex")
8058 (set_attr "mode" "TI")])
8060 (define_insn "ssse3_phaddswv8hi3"
8061 [(set (match_operand:V8HI 0 "register_operand" "=x")
8067 (match_operand:V8HI 1 "register_operand" "0")
8068 (parallel [(const_int 0)]))
8069 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8071 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8072 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8075 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8076 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8078 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8079 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8084 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8085 (parallel [(const_int 0)]))
8086 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8088 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8089 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8092 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8093 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8095 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8096 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8098 "phaddsw\t{%2, %0|%0, %2}"
8099 [(set_attr "type" "sseiadd")
8100 (set_attr "atom_unit" "complex")
8101 (set_attr "prefix_data16" "1")
8102 (set_attr "prefix_extra" "1")
8103 (set_attr "mode" "TI")])
8105 (define_insn "ssse3_phaddswv4hi3"
8106 [(set (match_operand:V4HI 0 "register_operand" "=y")
8111 (match_operand:V4HI 1 "register_operand" "0")
8112 (parallel [(const_int 0)]))
8113 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8115 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8116 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8120 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8121 (parallel [(const_int 0)]))
8122 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8124 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8125 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8127 "phaddsw\t{%2, %0|%0, %2}"
8128 [(set_attr "type" "sseiadd")
8129 (set_attr "atom_unit" "complex")
8130 (set_attr "prefix_extra" "1")
8131 (set_attr "mode" "DI")])
8133 (define_insn "*avx_phsubwv8hi3"
8134 [(set (match_operand:V8HI 0 "register_operand" "=x")
8140 (match_operand:V8HI 1 "register_operand" "x")
8141 (parallel [(const_int 0)]))
8142 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8144 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8145 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8148 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8149 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8151 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8152 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8157 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8158 (parallel [(const_int 0)]))
8159 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8161 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8165 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8166 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8168 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8169 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8171 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8172 [(set_attr "type" "sseiadd")
8173 (set_attr "prefix" "vex")
8174 (set_attr "mode" "TI")])
8176 (define_insn "ssse3_phsubwv8hi3"
8177 [(set (match_operand:V8HI 0 "register_operand" "=x")
8183 (match_operand:V8HI 1 "register_operand" "0")
8184 (parallel [(const_int 0)]))
8185 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8187 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8188 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8191 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8194 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8200 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8201 (parallel [(const_int 0)]))
8202 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8204 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8205 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8208 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8209 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8211 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8212 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8214 "phsubw\t{%2, %0|%0, %2}"
8215 [(set_attr "type" "sseiadd")
8216 (set_attr "atom_unit" "complex")
8217 (set_attr "prefix_data16" "1")
8218 (set_attr "prefix_extra" "1")
8219 (set_attr "mode" "TI")])
8221 (define_insn "ssse3_phsubwv4hi3"
8222 [(set (match_operand:V4HI 0 "register_operand" "=y")
8227 (match_operand:V4HI 1 "register_operand" "0")
8228 (parallel [(const_int 0)]))
8229 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8231 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8232 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8236 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8237 (parallel [(const_int 0)]))
8238 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8240 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8241 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8243 "phsubw\t{%2, %0|%0, %2}"
8244 [(set_attr "type" "sseiadd")
8245 (set_attr "atom_unit" "complex")
8246 (set_attr "prefix_extra" "1")
8247 (set_attr "mode" "DI")])
8249 (define_insn "*avx_phsubdv4si3"
8250 [(set (match_operand:V4SI 0 "register_operand" "=x")
8255 (match_operand:V4SI 1 "register_operand" "x")
8256 (parallel [(const_int 0)]))
8257 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8259 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8260 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8264 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8265 (parallel [(const_int 0)]))
8266 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8268 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8269 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8271 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8272 [(set_attr "type" "sseiadd")
8273 (set_attr "prefix" "vex")
8274 (set_attr "mode" "TI")])
8276 (define_insn "ssse3_phsubdv4si3"
8277 [(set (match_operand:V4SI 0 "register_operand" "=x")
8282 (match_operand:V4SI 1 "register_operand" "0")
8283 (parallel [(const_int 0)]))
8284 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8286 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8287 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8291 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8292 (parallel [(const_int 0)]))
8293 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8295 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8296 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8298 "phsubd\t{%2, %0|%0, %2}"
8299 [(set_attr "type" "sseiadd")
8300 (set_attr "atom_unit" "complex")
8301 (set_attr "prefix_data16" "1")
8302 (set_attr "prefix_extra" "1")
8303 (set_attr "mode" "TI")])
8305 (define_insn "ssse3_phsubdv2si3"
8306 [(set (match_operand:V2SI 0 "register_operand" "=y")
8310 (match_operand:V2SI 1 "register_operand" "0")
8311 (parallel [(const_int 0)]))
8312 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8315 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8316 (parallel [(const_int 0)]))
8317 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8319 "phsubd\t{%2, %0|%0, %2}"
8320 [(set_attr "type" "sseiadd")
8321 (set_attr "atom_unit" "complex")
8322 (set_attr "prefix_extra" "1")
8323 (set_attr "mode" "DI")])
8325 (define_insn "*avx_phsubswv8hi3"
8326 [(set (match_operand:V8HI 0 "register_operand" "=x")
8332 (match_operand:V8HI 1 "register_operand" "x")
8333 (parallel [(const_int 0)]))
8334 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8336 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8337 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8340 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8341 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8343 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8344 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8349 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8350 (parallel [(const_int 0)]))
8351 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8353 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8354 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8357 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8358 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8360 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8361 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8363 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8364 [(set_attr "type" "sseiadd")
8365 (set_attr "prefix" "vex")
8366 (set_attr "mode" "TI")])
8368 (define_insn "ssse3_phsubswv8hi3"
8369 [(set (match_operand:V8HI 0 "register_operand" "=x")
8375 (match_operand:V8HI 1 "register_operand" "0")
8376 (parallel [(const_int 0)]))
8377 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8379 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8380 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8383 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8386 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8387 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8392 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8393 (parallel [(const_int 0)]))
8394 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8396 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8397 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8400 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8403 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8404 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8406 "phsubsw\t{%2, %0|%0, %2}"
8407 [(set_attr "type" "sseiadd")
8408 (set_attr "atom_unit" "complex")
8409 (set_attr "prefix_data16" "1")
8410 (set_attr "prefix_extra" "1")
8411 (set_attr "mode" "TI")])
8413 (define_insn "ssse3_phsubswv4hi3"
8414 [(set (match_operand:V4HI 0 "register_operand" "=y")
8419 (match_operand:V4HI 1 "register_operand" "0")
8420 (parallel [(const_int 0)]))
8421 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8423 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8424 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8428 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8429 (parallel [(const_int 0)]))
8430 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8432 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8433 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8435 "phsubsw\t{%2, %0|%0, %2}"
8436 [(set_attr "type" "sseiadd")
8437 (set_attr "atom_unit" "complex")
8438 (set_attr "prefix_extra" "1")
8439 (set_attr "mode" "DI")])
8441 (define_insn "*avx_pmaddubsw128"
8442 [(set (match_operand:V8HI 0 "register_operand" "=x")
8447 (match_operand:V16QI 1 "register_operand" "x")
8448 (parallel [(const_int 0)
8458 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8459 (parallel [(const_int 0)
8469 (vec_select:V16QI (match_dup 1)
8470 (parallel [(const_int 1)
8479 (vec_select:V16QI (match_dup 2)
8480 (parallel [(const_int 1)
8487 (const_int 15)]))))))]
8489 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8490 [(set_attr "type" "sseiadd")
8491 (set_attr "prefix" "vex")
8492 (set_attr "mode" "TI")])
8494 (define_insn "ssse3_pmaddubsw128"
8495 [(set (match_operand:V8HI 0 "register_operand" "=x")
8500 (match_operand:V16QI 1 "register_operand" "0")
8501 (parallel [(const_int 0)
8511 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8512 (parallel [(const_int 0)
8522 (vec_select:V16QI (match_dup 1)
8523 (parallel [(const_int 1)
8532 (vec_select:V16QI (match_dup 2)
8533 (parallel [(const_int 1)
8540 (const_int 15)]))))))]
8542 "pmaddubsw\t{%2, %0|%0, %2}"
8543 [(set_attr "type" "sseiadd")
8544 (set_attr "atom_unit" "simul")
8545 (set_attr "prefix_data16" "1")
8546 (set_attr "prefix_extra" "1")
8547 (set_attr "mode" "TI")])
8549 (define_insn "ssse3_pmaddubsw"
8550 [(set (match_operand:V4HI 0 "register_operand" "=y")
8555 (match_operand:V8QI 1 "register_operand" "0")
8556 (parallel [(const_int 0)
8562 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8563 (parallel [(const_int 0)
8569 (vec_select:V8QI (match_dup 1)
8570 (parallel [(const_int 1)
8575 (vec_select:V8QI (match_dup 2)
8576 (parallel [(const_int 1)
8579 (const_int 7)]))))))]
8581 "pmaddubsw\t{%2, %0|%0, %2}"
8582 [(set_attr "type" "sseiadd")
8583 (set_attr "atom_unit" "simul")
8584 (set_attr "prefix_extra" "1")
8585 (set_attr "mode" "DI")])
8587 (define_expand "ssse3_pmulhrswv8hi3"
8588 [(set (match_operand:V8HI 0 "register_operand" "")
8595 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8597 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8599 (const_vector:V8HI [(const_int 1) (const_int 1)
8600 (const_int 1) (const_int 1)
8601 (const_int 1) (const_int 1)
8602 (const_int 1) (const_int 1)]))
8605 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8607 (define_insn "*avx_pmulhrswv8hi3"
8608 [(set (match_operand:V8HI 0 "register_operand" "=x")
8615 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8617 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8619 (const_vector:V8HI [(const_int 1) (const_int 1)
8620 (const_int 1) (const_int 1)
8621 (const_int 1) (const_int 1)
8622 (const_int 1) (const_int 1)]))
8624 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8625 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8626 [(set_attr "type" "sseimul")
8627 (set_attr "prefix" "vex")
8628 (set_attr "mode" "TI")])
8630 (define_insn "*ssse3_pmulhrswv8hi3"
8631 [(set (match_operand:V8HI 0 "register_operand" "=x")
8638 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8640 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8642 (const_vector:V8HI [(const_int 1) (const_int 1)
8643 (const_int 1) (const_int 1)
8644 (const_int 1) (const_int 1)
8645 (const_int 1) (const_int 1)]))
8647 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8648 "pmulhrsw\t{%2, %0|%0, %2}"
8649 [(set_attr "type" "sseimul")
8650 (set_attr "prefix_data16" "1")
8651 (set_attr "prefix_extra" "1")
8652 (set_attr "mode" "TI")])
8654 (define_expand "ssse3_pmulhrswv4hi3"
8655 [(set (match_operand:V4HI 0 "register_operand" "")
8662 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8664 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8666 (const_vector:V4HI [(const_int 1) (const_int 1)
8667 (const_int 1) (const_int 1)]))
8670 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8672 (define_insn "*ssse3_pmulhrswv4hi3"
8673 [(set (match_operand:V4HI 0 "register_operand" "=y")
8680 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8682 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8684 (const_vector:V4HI [(const_int 1) (const_int 1)
8685 (const_int 1) (const_int 1)]))
8687 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8688 "pmulhrsw\t{%2, %0|%0, %2}"
8689 [(set_attr "type" "sseimul")
8690 (set_attr "prefix_extra" "1")
8691 (set_attr "mode" "DI")])
8693 (define_insn "*avx_pshufbv16qi3"
8694 [(set (match_operand:V16QI 0 "register_operand" "=x")
8695 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8696 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8699 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8700 [(set_attr "type" "sselog1")
8701 (set_attr "prefix" "vex")
8702 (set_attr "mode" "TI")])
8704 (define_insn "ssse3_pshufbv16qi3"
8705 [(set (match_operand:V16QI 0 "register_operand" "=x")
8706 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8707 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8710 "pshufb\t{%2, %0|%0, %2}";
8711 [(set_attr "type" "sselog1")
8712 (set_attr "prefix_data16" "1")
8713 (set_attr "prefix_extra" "1")
8714 (set_attr "mode" "TI")])
8716 (define_insn "ssse3_pshufbv8qi3"
8717 [(set (match_operand:V8QI 0 "register_operand" "=y")
8718 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8719 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8722 "pshufb\t{%2, %0|%0, %2}";
8723 [(set_attr "type" "sselog1")
8724 (set_attr "prefix_extra" "1")
8725 (set_attr "mode" "DI")])
8727 (define_insn "*avx_psign<mode>3"
8728 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8730 [(match_operand:SSEMODE124 1 "register_operand" "x")
8731 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8734 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8735 [(set_attr "type" "sselog1")
8736 (set_attr "prefix" "vex")
8737 (set_attr "mode" "TI")])
8739 (define_insn "ssse3_psign<mode>3"
8740 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8742 [(match_operand:SSEMODE124 1 "register_operand" "0")
8743 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8746 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8747 [(set_attr "type" "sselog1")
8748 (set_attr "prefix_data16" "1")
8749 (set_attr "prefix_extra" "1")
8750 (set_attr "mode" "TI")])
8752 (define_insn "ssse3_psign<mode>3"
8753 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8755 [(match_operand:MMXMODEI 1 "register_operand" "0")
8756 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8759 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8760 [(set_attr "type" "sselog1")
8761 (set_attr "prefix_extra" "1")
8762 (set_attr "mode" "DI")])
8764 (define_insn "*avx_palignrti"
8765 [(set (match_operand:TI 0 "register_operand" "=x")
8766 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8767 (match_operand:TI 2 "nonimmediate_operand" "xm")
8768 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8772 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8773 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8775 [(set_attr "type" "sseishft")
8776 (set_attr "prefix" "vex")
8777 (set_attr "mode" "TI")])
8779 (define_insn "ssse3_palignrti"
8780 [(set (match_operand:TI 0 "register_operand" "=x")
8781 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
8782 (match_operand:TI 2 "nonimmediate_operand" "xm")
8783 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8787 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8788 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8790 [(set_attr "type" "sseishft")
8791 (set_attr "atom_unit" "sishuf")
8792 (set_attr "prefix_data16" "1")
8793 (set_attr "prefix_extra" "1")
8794 (set_attr "mode" "TI")])
8796 (define_insn "ssse3_palignrdi"
8797 [(set (match_operand:DI 0 "register_operand" "=y")
8798 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8799 (match_operand:DI 2 "nonimmediate_operand" "ym")
8800 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8804 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8805 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8807 [(set_attr "type" "sseishft")
8808 (set_attr "atom_unit" "sishuf")
8809 (set_attr "prefix_extra" "1")
8810 (set_attr "mode" "DI")])
8812 (define_insn "abs<mode>2"
8813 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8814 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8816 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
8817 [(set_attr "type" "sselog1")
8818 (set_attr "prefix_data16" "1")
8819 (set_attr "prefix_extra" "1")
8820 (set_attr "prefix" "maybe_vex")
8821 (set_attr "mode" "TI")])
8823 (define_insn "abs<mode>2"
8824 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8825 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8827 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8828 [(set_attr "type" "sselog1")
8829 (set_attr "prefix_extra" "1")
8830 (set_attr "mode" "DI")])
8832 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8834 ;; AMD SSE4A instructions
8836 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8838 (define_insn "sse4a_movnt<mode>"
8839 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8841 [(match_operand:MODEF 1 "register_operand" "x")]
8844 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
8845 [(set_attr "type" "ssemov")
8846 (set_attr "mode" "<MODE>")])
8848 (define_insn "sse4a_vmmovnt<mode>"
8849 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8850 (unspec:<ssescalarmode>
8851 [(vec_select:<ssescalarmode>
8852 (match_operand:SSEMODEF2P 1 "register_operand" "x")
8853 (parallel [(const_int 0)]))]
8856 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8857 [(set_attr "type" "ssemov")
8858 (set_attr "mode" "<ssescalarmode>")])
8860 (define_insn "sse4a_extrqi"
8861 [(set (match_operand:V2DI 0 "register_operand" "=x")
8862 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8863 (match_operand 2 "const_int_operand" "")
8864 (match_operand 3 "const_int_operand" "")]
8867 "extrq\t{%3, %2, %0|%0, %2, %3}"
8868 [(set_attr "type" "sse")
8869 (set_attr "prefix_data16" "1")
8870 (set_attr "mode" "TI")])
8872 (define_insn "sse4a_extrq"
8873 [(set (match_operand:V2DI 0 "register_operand" "=x")
8874 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8875 (match_operand:V16QI 2 "register_operand" "x")]
8878 "extrq\t{%2, %0|%0, %2}"
8879 [(set_attr "type" "sse")
8880 (set_attr "prefix_data16" "1")
8881 (set_attr "mode" "TI")])
8883 (define_insn "sse4a_insertqi"
8884 [(set (match_operand:V2DI 0 "register_operand" "=x")
8885 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8886 (match_operand:V2DI 2 "register_operand" "x")
8887 (match_operand 3 "const_int_operand" "")
8888 (match_operand 4 "const_int_operand" "")]
8891 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8892 [(set_attr "type" "sseins")
8893 (set_attr "prefix_rep" "1")
8894 (set_attr "mode" "TI")])
8896 (define_insn "sse4a_insertq"
8897 [(set (match_operand:V2DI 0 "register_operand" "=x")
8898 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8899 (match_operand:V2DI 2 "register_operand" "x")]
8902 "insertq\t{%2, %0|%0, %2}"
8903 [(set_attr "type" "sseins")
8904 (set_attr "prefix_rep" "1")
8905 (set_attr "mode" "TI")])
8907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8909 ;; Intel SSE4.1 instructions
8911 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8913 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
8914 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8915 (vec_merge:AVXMODEF2P
8916 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8917 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8918 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8920 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8921 [(set_attr "type" "ssemov")
8922 (set_attr "prefix" "vex")
8923 (set_attr "mode" "<avxvecmode>")])
8925 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
8926 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8928 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
8929 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8930 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8933 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8934 [(set_attr "type" "ssemov")
8935 (set_attr "prefix" "vex")
8936 (set_attr "mode" "<avxvecmode>")])
8938 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
8939 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8940 (vec_merge:SSEMODEF2P
8941 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8942 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8943 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8945 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8946 [(set_attr "type" "ssemov")
8947 (set_attr "prefix_extra" "1")
8948 (set_attr "mode" "<MODE>")])
8950 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
8951 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8953 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
8954 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
8955 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8958 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8959 [(set_attr "type" "ssemov")
8960 (set_attr "prefix_extra" "1")
8961 (set_attr "mode" "<MODE>")])
8963 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
8964 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8966 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
8967 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8968 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8971 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8972 [(set_attr "type" "ssemul")
8973 (set_attr "prefix" "vex")
8974 (set_attr "mode" "<avxvecmode>")])
8976 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
8977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8979 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
8980 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8981 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8984 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8985 [(set_attr "type" "ssemul")
8986 (set_attr "prefix_extra" "1")
8987 (set_attr "mode" "<MODE>")])
8989 (define_insn "sse4_1_movntdqa"
8990 [(set (match_operand:V2DI 0 "register_operand" "=x")
8991 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8994 "%vmovntdqa\t{%1, %0|%0, %1}"
8995 [(set_attr "type" "ssemov")
8996 (set_attr "prefix_extra" "1")
8997 (set_attr "prefix" "maybe_vex")
8998 (set_attr "mode" "TI")])
9000 (define_insn "*avx_mpsadbw"
9001 [(set (match_operand:V16QI 0 "register_operand" "=x")
9002 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9003 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9004 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9007 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9008 [(set_attr "type" "sselog1")
9009 (set_attr "prefix" "vex")
9010 (set_attr "mode" "TI")])
9012 (define_insn "sse4_1_mpsadbw"
9013 [(set (match_operand:V16QI 0 "register_operand" "=x")
9014 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9015 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9016 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9019 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9020 [(set_attr "type" "sselog1")
9021 (set_attr "prefix_extra" "1")
9022 (set_attr "mode" "TI")])
9024 (define_insn "*avx_packusdw"
9025 [(set (match_operand:V8HI 0 "register_operand" "=x")
9028 (match_operand:V4SI 1 "register_operand" "x"))
9030 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9032 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9033 [(set_attr "type" "sselog")
9034 (set_attr "prefix" "vex")
9035 (set_attr "mode" "TI")])
9037 (define_insn "sse4_1_packusdw"
9038 [(set (match_operand:V8HI 0 "register_operand" "=x")
9041 (match_operand:V4SI 1 "register_operand" "0"))
9043 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9045 "packusdw\t{%2, %0|%0, %2}"
9046 [(set_attr "type" "sselog")
9047 (set_attr "prefix_extra" "1")
9048 (set_attr "mode" "TI")])
9050 (define_insn "*avx_pblendvb"
9051 [(set (match_operand:V16QI 0 "register_operand" "=x")
9052 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9053 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9054 (match_operand:V16QI 3 "register_operand" "x")]
9057 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9058 [(set_attr "type" "ssemov")
9059 (set_attr "prefix" "vex")
9060 (set_attr "mode" "TI")])
9062 (define_insn "sse4_1_pblendvb"
9063 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9064 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9065 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9066 (match_operand:V16QI 3 "register_operand" "Yz")]
9069 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9070 [(set_attr "type" "ssemov")
9071 (set_attr "prefix_extra" "1")
9072 (set_attr "mode" "TI")])
9074 (define_insn "*avx_pblendw"
9075 [(set (match_operand:V8HI 0 "register_operand" "=x")
9077 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9078 (match_operand:V8HI 1 "register_operand" "x")
9079 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9081 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9082 [(set_attr "type" "ssemov")
9083 (set_attr "prefix" "vex")
9084 (set_attr "mode" "TI")])
9086 (define_insn "sse4_1_pblendw"
9087 [(set (match_operand:V8HI 0 "register_operand" "=x")
9089 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9090 (match_operand:V8HI 1 "register_operand" "0")
9091 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9093 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9094 [(set_attr "type" "ssemov")
9095 (set_attr "prefix_extra" "1")
9096 (set_attr "mode" "TI")])
9098 (define_insn "sse4_1_phminposuw"
9099 [(set (match_operand:V8HI 0 "register_operand" "=x")
9100 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9101 UNSPEC_PHMINPOSUW))]
9103 "%vphminposuw\t{%1, %0|%0, %1}"
9104 [(set_attr "type" "sselog1")
9105 (set_attr "prefix_extra" "1")
9106 (set_attr "prefix" "maybe_vex")
9107 (set_attr "mode" "TI")])
9109 (define_insn "sse4_1_extendv8qiv8hi2"
9110 [(set (match_operand:V8HI 0 "register_operand" "=x")
9113 (match_operand:V16QI 1 "register_operand" "x")
9114 (parallel [(const_int 0)
9123 "%vpmovsxbw\t{%1, %0|%0, %1}"
9124 [(set_attr "type" "ssemov")
9125 (set_attr "prefix_extra" "1")
9126 (set_attr "prefix" "maybe_vex")
9127 (set_attr "mode" "TI")])
9129 (define_insn "*sse4_1_extendv8qiv8hi2"
9130 [(set (match_operand:V8HI 0 "register_operand" "=x")
9133 (vec_duplicate:V16QI
9134 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9135 (parallel [(const_int 0)
9144 "%vpmovsxbw\t{%1, %0|%0, %1}"
9145 [(set_attr "type" "ssemov")
9146 (set_attr "prefix_extra" "1")
9147 (set_attr "prefix" "maybe_vex")
9148 (set_attr "mode" "TI")])
9150 (define_insn "sse4_1_extendv4qiv4si2"
9151 [(set (match_operand:V4SI 0 "register_operand" "=x")
9154 (match_operand:V16QI 1 "register_operand" "x")
9155 (parallel [(const_int 0)
9160 "%vpmovsxbd\t{%1, %0|%0, %1}"
9161 [(set_attr "type" "ssemov")
9162 (set_attr "prefix_extra" "1")
9163 (set_attr "prefix" "maybe_vex")
9164 (set_attr "mode" "TI")])
9166 (define_insn "*sse4_1_extendv4qiv4si2"
9167 [(set (match_operand:V4SI 0 "register_operand" "=x")
9170 (vec_duplicate:V16QI
9171 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9172 (parallel [(const_int 0)
9177 "%vpmovsxbd\t{%1, %0|%0, %1}"
9178 [(set_attr "type" "ssemov")
9179 (set_attr "prefix_extra" "1")
9180 (set_attr "prefix" "maybe_vex")
9181 (set_attr "mode" "TI")])
9183 (define_insn "sse4_1_extendv2qiv2di2"
9184 [(set (match_operand:V2DI 0 "register_operand" "=x")
9187 (match_operand:V16QI 1 "register_operand" "x")
9188 (parallel [(const_int 0)
9191 "%vpmovsxbq\t{%1, %0|%0, %1}"
9192 [(set_attr "type" "ssemov")
9193 (set_attr "prefix_extra" "1")
9194 (set_attr "prefix" "maybe_vex")
9195 (set_attr "mode" "TI")])
9197 (define_insn "*sse4_1_extendv2qiv2di2"
9198 [(set (match_operand:V2DI 0 "register_operand" "=x")
9201 (vec_duplicate:V16QI
9202 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9203 (parallel [(const_int 0)
9206 "%vpmovsxbq\t{%1, %0|%0, %1}"
9207 [(set_attr "type" "ssemov")
9208 (set_attr "prefix_extra" "1")
9209 (set_attr "prefix" "maybe_vex")
9210 (set_attr "mode" "TI")])
9212 (define_insn "sse4_1_extendv4hiv4si2"
9213 [(set (match_operand:V4SI 0 "register_operand" "=x")
9216 (match_operand:V8HI 1 "register_operand" "x")
9217 (parallel [(const_int 0)
9222 "%vpmovsxwd\t{%1, %0|%0, %1}"
9223 [(set_attr "type" "ssemov")
9224 (set_attr "prefix_extra" "1")
9225 (set_attr "prefix" "maybe_vex")
9226 (set_attr "mode" "TI")])
9228 (define_insn "*sse4_1_extendv4hiv4si2"
9229 [(set (match_operand:V4SI 0 "register_operand" "=x")
9233 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9234 (parallel [(const_int 0)
9239 "%vpmovsxwd\t{%1, %0|%0, %1}"
9240 [(set_attr "type" "ssemov")
9241 (set_attr "prefix_extra" "1")
9242 (set_attr "prefix" "maybe_vex")
9243 (set_attr "mode" "TI")])
9245 (define_insn "sse4_1_extendv2hiv2di2"
9246 [(set (match_operand:V2DI 0 "register_operand" "=x")
9249 (match_operand:V8HI 1 "register_operand" "x")
9250 (parallel [(const_int 0)
9253 "%vpmovsxwq\t{%1, %0|%0, %1}"
9254 [(set_attr "type" "ssemov")
9255 (set_attr "prefix_extra" "1")
9256 (set_attr "prefix" "maybe_vex")
9257 (set_attr "mode" "TI")])
9259 (define_insn "*sse4_1_extendv2hiv2di2"
9260 [(set (match_operand:V2DI 0 "register_operand" "=x")
9264 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9265 (parallel [(const_int 0)
9268 "%vpmovsxwq\t{%1, %0|%0, %1}"
9269 [(set_attr "type" "ssemov")
9270 (set_attr "prefix_extra" "1")
9271 (set_attr "prefix" "maybe_vex")
9272 (set_attr "mode" "TI")])
9274 (define_insn "sse4_1_extendv2siv2di2"
9275 [(set (match_operand:V2DI 0 "register_operand" "=x")
9278 (match_operand:V4SI 1 "register_operand" "x")
9279 (parallel [(const_int 0)
9282 "%vpmovsxdq\t{%1, %0|%0, %1}"
9283 [(set_attr "type" "ssemov")
9284 (set_attr "prefix_extra" "1")
9285 (set_attr "prefix" "maybe_vex")
9286 (set_attr "mode" "TI")])
9288 (define_insn "*sse4_1_extendv2siv2di2"
9289 [(set (match_operand:V2DI 0 "register_operand" "=x")
9293 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9294 (parallel [(const_int 0)
9297 "%vpmovsxdq\t{%1, %0|%0, %1}"
9298 [(set_attr "type" "ssemov")
9299 (set_attr "prefix_extra" "1")
9300 (set_attr "prefix" "maybe_vex")
9301 (set_attr "mode" "TI")])
9303 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9304 [(set (match_operand:V8HI 0 "register_operand" "=x")
9307 (match_operand:V16QI 1 "register_operand" "x")
9308 (parallel [(const_int 0)
9317 "%vpmovzxbw\t{%1, %0|%0, %1}"
9318 [(set_attr "type" "ssemov")
9319 (set_attr "prefix_extra" "1")
9320 (set_attr "prefix" "maybe_vex")
9321 (set_attr "mode" "TI")])
9323 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9324 [(set (match_operand:V8HI 0 "register_operand" "=x")
9327 (vec_duplicate:V16QI
9328 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9329 (parallel [(const_int 0)
9338 "%vpmovzxbw\t{%1, %0|%0, %1}"
9339 [(set_attr "type" "ssemov")
9340 (set_attr "prefix_extra" "1")
9341 (set_attr "prefix" "maybe_vex")
9342 (set_attr "mode" "TI")])
9344 (define_insn "sse4_1_zero_extendv4qiv4si2"
9345 [(set (match_operand:V4SI 0 "register_operand" "=x")
9348 (match_operand:V16QI 1 "register_operand" "x")
9349 (parallel [(const_int 0)
9354 "%vpmovzxbd\t{%1, %0|%0, %1}"
9355 [(set_attr "type" "ssemov")
9356 (set_attr "prefix_extra" "1")
9357 (set_attr "prefix" "maybe_vex")
9358 (set_attr "mode" "TI")])
9360 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9361 [(set (match_operand:V4SI 0 "register_operand" "=x")
9364 (vec_duplicate:V16QI
9365 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9366 (parallel [(const_int 0)
9371 "%vpmovzxbd\t{%1, %0|%0, %1}"
9372 [(set_attr "type" "ssemov")
9373 (set_attr "prefix_extra" "1")
9374 (set_attr "prefix" "maybe_vex")
9375 (set_attr "mode" "TI")])
9377 (define_insn "sse4_1_zero_extendv2qiv2di2"
9378 [(set (match_operand:V2DI 0 "register_operand" "=x")
9381 (match_operand:V16QI 1 "register_operand" "x")
9382 (parallel [(const_int 0)
9385 "%vpmovzxbq\t{%1, %0|%0, %1}"
9386 [(set_attr "type" "ssemov")
9387 (set_attr "prefix_extra" "1")
9388 (set_attr "prefix" "maybe_vex")
9389 (set_attr "mode" "TI")])
9391 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9392 [(set (match_operand:V2DI 0 "register_operand" "=x")
9395 (vec_duplicate:V16QI
9396 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9397 (parallel [(const_int 0)
9400 "%vpmovzxbq\t{%1, %0|%0, %1}"
9401 [(set_attr "type" "ssemov")
9402 (set_attr "prefix_extra" "1")
9403 (set_attr "prefix" "maybe_vex")
9404 (set_attr "mode" "TI")])
9406 (define_insn "sse4_1_zero_extendv4hiv4si2"
9407 [(set (match_operand:V4SI 0 "register_operand" "=x")
9410 (match_operand:V8HI 1 "register_operand" "x")
9411 (parallel [(const_int 0)
9416 "%vpmovzxwd\t{%1, %0|%0, %1}"
9417 [(set_attr "type" "ssemov")
9418 (set_attr "prefix_extra" "1")
9419 (set_attr "prefix" "maybe_vex")
9420 (set_attr "mode" "TI")])
9422 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9423 [(set (match_operand:V4SI 0 "register_operand" "=x")
9427 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9428 (parallel [(const_int 0)
9433 "%vpmovzxwd\t{%1, %0|%0, %1}"
9434 [(set_attr "type" "ssemov")
9435 (set_attr "prefix_extra" "1")
9436 (set_attr "prefix" "maybe_vex")
9437 (set_attr "mode" "TI")])
9439 (define_insn "sse4_1_zero_extendv2hiv2di2"
9440 [(set (match_operand:V2DI 0 "register_operand" "=x")
9443 (match_operand:V8HI 1 "register_operand" "x")
9444 (parallel [(const_int 0)
9447 "%vpmovzxwq\t{%1, %0|%0, %1}"
9448 [(set_attr "type" "ssemov")
9449 (set_attr "prefix_extra" "1")
9450 (set_attr "prefix" "maybe_vex")
9451 (set_attr "mode" "TI")])
9453 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9454 [(set (match_operand:V2DI 0 "register_operand" "=x")
9458 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9459 (parallel [(const_int 0)
9462 "%vpmovzxwq\t{%1, %0|%0, %1}"
9463 [(set_attr "type" "ssemov")
9464 (set_attr "prefix_extra" "1")
9465 (set_attr "prefix" "maybe_vex")
9466 (set_attr "mode" "TI")])
9468 (define_insn "sse4_1_zero_extendv2siv2di2"
9469 [(set (match_operand:V2DI 0 "register_operand" "=x")
9472 (match_operand:V4SI 1 "register_operand" "x")
9473 (parallel [(const_int 0)
9476 "%vpmovzxdq\t{%1, %0|%0, %1}"
9477 [(set_attr "type" "ssemov")
9478 (set_attr "prefix_extra" "1")
9479 (set_attr "prefix" "maybe_vex")
9480 (set_attr "mode" "TI")])
9482 (define_insn "*sse4_1_zero_extendv2siv2di2"
9483 [(set (match_operand:V2DI 0 "register_operand" "=x")
9487 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9488 (parallel [(const_int 0)
9491 "%vpmovzxdq\t{%1, %0|%0, %1}"
9492 [(set_attr "type" "ssemov")
9493 (set_attr "prefix_extra" "1")
9494 (set_attr "prefix" "maybe_vex")
9495 (set_attr "mode" "TI")])
9497 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9498 ;; setting FLAGS_REG. But it is not a really compare instruction.
9499 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9500 [(set (reg:CC FLAGS_REG)
9501 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9502 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9505 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9506 [(set_attr "type" "ssecomi")
9507 (set_attr "prefix" "vex")
9508 (set_attr "mode" "<MODE>")])
9510 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9511 ;; But it is not a really compare instruction.
9512 (define_insn "avx_ptest256"
9513 [(set (reg:CC FLAGS_REG)
9514 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9515 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9518 "vptest\t{%1, %0|%0, %1}"
9519 [(set_attr "type" "ssecomi")
9520 (set_attr "prefix" "vex")
9521 (set_attr "mode" "OI")])
9523 (define_insn "sse4_1_ptest"
9524 [(set (reg:CC FLAGS_REG)
9525 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9526 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9529 "%vptest\t{%1, %0|%0, %1}"
9530 [(set_attr "type" "ssecomi")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "prefix" "maybe_vex")
9533 (set_attr "mode" "TI")])
9535 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9536 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9537 (unspec:AVX256MODEF2P
9538 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9539 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9542 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9543 [(set_attr "type" "ssecvt")
9544 (set_attr "prefix" "vex")
9545 (set_attr "mode" "<MODE>")])
9547 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9548 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9550 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9551 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9554 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9555 [(set_attr "type" "ssecvt")
9556 (set_attr "prefix_extra" "1")
9557 (set_attr "prefix" "maybe_vex")
9558 (set_attr "mode" "<MODE>")])
9560 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9561 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9562 (vec_merge:SSEMODEF2P
9564 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9565 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9567 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9570 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9571 [(set_attr "type" "ssecvt")
9572 (set_attr "prefix" "vex")
9573 (set_attr "mode" "<MODE>")])
9575 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9576 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9577 (vec_merge:SSEMODEF2P
9579 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9580 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9582 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9585 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9586 [(set_attr "type" "ssecvt")
9587 (set_attr "prefix_extra" "1")
9588 (set_attr "mode" "<MODE>")])
9590 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9592 ;; Intel SSE4.2 string/text processing instructions
9594 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9596 (define_insn_and_split "sse4_2_pcmpestr"
9597 [(set (match_operand:SI 0 "register_operand" "=c,c")
9599 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9600 (match_operand:SI 3 "register_operand" "a,a")
9601 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9602 (match_operand:SI 5 "register_operand" "d,d")
9603 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9605 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9613 (set (reg:CC FLAGS_REG)
9622 && !(reload_completed || reload_in_progress)"
9627 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9628 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9629 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9632 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9633 operands[3], operands[4],
9634 operands[5], operands[6]));
9636 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9637 operands[3], operands[4],
9638 operands[5], operands[6]));
9639 if (flags && !(ecx || xmm0))
9640 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9641 operands[2], operands[3],
9642 operands[4], operands[5],
9646 [(set_attr "type" "sselog")
9647 (set_attr "prefix_data16" "1")
9648 (set_attr "prefix_extra" "1")
9649 (set_attr "memory" "none,load")
9650 (set_attr "mode" "TI")])
9652 (define_insn "sse4_2_pcmpestri"
9653 [(set (match_operand:SI 0 "register_operand" "=c,c")
9655 [(match_operand:V16QI 1 "register_operand" "x,x")
9656 (match_operand:SI 2 "register_operand" "a,a")
9657 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9658 (match_operand:SI 4 "register_operand" "d,d")
9659 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9661 (set (reg:CC FLAGS_REG)
9670 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9671 [(set_attr "type" "sselog")
9672 (set_attr "prefix_data16" "1")
9673 (set_attr "prefix_extra" "1")
9674 (set_attr "prefix" "maybe_vex")
9675 (set_attr "memory" "none,load")
9676 (set_attr "mode" "TI")])
9678 (define_insn "sse4_2_pcmpestrm"
9679 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9681 [(match_operand:V16QI 1 "register_operand" "x,x")
9682 (match_operand:SI 2 "register_operand" "a,a")
9683 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9684 (match_operand:SI 4 "register_operand" "d,d")
9685 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9687 (set (reg:CC FLAGS_REG)
9696 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9697 [(set_attr "type" "sselog")
9698 (set_attr "prefix_data16" "1")
9699 (set_attr "prefix_extra" "1")
9700 (set_attr "prefix" "maybe_vex")
9701 (set_attr "memory" "none,load")
9702 (set_attr "mode" "TI")])
9704 (define_insn "sse4_2_pcmpestr_cconly"
9705 [(set (reg:CC FLAGS_REG)
9707 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9708 (match_operand:SI 3 "register_operand" "a,a,a,a")
9709 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9710 (match_operand:SI 5 "register_operand" "d,d,d,d")
9711 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9713 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9714 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9717 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9718 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9719 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9720 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9721 [(set_attr "type" "sselog")
9722 (set_attr "prefix_data16" "1")
9723 (set_attr "prefix_extra" "1")
9724 (set_attr "memory" "none,load,none,load")
9725 (set_attr "prefix" "maybe_vex")
9726 (set_attr "mode" "TI")])
9728 (define_insn_and_split "sse4_2_pcmpistr"
9729 [(set (match_operand:SI 0 "register_operand" "=c,c")
9731 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9732 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9733 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9735 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9741 (set (reg:CC FLAGS_REG)
9748 && !(reload_completed || reload_in_progress)"
9753 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9754 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9755 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9758 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9759 operands[3], operands[4]));
9761 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9762 operands[3], operands[4]));
9763 if (flags && !(ecx || xmm0))
9764 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9765 operands[2], operands[3],
9769 [(set_attr "type" "sselog")
9770 (set_attr "prefix_data16" "1")
9771 (set_attr "prefix_extra" "1")
9772 (set_attr "memory" "none,load")
9773 (set_attr "mode" "TI")])
9775 (define_insn "sse4_2_pcmpistri"
9776 [(set (match_operand:SI 0 "register_operand" "=c,c")
9778 [(match_operand:V16QI 1 "register_operand" "x,x")
9779 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9780 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9782 (set (reg:CC FLAGS_REG)
9789 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9790 [(set_attr "type" "sselog")
9791 (set_attr "prefix_data16" "1")
9792 (set_attr "prefix_extra" "1")
9793 (set_attr "prefix" "maybe_vex")
9794 (set_attr "memory" "none,load")
9795 (set_attr "mode" "TI")])
9797 (define_insn "sse4_2_pcmpistrm"
9798 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9800 [(match_operand:V16QI 1 "register_operand" "x,x")
9801 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9802 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9804 (set (reg:CC FLAGS_REG)
9811 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9812 [(set_attr "type" "sselog")
9813 (set_attr "prefix_data16" "1")
9814 (set_attr "prefix_extra" "1")
9815 (set_attr "prefix" "maybe_vex")
9816 (set_attr "memory" "none,load")
9817 (set_attr "mode" "TI")])
9819 (define_insn "sse4_2_pcmpistr_cconly"
9820 [(set (reg:CC FLAGS_REG)
9822 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9823 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9824 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9826 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9827 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9830 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9831 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9832 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9833 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9834 [(set_attr "type" "sselog")
9835 (set_attr "prefix_data16" "1")
9836 (set_attr "prefix_extra" "1")
9837 (set_attr "memory" "none,load,none,load")
9838 (set_attr "prefix" "maybe_vex")
9839 (set_attr "mode" "TI")])
9841 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9843 ;; SSE5 instructions
9845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9847 ;; SSE5 parallel integer multiply/add instructions.
9848 ;; Note the instruction does not allow the value being added to be a memory
9849 ;; operation. However by pretending via the nonimmediate_operand predicate
9850 ;; that it does and splitting it later allows the following to be recognized:
9851 ;; a[i] = b[i] * c[i] + d[i];
9852 (define_insn "sse5_pmacsww"
9853 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9856 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
9857 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
9858 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9859 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9861 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9862 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9863 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9864 [(set_attr "type" "ssemuladd")
9865 (set_attr "mode" "TI")])
9867 ;; Split pmacsww with two memory operands into a load and the pmacsww.
9869 [(set (match_operand:V8HI 0 "register_operand" "")
9871 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
9872 (match_operand:V8HI 2 "nonimmediate_operand" ""))
9873 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
9875 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9876 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9877 && !reg_mentioned_p (operands[0], operands[1])
9878 && !reg_mentioned_p (operands[0], operands[2])
9879 && !reg_mentioned_p (operands[0], operands[3])"
9882 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
9883 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
9888 (define_insn "sse5_pmacssww"
9889 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9891 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
9892 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
9893 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9894 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9896 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9897 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9898 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9899 [(set_attr "type" "ssemuladd")
9900 (set_attr "mode" "TI")])
9902 ;; Note the instruction does not allow the value being added to be a memory
9903 ;; operation. However by pretending via the nonimmediate_operand predicate
9904 ;; that it does and splitting it later allows the following to be recognized:
9905 ;; a[i] = b[i] * c[i] + d[i];
9906 (define_insn "sse5_pmacsdd"
9907 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9910 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9911 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9912 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9913 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9915 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9916 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9917 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9918 [(set_attr "type" "ssemuladd")
9919 (set_attr "mode" "TI")])
9921 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
9923 [(set (match_operand:V4SI 0 "register_operand" "")
9925 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
9926 (match_operand:V4SI 2 "nonimmediate_operand" ""))
9927 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
9929 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9930 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9931 && !reg_mentioned_p (operands[0], operands[1])
9932 && !reg_mentioned_p (operands[0], operands[2])
9933 && !reg_mentioned_p (operands[0], operands[3])"
9936 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
9937 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
9942 (define_insn "sse5_pmacssdd"
9943 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9945 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9946 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9947 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9948 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9950 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9951 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9952 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9953 [(set_attr "type" "ssemuladd")
9954 (set_attr "mode" "TI")])
9956 (define_insn "sse5_pmacssdql"
9957 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9962 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9963 (parallel [(const_int 1)
9966 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9967 (parallel [(const_int 1)
9969 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9970 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9972 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9973 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9974 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9975 [(set_attr "type" "ssemuladd")
9976 (set_attr "mode" "TI")])
9978 (define_insn "sse5_pmacssdqh"
9979 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9984 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9985 (parallel [(const_int 0)
9989 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9990 (parallel [(const_int 0)
9992 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9993 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9995 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9996 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9997 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9998 [(set_attr "type" "ssemuladd")
9999 (set_attr "mode" "TI")])
10001 (define_insn "sse5_pmacsdql"
10002 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10007 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10008 (parallel [(const_int 1)
10012 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10013 (parallel [(const_int 1)
10015 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10016 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10018 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10019 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10020 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10021 [(set_attr "type" "ssemuladd")
10022 (set_attr "mode" "TI")])
10024 (define_insn_and_split "*sse5_pmacsdql_mem"
10025 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10030 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10031 (parallel [(const_int 1)
10035 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10036 (parallel [(const_int 1)
10038 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10039 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10041 "&& (reload_completed
10042 || (!reg_mentioned_p (operands[0], operands[1])
10043 && !reg_mentioned_p (operands[0], operands[2])))"
10044 [(set (match_dup 0)
10052 (parallel [(const_int 1)
10057 (parallel [(const_int 1)
10061 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10062 ;; fake it with a multiply/add. In general, we expect the define_split to
10063 ;; occur before register allocation, so we have to handle the corner case where
10064 ;; the target is the same as operands 1/2
10065 (define_insn_and_split "sse5_mulv2div2di3_low"
10066 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10070 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10071 (parallel [(const_int 1)
10075 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10076 (parallel [(const_int 1)
10077 (const_int 3)])))))]
10080 "&& (reload_completed
10081 || (!reg_mentioned_p (operands[0], operands[1])
10082 && !reg_mentioned_p (operands[0], operands[2])))"
10083 [(set (match_dup 0)
10091 (parallel [(const_int 1)
10096 (parallel [(const_int 1)
10100 operands[3] = CONST0_RTX (V2DImode);
10102 [(set_attr "type" "ssemuladd")
10103 (set_attr "mode" "TI")])
10105 (define_insn "sse5_pmacsdqh"
10106 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10111 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10112 (parallel [(const_int 0)
10116 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10117 (parallel [(const_int 0)
10119 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10120 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10122 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10123 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10124 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10125 [(set_attr "type" "ssemuladd")
10126 (set_attr "mode" "TI")])
10128 (define_insn_and_split "*sse5_pmacsdqh_mem"
10129 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10134 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10135 (parallel [(const_int 0)
10139 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10140 (parallel [(const_int 0)
10142 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10143 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10145 "&& (reload_completed
10146 || (!reg_mentioned_p (operands[0], operands[1])
10147 && !reg_mentioned_p (operands[0], operands[2])))"
10148 [(set (match_dup 0)
10156 (parallel [(const_int 0)
10161 (parallel [(const_int 0)
10165 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10166 ;; fake it with a multiply/add. In general, we expect the define_split to
10167 ;; occur before register allocation, so we have to handle the corner case where
10168 ;; the target is the same as either operands[1] or operands[2]
10169 (define_insn_and_split "sse5_mulv2div2di3_high"
10170 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10174 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10175 (parallel [(const_int 0)
10179 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10180 (parallel [(const_int 0)
10181 (const_int 2)])))))]
10184 "&& (reload_completed
10185 || (!reg_mentioned_p (operands[0], operands[1])
10186 && !reg_mentioned_p (operands[0], operands[2])))"
10187 [(set (match_dup 0)
10195 (parallel [(const_int 0)
10200 (parallel [(const_int 0)
10204 operands[3] = CONST0_RTX (V2DImode);
10206 [(set_attr "type" "ssemuladd")
10207 (set_attr "mode" "TI")])
10209 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10210 (define_insn "sse5_pmacsswd"
10211 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10216 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10217 (parallel [(const_int 1)
10223 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10224 (parallel [(const_int 1)
10228 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10229 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10231 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10232 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10233 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10234 [(set_attr "type" "ssemuladd")
10235 (set_attr "mode" "TI")])
10237 (define_insn "sse5_pmacswd"
10238 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10243 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10244 (parallel [(const_int 1)
10250 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10251 (parallel [(const_int 1)
10255 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10256 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10258 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10259 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10260 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10261 [(set_attr "type" "ssemuladd")
10262 (set_attr "mode" "TI")])
10264 (define_insn "sse5_pmadcsswd"
10265 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10271 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10272 (parallel [(const_int 0)
10278 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10279 (parallel [(const_int 0)
10287 (parallel [(const_int 1)
10294 (parallel [(const_int 1)
10297 (const_int 7)])))))
10298 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10299 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10301 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10302 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10303 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10304 [(set_attr "type" "ssemuladd")
10305 (set_attr "mode" "TI")])
10307 (define_insn "sse5_pmadcswd"
10308 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10314 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10315 (parallel [(const_int 0)
10321 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10322 (parallel [(const_int 0)
10330 (parallel [(const_int 1)
10337 (parallel [(const_int 1)
10340 (const_int 7)])))))
10341 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10342 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10344 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10345 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10346 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10347 [(set_attr "type" "ssemuladd")
10348 (set_attr "mode" "TI")])
10350 ;; SSE5 parallel XMM conditional moves
10351 (define_insn "sse5_pcmov_<mode>"
10352 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10353 (if_then_else:SSEMODE
10354 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10355 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10356 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10357 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10359 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10360 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10361 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10362 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10363 [(set_attr "type" "sse4arg")])
10365 ;; SSE5 horizontal add/subtract instructions
10366 (define_insn "sse5_phaddbw"
10367 [(set (match_operand:V8HI 0 "register_operand" "=x")
10371 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10372 (parallel [(const_int 0)
10383 (parallel [(const_int 1)
10390 (const_int 15)])))))]
10392 "phaddbw\t{%1, %0|%0, %1}"
10393 [(set_attr "type" "sseiadd1")])
10395 (define_insn "sse5_phaddbd"
10396 [(set (match_operand:V4SI 0 "register_operand" "=x")
10401 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10402 (parallel [(const_int 0)
10409 (parallel [(const_int 1)
10412 (const_int 13)]))))
10417 (parallel [(const_int 2)
10424 (parallel [(const_int 3)
10427 (const_int 15)]))))))]
10429 "phaddbd\t{%1, %0|%0, %1}"
10430 [(set_attr "type" "sseiadd1")])
10432 (define_insn "sse5_phaddbq"
10433 [(set (match_operand:V2DI 0 "register_operand" "=x")
10439 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10440 (parallel [(const_int 0)
10445 (parallel [(const_int 1)
10451 (parallel [(const_int 2)
10456 (parallel [(const_int 3)
10457 (const_int 7)])))))
10463 (parallel [(const_int 8)
10468 (parallel [(const_int 9)
10469 (const_int 13)]))))
10474 (parallel [(const_int 10)
10479 (parallel [(const_int 11)
10480 (const_int 15)])))))))]
10482 "phaddbq\t{%1, %0|%0, %1}"
10483 [(set_attr "type" "sseiadd1")])
10485 (define_insn "sse5_phaddwd"
10486 [(set (match_operand:V4SI 0 "register_operand" "=x")
10490 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10491 (parallel [(const_int 0)
10498 (parallel [(const_int 1)
10501 (const_int 7)])))))]
10503 "phaddwd\t{%1, %0|%0, %1}"
10504 [(set_attr "type" "sseiadd1")])
10506 (define_insn "sse5_phaddwq"
10507 [(set (match_operand:V2DI 0 "register_operand" "=x")
10512 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10513 (parallel [(const_int 0)
10518 (parallel [(const_int 1)
10524 (parallel [(const_int 2)
10529 (parallel [(const_int 3)
10530 (const_int 7)]))))))]
10532 "phaddwq\t{%1, %0|%0, %1}"
10533 [(set_attr "type" "sseiadd1")])
10535 (define_insn "sse5_phadddq"
10536 [(set (match_operand:V2DI 0 "register_operand" "=x")
10540 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10541 (parallel [(const_int 0)
10546 (parallel [(const_int 1)
10547 (const_int 3)])))))]
10549 "phadddq\t{%1, %0|%0, %1}"
10550 [(set_attr "type" "sseiadd1")])
10552 (define_insn "sse5_phaddubw"
10553 [(set (match_operand:V8HI 0 "register_operand" "=x")
10557 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10558 (parallel [(const_int 0)
10569 (parallel [(const_int 1)
10576 (const_int 15)])))))]
10578 "phaddubw\t{%1, %0|%0, %1}"
10579 [(set_attr "type" "sseiadd1")])
10581 (define_insn "sse5_phaddubd"
10582 [(set (match_operand:V4SI 0 "register_operand" "=x")
10587 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10588 (parallel [(const_int 0)
10595 (parallel [(const_int 1)
10598 (const_int 13)]))))
10603 (parallel [(const_int 2)
10610 (parallel [(const_int 3)
10613 (const_int 15)]))))))]
10615 "phaddubd\t{%1, %0|%0, %1}"
10616 [(set_attr "type" "sseiadd1")])
10618 (define_insn "sse5_phaddubq"
10619 [(set (match_operand:V2DI 0 "register_operand" "=x")
10625 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10626 (parallel [(const_int 0)
10631 (parallel [(const_int 1)
10637 (parallel [(const_int 2)
10642 (parallel [(const_int 3)
10643 (const_int 7)])))))
10649 (parallel [(const_int 8)
10654 (parallel [(const_int 9)
10655 (const_int 13)]))))
10660 (parallel [(const_int 10)
10665 (parallel [(const_int 11)
10666 (const_int 15)])))))))]
10668 "phaddubq\t{%1, %0|%0, %1}"
10669 [(set_attr "type" "sseiadd1")])
10671 (define_insn "sse5_phadduwd"
10672 [(set (match_operand:V4SI 0 "register_operand" "=x")
10676 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10677 (parallel [(const_int 0)
10684 (parallel [(const_int 1)
10687 (const_int 7)])))))]
10689 "phadduwd\t{%1, %0|%0, %1}"
10690 [(set_attr "type" "sseiadd1")])
10692 (define_insn "sse5_phadduwq"
10693 [(set (match_operand:V2DI 0 "register_operand" "=x")
10698 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10699 (parallel [(const_int 0)
10704 (parallel [(const_int 1)
10710 (parallel [(const_int 2)
10715 (parallel [(const_int 3)
10716 (const_int 7)]))))))]
10718 "phadduwq\t{%1, %0|%0, %1}"
10719 [(set_attr "type" "sseiadd1")])
10721 (define_insn "sse5_phaddudq"
10722 [(set (match_operand:V2DI 0 "register_operand" "=x")
10726 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10727 (parallel [(const_int 0)
10732 (parallel [(const_int 1)
10733 (const_int 3)])))))]
10735 "phaddudq\t{%1, %0|%0, %1}"
10736 [(set_attr "type" "sseiadd1")])
10738 (define_insn "sse5_phsubbw"
10739 [(set (match_operand:V8HI 0 "register_operand" "=x")
10743 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10744 (parallel [(const_int 0)
10755 (parallel [(const_int 1)
10762 (const_int 15)])))))]
10764 "phsubbw\t{%1, %0|%0, %1}"
10765 [(set_attr "type" "sseiadd1")])
10767 (define_insn "sse5_phsubwd"
10768 [(set (match_operand:V4SI 0 "register_operand" "=x")
10772 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10773 (parallel [(const_int 0)
10780 (parallel [(const_int 1)
10783 (const_int 7)])))))]
10785 "phsubwd\t{%1, %0|%0, %1}"
10786 [(set_attr "type" "sseiadd1")])
10788 (define_insn "sse5_phsubdq"
10789 [(set (match_operand:V2DI 0 "register_operand" "=x")
10793 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10794 (parallel [(const_int 0)
10799 (parallel [(const_int 1)
10800 (const_int 3)])))))]
10802 "phsubdq\t{%1, %0|%0, %1}"
10803 [(set_attr "type" "sseiadd1")])
10805 ;; SSE5 permute instructions
10806 (define_insn "sse5_pperm"
10807 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10809 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
10810 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
10811 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10812 UNSPEC_SSE5_PERMUTE))]
10813 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10814 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10815 [(set_attr "type" "sse4arg")
10816 (set_attr "mode" "TI")])
10818 ;; The following are for the various unpack insns which doesn't need the first
10819 ;; source operand, so we can just use the output operand for the first operand.
10820 ;; This allows either of the other two operands to be a memory operand. We
10821 ;; can't just use the first operand as an argument to the normal pperm because
10822 ;; then an output only argument, suddenly becomes an input operand.
10823 (define_insn "sse5_pperm_zero_v16qi_v8hi"
10824 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10827 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10828 (match_operand 2 "" "")))) ;; parallel with const_int's
10829 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10831 && (register_operand (operands[1], V16QImode)
10832 || register_operand (operands[2], V16QImode))"
10833 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10834 [(set_attr "type" "sseadd")
10835 (set_attr "mode" "TI")])
10837 (define_insn "sse5_pperm_sign_v16qi_v8hi"
10838 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10841 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10842 (match_operand 2 "" "")))) ;; parallel with const_int's
10843 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10845 && (register_operand (operands[1], V16QImode)
10846 || register_operand (operands[2], V16QImode))"
10847 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10848 [(set_attr "type" "sseadd")
10849 (set_attr "mode" "TI")])
10851 (define_insn "sse5_pperm_zero_v8hi_v4si"
10852 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10855 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10856 (match_operand 2 "" "")))) ;; parallel with const_int's
10857 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10859 && (register_operand (operands[1], V8HImode)
10860 || register_operand (operands[2], V16QImode))"
10861 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10862 [(set_attr "type" "sseadd")
10863 (set_attr "mode" "TI")])
10865 (define_insn "sse5_pperm_sign_v8hi_v4si"
10866 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10869 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10870 (match_operand 2 "" "")))) ;; parallel with const_int's
10871 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10873 && (register_operand (operands[1], V8HImode)
10874 || register_operand (operands[2], V16QImode))"
10875 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10876 [(set_attr "type" "sseadd")
10877 (set_attr "mode" "TI")])
10879 (define_insn "sse5_pperm_zero_v4si_v2di"
10880 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10883 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10884 (match_operand 2 "" "")))) ;; parallel with const_int's
10885 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10887 && (register_operand (operands[1], V4SImode)
10888 || register_operand (operands[2], V16QImode))"
10889 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10890 [(set_attr "type" "sseadd")
10891 (set_attr "mode" "TI")])
10893 (define_insn "sse5_pperm_sign_v4si_v2di"
10894 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10897 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10898 (match_operand 2 "" "")))) ;; parallel with const_int's
10899 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10901 && (register_operand (operands[1], V4SImode)
10902 || register_operand (operands[2], V16QImode))"
10903 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10904 [(set_attr "type" "sseadd")
10905 (set_attr "mode" "TI")])
10907 ;; SSE5 pack instructions that combine two vectors into a smaller vector
10908 (define_insn "sse5_pperm_pack_v2di_v4si"
10909 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
10912 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
10914 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10915 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10916 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10917 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10918 [(set_attr "type" "sse4arg")
10919 (set_attr "mode" "TI")])
10921 (define_insn "sse5_pperm_pack_v4si_v8hi"
10922 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
10925 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
10927 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10928 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10929 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10930 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10931 [(set_attr "type" "sse4arg")
10932 (set_attr "mode" "TI")])
10934 (define_insn "sse5_pperm_pack_v8hi_v16qi"
10935 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10938 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
10940 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10941 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10942 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10943 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10944 [(set_attr "type" "sse4arg")
10945 (set_attr "mode" "TI")])
10947 ;; Floating point permutation (permps, permpd)
10948 (define_insn "sse5_perm<mode>"
10949 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
10951 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
10952 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
10953 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10954 UNSPEC_SSE5_PERMUTE))]
10955 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10956 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10957 [(set_attr "type" "sse4arg")
10958 (set_attr "mode" "<MODE>")])
10960 ;; SSE5 packed rotate instructions
10961 (define_expand "rotl<mode>3"
10962 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10963 (rotate:SSEMODE1248
10964 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10965 (match_operand:SI 2 "general_operand")))]
10968 /* If we were given a scalar, convert it to parallel */
10969 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10971 rtvec vs = rtvec_alloc (<ssescalarnum>);
10972 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10973 rtx reg = gen_reg_rtx (<MODE>mode);
10974 rtx op2 = operands[2];
10977 if (GET_MODE (op2) != <ssescalarmode>mode)
10979 op2 = gen_reg_rtx (<ssescalarmode>mode);
10980 convert_move (op2, operands[2], false);
10983 for (i = 0; i < <ssescalarnum>; i++)
10984 RTVEC_ELT (vs, i) = op2;
10986 emit_insn (gen_vec_init<mode> (reg, par));
10987 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10992 (define_expand "rotr<mode>3"
10993 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10994 (rotatert:SSEMODE1248
10995 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10996 (match_operand:SI 2 "general_operand")))]
10999 /* If we were given a scalar, convert it to parallel */
11000 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11002 rtvec vs = rtvec_alloc (<ssescalarnum>);
11003 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11004 rtx neg = gen_reg_rtx (<MODE>mode);
11005 rtx reg = gen_reg_rtx (<MODE>mode);
11006 rtx op2 = operands[2];
11009 if (GET_MODE (op2) != <ssescalarmode>mode)
11011 op2 = gen_reg_rtx (<ssescalarmode>mode);
11012 convert_move (op2, operands[2], false);
11015 for (i = 0; i < <ssescalarnum>; i++)
11016 RTVEC_ELT (vs, i) = op2;
11018 emit_insn (gen_vec_init<mode> (reg, par));
11019 emit_insn (gen_neg<mode>2 (neg, reg));
11020 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
11025 (define_insn "sse5_rotl<mode>3"
11026 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11027 (rotate:SSEMODE1248
11028 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11029 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11031 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11032 [(set_attr "type" "sseishft")
11033 (set_attr "mode" "TI")])
11035 (define_insn "sse5_rotr<mode>3"
11036 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11037 (rotatert:SSEMODE1248
11038 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11039 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11042 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11043 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11045 [(set_attr "type" "sseishft")
11046 (set_attr "mode" "TI")])
11048 (define_expand "vrotr<mode>3"
11049 [(match_operand:SSEMODE1248 0 "register_operand" "")
11050 (match_operand:SSEMODE1248 1 "register_operand" "")
11051 (match_operand:SSEMODE1248 2 "register_operand" "")]
11054 rtx reg = gen_reg_rtx (<MODE>mode);
11055 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11056 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11060 (define_expand "vrotl<mode>3"
11061 [(match_operand:SSEMODE1248 0 "register_operand" "")
11062 (match_operand:SSEMODE1248 1 "register_operand" "")
11063 (match_operand:SSEMODE1248 2 "register_operand" "")]
11066 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11070 (define_insn "sse5_vrotl<mode>3"
11071 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11072 (if_then_else:SSEMODE1248
11074 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11076 (rotate:SSEMODE1248
11077 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11079 (rotatert:SSEMODE1248
11081 (neg:SSEMODE1248 (match_dup 2)))))]
11082 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11083 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11084 [(set_attr "type" "sseishft")
11085 (set_attr "mode" "TI")])
11087 ;; SSE5 packed shift instructions.
11088 ;; FIXME: add V2DI back in
11089 (define_expand "vlshr<mode>3"
11090 [(match_operand:SSEMODE124 0 "register_operand" "")
11091 (match_operand:SSEMODE124 1 "register_operand" "")
11092 (match_operand:SSEMODE124 2 "register_operand" "")]
11095 rtx neg = gen_reg_rtx (<MODE>mode);
11096 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11097 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
11101 (define_expand "vashr<mode>3"
11102 [(match_operand:SSEMODE124 0 "register_operand" "")
11103 (match_operand:SSEMODE124 1 "register_operand" "")
11104 (match_operand:SSEMODE124 2 "register_operand" "")]
11107 rtx neg = gen_reg_rtx (<MODE>mode);
11108 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11109 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
11113 (define_expand "vashl<mode>3"
11114 [(match_operand:SSEMODE124 0 "register_operand" "")
11115 (match_operand:SSEMODE124 1 "register_operand" "")
11116 (match_operand:SSEMODE124 2 "register_operand" "")]
11119 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
11123 (define_insn "sse5_ashl<mode>3"
11124 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11125 (if_then_else:SSEMODE1248
11127 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11129 (ashift:SSEMODE1248
11130 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11132 (ashiftrt:SSEMODE1248
11134 (neg:SSEMODE1248 (match_dup 2)))))]
11135 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11136 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11137 [(set_attr "type" "sseishft")
11138 (set_attr "mode" "TI")])
11140 (define_insn "sse5_lshl<mode>3"
11141 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11142 (if_then_else:SSEMODE1248
11144 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11146 (ashift:SSEMODE1248
11147 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11149 (lshiftrt:SSEMODE1248
11151 (neg:SSEMODE1248 (match_dup 2)))))]
11152 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11153 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11154 [(set_attr "type" "sseishft")
11155 (set_attr "mode" "TI")])
11157 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
11158 (define_expand "ashlv16qi3"
11159 [(match_operand:V16QI 0 "register_operand" "")
11160 (match_operand:V16QI 1 "register_operand" "")
11161 (match_operand:SI 2 "nonmemory_operand" "")]
11164 rtvec vs = rtvec_alloc (16);
11165 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11166 rtx reg = gen_reg_rtx (V16QImode);
11168 for (i = 0; i < 16; i++)
11169 RTVEC_ELT (vs, i) = operands[2];
11171 emit_insn (gen_vec_initv16qi (reg, par));
11172 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11176 (define_expand "lshlv16qi3"
11177 [(match_operand:V16QI 0 "register_operand" "")
11178 (match_operand:V16QI 1 "register_operand" "")
11179 (match_operand:SI 2 "nonmemory_operand" "")]
11182 rtvec vs = rtvec_alloc (16);
11183 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11184 rtx reg = gen_reg_rtx (V16QImode);
11186 for (i = 0; i < 16; i++)
11187 RTVEC_ELT (vs, i) = operands[2];
11189 emit_insn (gen_vec_initv16qi (reg, par));
11190 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
11194 (define_expand "ashrv16qi3"
11195 [(match_operand:V16QI 0 "register_operand" "")
11196 (match_operand:V16QI 1 "register_operand" "")
11197 (match_operand:SI 2 "nonmemory_operand" "")]
11200 rtvec vs = rtvec_alloc (16);
11201 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11202 rtx reg = gen_reg_rtx (V16QImode);
11204 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
11205 ? GEN_INT (- INTVAL (operands[2]))
11208 for (i = 0; i < 16; i++)
11209 RTVEC_ELT (vs, i) = ele;
11211 emit_insn (gen_vec_initv16qi (reg, par));
11213 if (GET_CODE (operands[2]) != CONST_INT)
11215 rtx neg = gen_reg_rtx (V16QImode);
11216 emit_insn (gen_negv16qi2 (neg, reg));
11217 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11220 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11225 (define_expand "ashrv2di3"
11226 [(match_operand:V2DI 0 "register_operand" "")
11227 (match_operand:V2DI 1 "register_operand" "")
11228 (match_operand:DI 2 "nonmemory_operand" "")]
11231 rtvec vs = rtvec_alloc (2);
11232 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11233 rtx reg = gen_reg_rtx (V2DImode);
11236 if (GET_CODE (operands[2]) == CONST_INT)
11237 ele = GEN_INT (- INTVAL (operands[2]));
11238 else if (GET_MODE (operands[2]) != DImode)
11240 rtx move = gen_reg_rtx (DImode);
11241 ele = gen_reg_rtx (DImode);
11242 convert_move (move, operands[2], false);
11243 emit_insn (gen_negdi2 (ele, move));
11247 ele = gen_reg_rtx (DImode);
11248 emit_insn (gen_negdi2 (ele, operands[2]));
11251 RTVEC_ELT (vs, 0) = ele;
11252 RTVEC_ELT (vs, 1) = ele;
11253 emit_insn (gen_vec_initv2di (reg, par));
11254 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11258 ;; SSE5 FRCZ support
11260 (define_insn "sse5_frcz<mode>2"
11261 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11263 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11266 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11267 [(set_attr "type" "ssecvt1")
11268 (set_attr "prefix_extra" "1")
11269 (set_attr "mode" "<MODE>")])
11272 (define_insn "sse5_vmfrcz<mode>2"
11273 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11274 (vec_merge:SSEMODEF2P
11276 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11278 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11281 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11282 [(set_attr "type" "ssecvt1")
11283 (set_attr "prefix_extra" "1")
11284 (set_attr "mode" "<MODE>")])
11286 (define_insn "sse5_cvtph2ps"
11287 [(set (match_operand:V4SF 0 "register_operand" "=x")
11288 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11291 "cvtph2ps\t{%1, %0|%0, %1}"
11292 [(set_attr "type" "ssecvt")
11293 (set_attr "mode" "V4SF")])
11295 (define_insn "sse5_cvtps2ph"
11296 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11297 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11300 "cvtps2ph\t{%1, %0|%0, %1}"
11301 [(set_attr "type" "ssecvt")
11302 (set_attr "mode" "V4SF")])
11304 ;; Scalar versions of the com instructions that use vector types that are
11305 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11306 ;; com instructions fill in 0's in the upper bits instead of leaving them
11307 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11308 (define_expand "sse5_vmmaskcmp<mode>3"
11309 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11310 (vec_merge:SSEMODEF2P
11311 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11312 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11313 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11318 operands[4] = CONST0_RTX (<MODE>mode);
11321 (define_insn "*sse5_vmmaskcmp<mode>3"
11322 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11323 (vec_merge:SSEMODEF2P
11324 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11325 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11326 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11327 (match_operand:SSEMODEF2P 4 "")
11330 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11331 [(set_attr "type" "sse4arg")
11332 (set_attr "mode" "<ssescalarmode>")])
11334 ;; We don't have a comparison operator that always returns true/false, so
11335 ;; handle comfalse and comtrue specially.
11336 (define_insn "sse5_com_tf<mode>3"
11337 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11339 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11340 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11341 (match_operand:SI 3 "const_int_operand" "n")]
11342 UNSPEC_SSE5_TRUEFALSE))]
11345 const char *ret = NULL;
11347 switch (INTVAL (operands[3]))
11350 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11354 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11358 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11362 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11366 gcc_unreachable ();
11371 [(set_attr "type" "ssecmp")
11372 (set_attr "mode" "<MODE>")])
11374 (define_insn "sse5_maskcmp<mode>3"
11375 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11376 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11377 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11378 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11380 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11381 [(set_attr "type" "ssecmp")
11382 (set_attr "mode" "<MODE>")])
11384 (define_insn "sse5_maskcmp<mode>3"
11385 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11386 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11387 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11388 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11390 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11391 [(set_attr "type" "sse4arg")
11392 (set_attr "mode" "TI")])
11394 (define_insn "sse5_maskcmp_uns<mode>3"
11395 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11396 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11397 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11398 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11400 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11401 [(set_attr "type" "ssecmp")
11402 (set_attr "mode" "TI")])
11404 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11405 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11406 ;; the exact instruction generated for the intrinsic.
11407 (define_insn "sse5_maskcmp_uns2<mode>3"
11408 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11409 (unspec:SSEMODE1248
11410 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11411 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11412 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11413 UNSPEC_SSE5_UNSIGNED_CMP))]
11415 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11416 [(set_attr "type" "ssecmp")
11417 (set_attr "mode" "TI")])
11419 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11420 ;; being added here to be complete.
11421 (define_insn "sse5_pcom_tf<mode>3"
11422 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11423 (unspec:SSEMODE1248
11424 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11425 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11426 (match_operand:SI 3 "const_int_operand" "n")]
11427 UNSPEC_SSE5_TRUEFALSE))]
11430 return ((INTVAL (operands[3]) != 0)
11431 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11432 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11434 [(set_attr "type" "ssecmp")
11435 (set_attr "mode" "TI")])
11437 (define_insn "*avx_aesenc"
11438 [(set (match_operand:V2DI 0 "register_operand" "=x")
11439 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11440 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11442 "TARGET_AES && TARGET_AVX"
11443 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11444 [(set_attr "type" "sselog1")
11445 (set_attr "prefix" "vex")
11446 (set_attr "mode" "TI")])
11448 (define_insn "aesenc"
11449 [(set (match_operand:V2DI 0 "register_operand" "=x")
11450 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11451 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11454 "aesenc\t{%2, %0|%0, %2}"
11455 [(set_attr "type" "sselog1")
11456 (set_attr "prefix_extra" "1")
11457 (set_attr "mode" "TI")])
11459 (define_insn "*avx_aesenclast"
11460 [(set (match_operand:V2DI 0 "register_operand" "=x")
11461 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11462 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11463 UNSPEC_AESENCLAST))]
11464 "TARGET_AES && TARGET_AVX"
11465 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11466 [(set_attr "type" "sselog1")
11467 (set_attr "prefix" "vex")
11468 (set_attr "mode" "TI")])
11470 (define_insn "aesenclast"
11471 [(set (match_operand:V2DI 0 "register_operand" "=x")
11472 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11473 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11474 UNSPEC_AESENCLAST))]
11476 "aesenclast\t{%2, %0|%0, %2}"
11477 [(set_attr "type" "sselog1")
11478 (set_attr "prefix_extra" "1")
11479 (set_attr "mode" "TI")])
11481 (define_insn "*avx_aesdec"
11482 [(set (match_operand:V2DI 0 "register_operand" "=x")
11483 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11484 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11486 "TARGET_AES && TARGET_AVX"
11487 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11488 [(set_attr "type" "sselog1")
11489 (set_attr "prefix" "vex")
11490 (set_attr "mode" "TI")])
11492 (define_insn "aesdec"
11493 [(set (match_operand:V2DI 0 "register_operand" "=x")
11494 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11495 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11498 "aesdec\t{%2, %0|%0, %2}"
11499 [(set_attr "type" "sselog1")
11500 (set_attr "prefix_extra" "1")
11501 (set_attr "mode" "TI")])
11503 (define_insn "*avx_aesdeclast"
11504 [(set (match_operand:V2DI 0 "register_operand" "=x")
11505 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11506 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11507 UNSPEC_AESDECLAST))]
11508 "TARGET_AES && TARGET_AVX"
11509 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11510 [(set_attr "type" "sselog1")
11511 (set_attr "prefix" "vex")
11512 (set_attr "mode" "TI")])
11514 (define_insn "aesdeclast"
11515 [(set (match_operand:V2DI 0 "register_operand" "=x")
11516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11517 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11518 UNSPEC_AESDECLAST))]
11520 "aesdeclast\t{%2, %0|%0, %2}"
11521 [(set_attr "type" "sselog1")
11522 (set_attr "prefix_extra" "1")
11523 (set_attr "mode" "TI")])
11525 (define_insn "aesimc"
11526 [(set (match_operand:V2DI 0 "register_operand" "=x")
11527 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11530 "%vaesimc\t{%1, %0|%0, %1}"
11531 [(set_attr "type" "sselog1")
11532 (set_attr "prefix_extra" "1")
11533 (set_attr "prefix" "maybe_vex")
11534 (set_attr "mode" "TI")])
11536 (define_insn "aeskeygenassist"
11537 [(set (match_operand:V2DI 0 "register_operand" "=x")
11538 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11539 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11540 UNSPEC_AESKEYGENASSIST))]
11542 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11543 [(set_attr "type" "sselog1")
11544 (set_attr "prefix_extra" "1")
11545 (set_attr "prefix" "maybe_vex")
11546 (set_attr "mode" "TI")])
11548 (define_insn "*vpclmulqdq"
11549 [(set (match_operand:V2DI 0 "register_operand" "=x")
11550 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11551 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11552 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11554 "TARGET_PCLMUL && TARGET_AVX"
11555 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11556 [(set_attr "type" "sselog1")
11557 (set_attr "prefix" "vex")
11558 (set_attr "mode" "TI")])
11560 (define_insn "pclmulqdq"
11561 [(set (match_operand:V2DI 0 "register_operand" "=x")
11562 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11563 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11564 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11567 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11568 [(set_attr "type" "sselog1")
11569 (set_attr "prefix_extra" "1")
11570 (set_attr "mode" "TI")])
11572 (define_expand "avx_vzeroall"
11573 [(match_par_dup 0 [(const_int 0)])]
11576 int nregs = TARGET_64BIT ? 16 : 8;
11579 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11581 XVECEXP (operands[0], 0, 0)
11582 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11585 for (regno = 0; regno < nregs; regno++)
11586 XVECEXP (operands[0], 0, regno + 1)
11587 = gen_rtx_SET (VOIDmode,
11588 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11589 CONST0_RTX (V8SImode));
11592 (define_insn "*avx_vzeroall"
11593 [(match_parallel 0 "vzeroall_operation"
11594 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11595 (set (match_operand 1 "register_operand" "=x")
11596 (match_operand 2 "const0_operand" "X"))])]
11599 [(set_attr "type" "sse")
11600 (set_attr "memory" "none")
11601 (set_attr "prefix" "vex")
11602 (set_attr "mode" "OI")])
11604 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11605 (define_insn "avx_vzeroupper"
11606 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11607 (clobber (reg:V8SI XMM0_REG))
11608 (clobber (reg:V8SI XMM1_REG))
11609 (clobber (reg:V8SI XMM2_REG))
11610 (clobber (reg:V8SI XMM3_REG))
11611 (clobber (reg:V8SI XMM4_REG))
11612 (clobber (reg:V8SI XMM5_REG))
11613 (clobber (reg:V8SI XMM6_REG))
11614 (clobber (reg:V8SI XMM7_REG))]
11615 "TARGET_AVX && !TARGET_64BIT"
11617 [(set_attr "type" "sse")
11618 (set_attr "memory" "none")
11619 (set_attr "prefix" "vex")
11620 (set_attr "mode" "OI")])
11622 (define_insn "avx_vzeroupper_rex64"
11623 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11624 (clobber (reg:V8SI XMM0_REG))
11625 (clobber (reg:V8SI XMM1_REG))
11626 (clobber (reg:V8SI XMM2_REG))
11627 (clobber (reg:V8SI XMM3_REG))
11628 (clobber (reg:V8SI XMM4_REG))
11629 (clobber (reg:V8SI XMM5_REG))
11630 (clobber (reg:V8SI XMM6_REG))
11631 (clobber (reg:V8SI XMM7_REG))
11632 (clobber (reg:V8SI XMM8_REG))
11633 (clobber (reg:V8SI XMM9_REG))
11634 (clobber (reg:V8SI XMM10_REG))
11635 (clobber (reg:V8SI XMM11_REG))
11636 (clobber (reg:V8SI XMM12_REG))
11637 (clobber (reg:V8SI XMM13_REG))
11638 (clobber (reg:V8SI XMM14_REG))
11639 (clobber (reg:V8SI XMM15_REG))]
11640 "TARGET_AVX && TARGET_64BIT"
11642 [(set_attr "type" "sse")
11643 (set_attr "memory" "none")
11644 (set_attr "prefix" "vex")
11645 (set_attr "mode" "OI")])
11647 (define_insn "avx_vpermil<mode>"
11648 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11650 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
11651 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
11654 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11655 [(set_attr "type" "sselog")
11656 (set_attr "prefix" "vex")
11657 (set_attr "mode" "<MODE>")])
11659 (define_insn "avx_vpermilvar<mode>3"
11660 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11662 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11663 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11666 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11667 [(set_attr "type" "sselog")
11668 (set_attr "prefix" "vex")
11669 (set_attr "mode" "<MODE>")])
11671 (define_insn "avx_vperm2f128<mode>3"
11672 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11673 (unspec:AVX256MODE2P
11674 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11675 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11676 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11677 UNSPEC_VPERMIL2F128))]
11679 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11680 [(set_attr "type" "sselog")
11681 (set_attr "prefix" "vex")
11682 (set_attr "mode" "V8SF")])
11684 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
11685 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
11686 (vec_concat:AVXMODEF4P
11687 (vec_concat:<avxhalfvecmode>
11688 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
11690 (vec_concat:<avxhalfvecmode>
11694 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
11695 [(set_attr "type" "ssemov")
11696 (set_attr "prefix" "vex")
11697 (set_attr "mode" "<avxscalarmode>")])
11699 (define_insn "avx_vbroadcastss256"
11700 [(set (match_operand:V8SF 0 "register_operand" "=x")
11704 (match_operand:SF 1 "memory_operand" "m")
11717 "vbroadcastss\t{%1, %0|%0, %1}"
11718 [(set_attr "type" "ssemov")
11719 (set_attr "prefix" "vex")
11720 (set_attr "mode" "SF")])
11722 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11723 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11724 (vec_concat:AVX256MODEF2P
11725 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11728 "vbroadcastf128\t{%1, %0|%0, %1}"
11729 [(set_attr "type" "ssemov")
11730 (set_attr "prefix" "vex")
11731 (set_attr "mode" "V4SF")])
11733 (define_expand "avx_vinsertf128<mode>"
11734 [(match_operand:AVX256MODE 0 "register_operand" "")
11735 (match_operand:AVX256MODE 1 "register_operand" "")
11736 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11737 (match_operand:SI 3 "const_0_to_1_operand" "")]
11740 switch (INTVAL (operands[3]))
11743 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11747 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11751 gcc_unreachable ();
11756 (define_insn "vec_set_lo_<mode>"
11757 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11758 (vec_concat:AVX256MODE4P
11759 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11760 (vec_select:<avxhalfvecmode>
11761 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11762 (parallel [(const_int 2) (const_int 3)]))))]
11764 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11765 [(set_attr "type" "sselog")
11766 (set_attr "prefix" "vex")
11767 (set_attr "mode" "V8SF")])
11769 (define_insn "vec_set_hi_<mode>"
11770 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11771 (vec_concat:AVX256MODE4P
11772 (vec_select:<avxhalfvecmode>
11773 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11774 (parallel [(const_int 0) (const_int 1)]))
11775 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11777 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11778 [(set_attr "type" "sselog")
11779 (set_attr "prefix" "vex")
11780 (set_attr "mode" "V8SF")])
11782 (define_insn "vec_set_lo_<mode>"
11783 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11784 (vec_concat:AVX256MODE8P
11785 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11786 (vec_select:<avxhalfvecmode>
11787 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11788 (parallel [(const_int 4) (const_int 5)
11789 (const_int 6) (const_int 7)]))))]
11791 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11792 [(set_attr "type" "sselog")
11793 (set_attr "prefix" "vex")
11794 (set_attr "mode" "V8SF")])
11796 (define_insn "vec_set_hi_<mode>"
11797 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11798 (vec_concat:AVX256MODE8P
11799 (vec_select:<avxhalfvecmode>
11800 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11801 (parallel [(const_int 0) (const_int 1)
11802 (const_int 2) (const_int 3)]))
11803 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11805 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11806 [(set_attr "type" "sselog")
11807 (set_attr "prefix" "vex")
11808 (set_attr "mode" "V8SF")])
11810 (define_insn "vec_set_lo_v16hi"
11811 [(set (match_operand:V16HI 0 "register_operand" "=x")
11813 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11815 (match_operand:V16HI 1 "register_operand" "x")
11816 (parallel [(const_int 8) (const_int 9)
11817 (const_int 10) (const_int 11)
11818 (const_int 12) (const_int 13)
11819 (const_int 14) (const_int 15)]))))]
11821 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11822 [(set_attr "type" "sselog")
11823 (set_attr "prefix" "vex")
11824 (set_attr "mode" "V8SF")])
11826 (define_insn "vec_set_hi_v16hi"
11827 [(set (match_operand:V16HI 0 "register_operand" "=x")
11830 (match_operand:V16HI 1 "register_operand" "x")
11831 (parallel [(const_int 0) (const_int 1)
11832 (const_int 2) (const_int 3)
11833 (const_int 4) (const_int 5)
11834 (const_int 6) (const_int 7)]))
11835 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11837 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11838 [(set_attr "type" "sselog")
11839 (set_attr "prefix" "vex")
11840 (set_attr "mode" "V8SF")])
11842 (define_insn "vec_set_lo_v32qi"
11843 [(set (match_operand:V32QI 0 "register_operand" "=x")
11845 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11847 (match_operand:V32QI 1 "register_operand" "x")
11848 (parallel [(const_int 16) (const_int 17)
11849 (const_int 18) (const_int 19)
11850 (const_int 20) (const_int 21)
11851 (const_int 22) (const_int 23)
11852 (const_int 24) (const_int 25)
11853 (const_int 26) (const_int 27)
11854 (const_int 28) (const_int 29)
11855 (const_int 30) (const_int 31)]))))]
11857 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11858 [(set_attr "type" "sselog")
11859 (set_attr "prefix" "vex")
11860 (set_attr "mode" "V8SF")])
11862 (define_insn "vec_set_hi_v32qi"
11863 [(set (match_operand:V32QI 0 "register_operand" "=x")
11866 (match_operand:V32QI 1 "register_operand" "x")
11867 (parallel [(const_int 0) (const_int 1)
11868 (const_int 2) (const_int 3)
11869 (const_int 4) (const_int 5)
11870 (const_int 6) (const_int 7)
11871 (const_int 8) (const_int 9)
11872 (const_int 10) (const_int 11)
11873 (const_int 12) (const_int 13)
11874 (const_int 14) (const_int 15)]))
11875 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11877 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11878 [(set_attr "type" "sselog")
11879 (set_attr "prefix" "vex")
11880 (set_attr "mode" "V8SF")])
11882 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
11883 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11885 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11886 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11890 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
11891 [(set_attr "type" "sselog1")
11892 (set_attr "prefix" "vex")
11893 (set_attr "mode" "<MODE>")])
11895 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
11896 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11898 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11899 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11901 UNSPEC_MASKSTORE))]
11903 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11904 [(set_attr "type" "sselog1")
11905 (set_attr "prefix" "vex")
11906 (set_attr "mode" "<MODE>")])
11908 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11909 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
11910 (unspec:AVX256MODE2P
11911 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
11915 switch (which_alternative)
11920 switch (get_attr_mode (insn))
11923 return "vmovaps\t{%1, %x0|%x0, %1}";
11925 return "vmovapd\t{%1, %x0|%x0, %1}";
11927 return "vmovdqa\t{%1, %x0|%x0, %1}";
11934 gcc_unreachable ();
11936 [(set_attr "type" "ssemov")
11937 (set_attr "prefix" "vex")
11938 (set_attr "mode" "<avxvecmode>")
11939 (set (attr "length")
11940 (if_then_else (eq_attr "alternative" "0")
11942 (const_string "*")))])
11944 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
11945 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
11946 (unspec:<avxhalfvecmode>
11947 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
11951 switch (which_alternative)
11956 switch (get_attr_mode (insn))
11959 return "vmovaps\t{%x1, %0|%0, %x1}";
11961 return "vmovapd\t{%x1, %0|%0, %x1}";
11963 return "vmovdqa\t{%x1, %0|%0, %x1}";
11970 gcc_unreachable ();
11972 [(set_attr "type" "ssemov")
11973 (set_attr "prefix" "vex")
11974 (set_attr "mode" "<avxvecmode>")
11975 (set (attr "length")
11976 (if_then_else (eq_attr "alternative" "0")
11978 (const_string "*")))])
11980 (define_expand "vec_init<mode>"
11981 [(match_operand:AVX256MODE 0 "register_operand" "")
11982 (match_operand 1 "" "")]
11985 ix86_expand_vector_init (false, operands[0], operands[1]);
11989 (define_insn "*vec_concat<mode>_avx"
11990 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11991 (vec_concat:AVX256MODE
11992 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11993 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11996 switch (which_alternative)
11999 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12001 switch (get_attr_mode (insn))
12004 return "vmovaps\t{%1, %x0|%x0, %1}";
12006 return "vmovapd\t{%1, %x0|%x0, %1}";
12008 return "vmovdqa\t{%1, %x0|%x0, %1}";
12011 gcc_unreachable ();
12014 [(set_attr "type" "sselog,ssemov")
12015 (set_attr "prefix" "vex")
12016 (set_attr "mode" "<avxvecmode>")])