1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Mapping from float mode to required SSE level
68 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
70 ;; Mapping from integer vector mode to mnemonic suffix
71 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
73 ;; Mapping of the sse5 suffix
74 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
75 (V4SF "ps") (V2DF "pd")])
76 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
77 (V4SF "ss") (V2DF "sd")])
78 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
80 ;; Mapping of the max integer size for sse5 rotate immediate constraint
81 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
83 ;; Mapping of vector modes back to the scalar modes
84 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
85 (V16QI "QI") (V8HI "HI")
86 (V4SI "SI") (V2DI "DI")])
88 ;; Mapping of vector modes to a vector mode of double size
89 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
90 (V4SF "V8SF") (V4SI "V8SI")])
92 ;; Number of scalar elements in each vector type
93 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
94 (V16QI "16") (V8HI "8")
95 (V4SI "4") (V2DI "2")])
98 (define_mode_attr avxvecmode
99 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
100 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
101 (V8SF "V8SF") (V4DF "V4DF")])
102 (define_mode_attr avxvecpsmode
103 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
104 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
105 (define_mode_attr avxhalfvecmode
106 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
107 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
108 (define_mode_attr avxscalarmode
109 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
110 (V8SF "SF") (V4DF "DF")])
111 (define_mode_attr avxcvtvecmode
112 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
113 (define_mode_attr avxpermvecmode
114 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
115 (define_mode_attr avxmodesuffixf2c
116 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
117 (define_mode_attr avxmodesuffixp
118 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
120 (define_mode_attr avxmodesuffix
121 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
122 (V8SI "256") (V8SF "256") (V4DF "256")])
124 ;; Mapping of immediate bits for blend instructions
125 (define_mode_attr blendbits
126 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
128 ;; Mapping of immediate bits for vpermil instructions
129 (define_mode_attr vpermilbits
130 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
132 ;; Mapping of immediate bits for pinsr instructions
133 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
135 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
143 (define_expand "mov<mode>"
144 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
145 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
148 ix86_expand_vector_move (<MODE>mode, operands);
152 (define_insn "*avx_mov<mode>_internal"
153 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
154 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
156 && (register_operand (operands[0], <MODE>mode)
157 || register_operand (operands[1], <MODE>mode))"
159 switch (which_alternative)
162 return standard_sse_constant_opcode (insn, operands[1]);
165 switch (get_attr_mode (insn))
169 return "vmovaps\t{%1, %0|%0, %1}";
172 return "vmovapd\t{%1, %0|%0, %1}";
174 return "vmovdqa\t{%1, %0|%0, %1}";
180 [(set_attr "type" "sselog1,ssemov,ssemov")
181 (set_attr "prefix" "vex")
182 (set_attr "mode" "<avxvecmode>")])
184 ;; All of these patterns are enabled for SSE1 as well as SSE2.
185 ;; This is essential for maintaining stable calling conventions.
187 (define_expand "mov<mode>"
188 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 ix86_expand_vector_move (<MODE>mode, operands);
196 (define_insn "*mov<mode>_internal"
197 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
198 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
200 && (register_operand (operands[0], <MODE>mode)
201 || register_operand (operands[1], <MODE>mode))"
203 switch (which_alternative)
206 return standard_sse_constant_opcode (insn, operands[1]);
209 switch (get_attr_mode (insn))
212 return "movaps\t{%1, %0|%0, %1}";
214 return "movapd\t{%1, %0|%0, %1}";
216 return "movdqa\t{%1, %0|%0, %1}";
222 [(set_attr "type" "sselog1,ssemov,ssemov")
224 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
225 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
226 (and (eq_attr "alternative" "2")
227 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
229 (const_string "V4SF")
230 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
231 (const_string "V4SF")
232 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
233 (const_string "V2DF")
235 (const_string "TI")))])
237 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
238 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
239 ;; from memory, we'd prefer to load the memory directly into the %xmm
240 ;; register. To facilitate this happy circumstance, this pattern won't
241 ;; split until after register allocation. If the 64-bit value didn't
242 ;; come from memory, this is the best we can do. This is much better
243 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
246 (define_insn_and_split "movdi_to_sse"
248 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
249 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
250 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
251 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
253 "&& reload_completed"
256 if (register_operand (operands[1], DImode))
258 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
259 Assemble the 64-bit DImode value in an xmm register. */
260 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
261 gen_rtx_SUBREG (SImode, operands[1], 0)));
262 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
263 gen_rtx_SUBREG (SImode, operands[1], 4)));
264 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
266 else if (memory_operand (operands[1], DImode))
267 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
273 [(set (match_operand:V4SF 0 "register_operand" "")
274 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
275 "TARGET_SSE && reload_completed"
278 (vec_duplicate:V4SF (match_dup 1))
282 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
283 operands[2] = CONST0_RTX (V4SFmode);
287 [(set (match_operand:V2DF 0 "register_operand" "")
288 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
289 "TARGET_SSE2 && reload_completed"
290 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
292 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
293 operands[2] = CONST0_RTX (DFmode);
296 (define_expand "push<mode>1"
297 [(match_operand:AVX256MODE 0 "register_operand" "")]
300 ix86_expand_push (<MODE>mode, operands[0]);
304 (define_expand "push<mode>1"
305 [(match_operand:SSEMODE 0 "register_operand" "")]
308 ix86_expand_push (<MODE>mode, operands[0]);
312 (define_expand "movmisalign<mode>"
313 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
314 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
317 ix86_expand_vector_move_misalign (<MODE>mode, operands);
321 (define_expand "movmisalign<mode>"
322 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
323 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
326 ix86_expand_vector_move_misalign (<MODE>mode, operands);
330 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
331 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
333 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
335 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
336 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
337 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
338 [(set_attr "type" "ssemov")
339 (set_attr "movu" "1")
340 (set_attr "prefix" "vex")
341 (set_attr "mode" "<MODE>")])
343 (define_insn "sse2_movq128"
344 [(set (match_operand:V2DI 0 "register_operand" "=x")
347 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
348 (parallel [(const_int 0)]))
351 "%vmovq\t{%1, %0|%0, %1}"
352 [(set_attr "type" "ssemov")
353 (set_attr "prefix" "maybe_vex")
354 (set_attr "mode" "TI")])
356 (define_insn "<sse>_movup<ssemodesuffixf2c>"
357 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
359 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
361 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
362 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
363 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
364 [(set_attr "type" "ssemov")
365 (set_attr "movu" "1")
366 (set_attr "mode" "<MODE>")])
368 (define_insn "avx_movdqu<avxmodesuffix>"
369 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
371 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
373 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
374 "vmovdqu\t{%1, %0|%0, %1}"
375 [(set_attr "type" "ssemov")
376 (set_attr "movu" "1")
377 (set_attr "prefix" "vex")
378 (set_attr "mode" "<avxvecmode>")])
380 (define_insn "sse2_movdqu"
381 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
382 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
384 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
385 "movdqu\t{%1, %0|%0, %1}"
386 [(set_attr "type" "ssemov")
387 (set_attr "movu" "1")
388 (set_attr "prefix_data16" "1")
389 (set_attr "mode" "TI")])
391 (define_insn "avx_movnt<mode>"
392 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
394 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
396 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
397 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
398 [(set_attr "type" "ssemov")
399 (set_attr "prefix" "vex")
400 (set_attr "mode" "<MODE>")])
402 (define_insn "<sse>_movnt<mode>"
403 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
405 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
407 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
408 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
409 [(set_attr "type" "ssemov")
410 (set_attr "mode" "<MODE>")])
412 (define_insn "avx_movnt<mode>"
413 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
415 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
418 "vmovntdq\t{%1, %0|%0, %1}"
419 [(set_attr "type" "ssecvt")
420 (set_attr "prefix" "vex")
421 (set_attr "mode" "<avxvecmode>")])
423 (define_insn "sse2_movntv2di"
424 [(set (match_operand:V2DI 0 "memory_operand" "=m")
425 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
428 "movntdq\t{%1, %0|%0, %1}"
429 [(set_attr "type" "ssemov")
430 (set_attr "prefix_data16" "1")
431 (set_attr "mode" "TI")])
433 (define_insn "sse2_movntsi"
434 [(set (match_operand:SI 0 "memory_operand" "=m")
435 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
438 "movnti\t{%1, %0|%0, %1}"
439 [(set_attr "type" "ssemov")
440 (set_attr "prefix_data16" "0")
441 (set_attr "mode" "V2DF")])
443 (define_insn "avx_lddqu<avxmodesuffix>"
444 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
446 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
449 "vlddqu\t{%1, %0|%0, %1}"
450 [(set_attr "type" "ssecvt")
451 (set_attr "movu" "1")
452 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")])
455 (define_insn "sse3_lddqu"
456 [(set (match_operand:V16QI 0 "register_operand" "=x")
457 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
460 "lddqu\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssemov")
462 (set_attr "movu" "1")
463 (set_attr "prefix_data16" "0")
464 (set_attr "prefix_rep" "1")
465 (set_attr "mode" "TI")])
467 ; Expand patterns for non-temporal stores. At the moment, only those
468 ; that directly map to insns are defined; it would be possible to
469 ; define patterns for other modes that would expand to several insns.
471 (define_expand "storent<mode>"
472 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
474 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
476 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
479 (define_expand "storent<mode>"
480 [(set (match_operand:MODEF 0 "memory_operand" "")
482 [(match_operand:MODEF 1 "register_operand" "")]
487 (define_expand "storentv2di"
488 [(set (match_operand:V2DI 0 "memory_operand" "")
489 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
494 (define_expand "storentsi"
495 [(set (match_operand:SI 0 "memory_operand" "")
496 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
503 ;; Parallel floating point arithmetic
505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
507 (define_expand "<code><mode>2"
508 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
510 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
512 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
514 (define_expand "<plusminus_insn><mode>3"
515 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
516 (plusminus:AVX256MODEF2P
517 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
518 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
519 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
520 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
522 (define_insn "*avx_<plusminus_insn><mode>3"
523 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
524 (plusminus:AVXMODEF2P
525 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
526 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
527 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
528 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
529 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
530 [(set_attr "type" "sseadd")
531 (set_attr "prefix" "vex")
532 (set_attr "mode" "<avxvecmode>")])
534 (define_expand "<plusminus_insn><mode>3"
535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
536 (plusminus:SSEMODEF2P
537 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
538 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
539 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
542 (define_insn "*<plusminus_insn><mode>3"
543 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
544 (plusminus:SSEMODEF2P
545 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
546 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
547 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
549 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
550 [(set_attr "type" "sseadd")
551 (set_attr "mode" "<MODE>")])
553 (define_insn "*avx_vm<plusminus_insn><mode>3"
554 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
555 (vec_merge:SSEMODEF2P
556 (plusminus:SSEMODEF2P
557 (match_operand:SSEMODEF2P 1 "register_operand" "x")
558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
561 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
562 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
563 [(set_attr "type" "sseadd")
564 (set_attr "prefix" "vex")
565 (set_attr "mode" "<ssescalarmode>")])
567 (define_insn "<sse>_vm<plusminus_insn><mode>3"
568 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
569 (vec_merge:SSEMODEF2P
570 (plusminus:SSEMODEF2P
571 (match_operand:SSEMODEF2P 1 "register_operand" "0")
572 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
575 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
576 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<ssescalarmode>")])
580 (define_expand "mul<mode>3"
581 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
583 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
584 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
585 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
586 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
588 (define_insn "*avx_mul<mode>3"
589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
591 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
592 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
593 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
594 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
595 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
596 [(set_attr "type" "ssemul")
597 (set_attr "prefix" "vex")
598 (set_attr "mode" "<avxvecmode>")])
600 (define_expand "mul<mode>3"
601 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
603 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
604 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
605 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
606 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
608 (define_insn "*mul<mode>3"
609 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
611 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
612 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
613 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
614 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
615 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
616 [(set_attr "type" "ssemul")
617 (set_attr "mode" "<MODE>")])
619 (define_insn "*avx_vmmul<mode>3"
620 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
621 (vec_merge:SSEMODEF2P
623 (match_operand:SSEMODEF2P 1 "register_operand" "x")
624 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
627 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
628 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
629 [(set_attr "type" "ssemul")
630 (set_attr "prefix" "vex")
631 (set_attr "mode" "<ssescalarmode>")])
633 (define_insn "<sse>_vmmul<mode>3"
634 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
635 (vec_merge:SSEMODEF2P
637 (match_operand:SSEMODEF2P 1 "register_operand" "0")
638 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
641 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
642 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
643 [(set_attr "type" "ssemul")
644 (set_attr "mode" "<ssescalarmode>")])
646 (define_expand "divv8sf3"
647 [(set (match_operand:V8SF 0 "register_operand" "")
648 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
649 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
652 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
654 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
655 && flag_finite_math_only && !flag_trapping_math
656 && flag_unsafe_math_optimizations)
658 ix86_emit_swdivsf (operands[0], operands[1],
659 operands[2], V8SFmode);
664 (define_expand "divv4df3"
665 [(set (match_operand:V4DF 0 "register_operand" "")
666 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
667 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
669 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
671 (define_insn "avx_div<mode>3"
672 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
674 (match_operand:AVXMODEF2P 1 "register_operand" "x")
675 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
676 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
677 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
678 [(set_attr "type" "ssediv")
679 (set_attr "prefix" "vex")
680 (set_attr "mode" "<MODE>")])
682 (define_expand "divv4sf3"
683 [(set (match_operand:V4SF 0 "register_operand" "")
684 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
685 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
688 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
689 && flag_finite_math_only && !flag_trapping_math
690 && flag_unsafe_math_optimizations)
692 ix86_emit_swdivsf (operands[0], operands[1],
693 operands[2], V4SFmode);
698 (define_expand "divv2df3"
699 [(set (match_operand:V2DF 0 "register_operand" "")
700 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
701 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
705 (define_insn "*avx_div<mode>3"
706 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
708 (match_operand:SSEMODEF2P 1 "register_operand" "x")
709 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
710 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
711 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
712 [(set_attr "type" "ssediv")
713 (set_attr "prefix" "vex")
714 (set_attr "mode" "<MODE>")])
716 (define_insn "<sse>_div<mode>3"
717 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
719 (match_operand:SSEMODEF2P 1 "register_operand" "0")
720 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
721 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
722 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
723 [(set_attr "type" "ssediv")
724 (set_attr "mode" "<MODE>")])
726 (define_insn "*avx_vmdiv<mode>3"
727 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (vec_merge:SSEMODEF2P
730 (match_operand:SSEMODEF2P 1 "register_operand" "x")
731 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
734 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
735 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
736 [(set_attr "type" "ssediv")
737 (set_attr "prefix" "vex")
738 (set_attr "mode" "<ssescalarmode>")])
740 (define_insn "<sse>_vmdiv<mode>3"
741 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
742 (vec_merge:SSEMODEF2P
744 (match_operand:SSEMODEF2P 1 "register_operand" "0")
745 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
748 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
749 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssediv")
751 (set_attr "mode" "<ssescalarmode>")])
753 (define_insn "avx_rcpv8sf2"
754 [(set (match_operand:V8SF 0 "register_operand" "=x")
756 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
758 "vrcpps\t{%1, %0|%0, %1}"
759 [(set_attr "type" "sse")
760 (set_attr "prefix" "vex")
761 (set_attr "mode" "V8SF")])
763 (define_insn "sse_rcpv4sf2"
764 [(set (match_operand:V4SF 0 "register_operand" "=x")
766 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
768 "%vrcpps\t{%1, %0|%0, %1}"
769 [(set_attr "type" "sse")
770 (set_attr "atom_sse_attr" "rcp")
771 (set_attr "prefix" "maybe_vex")
772 (set_attr "mode" "V4SF")])
774 (define_insn "*avx_vmrcpv4sf2"
775 [(set (match_operand:V4SF 0 "register_operand" "=x")
777 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
779 (match_operand:V4SF 2 "register_operand" "x")
782 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
783 [(set_attr "type" "sse")
784 (set_attr "prefix" "vex")
785 (set_attr "mode" "SF")])
787 (define_insn "sse_vmrcpv4sf2"
788 [(set (match_operand:V4SF 0 "register_operand" "=x")
790 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
792 (match_operand:V4SF 2 "register_operand" "0")
795 "rcpss\t{%1, %0|%0, %1}"
796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
798 (set_attr "mode" "SF")])
800 (define_expand "sqrtv8sf2"
801 [(set (match_operand:V8SF 0 "register_operand" "")
802 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
805 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
806 && flag_finite_math_only && !flag_trapping_math
807 && flag_unsafe_math_optimizations)
809 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
814 (define_insn "avx_sqrtv8sf2"
815 [(set (match_operand:V8SF 0 "register_operand" "=x")
816 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
818 "vsqrtps\t{%1, %0|%0, %1}"
819 [(set_attr "type" "sse")
820 (set_attr "prefix" "vex")
821 (set_attr "mode" "V8SF")])
823 (define_expand "sqrtv4sf2"
824 [(set (match_operand:V4SF 0 "register_operand" "")
825 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
828 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
829 && flag_finite_math_only && !flag_trapping_math
830 && flag_unsafe_math_optimizations)
832 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
837 (define_insn "sse_sqrtv4sf2"
838 [(set (match_operand:V4SF 0 "register_operand" "=x")
839 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
841 "%vsqrtps\t{%1, %0|%0, %1}"
842 [(set_attr "type" "sse")
843 (set_attr "atom_sse_attr" "sqrt")
844 (set_attr "prefix" "maybe_vex")
845 (set_attr "mode" "V4SF")])
847 (define_insn "sqrtv4df2"
848 [(set (match_operand:V4DF 0 "register_operand" "=x")
849 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
851 "vsqrtpd\t{%1, %0|%0, %1}"
852 [(set_attr "type" "sse")
853 (set_attr "prefix" "vex")
854 (set_attr "mode" "V4DF")])
856 (define_insn "sqrtv2df2"
857 [(set (match_operand:V2DF 0 "register_operand" "=x")
858 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
860 "%vsqrtpd\t{%1, %0|%0, %1}"
861 [(set_attr "type" "sse")
862 (set_attr "prefix" "maybe_vex")
863 (set_attr "mode" "V2DF")])
865 (define_insn "*avx_vmsqrt<mode>2"
866 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
867 (vec_merge:SSEMODEF2P
869 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
870 (match_operand:SSEMODEF2P 2 "register_operand" "x")
872 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
873 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
874 [(set_attr "type" "sse")
875 (set_attr "prefix" "vex")
876 (set_attr "mode" "<ssescalarmode>")])
878 (define_insn "<sse>_vmsqrt<mode>2"
879 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
880 (vec_merge:SSEMODEF2P
882 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
883 (match_operand:SSEMODEF2P 2 "register_operand" "0")
885 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
886 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
887 [(set_attr "type" "sse")
888 (set_attr "atom_sse_attr" "sqrt")
889 (set_attr "mode" "<ssescalarmode>")])
891 (define_expand "rsqrtv8sf2"
892 [(set (match_operand:V8SF 0 "register_operand" "")
894 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
895 "TARGET_AVX && TARGET_SSE_MATH"
897 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
901 (define_insn "avx_rsqrtv8sf2"
902 [(set (match_operand:V8SF 0 "register_operand" "=x")
904 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
906 "vrsqrtps\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "prefix" "vex")
909 (set_attr "mode" "V8SF")])
911 (define_expand "rsqrtv4sf2"
912 [(set (match_operand:V4SF 0 "register_operand" "")
914 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
917 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
921 (define_insn "sse_rsqrtv4sf2"
922 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
926 "%vrsqrtps\t{%1, %0|%0, %1}"
927 [(set_attr "type" "sse")
928 (set_attr "prefix" "maybe_vex")
929 (set_attr "mode" "V4SF")])
931 (define_insn "*avx_vmrsqrtv4sf2"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
934 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
936 (match_operand:V4SF 2 "register_operand" "x")
939 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
940 [(set_attr "type" "sse")
941 (set_attr "prefix" "vex")
942 (set_attr "mode" "SF")])
944 (define_insn "sse_vmrsqrtv4sf2"
945 [(set (match_operand:V4SF 0 "register_operand" "=x")
947 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
949 (match_operand:V4SF 2 "register_operand" "0")
952 "rsqrtss\t{%1, %0|%0, %1}"
953 [(set_attr "type" "sse")
954 (set_attr "mode" "SF")])
956 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
957 ;; isn't really correct, as those rtl operators aren't defined when
958 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
960 (define_expand "<code><mode>3"
961 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
962 (smaxmin:AVX256MODEF2P
963 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
964 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
965 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
967 if (!flag_finite_math_only)
968 operands[1] = force_reg (<MODE>mode, operands[1]);
969 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
972 (define_expand "<code><mode>3"
973 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
975 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
976 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
977 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
979 if (!flag_finite_math_only)
980 operands[1] = force_reg (<MODE>mode, operands[1]);
981 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
984 (define_insn "*avx_<code><mode>3_finite"
985 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
987 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
988 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
989 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
990 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
991 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
992 [(set_attr "type" "sseadd")
993 (set_attr "prefix" "vex")
994 (set_attr "mode" "<MODE>")])
996 (define_insn "*<code><mode>3_finite"
997 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
999 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1000 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1001 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1002 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1003 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1004 [(set_attr "type" "sseadd")
1005 (set_attr "mode" "<MODE>")])
1007 (define_insn "*avx_<code><mode>3"
1008 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1010 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1011 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1012 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1013 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1014 [(set_attr "type" "sseadd")
1015 (set_attr "prefix" "vex")
1016 (set_attr "mode" "<avxvecmode>")])
1018 (define_insn "*<code><mode>3"
1019 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1021 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1023 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1024 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1025 [(set_attr "type" "sseadd")
1026 (set_attr "mode" "<MODE>")])
1028 (define_insn "*avx_vm<code><mode>3"
1029 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1030 (vec_merge:SSEMODEF2P
1032 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1033 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1036 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1037 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1038 [(set_attr "type" "sse")
1039 (set_attr "prefix" "vex")
1040 (set_attr "mode" "<ssescalarmode>")])
1042 (define_insn "<sse>_vm<code><mode>3"
1043 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1044 (vec_merge:SSEMODEF2P
1046 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1047 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1051 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "mode" "<ssescalarmode>")])
1055 ;; These versions of the min/max patterns implement exactly the operations
1056 ;; min = (op1 < op2 ? op1 : op2)
1057 ;; max = (!(op1 < op2) ? op1 : op2)
1058 ;; Their operands are not commutative, and thus they may be used in the
1059 ;; presence of -0.0 and NaN.
1061 (define_insn "*avx_ieee_smin<mode>3"
1062 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1064 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1065 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1067 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1068 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1069 [(set_attr "type" "sseadd")
1070 (set_attr "prefix" "vex")
1071 (set_attr "mode" "<avxvecmode>")])
1073 (define_insn "*avx_ieee_smax<mode>3"
1074 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1076 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1077 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1079 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1080 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1081 [(set_attr "type" "sseadd")
1082 (set_attr "prefix" "vex")
1083 (set_attr "mode" "<avxvecmode>")])
1085 (define_insn "*ieee_smin<mode>3"
1086 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1088 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1089 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1091 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1092 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1093 [(set_attr "type" "sseadd")
1094 (set_attr "mode" "<MODE>")])
1096 (define_insn "*ieee_smax<mode>3"
1097 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1099 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1100 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1102 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1103 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1104 [(set_attr "type" "sseadd")
1105 (set_attr "mode" "<MODE>")])
1107 (define_insn "avx_addsubv8sf3"
1108 [(set (match_operand:V8SF 0 "register_operand" "=x")
1111 (match_operand:V8SF 1 "register_operand" "x")
1112 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1113 (minus:V8SF (match_dup 1) (match_dup 2))
1116 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1117 [(set_attr "type" "sseadd")
1118 (set_attr "prefix" "vex")
1119 (set_attr "mode" "V8SF")])
1121 (define_insn "avx_addsubv4df3"
1122 [(set (match_operand:V4DF 0 "register_operand" "=x")
1125 (match_operand:V4DF 1 "register_operand" "x")
1126 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1127 (minus:V4DF (match_dup 1) (match_dup 2))
1130 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1131 [(set_attr "type" "sseadd")
1132 (set_attr "prefix" "vex")
1133 (set_attr "mode" "V4DF")])
1135 (define_insn "*avx_addsubv4sf3"
1136 [(set (match_operand:V4SF 0 "register_operand" "=x")
1139 (match_operand:V4SF 1 "register_operand" "x")
1140 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1141 (minus:V4SF (match_dup 1) (match_dup 2))
1144 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1145 [(set_attr "type" "sseadd")
1146 (set_attr "prefix" "vex")
1147 (set_attr "mode" "V4SF")])
1149 (define_insn "sse3_addsubv4sf3"
1150 [(set (match_operand:V4SF 0 "register_operand" "=x")
1153 (match_operand:V4SF 1 "register_operand" "0")
1154 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1155 (minus:V4SF (match_dup 1) (match_dup 2))
1158 "addsubps\t{%2, %0|%0, %2}"
1159 [(set_attr "type" "sseadd")
1160 (set_attr "prefix_rep" "1")
1161 (set_attr "mode" "V4SF")])
1163 (define_insn "*avx_addsubv2df3"
1164 [(set (match_operand:V2DF 0 "register_operand" "=x")
1167 (match_operand:V2DF 1 "register_operand" "x")
1168 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1169 (minus:V2DF (match_dup 1) (match_dup 2))
1172 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1173 [(set_attr "type" "sseadd")
1174 (set_attr "prefix" "vex")
1175 (set_attr "mode" "V2DF")])
1177 (define_insn "sse3_addsubv2df3"
1178 [(set (match_operand:V2DF 0 "register_operand" "=x")
1181 (match_operand:V2DF 1 "register_operand" "0")
1182 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1183 (minus:V2DF (match_dup 1) (match_dup 2))
1186 "addsubpd\t{%2, %0|%0, %2}"
1187 [(set_attr "type" "sseadd")
1188 (set_attr "atom_unit" "complex")
1189 (set_attr "mode" "V2DF")])
1191 (define_insn "avx_h<plusminus_insn>v4df3"
1192 [(set (match_operand:V4DF 0 "register_operand" "=x")
1197 (match_operand:V4DF 1 "register_operand" "x")
1198 (parallel [(const_int 0)]))
1199 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1201 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1202 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1206 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1207 (parallel [(const_int 0)]))
1208 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1210 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1211 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1213 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1214 [(set_attr "type" "sseadd")
1215 (set_attr "prefix" "vex")
1216 (set_attr "mode" "V4DF")])
1218 (define_insn "avx_h<plusminus_insn>v8sf3"
1219 [(set (match_operand:V8SF 0 "register_operand" "=x")
1225 (match_operand:V8SF 1 "register_operand" "x")
1226 (parallel [(const_int 0)]))
1227 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1229 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1230 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1234 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1235 (parallel [(const_int 0)]))
1236 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1238 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1239 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1243 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1246 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1250 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1251 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1253 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1254 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1256 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1257 [(set_attr "type" "sseadd")
1258 (set_attr "prefix" "vex")
1259 (set_attr "mode" "V8SF")])
1261 (define_insn "*avx_h<plusminus_insn>v4sf3"
1262 [(set (match_operand:V4SF 0 "register_operand" "=x")
1267 (match_operand:V4SF 1 "register_operand" "x")
1268 (parallel [(const_int 0)]))
1269 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1272 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1276 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1277 (parallel [(const_int 0)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1283 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "prefix" "vex")
1286 (set_attr "mode" "V4SF")])
1288 (define_insn "sse3_h<plusminus_insn>v4sf3"
1289 [(set (match_operand:V4SF 0 "register_operand" "=x")
1294 (match_operand:V4SF 1 "register_operand" "0")
1295 (parallel [(const_int 0)]))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1310 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1311 [(set_attr "type" "sseadd")
1312 (set_attr "atom_unit" "complex")
1313 (set_attr "prefix_rep" "1")
1314 (set_attr "mode" "V4SF")])
1316 (define_insn "*avx_h<plusminus_insn>v2df3"
1317 [(set (match_operand:V2DF 0 "register_operand" "=x")
1321 (match_operand:V2DF 1 "register_operand" "x")
1322 (parallel [(const_int 0)]))
1323 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1326 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1327 (parallel [(const_int 0)]))
1328 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1330 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1331 [(set_attr "type" "sseadd")
1332 (set_attr "prefix" "vex")
1333 (set_attr "mode" "V2DF")])
1335 (define_insn "sse3_h<plusminus_insn>v2df3"
1336 [(set (match_operand:V2DF 0 "register_operand" "=x")
1340 (match_operand:V2DF 1 "register_operand" "0")
1341 (parallel [(const_int 0)]))
1342 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1345 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1346 (parallel [(const_int 0)]))
1347 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1349 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1350 [(set_attr "type" "sseadd")
1351 (set_attr "mode" "V2DF")])
1353 (define_expand "reduc_splus_v4sf"
1354 [(match_operand:V4SF 0 "register_operand" "")
1355 (match_operand:V4SF 1 "register_operand" "")]
1360 rtx tmp = gen_reg_rtx (V4SFmode);
1361 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1362 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1365 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1369 (define_expand "reduc_splus_v2df"
1370 [(match_operand:V2DF 0 "register_operand" "")
1371 (match_operand:V2DF 1 "register_operand" "")]
1374 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1378 (define_expand "reduc_smax_v4sf"
1379 [(match_operand:V4SF 0 "register_operand" "")
1380 (match_operand:V4SF 1 "register_operand" "")]
1383 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1387 (define_expand "reduc_smin_v4sf"
1388 [(match_operand:V4SF 0 "register_operand" "")
1389 (match_operand:V4SF 1 "register_operand" "")]
1392 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1398 ;; Parallel floating point comparisons
1400 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1402 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1403 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1405 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1406 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1407 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1410 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1411 [(set_attr "type" "ssecmp")
1412 (set_attr "length_immediate" "1")
1413 (set_attr "prefix" "vex")
1414 (set_attr "mode" "<MODE>")])
1416 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1417 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1418 (vec_merge:SSEMODEF2P
1420 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1421 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1422 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1427 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1428 [(set_attr "type" "ssecmp")
1429 (set_attr "length_immediate" "1")
1430 (set_attr "prefix" "vex")
1431 (set_attr "mode" "<ssescalarmode>")])
1433 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1434 ;; may generate 256bit vector compare instructions.
1435 (define_insn "*avx_maskcmp<mode>3"
1436 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1437 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1438 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1439 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1440 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1441 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1442 [(set_attr "type" "ssecmp")
1443 (set_attr "prefix" "vex")
1444 (set_attr "length_immediate" "1")
1445 (set_attr "mode" "<avxvecmode>")])
1447 (define_insn "<sse>_maskcmp<mode>3"
1448 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1449 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1450 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1451 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1452 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1454 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "mode" "<MODE>")])
1459 (define_insn "<sse>_vmmaskcmp<mode>3"
1460 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1461 (vec_merge:SSEMODEF2P
1462 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1463 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1464 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1467 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1468 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1469 [(set_attr "type" "ssecmp")
1470 (set_attr "length_immediate" "1")
1471 (set_attr "mode" "<ssescalarmode>")])
1473 (define_insn "<sse>_comi"
1474 [(set (reg:CCFP FLAGS_REG)
1477 (match_operand:<ssevecmode> 0 "register_operand" "x")
1478 (parallel [(const_int 0)]))
1480 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1481 (parallel [(const_int 0)]))))]
1482 "SSE_FLOAT_MODE_P (<MODE>mode)"
1483 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "ssecomi")
1485 (set_attr "prefix" "maybe_vex")
1486 (set_attr "prefix_rep" "0")
1487 (set (attr "prefix_data16")
1488 (if_then_else (eq_attr "mode" "DF")
1490 (const_string "0")))
1491 (set_attr "mode" "<MODE>")])
1493 (define_insn "<sse>_ucomi"
1494 [(set (reg:CCFPU FLAGS_REG)
1497 (match_operand:<ssevecmode> 0 "register_operand" "x")
1498 (parallel [(const_int 0)]))
1500 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1501 (parallel [(const_int 0)]))))]
1502 "SSE_FLOAT_MODE_P (<MODE>mode)"
1503 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1504 [(set_attr "type" "ssecomi")
1505 (set_attr "prefix" "maybe_vex")
1506 (set_attr "prefix_rep" "0")
1507 (set (attr "prefix_data16")
1508 (if_then_else (eq_attr "mode" "DF")
1510 (const_string "0")))
1511 (set_attr "mode" "<MODE>")])
1513 (define_expand "vcond<mode>"
1514 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1515 (if_then_else:SSEMODEF2P
1516 (match_operator 3 ""
1517 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1518 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1519 (match_operand:SSEMODEF2P 1 "general_operand" "")
1520 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1521 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1523 if (ix86_expand_fp_vcond (operands))
1529 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1531 ;; Parallel floating point logical operations
1533 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1535 (define_insn "avx_andnot<mode>3"
1536 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1539 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1540 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1541 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1542 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1543 [(set_attr "type" "sselog")
1544 (set_attr "prefix" "vex")
1545 (set_attr "mode" "<avxvecmode>")])
1547 (define_insn "<sse>_andnot<mode>3"
1548 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1551 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1552 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1553 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1554 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "sselog")
1556 (set_attr "mode" "<MODE>")])
1558 (define_expand "<code><mode>3"
1559 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1560 (plogic:AVX256MODEF2P
1561 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1562 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1563 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1564 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1566 (define_insn "*avx_<code><mode>3"
1567 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1569 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1570 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1571 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1572 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1573 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1574 [(set_attr "type" "sselog")
1575 (set_attr "prefix" "vex")
1576 (set_attr "mode" "<avxvecmode>")])
1578 (define_expand "<code><mode>3"
1579 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1581 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1582 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1583 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1584 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1586 (define_insn "*<code><mode>3"
1587 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1589 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1590 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1591 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1592 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1593 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1594 [(set_attr "type" "sselog")
1595 (set_attr "mode" "<MODE>")])
1597 (define_expand "copysign<mode>3"
1600 (not:SSEMODEF2P (match_dup 3))
1601 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1603 (and:SSEMODEF2P (match_dup 3)
1604 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1605 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1606 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1607 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1609 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1611 operands[4] = gen_reg_rtx (<MODE>mode);
1612 operands[5] = gen_reg_rtx (<MODE>mode);
1615 ;; Also define scalar versions. These are used for abs, neg, and
1616 ;; conditional move. Using subregs into vector modes causes register
1617 ;; allocation lossage. These patterns do not allow memory operands
1618 ;; because the native instructions read the full 128-bits.
1620 (define_insn "*avx_andnot<mode>3"
1621 [(set (match_operand:MODEF 0 "register_operand" "=x")
1624 (match_operand:MODEF 1 "register_operand" "x"))
1625 (match_operand:MODEF 2 "register_operand" "x")))]
1626 "AVX_FLOAT_MODE_P (<MODE>mode)"
1627 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1628 [(set_attr "type" "sselog")
1629 (set_attr "prefix" "vex")
1630 (set_attr "mode" "<ssevecmode>")])
1632 (define_insn "*andnot<mode>3"
1633 [(set (match_operand:MODEF 0 "register_operand" "=x")
1636 (match_operand:MODEF 1 "register_operand" "0"))
1637 (match_operand:MODEF 2 "register_operand" "x")))]
1638 "SSE_FLOAT_MODE_P (<MODE>mode)"
1639 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1640 [(set_attr "type" "sselog")
1641 (set_attr "mode" "<ssevecmode>")])
1643 (define_insn "*avx_<code><mode>3"
1644 [(set (match_operand:MODEF 0 "register_operand" "=x")
1646 (match_operand:MODEF 1 "register_operand" "x")
1647 (match_operand:MODEF 2 "register_operand" "x")))]
1648 "AVX_FLOAT_MODE_P (<MODE>mode)"
1649 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1650 [(set_attr "type" "sselog")
1651 (set_attr "prefix" "vex")
1652 (set_attr "mode" "<ssevecmode>")])
1654 (define_insn "*<code><mode>3"
1655 [(set (match_operand:MODEF 0 "register_operand" "=x")
1657 (match_operand:MODEF 1 "register_operand" "0")
1658 (match_operand:MODEF 2 "register_operand" "x")))]
1659 "SSE_FLOAT_MODE_P (<MODE>mode)"
1660 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1661 [(set_attr "type" "sselog")
1662 (set_attr "mode" "<ssevecmode>")])
1664 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1666 ;; SSE5 floating point multiply/accumulate instructions This includes the
1667 ;; scalar version of the instructions as well as the vector
1669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1671 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1672 ;; combine to generate a multiply/add with two memory references. We then
1673 ;; split this insn, into loading up the destination register with one of the
1674 ;; memory operations. If we don't manage to split the insn, reload will
1675 ;; generate the appropriate moves. The reason this is needed, is that combine
1676 ;; has already folded one of the memory references into both the multiply and
1677 ;; add insns, and it can't generate a new pseudo. I.e.:
1678 ;; (set (reg1) (mem (addr1)))
1679 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1680 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1682 (define_insn "sse5_fmadd<mode>4"
1683 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1686 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1687 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1688 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1689 "TARGET_SSE5 && TARGET_FUSED_MADD
1690 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1691 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1692 [(set_attr "type" "ssemuladd")
1693 (set_attr "mode" "<MODE>")])
1695 ;; Split fmadd with two memory operands into a load and the fmadd.
1697 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1700 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1701 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1702 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1704 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1705 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1706 && !reg_mentioned_p (operands[0], operands[1])
1707 && !reg_mentioned_p (operands[0], operands[2])
1708 && !reg_mentioned_p (operands[0], operands[3])"
1711 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1712 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1713 operands[2], operands[3]));
1717 ;; For the scalar operations, use operand1 for the upper words that aren't
1718 ;; modified, so restrict the forms that are generated.
1719 ;; Scalar version of fmadd
1720 (define_insn "sse5_vmfmadd<mode>4"
1721 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1722 (vec_merge:SSEMODEF2P
1725 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1726 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1727 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1730 "TARGET_SSE5 && TARGET_FUSED_MADD
1731 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1732 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1733 [(set_attr "type" "ssemuladd")
1734 (set_attr "mode" "<MODE>")])
1736 ;; Floating multiply and subtract
1737 ;; Allow two memory operands the same as fmadd
1738 (define_insn "sse5_fmsub<mode>4"
1739 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1742 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1743 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1744 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1745 "TARGET_SSE5 && TARGET_FUSED_MADD
1746 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1747 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1748 [(set_attr "type" "ssemuladd")
1749 (set_attr "mode" "<MODE>")])
1751 ;; Split fmsub with two memory operands into a load and the fmsub.
1753 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1756 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1757 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1758 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1760 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1761 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1762 && !reg_mentioned_p (operands[0], operands[1])
1763 && !reg_mentioned_p (operands[0], operands[2])
1764 && !reg_mentioned_p (operands[0], operands[3])"
1767 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1768 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1769 operands[2], operands[3]));
1773 ;; For the scalar operations, use operand1 for the upper words that aren't
1774 ;; modified, so restrict the forms that are generated.
1775 ;; Scalar version of fmsub
1776 (define_insn "sse5_vmfmsub<mode>4"
1777 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1778 (vec_merge:SSEMODEF2P
1781 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1782 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1783 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1786 "TARGET_SSE5 && TARGET_FUSED_MADD
1787 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1788 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1789 [(set_attr "type" "ssemuladd")
1790 (set_attr "mode" "<MODE>")])
1792 ;; Floating point negative multiply and add
1793 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1794 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1795 ;; Allow two memory operands to help in optimizing.
1796 (define_insn "sse5_fnmadd<mode>4"
1797 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1799 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1801 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1802 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1803 "TARGET_SSE5 && TARGET_FUSED_MADD
1804 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1805 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1806 [(set_attr "type" "ssemuladd")
1807 (set_attr "mode" "<MODE>")])
1809 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1811 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1813 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1815 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1816 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1818 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1819 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1820 && !reg_mentioned_p (operands[0], operands[1])
1821 && !reg_mentioned_p (operands[0], operands[2])
1822 && !reg_mentioned_p (operands[0], operands[3])"
1825 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1826 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1827 operands[2], operands[3]));
1831 ;; For the scalar operations, use operand1 for the upper words that aren't
1832 ;; modified, so restrict the forms that are generated.
1833 ;; Scalar version of fnmadd
1834 (define_insn "sse5_vmfnmadd<mode>4"
1835 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1836 (vec_merge:SSEMODEF2P
1838 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1840 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1841 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1844 "TARGET_SSE5 && TARGET_FUSED_MADD
1845 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1846 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1847 [(set_attr "type" "ssemuladd")
1848 (set_attr "mode" "<MODE>")])
1850 ;; Floating point negative multiply and subtract
1851 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1852 ;; Allow 2 memory operands to help with optimization
1853 (define_insn "sse5_fnmsub<mode>4"
1854 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1858 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1859 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1860 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1861 "TARGET_SSE5 && TARGET_FUSED_MADD
1862 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1863 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1864 [(set_attr "type" "ssemuladd")
1865 (set_attr "mode" "<MODE>")])
1867 ;; Split fnmsub with two memory operands into a load and the fmsub.
1869 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1873 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1874 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1875 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1877 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1878 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1879 && !reg_mentioned_p (operands[0], operands[1])
1880 && !reg_mentioned_p (operands[0], operands[2])
1881 && !reg_mentioned_p (operands[0], operands[3])"
1884 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1885 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1886 operands[2], operands[3]));
1890 ;; For the scalar operations, use operand1 for the upper words that aren't
1891 ;; modified, so restrict the forms that are generated.
1892 ;; Scalar version of fnmsub
1893 (define_insn "sse5_vmfnmsub<mode>4"
1894 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1895 (vec_merge:SSEMODEF2P
1899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1900 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1901 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1904 "TARGET_SSE5 && TARGET_FUSED_MADD
1905 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1906 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1907 [(set_attr "type" "ssemuladd")
1908 (set_attr "mode" "<MODE>")])
1910 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1911 ;; even if the user used -mno-fused-madd
1912 ;; Parallel instructions. During instruction generation, just default
1913 ;; to registers, and let combine later build the appropriate instruction.
1914 (define_expand "sse5i_fmadd<mode>4"
1915 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1919 (match_operand:SSEMODEF2P 1 "register_operand" "")
1920 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1921 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1922 UNSPEC_SSE5_INTRINSIC))]
1925 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1926 if (TARGET_FUSED_MADD)
1928 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1929 operands[2], operands[3]));
1934 (define_insn "*sse5i_fmadd<mode>4"
1935 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1939 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1940 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1941 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1942 UNSPEC_SSE5_INTRINSIC))]
1943 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1944 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1945 [(set_attr "type" "ssemuladd")
1946 (set_attr "mode" "<MODE>")])
1948 (define_expand "sse5i_fmsub<mode>4"
1949 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1953 (match_operand:SSEMODEF2P 1 "register_operand" "")
1954 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1955 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1956 UNSPEC_SSE5_INTRINSIC))]
1959 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1960 if (TARGET_FUSED_MADD)
1962 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1963 operands[2], operands[3]));
1968 (define_insn "*sse5i_fmsub<mode>4"
1969 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1973 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1974 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1975 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1976 UNSPEC_SSE5_INTRINSIC))]
1977 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1978 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1979 [(set_attr "type" "ssemuladd")
1980 (set_attr "mode" "<MODE>")])
1982 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1983 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1984 (define_expand "sse5i_fnmadd<mode>4"
1985 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1988 (match_operand:SSEMODEF2P 3 "register_operand" "")
1990 (match_operand:SSEMODEF2P 1 "register_operand" "")
1991 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1992 UNSPEC_SSE5_INTRINSIC))]
1995 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1996 if (TARGET_FUSED_MADD)
1998 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1999 operands[2], operands[3]));
2004 (define_insn "*sse5i_fnmadd<mode>4"
2005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2008 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
2010 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
2011 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
2012 UNSPEC_SSE5_INTRINSIC))]
2013 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2014 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2015 [(set_attr "type" "ssemuladd")
2016 (set_attr "mode" "<MODE>")])
2018 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
2019 (define_expand "sse5i_fnmsub<mode>4"
2020 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2025 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2026 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2027 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
2028 UNSPEC_SSE5_INTRINSIC))]
2031 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2032 if (TARGET_FUSED_MADD)
2034 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
2035 operands[2], operands[3]));
2040 (define_insn "*sse5i_fnmsub<mode>4"
2041 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2046 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2047 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2048 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2049 UNSPEC_SSE5_INTRINSIC))]
2050 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2051 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2052 [(set_attr "type" "ssemuladd")
2053 (set_attr "mode" "<MODE>")])
2055 ;; Scalar instructions
2056 (define_expand "sse5i_vmfmadd<mode>4"
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2059 [(vec_merge:SSEMODEF2P
2062 (match_operand:SSEMODEF2P 1 "register_operand" "")
2063 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2064 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2067 UNSPEC_SSE5_INTRINSIC))]
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2071 if (TARGET_FUSED_MADD)
2073 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2074 operands[2], operands[3]));
2079 ;; For the scalar operations, use operand1 for the upper words that aren't
2080 ;; modified, so restrict the forms that are accepted.
2081 (define_insn "*sse5i_vmfmadd<mode>4"
2082 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2084 [(vec_merge:SSEMODEF2P
2087 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2088 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2089 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2092 UNSPEC_SSE5_INTRINSIC))]
2093 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2094 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2095 [(set_attr "type" "ssemuladd")
2096 (set_attr "mode" "<ssescalarmode>")])
2098 (define_expand "sse5i_vmfmsub<mode>4"
2099 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2101 [(vec_merge:SSEMODEF2P
2104 (match_operand:SSEMODEF2P 1 "register_operand" "")
2105 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2106 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2109 UNSPEC_SSE5_INTRINSIC))]
2112 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2113 if (TARGET_FUSED_MADD)
2115 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2116 operands[2], operands[3]));
2121 (define_insn "*sse5i_vmfmsub<mode>4"
2122 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2124 [(vec_merge:SSEMODEF2P
2127 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2129 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2132 UNSPEC_SSE5_INTRINSIC))]
2133 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2134 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "<ssescalarmode>")])
2138 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2139 (define_expand "sse5i_vmfnmadd<mode>4"
2140 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2142 [(vec_merge:SSEMODEF2P
2144 (match_operand:SSEMODEF2P 3 "register_operand" "")
2146 (match_operand:SSEMODEF2P 1 "register_operand" "")
2147 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2150 UNSPEC_SSE5_INTRINSIC))]
2153 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2154 if (TARGET_FUSED_MADD)
2156 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2157 operands[2], operands[3]));
2162 (define_insn "*sse5i_vmfnmadd<mode>4"
2163 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2165 [(vec_merge:SSEMODEF2P
2167 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2169 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2170 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2173 UNSPEC_SSE5_INTRINSIC))]
2174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2175 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2176 [(set_attr "type" "ssemuladd")
2177 (set_attr "mode" "<ssescalarmode>")])
2179 (define_expand "sse5i_vmfnmsub<mode>4"
2180 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2182 [(vec_merge:SSEMODEF2P
2186 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2187 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2188 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2191 UNSPEC_SSE5_INTRINSIC))]
2194 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2195 if (TARGET_FUSED_MADD)
2197 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2198 operands[2], operands[3]));
2203 (define_insn "*sse5i_vmfnmsub<mode>4"
2204 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2206 [(vec_merge:SSEMODEF2P
2210 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2211 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2212 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2215 UNSPEC_SSE5_INTRINSIC))]
2216 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2217 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2218 [(set_attr "type" "ssemuladd")
2219 (set_attr "mode" "<ssescalarmode>")])
2221 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2223 ;; Parallel single-precision floating point conversion operations
2225 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2227 (define_insn "sse_cvtpi2ps"
2228 [(set (match_operand:V4SF 0 "register_operand" "=x")
2231 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2232 (match_operand:V4SF 1 "register_operand" "0")
2235 "cvtpi2ps\t{%2, %0|%0, %2}"
2236 [(set_attr "type" "ssecvt")
2237 (set_attr "mode" "V4SF")])
2239 (define_insn "sse_cvtps2pi"
2240 [(set (match_operand:V2SI 0 "register_operand" "=y")
2242 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2244 (parallel [(const_int 0) (const_int 1)])))]
2246 "cvtps2pi\t{%1, %0|%0, %1}"
2247 [(set_attr "type" "ssecvt")
2248 (set_attr "unit" "mmx")
2249 (set_attr "mode" "DI")])
2251 (define_insn "sse_cvttps2pi"
2252 [(set (match_operand:V2SI 0 "register_operand" "=y")
2254 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2255 (parallel [(const_int 0) (const_int 1)])))]
2257 "cvttps2pi\t{%1, %0|%0, %1}"
2258 [(set_attr "type" "ssecvt")
2259 (set_attr "unit" "mmx")
2260 (set_attr "prefix_rep" "0")
2261 (set_attr "mode" "SF")])
2263 (define_insn "*avx_cvtsi2ss"
2264 [(set (match_operand:V4SF 0 "register_operand" "=x")
2267 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2268 (match_operand:V4SF 1 "register_operand" "x")
2271 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2272 [(set_attr "type" "sseicvt")
2273 (set_attr "prefix" "vex")
2274 (set_attr "mode" "SF")])
2276 (define_insn "sse_cvtsi2ss"
2277 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2280 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2281 (match_operand:V4SF 1 "register_operand" "0,0")
2284 "cvtsi2ss\t{%2, %0|%0, %2}"
2285 [(set_attr "type" "sseicvt")
2286 (set_attr "athlon_decode" "vector,double")
2287 (set_attr "amdfam10_decode" "vector,double")
2288 (set_attr "mode" "SF")])
2290 (define_insn "*avx_cvtsi2ssq"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x")
2294 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2295 (match_operand:V4SF 1 "register_operand" "x")
2297 "TARGET_AVX && TARGET_64BIT"
2298 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2299 [(set_attr "type" "sseicvt")
2300 (set_attr "length_vex" "4")
2301 (set_attr "prefix" "vex")
2302 (set_attr "mode" "SF")])
2304 (define_insn "sse_cvtsi2ssq"
2305 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2308 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2309 (match_operand:V4SF 1 "register_operand" "0,0")
2311 "TARGET_SSE && TARGET_64BIT"
2312 "cvtsi2ssq\t{%2, %0|%0, %2}"
2313 [(set_attr "type" "sseicvt")
2314 (set_attr "prefix_rex" "1")
2315 (set_attr "athlon_decode" "vector,double")
2316 (set_attr "amdfam10_decode" "vector,double")
2317 (set_attr "mode" "SF")])
2319 (define_insn "sse_cvtss2si"
2320 [(set (match_operand:SI 0 "register_operand" "=r,r")
2323 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2324 (parallel [(const_int 0)]))]
2325 UNSPEC_FIX_NOTRUNC))]
2327 "%vcvtss2si\t{%1, %0|%0, %1}"
2328 [(set_attr "type" "sseicvt")
2329 (set_attr "athlon_decode" "double,vector")
2330 (set_attr "prefix_rep" "1")
2331 (set_attr "prefix" "maybe_vex")
2332 (set_attr "mode" "SI")])
2334 (define_insn "sse_cvtss2si_2"
2335 [(set (match_operand:SI 0 "register_operand" "=r,r")
2336 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2337 UNSPEC_FIX_NOTRUNC))]
2339 "%vcvtss2si\t{%1, %0|%0, %1}"
2340 [(set_attr "type" "sseicvt")
2341 (set_attr "athlon_decode" "double,vector")
2342 (set_attr "amdfam10_decode" "double,double")
2343 (set_attr "prefix_rep" "1")
2344 (set_attr "prefix" "maybe_vex")
2345 (set_attr "mode" "SI")])
2347 (define_insn "sse_cvtss2siq"
2348 [(set (match_operand:DI 0 "register_operand" "=r,r")
2351 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2352 (parallel [(const_int 0)]))]
2353 UNSPEC_FIX_NOTRUNC))]
2354 "TARGET_SSE && TARGET_64BIT"
2355 "%vcvtss2siq\t{%1, %0|%0, %1}"
2356 [(set_attr "type" "sseicvt")
2357 (set_attr "athlon_decode" "double,vector")
2358 (set_attr "prefix_rep" "1")
2359 (set_attr "prefix" "maybe_vex")
2360 (set_attr "mode" "DI")])
2362 (define_insn "sse_cvtss2siq_2"
2363 [(set (match_operand:DI 0 "register_operand" "=r,r")
2364 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2365 UNSPEC_FIX_NOTRUNC))]
2366 "TARGET_SSE && TARGET_64BIT"
2367 "%vcvtss2siq\t{%1, %0|%0, %1}"
2368 [(set_attr "type" "sseicvt")
2369 (set_attr "athlon_decode" "double,vector")
2370 (set_attr "amdfam10_decode" "double,double")
2371 (set_attr "prefix_rep" "1")
2372 (set_attr "prefix" "maybe_vex")
2373 (set_attr "mode" "DI")])
2375 (define_insn "sse_cvttss2si"
2376 [(set (match_operand:SI 0 "register_operand" "=r,r")
2379 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2380 (parallel [(const_int 0)]))))]
2382 "%vcvttss2si\t{%1, %0|%0, %1}"
2383 [(set_attr "type" "sseicvt")
2384 (set_attr "athlon_decode" "double,vector")
2385 (set_attr "amdfam10_decode" "double,double")
2386 (set_attr "prefix_rep" "1")
2387 (set_attr "prefix" "maybe_vex")
2388 (set_attr "mode" "SI")])
2390 (define_insn "sse_cvttss2siq"
2391 [(set (match_operand:DI 0 "register_operand" "=r,r")
2394 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2395 (parallel [(const_int 0)]))))]
2396 "TARGET_SSE && TARGET_64BIT"
2397 "%vcvttss2siq\t{%1, %0|%0, %1}"
2398 [(set_attr "type" "sseicvt")
2399 (set_attr "athlon_decode" "double,vector")
2400 (set_attr "amdfam10_decode" "double,double")
2401 (set_attr "prefix_rep" "1")
2402 (set_attr "prefix" "maybe_vex")
2403 (set_attr "mode" "DI")])
2405 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2406 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2407 (float:AVXMODEDCVTDQ2PS
2408 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2410 "vcvtdq2ps\t{%1, %0|%0, %1}"
2411 [(set_attr "type" "ssecvt")
2412 (set_attr "prefix" "vex")
2413 (set_attr "mode" "<avxvecmode>")])
2415 (define_insn "sse2_cvtdq2ps"
2416 [(set (match_operand:V4SF 0 "register_operand" "=x")
2417 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2419 "cvtdq2ps\t{%1, %0|%0, %1}"
2420 [(set_attr "type" "ssecvt")
2421 (set_attr "mode" "V4SF")])
2423 (define_expand "sse2_cvtudq2ps"
2425 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2427 (lt:V4SF (match_dup 5) (match_dup 3)))
2429 (and:V4SF (match_dup 6) (match_dup 4)))
2430 (set (match_operand:V4SF 0 "register_operand" "")
2431 (plus:V4SF (match_dup 5) (match_dup 7)))]
2434 REAL_VALUE_TYPE TWO32r;
2438 real_ldexp (&TWO32r, &dconst1, 32);
2439 x = const_double_from_real_value (TWO32r, SFmode);
2441 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2442 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2444 for (i = 5; i < 8; i++)
2445 operands[i] = gen_reg_rtx (V4SFmode);
2448 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2449 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2450 (unspec:AVXMODEDCVTPS2DQ
2451 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2452 UNSPEC_FIX_NOTRUNC))]
2454 "vcvtps2dq\t{%1, %0|%0, %1}"
2455 [(set_attr "type" "ssecvt")
2456 (set_attr "prefix" "vex")
2457 (set_attr "mode" "<avxvecmode>")])
2459 (define_insn "sse2_cvtps2dq"
2460 [(set (match_operand:V4SI 0 "register_operand" "=x")
2461 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2462 UNSPEC_FIX_NOTRUNC))]
2464 "cvtps2dq\t{%1, %0|%0, %1}"
2465 [(set_attr "type" "ssecvt")
2466 (set_attr "prefix_data16" "1")
2467 (set_attr "mode" "TI")])
2469 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2470 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2471 (fix:AVXMODEDCVTPS2DQ
2472 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2474 "vcvttps2dq\t{%1, %0|%0, %1}"
2475 [(set_attr "type" "ssecvt")
2476 (set_attr "prefix" "vex")
2477 (set_attr "mode" "<avxvecmode>")])
2479 (define_insn "sse2_cvttps2dq"
2480 [(set (match_operand:V4SI 0 "register_operand" "=x")
2481 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2483 "cvttps2dq\t{%1, %0|%0, %1}"
2484 [(set_attr "type" "ssecvt")
2485 (set_attr "prefix_rep" "1")
2486 (set_attr "prefix_data16" "0")
2487 (set_attr "mode" "TI")])
2489 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2491 ;; Parallel double-precision floating point conversion operations
2493 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2495 (define_insn "sse2_cvtpi2pd"
2496 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2497 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2499 "cvtpi2pd\t{%1, %0|%0, %1}"
2500 [(set_attr "type" "ssecvt")
2501 (set_attr "unit" "mmx,*")
2502 (set_attr "prefix_data16" "1,*")
2503 (set_attr "mode" "V2DF")])
2505 (define_insn "sse2_cvtpd2pi"
2506 [(set (match_operand:V2SI 0 "register_operand" "=y")
2507 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2508 UNSPEC_FIX_NOTRUNC))]
2510 "cvtpd2pi\t{%1, %0|%0, %1}"
2511 [(set_attr "type" "ssecvt")
2512 (set_attr "unit" "mmx")
2513 (set_attr "prefix_data16" "1")
2514 (set_attr "mode" "DI")])
2516 (define_insn "sse2_cvttpd2pi"
2517 [(set (match_operand:V2SI 0 "register_operand" "=y")
2518 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2520 "cvttpd2pi\t{%1, %0|%0, %1}"
2521 [(set_attr "type" "ssecvt")
2522 (set_attr "unit" "mmx")
2523 (set_attr "prefix_data16" "1")
2524 (set_attr "mode" "TI")])
2526 (define_insn "*avx_cvtsi2sd"
2527 [(set (match_operand:V2DF 0 "register_operand" "=x")
2530 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2531 (match_operand:V2DF 1 "register_operand" "x")
2534 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2535 [(set_attr "type" "sseicvt")
2536 (set_attr "prefix" "vex")
2537 (set_attr "mode" "DF")])
2539 (define_insn "sse2_cvtsi2sd"
2540 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2543 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2544 (match_operand:V2DF 1 "register_operand" "0,0")
2547 "cvtsi2sd\t{%2, %0|%0, %2}"
2548 [(set_attr "type" "sseicvt")
2549 (set_attr "mode" "DF")
2550 (set_attr "athlon_decode" "double,direct")
2551 (set_attr "amdfam10_decode" "vector,double")])
2553 (define_insn "*avx_cvtsi2sdq"
2554 [(set (match_operand:V2DF 0 "register_operand" "=x")
2557 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2558 (match_operand:V2DF 1 "register_operand" "x")
2560 "TARGET_AVX && TARGET_64BIT"
2561 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2562 [(set_attr "type" "sseicvt")
2563 (set_attr "length_vex" "4")
2564 (set_attr "prefix" "vex")
2565 (set_attr "mode" "DF")])
2567 (define_insn "sse2_cvtsi2sdq"
2568 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2571 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2572 (match_operand:V2DF 1 "register_operand" "0,0")
2574 "TARGET_SSE2 && TARGET_64BIT"
2575 "cvtsi2sdq\t{%2, %0|%0, %2}"
2576 [(set_attr "type" "sseicvt")
2577 (set_attr "prefix_rex" "1")
2578 (set_attr "mode" "DF")
2579 (set_attr "athlon_decode" "double,direct")
2580 (set_attr "amdfam10_decode" "vector,double")])
2582 (define_insn "sse2_cvtsd2si"
2583 [(set (match_operand:SI 0 "register_operand" "=r,r")
2586 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2587 (parallel [(const_int 0)]))]
2588 UNSPEC_FIX_NOTRUNC))]
2590 "%vcvtsd2si\t{%1, %0|%0, %1}"
2591 [(set_attr "type" "sseicvt")
2592 (set_attr "athlon_decode" "double,vector")
2593 (set_attr "prefix_rep" "1")
2594 (set_attr "prefix" "maybe_vex")
2595 (set_attr "mode" "SI")])
2597 (define_insn "sse2_cvtsd2si_2"
2598 [(set (match_operand:SI 0 "register_operand" "=r,r")
2599 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2600 UNSPEC_FIX_NOTRUNC))]
2602 "%vcvtsd2si\t{%1, %0|%0, %1}"
2603 [(set_attr "type" "sseicvt")
2604 (set_attr "athlon_decode" "double,vector")
2605 (set_attr "amdfam10_decode" "double,double")
2606 (set_attr "prefix_rep" "1")
2607 (set_attr "prefix" "maybe_vex")
2608 (set_attr "mode" "SI")])
2610 (define_insn "sse2_cvtsd2siq"
2611 [(set (match_operand:DI 0 "register_operand" "=r,r")
2614 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2615 (parallel [(const_int 0)]))]
2616 UNSPEC_FIX_NOTRUNC))]
2617 "TARGET_SSE2 && TARGET_64BIT"
2618 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2619 [(set_attr "type" "sseicvt")
2620 (set_attr "athlon_decode" "double,vector")
2621 (set_attr "prefix_rep" "1")
2622 (set_attr "prefix" "maybe_vex")
2623 (set_attr "mode" "DI")])
2625 (define_insn "sse2_cvtsd2siq_2"
2626 [(set (match_operand:DI 0 "register_operand" "=r,r")
2627 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2628 UNSPEC_FIX_NOTRUNC))]
2629 "TARGET_SSE2 && TARGET_64BIT"
2630 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "sseicvt")
2632 (set_attr "athlon_decode" "double,vector")
2633 (set_attr "amdfam10_decode" "double,double")
2634 (set_attr "prefix_rep" "1")
2635 (set_attr "prefix" "maybe_vex")
2636 (set_attr "mode" "DI")])
2638 (define_insn "sse2_cvttsd2si"
2639 [(set (match_operand:SI 0 "register_operand" "=r,r")
2642 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2643 (parallel [(const_int 0)]))))]
2645 "%vcvttsd2si\t{%1, %0|%0, %1}"
2646 [(set_attr "type" "sseicvt")
2647 (set_attr "prefix_rep" "1")
2648 (set_attr "prefix" "maybe_vex")
2649 (set_attr "mode" "SI")
2650 (set_attr "athlon_decode" "double,vector")
2651 (set_attr "amdfam10_decode" "double,double")])
2653 (define_insn "sse2_cvttsd2siq"
2654 [(set (match_operand:DI 0 "register_operand" "=r,r")
2657 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2658 (parallel [(const_int 0)]))))]
2659 "TARGET_SSE2 && TARGET_64BIT"
2660 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2661 [(set_attr "type" "sseicvt")
2662 (set_attr "prefix_rep" "1")
2663 (set_attr "prefix" "maybe_vex")
2664 (set_attr "mode" "DI")
2665 (set_attr "athlon_decode" "double,vector")
2666 (set_attr "amdfam10_decode" "double,double")])
2668 (define_insn "avx_cvtdq2pd256"
2669 [(set (match_operand:V4DF 0 "register_operand" "=x")
2670 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2672 "vcvtdq2pd\t{%1, %0|%0, %1}"
2673 [(set_attr "type" "ssecvt")
2674 (set_attr "prefix" "vex")
2675 (set_attr "mode" "V4DF")])
2677 (define_insn "sse2_cvtdq2pd"
2678 [(set (match_operand:V2DF 0 "register_operand" "=x")
2681 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2682 (parallel [(const_int 0) (const_int 1)]))))]
2684 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix" "maybe_vex")
2687 (set_attr "mode" "V2DF")])
2689 (define_insn "avx_cvtpd2dq256"
2690 [(set (match_operand:V4SI 0 "register_operand" "=x")
2691 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2692 UNSPEC_FIX_NOTRUNC))]
2694 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2695 [(set_attr "type" "ssecvt")
2696 (set_attr "prefix" "vex")
2697 (set_attr "mode" "OI")])
2699 (define_expand "sse2_cvtpd2dq"
2700 [(set (match_operand:V4SI 0 "register_operand" "")
2702 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2706 "operands[2] = CONST0_RTX (V2SImode);")
2708 (define_insn "*sse2_cvtpd2dq"
2709 [(set (match_operand:V4SI 0 "register_operand" "=x")
2711 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2713 (match_operand:V2SI 2 "const0_operand" "")))]
2715 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2716 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2717 [(set_attr "type" "ssecvt")
2718 (set_attr "prefix_rep" "1")
2719 (set_attr "prefix_data16" "0")
2720 (set_attr "prefix" "maybe_vex")
2721 (set_attr "mode" "TI")
2722 (set_attr "amdfam10_decode" "double")])
2724 (define_insn "avx_cvttpd2dq256"
2725 [(set (match_operand:V4SI 0 "register_operand" "=x")
2726 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2728 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "prefix" "vex")
2731 (set_attr "mode" "OI")])
2733 (define_expand "sse2_cvttpd2dq"
2734 [(set (match_operand:V4SI 0 "register_operand" "")
2736 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2739 "operands[2] = CONST0_RTX (V2SImode);")
2741 (define_insn "*sse2_cvttpd2dq"
2742 [(set (match_operand:V4SI 0 "register_operand" "=x")
2744 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2745 (match_operand:V2SI 2 "const0_operand" "")))]
2747 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2748 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2749 [(set_attr "type" "ssecvt")
2750 (set_attr "prefix" "maybe_vex")
2751 (set_attr "mode" "TI")
2752 (set_attr "amdfam10_decode" "double")])
2754 (define_insn "*avx_cvtsd2ss"
2755 [(set (match_operand:V4SF 0 "register_operand" "=x")
2758 (float_truncate:V2SF
2759 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2760 (match_operand:V4SF 1 "register_operand" "x")
2763 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2764 [(set_attr "type" "ssecvt")
2765 (set_attr "prefix" "vex")
2766 (set_attr "mode" "SF")])
2768 (define_insn "sse2_cvtsd2ss"
2769 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2772 (float_truncate:V2SF
2773 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2774 (match_operand:V4SF 1 "register_operand" "0,0")
2777 "cvtsd2ss\t{%2, %0|%0, %2}"
2778 [(set_attr "type" "ssecvt")
2779 (set_attr "athlon_decode" "vector,double")
2780 (set_attr "amdfam10_decode" "vector,double")
2781 (set_attr "mode" "SF")])
2783 (define_insn "*avx_cvtss2sd"
2784 [(set (match_operand:V2DF 0 "register_operand" "=x")
2788 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2789 (parallel [(const_int 0) (const_int 1)])))
2790 (match_operand:V2DF 1 "register_operand" "x")
2793 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2794 [(set_attr "type" "ssecvt")
2795 (set_attr "prefix" "vex")
2796 (set_attr "mode" "DF")])
2798 (define_insn "sse2_cvtss2sd"
2799 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2803 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2804 (parallel [(const_int 0) (const_int 1)])))
2805 (match_operand:V2DF 1 "register_operand" "0,0")
2808 "cvtss2sd\t{%2, %0|%0, %2}"
2809 [(set_attr "type" "ssecvt")
2810 (set_attr "amdfam10_decode" "vector,double")
2811 (set_attr "mode" "DF")])
2813 (define_insn "avx_cvtpd2ps256"
2814 [(set (match_operand:V4SF 0 "register_operand" "=x")
2815 (float_truncate:V4SF
2816 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2818 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2819 [(set_attr "type" "ssecvt")
2820 (set_attr "prefix" "vex")
2821 (set_attr "mode" "V4SF")])
2823 (define_expand "sse2_cvtpd2ps"
2824 [(set (match_operand:V4SF 0 "register_operand" "")
2826 (float_truncate:V2SF
2827 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2830 "operands[2] = CONST0_RTX (V2SFmode);")
2832 (define_insn "*sse2_cvtpd2ps"
2833 [(set (match_operand:V4SF 0 "register_operand" "=x")
2835 (float_truncate:V2SF
2836 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2837 (match_operand:V2SF 2 "const0_operand" "")))]
2839 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2840 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2841 [(set_attr "type" "ssecvt")
2842 (set_attr "prefix_data16" "1")
2843 (set_attr "prefix" "maybe_vex")
2844 (set_attr "mode" "V4SF")
2845 (set_attr "amdfam10_decode" "double")])
2847 (define_insn "avx_cvtps2pd256"
2848 [(set (match_operand:V4DF 0 "register_operand" "=x")
2850 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2852 "vcvtps2pd\t{%1, %0|%0, %1}"
2853 [(set_attr "type" "ssecvt")
2854 (set_attr "prefix" "vex")
2855 (set_attr "mode" "V4DF")])
2857 (define_insn "sse2_cvtps2pd"
2858 [(set (match_operand:V2DF 0 "register_operand" "=x")
2861 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2862 (parallel [(const_int 0) (const_int 1)]))))]
2864 "%vcvtps2pd\t{%1, %0|%0, %1}"
2865 [(set_attr "type" "ssecvt")
2866 (set_attr "prefix" "maybe_vex")
2867 (set_attr "mode" "V2DF")
2868 (set_attr "prefix_data16" "0")
2869 (set_attr "amdfam10_decode" "direct")])
2871 (define_expand "vec_unpacks_hi_v4sf"
2876 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2877 (parallel [(const_int 6)
2881 (set (match_operand:V2DF 0 "register_operand" "")
2885 (parallel [(const_int 0) (const_int 1)]))))]
2888 operands[2] = gen_reg_rtx (V4SFmode);
2891 (define_expand "vec_unpacks_lo_v4sf"
2892 [(set (match_operand:V2DF 0 "register_operand" "")
2895 (match_operand:V4SF 1 "nonimmediate_operand" "")
2896 (parallel [(const_int 0) (const_int 1)]))))]
2899 (define_expand "vec_unpacks_float_hi_v8hi"
2900 [(match_operand:V4SF 0 "register_operand" "")
2901 (match_operand:V8HI 1 "register_operand" "")]
2904 rtx tmp = gen_reg_rtx (V4SImode);
2906 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2907 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2911 (define_expand "vec_unpacks_float_lo_v8hi"
2912 [(match_operand:V4SF 0 "register_operand" "")
2913 (match_operand:V8HI 1 "register_operand" "")]
2916 rtx tmp = gen_reg_rtx (V4SImode);
2918 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2919 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2923 (define_expand "vec_unpacku_float_hi_v8hi"
2924 [(match_operand:V4SF 0 "register_operand" "")
2925 (match_operand:V8HI 1 "register_operand" "")]
2928 rtx tmp = gen_reg_rtx (V4SImode);
2930 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2931 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2935 (define_expand "vec_unpacku_float_lo_v8hi"
2936 [(match_operand:V4SF 0 "register_operand" "")
2937 (match_operand:V8HI 1 "register_operand" "")]
2940 rtx tmp = gen_reg_rtx (V4SImode);
2942 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2943 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2947 (define_expand "vec_unpacks_float_hi_v4si"
2950 (match_operand:V4SI 1 "nonimmediate_operand" "")
2951 (parallel [(const_int 2)
2955 (set (match_operand:V2DF 0 "register_operand" "")
2959 (parallel [(const_int 0) (const_int 1)]))))]
2962 operands[2] = gen_reg_rtx (V4SImode);
2965 (define_expand "vec_unpacks_float_lo_v4si"
2966 [(set (match_operand:V2DF 0 "register_operand" "")
2969 (match_operand:V4SI 1 "nonimmediate_operand" "")
2970 (parallel [(const_int 0) (const_int 1)]))))]
2973 (define_expand "vec_pack_trunc_v2df"
2974 [(match_operand:V4SF 0 "register_operand" "")
2975 (match_operand:V2DF 1 "nonimmediate_operand" "")
2976 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2981 r1 = gen_reg_rtx (V4SFmode);
2982 r2 = gen_reg_rtx (V4SFmode);
2984 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2985 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2986 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2990 (define_expand "vec_pack_sfix_trunc_v2df"
2991 [(match_operand:V4SI 0 "register_operand" "")
2992 (match_operand:V2DF 1 "nonimmediate_operand" "")
2993 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2998 r1 = gen_reg_rtx (V4SImode);
2999 r2 = gen_reg_rtx (V4SImode);
3001 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3002 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3003 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3004 gen_lowpart (V2DImode, r1),
3005 gen_lowpart (V2DImode, r2)));
3009 (define_expand "vec_pack_sfix_v2df"
3010 [(match_operand:V4SI 0 "register_operand" "")
3011 (match_operand:V2DF 1 "nonimmediate_operand" "")
3012 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3017 r1 = gen_reg_rtx (V4SImode);
3018 r2 = gen_reg_rtx (V4SImode);
3020 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3021 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3022 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3023 gen_lowpart (V2DImode, r1),
3024 gen_lowpart (V2DImode, r2)));
3028 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3030 ;; Parallel single-precision floating point element swizzling
3032 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3034 (define_expand "sse_movhlps_exp"
3035 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3038 (match_operand:V4SF 1 "nonimmediate_operand" "")
3039 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3040 (parallel [(const_int 6)
3045 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3047 (define_insn "*avx_movhlps"
3048 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3051 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3052 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3053 (parallel [(const_int 6)
3057 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3059 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3060 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3061 vmovhps\t{%2, %0|%0, %2}"
3062 [(set_attr "type" "ssemov")
3063 (set_attr "prefix" "vex")
3064 (set_attr "mode" "V4SF,V2SF,V2SF")])
3066 (define_insn "sse_movhlps"
3067 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3070 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3071 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3072 (parallel [(const_int 6)
3076 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3078 movhlps\t{%2, %0|%0, %2}
3079 movlps\t{%H2, %0|%0, %H2}
3080 movhps\t{%2, %0|%0, %2}"
3081 [(set_attr "type" "ssemov")
3082 (set_attr "mode" "V4SF,V2SF,V2SF")])
3084 (define_expand "sse_movlhps_exp"
3085 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3088 (match_operand:V4SF 1 "nonimmediate_operand" "")
3089 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3090 (parallel [(const_int 0)
3095 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3097 (define_insn "*avx_movlhps"
3098 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3101 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3102 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3103 (parallel [(const_int 0)
3107 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3109 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3110 vmovhps\t{%2, %1, %0|%0, %1, %2}
3111 vmovlps\t{%2, %H0|%H0, %2}"
3112 [(set_attr "type" "ssemov")
3113 (set_attr "prefix" "vex")
3114 (set_attr "mode" "V4SF,V2SF,V2SF")])
3116 (define_insn "sse_movlhps"
3117 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3120 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3121 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3122 (parallel [(const_int 0)
3126 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3128 movlhps\t{%2, %0|%0, %2}
3129 movhps\t{%2, %0|%0, %2}
3130 movlps\t{%2, %H0|%H0, %2}"
3131 [(set_attr "type" "ssemov")
3132 (set_attr "mode" "V4SF,V2SF,V2SF")])
3134 (define_insn "avx_unpckhps256"
3135 [(set (match_operand:V8SF 0 "register_operand" "=x")
3138 (match_operand:V8SF 1 "register_operand" "x")
3139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3140 (parallel [(const_int 2) (const_int 10)
3141 (const_int 3) (const_int 11)
3142 (const_int 6) (const_int 14)
3143 (const_int 7) (const_int 15)])))]
3145 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3146 [(set_attr "type" "sselog")
3147 (set_attr "prefix" "vex")
3148 (set_attr "mode" "V8SF")])
3150 (define_insn "*avx_unpckhps"
3151 [(set (match_operand:V4SF 0 "register_operand" "=x")
3154 (match_operand:V4SF 1 "register_operand" "x")
3155 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3156 (parallel [(const_int 2) (const_int 6)
3157 (const_int 3) (const_int 7)])))]
3159 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3160 [(set_attr "type" "sselog")
3161 (set_attr "prefix" "vex")
3162 (set_attr "mode" "V4SF")])
3164 (define_insn "sse_unpckhps"
3165 [(set (match_operand:V4SF 0 "register_operand" "=x")
3168 (match_operand:V4SF 1 "register_operand" "0")
3169 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3170 (parallel [(const_int 2) (const_int 6)
3171 (const_int 3) (const_int 7)])))]
3173 "unpckhps\t{%2, %0|%0, %2}"
3174 [(set_attr "type" "sselog")
3175 (set_attr "mode" "V4SF")])
3177 (define_insn "avx_unpcklps256"
3178 [(set (match_operand:V8SF 0 "register_operand" "=x")
3181 (match_operand:V8SF 1 "register_operand" "x")
3182 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3183 (parallel [(const_int 0) (const_int 8)
3184 (const_int 1) (const_int 9)
3185 (const_int 4) (const_int 12)
3186 (const_int 5) (const_int 13)])))]
3188 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3189 [(set_attr "type" "sselog")
3190 (set_attr "prefix" "vex")
3191 (set_attr "mode" "V8SF")])
3193 (define_insn "*avx_unpcklps"
3194 [(set (match_operand:V4SF 0 "register_operand" "=x")
3197 (match_operand:V4SF 1 "register_operand" "x")
3198 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3199 (parallel [(const_int 0) (const_int 4)
3200 (const_int 1) (const_int 5)])))]
3202 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3203 [(set_attr "type" "sselog")
3204 (set_attr "prefix" "vex")
3205 (set_attr "mode" "V4SF")])
3207 (define_insn "sse_unpcklps"
3208 [(set (match_operand:V4SF 0 "register_operand" "=x")
3211 (match_operand:V4SF 1 "register_operand" "0")
3212 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3213 (parallel [(const_int 0) (const_int 4)
3214 (const_int 1) (const_int 5)])))]
3216 "unpcklps\t{%2, %0|%0, %2}"
3217 [(set_attr "type" "sselog")
3218 (set_attr "mode" "V4SF")])
3220 ;; These are modeled with the same vec_concat as the others so that we
3221 ;; capture users of shufps that can use the new instructions
3222 (define_insn "avx_movshdup256"
3223 [(set (match_operand:V8SF 0 "register_operand" "=x")
3226 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3228 (parallel [(const_int 1) (const_int 1)
3229 (const_int 3) (const_int 3)
3230 (const_int 5) (const_int 5)
3231 (const_int 7) (const_int 7)])))]
3233 "vmovshdup\t{%1, %0|%0, %1}"
3234 [(set_attr "type" "sse")
3235 (set_attr "prefix" "vex")
3236 (set_attr "mode" "V8SF")])
3238 (define_insn "sse3_movshdup"
3239 [(set (match_operand:V4SF 0 "register_operand" "=x")
3242 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3244 (parallel [(const_int 1)
3249 "%vmovshdup\t{%1, %0|%0, %1}"
3250 [(set_attr "type" "sse")
3251 (set_attr "prefix_rep" "1")
3252 (set_attr "prefix" "maybe_vex")
3253 (set_attr "mode" "V4SF")])
3255 (define_insn "avx_movsldup256"
3256 [(set (match_operand:V8SF 0 "register_operand" "=x")
3259 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3261 (parallel [(const_int 0) (const_int 0)
3262 (const_int 2) (const_int 2)
3263 (const_int 4) (const_int 4)
3264 (const_int 6) (const_int 6)])))]
3266 "vmovsldup\t{%1, %0|%0, %1}"
3267 [(set_attr "type" "sse")
3268 (set_attr "prefix" "vex")
3269 (set_attr "mode" "V8SF")])
3271 (define_insn "sse3_movsldup"
3272 [(set (match_operand:V4SF 0 "register_operand" "=x")
3275 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3277 (parallel [(const_int 0)
3282 "%vmovsldup\t{%1, %0|%0, %1}"
3283 [(set_attr "type" "sse")
3284 (set_attr "prefix_rep" "1")
3285 (set_attr "prefix" "maybe_vex")
3286 (set_attr "mode" "V4SF")])
3288 (define_expand "avx_shufps256"
3289 [(match_operand:V8SF 0 "register_operand" "")
3290 (match_operand:V8SF 1 "register_operand" "")
3291 (match_operand:V8SF 2 "nonimmediate_operand" "")
3292 (match_operand:SI 3 "const_int_operand" "")]
3295 int mask = INTVAL (operands[3]);
3296 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3297 GEN_INT ((mask >> 0) & 3),
3298 GEN_INT ((mask >> 2) & 3),
3299 GEN_INT (((mask >> 4) & 3) + 8),
3300 GEN_INT (((mask >> 6) & 3) + 8),
3301 GEN_INT (((mask >> 0) & 3) + 4),
3302 GEN_INT (((mask >> 2) & 3) + 4),
3303 GEN_INT (((mask >> 4) & 3) + 12),
3304 GEN_INT (((mask >> 6) & 3) + 12)));
3308 ;; One bit in mask selects 2 elements.
3309 (define_insn "avx_shufps256_1"
3310 [(set (match_operand:V8SF 0 "register_operand" "=x")
3313 (match_operand:V8SF 1 "register_operand" "x")
3314 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3315 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3316 (match_operand 4 "const_0_to_3_operand" "")
3317 (match_operand 5 "const_8_to_11_operand" "")
3318 (match_operand 6 "const_8_to_11_operand" "")
3319 (match_operand 7 "const_4_to_7_operand" "")
3320 (match_operand 8 "const_4_to_7_operand" "")
3321 (match_operand 9 "const_12_to_15_operand" "")
3322 (match_operand 10 "const_12_to_15_operand" "")])))]
3324 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3325 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3326 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3327 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3330 mask = INTVAL (operands[3]);
3331 mask |= INTVAL (operands[4]) << 2;
3332 mask |= (INTVAL (operands[5]) - 8) << 4;
3333 mask |= (INTVAL (operands[6]) - 8) << 6;
3334 operands[3] = GEN_INT (mask);
3336 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3338 [(set_attr "type" "sselog")
3339 (set_attr "length_immediate" "1")
3340 (set_attr "prefix" "vex")
3341 (set_attr "mode" "V8SF")])
3343 (define_expand "sse_shufps"
3344 [(match_operand:V4SF 0 "register_operand" "")
3345 (match_operand:V4SF 1 "register_operand" "")
3346 (match_operand:V4SF 2 "nonimmediate_operand" "")
3347 (match_operand:SI 3 "const_int_operand" "")]
3350 int mask = INTVAL (operands[3]);
3351 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3352 GEN_INT ((mask >> 0) & 3),
3353 GEN_INT ((mask >> 2) & 3),
3354 GEN_INT (((mask >> 4) & 3) + 4),
3355 GEN_INT (((mask >> 6) & 3) + 4)));
3359 (define_insn "*avx_shufps_<mode>"
3360 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3361 (vec_select:SSEMODE4S
3362 (vec_concat:<ssedoublesizemode>
3363 (match_operand:SSEMODE4S 1 "register_operand" "x")
3364 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3365 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3366 (match_operand 4 "const_0_to_3_operand" "")
3367 (match_operand 5 "const_4_to_7_operand" "")
3368 (match_operand 6 "const_4_to_7_operand" "")])))]
3372 mask |= INTVAL (operands[3]) << 0;
3373 mask |= INTVAL (operands[4]) << 2;
3374 mask |= (INTVAL (operands[5]) - 4) << 4;
3375 mask |= (INTVAL (operands[6]) - 4) << 6;
3376 operands[3] = GEN_INT (mask);
3378 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3380 [(set_attr "type" "sselog")
3381 (set_attr "length_immediate" "1")
3382 (set_attr "prefix" "vex")
3383 (set_attr "mode" "V4SF")])
3385 (define_insn "sse_shufps_<mode>"
3386 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3387 (vec_select:SSEMODE4S
3388 (vec_concat:<ssedoublesizemode>
3389 (match_operand:SSEMODE4S 1 "register_operand" "0")
3390 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3391 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3392 (match_operand 4 "const_0_to_3_operand" "")
3393 (match_operand 5 "const_4_to_7_operand" "")
3394 (match_operand 6 "const_4_to_7_operand" "")])))]
3398 mask |= INTVAL (operands[3]) << 0;
3399 mask |= INTVAL (operands[4]) << 2;
3400 mask |= (INTVAL (operands[5]) - 4) << 4;
3401 mask |= (INTVAL (operands[6]) - 4) << 6;
3402 operands[3] = GEN_INT (mask);
3404 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3406 [(set_attr "type" "sselog")
3407 (set_attr "length_immediate" "1")
3408 (set_attr "mode" "V4SF")])
3410 (define_insn "sse_storehps"
3411 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3413 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3414 (parallel [(const_int 2) (const_int 3)])))]
3417 %vmovhps\t{%1, %0|%0, %1}
3418 %vmovhlps\t{%1, %d0|%d0, %1}
3419 %vmovlps\t{%H1, %d0|%d0, %H1}"
3420 [(set_attr "type" "ssemov")
3421 (set_attr "prefix" "maybe_vex")
3422 (set_attr "mode" "V2SF,V4SF,V2SF")])
3424 (define_expand "sse_loadhps_exp"
3425 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3428 (match_operand:V4SF 1 "nonimmediate_operand" "")
3429 (parallel [(const_int 0) (const_int 1)]))
3430 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3432 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3434 (define_insn "*avx_loadhps"
3435 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3438 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3439 (parallel [(const_int 0) (const_int 1)]))
3440 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3443 vmovhps\t{%2, %1, %0|%0, %1, %2}
3444 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3445 vmovlps\t{%2, %H0|%H0, %2}"
3446 [(set_attr "type" "ssemov")
3447 (set_attr "prefix" "vex")
3448 (set_attr "mode" "V2SF,V4SF,V2SF")])
3450 (define_insn "sse_loadhps"
3451 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3454 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3455 (parallel [(const_int 0) (const_int 1)]))
3456 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3459 movhps\t{%2, %0|%0, %2}
3460 movlhps\t{%2, %0|%0, %2}
3461 movlps\t{%2, %H0|%H0, %2}"
3462 [(set_attr "type" "ssemov")
3463 (set_attr "mode" "V2SF,V4SF,V2SF")])
3465 (define_insn "*avx_storelps"
3466 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3468 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3469 (parallel [(const_int 0) (const_int 1)])))]
3472 vmovlps\t{%1, %0|%0, %1}
3473 vmovaps\t{%1, %0|%0, %1}
3474 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3475 [(set_attr "type" "ssemov")
3476 (set_attr "prefix" "vex")
3477 (set_attr "mode" "V2SF,V2DF,V2SF")])
3479 (define_insn "sse_storelps"
3480 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3482 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3483 (parallel [(const_int 0) (const_int 1)])))]
3486 movlps\t{%1, %0|%0, %1}
3487 movaps\t{%1, %0|%0, %1}
3488 movlps\t{%1, %0|%0, %1}"
3489 [(set_attr "type" "ssemov")
3490 (set_attr "mode" "V2SF,V4SF,V2SF")])
3492 (define_expand "sse_loadlps_exp"
3493 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3495 (match_operand:V2SF 2 "nonimmediate_operand" "")
3497 (match_operand:V4SF 1 "nonimmediate_operand" "")
3498 (parallel [(const_int 2) (const_int 3)]))))]
3500 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3502 (define_insn "*avx_loadlps"
3503 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3505 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3507 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3508 (parallel [(const_int 2) (const_int 3)]))))]
3511 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3512 vmovlps\t{%2, %1, %0|%0, %1, %2}
3513 vmovlps\t{%2, %0|%0, %2}"
3514 [(set_attr "type" "sselog,ssemov,ssemov")
3515 (set_attr "length_immediate" "1,*,*")
3516 (set_attr "prefix" "vex")
3517 (set_attr "mode" "V4SF,V2SF,V2SF")])
3519 (define_insn "sse_loadlps"
3520 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3522 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3524 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3525 (parallel [(const_int 2) (const_int 3)]))))]
3528 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3529 movlps\t{%2, %0|%0, %2}
3530 movlps\t{%2, %0|%0, %2}"
3531 [(set_attr "type" "sselog,ssemov,ssemov")
3532 (set_attr "length_immediate" "1,*,*")
3533 (set_attr "mode" "V4SF,V2SF,V2SF")])
3535 (define_insn "*avx_movss"
3536 [(set (match_operand:V4SF 0 "register_operand" "=x")
3538 (match_operand:V4SF 2 "register_operand" "x")
3539 (match_operand:V4SF 1 "register_operand" "x")
3542 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3543 [(set_attr "type" "ssemov")
3544 (set_attr "prefix" "vex")
3545 (set_attr "mode" "SF")])
3547 (define_insn "sse_movss"
3548 [(set (match_operand:V4SF 0 "register_operand" "=x")
3550 (match_operand:V4SF 2 "register_operand" "x")
3551 (match_operand:V4SF 1 "register_operand" "0")
3554 "movss\t{%2, %0|%0, %2}"
3555 [(set_attr "type" "ssemov")
3556 (set_attr "mode" "SF")])
3558 (define_insn "*vec_dupv4sf_avx"
3559 [(set (match_operand:V4SF 0 "register_operand" "=x")
3561 (match_operand:SF 1 "register_operand" "x")))]
3563 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3564 [(set_attr "type" "sselog1")
3565 (set_attr "length_immediate" "1")
3566 (set_attr "prefix" "vex")
3567 (set_attr "mode" "V4SF")])
3569 (define_insn "*vec_dupv4sf"
3570 [(set (match_operand:V4SF 0 "register_operand" "=x")
3572 (match_operand:SF 1 "register_operand" "0")))]
3574 "shufps\t{$0, %0, %0|%0, %0, 0}"
3575 [(set_attr "type" "sselog1")
3576 (set_attr "length_immediate" "1")
3577 (set_attr "mode" "V4SF")])
3579 (define_insn "*vec_concatv2sf_avx"
3580 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3582 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3583 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3586 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3587 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3588 vmovss\t{%1, %0|%0, %1}
3589 punpckldq\t{%2, %0|%0, %2}
3590 movd\t{%1, %0|%0, %1}"
3591 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3592 (set_attr "length_immediate" "*,1,*,*,*")
3593 (set_attr "prefix_extra" "*,1,*,*,*")
3594 (set (attr "prefix")
3595 (if_then_else (eq_attr "alternative" "3,4")
3596 (const_string "orig")
3597 (const_string "vex")))
3598 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3600 ;; Although insertps takes register source, we prefer
3601 ;; unpcklps with register source since it is shorter.
3602 (define_insn "*vec_concatv2sf_sse4_1"
3603 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3605 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3606 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3609 unpcklps\t{%2, %0|%0, %2}
3610 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3611 movss\t{%1, %0|%0, %1}
3612 punpckldq\t{%2, %0|%0, %2}
3613 movd\t{%1, %0|%0, %1}"
3614 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3615 (set_attr "prefix_data16" "*,1,*,*,*")
3616 (set_attr "prefix_extra" "*,1,*,*,*")
3617 (set_attr "length_immediate" "*,1,*,*,*")
3618 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3620 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3621 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3622 ;; alternatives pretty much forces the MMX alternative to be chosen.
3623 (define_insn "*vec_concatv2sf_sse"
3624 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3626 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3627 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3630 unpcklps\t{%2, %0|%0, %2}
3631 movss\t{%1, %0|%0, %1}
3632 punpckldq\t{%2, %0|%0, %2}
3633 movd\t{%1, %0|%0, %1}"
3634 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3635 (set_attr "mode" "V4SF,SF,DI,DI")])
3637 (define_insn "*vec_concatv4sf_avx"
3638 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3640 (match_operand:V2SF 1 "register_operand" " x,x")
3641 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3644 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3645 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3646 [(set_attr "type" "ssemov")
3647 (set_attr "prefix" "vex")
3648 (set_attr "mode" "V4SF,V2SF")])
3650 (define_insn "*vec_concatv4sf_sse"
3651 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3653 (match_operand:V2SF 1 "register_operand" " 0,0")
3654 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3657 movlhps\t{%2, %0|%0, %2}
3658 movhps\t{%2, %0|%0, %2}"
3659 [(set_attr "type" "ssemov")
3660 (set_attr "mode" "V4SF,V2SF")])
3662 (define_expand "vec_init<mode>"
3663 [(match_operand:SSEMODE 0 "register_operand" "")
3664 (match_operand 1 "" "")]
3667 ix86_expand_vector_init (false, operands[0], operands[1]);
3671 (define_insn "*vec_setv4sf_0_avx"
3672 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3675 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3676 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3680 vmovss\t{%2, %1, %0|%0, %1, %2}
3681 vmovss\t{%2, %0|%0, %2}
3682 vmovd\t{%2, %0|%0, %2}
3684 [(set_attr "type" "ssemov")
3685 (set_attr "prefix" "vex")
3686 (set_attr "mode" "SF")])
3688 (define_insn "vec_setv4sf_0"
3689 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3692 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3693 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3697 movss\t{%2, %0|%0, %2}
3698 movss\t{%2, %0|%0, %2}
3699 movd\t{%2, %0|%0, %2}
3701 [(set_attr "type" "ssemov")
3702 (set_attr "mode" "SF")])
3704 ;; A subset is vec_setv4sf.
3705 (define_insn "*vec_setv4sf_avx"
3706 [(set (match_operand:V4SF 0 "register_operand" "=x")
3709 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3710 (match_operand:V4SF 1 "register_operand" "x")
3711 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3714 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3715 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3717 [(set_attr "type" "sselog")
3718 (set_attr "prefix_extra" "1")
3719 (set_attr "length_immediate" "1")
3720 (set_attr "prefix" "vex")
3721 (set_attr "mode" "V4SF")])
3723 (define_insn "*vec_setv4sf_sse4_1"
3724 [(set (match_operand:V4SF 0 "register_operand" "=x")
3727 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3728 (match_operand:V4SF 1 "register_operand" "0")
3729 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3732 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3733 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3735 [(set_attr "type" "sselog")
3736 (set_attr "prefix_data16" "1")
3737 (set_attr "prefix_extra" "1")
3738 (set_attr "length_immediate" "1")
3739 (set_attr "mode" "V4SF")])
3741 (define_insn "*avx_insertps"
3742 [(set (match_operand:V4SF 0 "register_operand" "=x")
3743 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3744 (match_operand:V4SF 1 "register_operand" "x")
3745 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3748 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3749 [(set_attr "type" "sselog")
3750 (set_attr "prefix" "vex")
3751 (set_attr "prefix_extra" "1")
3752 (set_attr "length_immediate" "1")
3753 (set_attr "mode" "V4SF")])
3755 (define_insn "sse4_1_insertps"
3756 [(set (match_operand:V4SF 0 "register_operand" "=x")
3757 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3758 (match_operand:V4SF 1 "register_operand" "0")
3759 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3762 "insertps\t{%3, %2, %0|%0, %2, %3}";
3763 [(set_attr "type" "sselog")
3764 (set_attr "prefix_data16" "1")
3765 (set_attr "prefix_extra" "1")
3766 (set_attr "length_immediate" "1")
3767 (set_attr "mode" "V4SF")])
3770 [(set (match_operand:V4SF 0 "memory_operand" "")
3773 (match_operand:SF 1 "nonmemory_operand" ""))
3776 "TARGET_SSE && reload_completed"
3779 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3783 (define_expand "vec_set<mode>"
3784 [(match_operand:SSEMODE 0 "register_operand" "")
3785 (match_operand:<ssescalarmode> 1 "register_operand" "")
3786 (match_operand 2 "const_int_operand" "")]
3789 ix86_expand_vector_set (false, operands[0], operands[1],
3790 INTVAL (operands[2]));
3794 (define_insn_and_split "*vec_extractv4sf_0"
3795 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3797 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3798 (parallel [(const_int 0)])))]
3799 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3801 "&& reload_completed"
3804 rtx op1 = operands[1];
3806 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3808 op1 = gen_lowpart (SFmode, op1);
3809 emit_move_insn (operands[0], op1);
3813 (define_expand "avx_vextractf128<mode>"
3814 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3815 (match_operand:AVX256MODE 1 "register_operand" "")
3816 (match_operand:SI 2 "const_0_to_1_operand" "")]
3819 switch (INTVAL (operands[2]))
3822 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3825 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3833 (define_insn "vec_extract_lo_<mode>"
3834 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3835 (vec_select:<avxhalfvecmode>
3836 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3837 (parallel [(const_int 0) (const_int 1)])))]
3839 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3840 [(set_attr "type" "sselog")
3841 (set_attr "prefix_extra" "1")
3842 (set_attr "length_immediate" "1")
3843 (set_attr "memory" "none,store")
3844 (set_attr "prefix" "vex")
3845 (set_attr "mode" "V8SF")])
3847 (define_insn "vec_extract_hi_<mode>"
3848 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3849 (vec_select:<avxhalfvecmode>
3850 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3851 (parallel [(const_int 2) (const_int 3)])))]
3853 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3854 [(set_attr "type" "sselog")
3855 (set_attr "prefix_extra" "1")
3856 (set_attr "length_immediate" "1")
3857 (set_attr "memory" "none,store")
3858 (set_attr "prefix" "vex")
3859 (set_attr "mode" "V8SF")])
3861 (define_insn "vec_extract_lo_<mode>"
3862 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3863 (vec_select:<avxhalfvecmode>
3864 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3865 (parallel [(const_int 0) (const_int 1)
3866 (const_int 2) (const_int 3)])))]
3868 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3869 [(set_attr "type" "sselog")
3870 (set_attr "prefix_extra" "1")
3871 (set_attr "length_immediate" "1")
3872 (set_attr "memory" "none,store")
3873 (set_attr "prefix" "vex")
3874 (set_attr "mode" "V8SF")])
3876 (define_insn "vec_extract_hi_<mode>"
3877 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3878 (vec_select:<avxhalfvecmode>
3879 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3880 (parallel [(const_int 4) (const_int 5)
3881 (const_int 6) (const_int 7)])))]
3883 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3884 [(set_attr "type" "sselog")
3885 (set_attr "prefix_extra" "1")
3886 (set_attr "length_immediate" "1")
3887 (set_attr "memory" "none,store")
3888 (set_attr "prefix" "vex")
3889 (set_attr "mode" "V8SF")])
3891 (define_insn "vec_extract_lo_v16hi"
3892 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3894 (match_operand:V16HI 1 "register_operand" "x,x")
3895 (parallel [(const_int 0) (const_int 1)
3896 (const_int 2) (const_int 3)
3897 (const_int 4) (const_int 5)
3898 (const_int 6) (const_int 7)])))]
3900 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3901 [(set_attr "type" "sselog")
3902 (set_attr "prefix_extra" "1")
3903 (set_attr "length_immediate" "1")
3904 (set_attr "memory" "none,store")
3905 (set_attr "prefix" "vex")
3906 (set_attr "mode" "V8SF")])
3908 (define_insn "vec_extract_hi_v16hi"
3909 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3911 (match_operand:V16HI 1 "register_operand" "x,x")
3912 (parallel [(const_int 8) (const_int 9)
3913 (const_int 10) (const_int 11)
3914 (const_int 12) (const_int 13)
3915 (const_int 14) (const_int 15)])))]
3917 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3918 [(set_attr "type" "sselog")
3919 (set_attr "prefix_extra" "1")
3920 (set_attr "length_immediate" "1")
3921 (set_attr "memory" "none,store")
3922 (set_attr "prefix" "vex")
3923 (set_attr "mode" "V8SF")])
3925 (define_insn "vec_extract_lo_v32qi"
3926 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3928 (match_operand:V32QI 1 "register_operand" "x,x")
3929 (parallel [(const_int 0) (const_int 1)
3930 (const_int 2) (const_int 3)
3931 (const_int 4) (const_int 5)
3932 (const_int 6) (const_int 7)
3933 (const_int 8) (const_int 9)
3934 (const_int 10) (const_int 11)
3935 (const_int 12) (const_int 13)
3936 (const_int 14) (const_int 15)])))]
3938 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3939 [(set_attr "type" "sselog")
3940 (set_attr "prefix_extra" "1")
3941 (set_attr "length_immediate" "1")
3942 (set_attr "memory" "none,store")
3943 (set_attr "prefix" "vex")
3944 (set_attr "mode" "V8SF")])
3946 (define_insn "vec_extract_hi_v32qi"
3947 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3949 (match_operand:V32QI 1 "register_operand" "x,x")
3950 (parallel [(const_int 16) (const_int 17)
3951 (const_int 18) (const_int 19)
3952 (const_int 20) (const_int 21)
3953 (const_int 22) (const_int 23)
3954 (const_int 24) (const_int 25)
3955 (const_int 26) (const_int 27)
3956 (const_int 28) (const_int 29)
3957 (const_int 30) (const_int 31)])))]
3959 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3960 [(set_attr "type" "sselog")
3961 (set_attr "prefix_extra" "1")
3962 (set_attr "length_immediate" "1")
3963 (set_attr "memory" "none,store")
3964 (set_attr "prefix" "vex")
3965 (set_attr "mode" "V8SF")])
3967 (define_insn "*sse4_1_extractps"
3968 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3970 (match_operand:V4SF 1 "register_operand" "x")
3971 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3973 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3974 [(set_attr "type" "sselog")
3975 (set_attr "prefix_data16" "1")
3976 (set_attr "prefix_extra" "1")
3977 (set_attr "length_immediate" "1")
3978 (set_attr "prefix" "maybe_vex")
3979 (set_attr "mode" "V4SF")])
3981 (define_insn_and_split "*vec_extract_v4sf_mem"
3982 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3984 (match_operand:V4SF 1 "memory_operand" "o")
3985 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3991 int i = INTVAL (operands[2]);
3993 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3997 (define_expand "vec_extract<mode>"
3998 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3999 (match_operand:SSEMODE 1 "register_operand" "")
4000 (match_operand 2 "const_int_operand" "")]
4003 ix86_expand_vector_extract (false, operands[0], operands[1],
4004 INTVAL (operands[2]));
4008 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4010 ;; Parallel double-precision floating point element swizzling
4012 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4014 (define_insn "avx_unpckhpd256"
4015 [(set (match_operand:V4DF 0 "register_operand" "=x")
4018 (match_operand:V4DF 1 "register_operand" "x")
4019 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4020 (parallel [(const_int 1) (const_int 5)
4021 (const_int 3) (const_int 7)])))]
4023 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4024 [(set_attr "type" "sselog")
4025 (set_attr "prefix" "vex")
4026 (set_attr "mode" "V4DF")])
4028 (define_expand "sse2_unpckhpd_exp"
4029 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4032 (match_operand:V2DF 1 "nonimmediate_operand" "")
4033 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4034 (parallel [(const_int 1)
4037 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4039 (define_insn "*avx_unpckhpd"
4040 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4043 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
4044 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
4045 (parallel [(const_int 1)
4047 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4049 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4050 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4051 vmovhpd\t{%1, %0|%0, %1}"
4052 [(set_attr "type" "sselog,ssemov,ssemov")
4053 (set_attr "prefix" "vex")
4054 (set_attr "mode" "V2DF,V1DF,V1DF")])
4056 (define_insn "sse2_unpckhpd"
4057 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4060 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4061 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4062 (parallel [(const_int 1)
4064 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4066 unpckhpd\t{%2, %0|%0, %2}
4067 movlpd\t{%H1, %0|%0, %H1}
4068 movhpd\t{%1, %0|%0, %1}"
4069 [(set_attr "type" "sselog,ssemov,ssemov")
4070 (set_attr "prefix_data16" "*,1,1")
4071 (set_attr "mode" "V2DF,V1DF,V1DF")])
4073 (define_insn "avx_movddup256"
4074 [(set (match_operand:V4DF 0 "register_operand" "=x")
4077 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
4079 (parallel [(const_int 0) (const_int 2)
4080 (const_int 4) (const_int 6)])))]
4082 "vmovddup\t{%1, %0|%0, %1}"
4083 [(set_attr "type" "sselog1")
4084 (set_attr "prefix" "vex")
4085 (set_attr "mode" "V4DF")])
4087 (define_insn "*avx_movddup"
4088 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4091 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4093 (parallel [(const_int 0)
4095 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4097 vmovddup\t{%1, %0|%0, %1}
4099 [(set_attr "type" "sselog1,ssemov")
4100 (set_attr "prefix" "vex")
4101 (set_attr "mode" "V2DF")])
4103 (define_insn "*sse3_movddup"
4104 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4107 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4109 (parallel [(const_int 0)
4111 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4113 movddup\t{%1, %0|%0, %1}
4115 [(set_attr "type" "sselog1,ssemov")
4116 (set_attr "mode" "V2DF")])
4119 [(set (match_operand:V2DF 0 "memory_operand" "")
4122 (match_operand:V2DF 1 "register_operand" "")
4124 (parallel [(const_int 0)
4126 "TARGET_SSE3 && reload_completed"
4129 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4130 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4131 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4135 (define_insn "avx_unpcklpd256"
4136 [(set (match_operand:V4DF 0 "register_operand" "=x")
4139 (match_operand:V4DF 1 "register_operand" "x")
4140 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4141 (parallel [(const_int 0) (const_int 4)
4142 (const_int 2) (const_int 6)])))]
4144 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4145 [(set_attr "type" "sselog")
4146 (set_attr "prefix" "vex")
4147 (set_attr "mode" "V4DF")])
4149 (define_expand "sse2_unpcklpd_exp"
4150 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4153 (match_operand:V2DF 1 "nonimmediate_operand" "")
4154 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4155 (parallel [(const_int 0)
4158 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4160 (define_insn "*avx_unpcklpd"
4161 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4164 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4165 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4166 (parallel [(const_int 0)
4168 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4170 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4171 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4172 vmovlpd\t{%2, %H0|%H0, %2}"
4173 [(set_attr "type" "sselog,ssemov,ssemov")
4174 (set_attr "prefix" "vex")
4175 (set_attr "mode" "V2DF,V1DF,V1DF")])
4177 (define_insn "sse2_unpcklpd"
4178 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4181 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4182 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4183 (parallel [(const_int 0)
4185 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4187 unpcklpd\t{%2, %0|%0, %2}
4188 movhpd\t{%2, %0|%0, %2}
4189 movlpd\t{%2, %H0|%H0, %2}"
4190 [(set_attr "type" "sselog,ssemov,ssemov")
4191 (set_attr "prefix_data16" "*,1,1")
4192 (set_attr "mode" "V2DF,V1DF,V1DF")])
4194 (define_expand "avx_shufpd256"
4195 [(match_operand:V4DF 0 "register_operand" "")
4196 (match_operand:V4DF 1 "register_operand" "")
4197 (match_operand:V4DF 2 "nonimmediate_operand" "")
4198 (match_operand:SI 3 "const_int_operand" "")]
4201 int mask = INTVAL (operands[3]);
4202 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4204 GEN_INT (mask & 2 ? 5 : 4),
4205 GEN_INT (mask & 4 ? 3 : 2),
4206 GEN_INT (mask & 8 ? 7 : 6)));
4210 (define_insn "avx_shufpd256_1"
4211 [(set (match_operand:V4DF 0 "register_operand" "=x")
4214 (match_operand:V4DF 1 "register_operand" "x")
4215 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4216 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4217 (match_operand 4 "const_4_to_5_operand" "")
4218 (match_operand 5 "const_2_to_3_operand" "")
4219 (match_operand 6 "const_6_to_7_operand" "")])))]
4223 mask = INTVAL (operands[3]);
4224 mask |= (INTVAL (operands[4]) - 4) << 1;
4225 mask |= (INTVAL (operands[5]) - 2) << 2;
4226 mask |= (INTVAL (operands[6]) - 6) << 3;
4227 operands[3] = GEN_INT (mask);
4229 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4231 [(set_attr "type" "sselog")
4232 (set_attr "length_immediate" "1")
4233 (set_attr "prefix" "vex")
4234 (set_attr "mode" "V4DF")])
4236 (define_expand "sse2_shufpd"
4237 [(match_operand:V2DF 0 "register_operand" "")
4238 (match_operand:V2DF 1 "register_operand" "")
4239 (match_operand:V2DF 2 "nonimmediate_operand" "")
4240 (match_operand:SI 3 "const_int_operand" "")]
4243 int mask = INTVAL (operands[3]);
4244 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4246 GEN_INT (mask & 2 ? 3 : 2)));
4250 (define_expand "vec_extract_even<mode>"
4251 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4252 (vec_select:SSEMODE4S
4253 (vec_concat:<ssedoublesizemode>
4254 (match_operand:SSEMODE4S 1 "register_operand" "")
4255 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4256 (parallel [(const_int 0)
4262 (define_expand "vec_extract_odd<mode>"
4263 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4264 (vec_select:SSEMODE4S
4265 (vec_concat:<ssedoublesizemode>
4266 (match_operand:SSEMODE4S 1 "register_operand" "")
4267 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4268 (parallel [(const_int 1)
4274 (define_expand "vec_extract_even<mode>"
4275 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4276 (vec_select:SSEMODE2D
4277 (vec_concat:<ssedoublesizemode>
4278 (match_operand:SSEMODE2D 1 "register_operand" "")
4279 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4280 (parallel [(const_int 0)
4284 (define_expand "vec_extract_odd<mode>"
4285 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4286 (vec_select:SSEMODE2D
4287 (vec_concat:<ssedoublesizemode>
4288 (match_operand:SSEMODE2D 1 "register_operand" "")
4289 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4290 (parallel [(const_int 1)
4294 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4295 (define_insn "*avx_punpckhqdq"
4296 [(set (match_operand:V2DI 0 "register_operand" "=x")
4299 (match_operand:V2DI 1 "register_operand" "x")
4300 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4301 (parallel [(const_int 1)
4304 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4305 [(set_attr "type" "sselog")
4306 (set_attr "prefix" "vex")
4307 (set_attr "mode" "TI")])
4309 (define_insn "sse2_punpckhqdq"
4310 [(set (match_operand:V2DI 0 "register_operand" "=x")
4313 (match_operand:V2DI 1 "register_operand" "0")
4314 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4315 (parallel [(const_int 1)
4318 "punpckhqdq\t{%2, %0|%0, %2}"
4319 [(set_attr "type" "sselog")
4320 (set_attr "prefix_data16" "1")
4321 (set_attr "mode" "TI")])
4323 (define_insn "*avx_punpcklqdq"
4324 [(set (match_operand:V2DI 0 "register_operand" "=x")
4327 (match_operand:V2DI 1 "register_operand" "x")
4328 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4329 (parallel [(const_int 0)
4332 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4333 [(set_attr "type" "sselog")
4334 (set_attr "prefix" "vex")
4335 (set_attr "mode" "TI")])
4337 (define_insn "sse2_punpcklqdq"
4338 [(set (match_operand:V2DI 0 "register_operand" "=x")
4341 (match_operand:V2DI 1 "register_operand" "0")
4342 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4343 (parallel [(const_int 0)
4346 "punpcklqdq\t{%2, %0|%0, %2}"
4347 [(set_attr "type" "sselog")
4348 (set_attr "prefix_data16" "1")
4349 (set_attr "mode" "TI")])
4351 (define_insn "*avx_shufpd_<mode>"
4352 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4353 (vec_select:SSEMODE2D
4354 (vec_concat:<ssedoublesizemode>
4355 (match_operand:SSEMODE2D 1 "register_operand" "x")
4356 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4357 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4358 (match_operand 4 "const_2_to_3_operand" "")])))]
4362 mask = INTVAL (operands[3]);
4363 mask |= (INTVAL (operands[4]) - 2) << 1;
4364 operands[3] = GEN_INT (mask);
4366 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4368 [(set_attr "type" "sselog")
4369 (set_attr "length_immediate" "1")
4370 (set_attr "prefix" "vex")
4371 (set_attr "mode" "V2DF")])
4373 (define_insn "sse2_shufpd_<mode>"
4374 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4375 (vec_select:SSEMODE2D
4376 (vec_concat:<ssedoublesizemode>
4377 (match_operand:SSEMODE2D 1 "register_operand" "0")
4378 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4379 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4380 (match_operand 4 "const_2_to_3_operand" "")])))]
4384 mask = INTVAL (operands[3]);
4385 mask |= (INTVAL (operands[4]) - 2) << 1;
4386 operands[3] = GEN_INT (mask);
4388 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4390 [(set_attr "type" "sselog")
4391 (set_attr "length_immediate" "1")
4392 (set_attr "mode" "V2DF")])
4394 ;; Avoid combining registers from different units in a single alternative,
4395 ;; see comment above inline_secondary_memory_needed function in i386.c
4396 (define_insn "*avx_storehpd"
4397 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4399 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4400 (parallel [(const_int 1)])))]
4401 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4403 vmovhpd\t{%1, %0|%0, %1}
4404 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4408 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4409 (set_attr "prefix" "vex")
4410 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4412 (define_insn "sse2_storehpd"
4413 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4415 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4416 (parallel [(const_int 1)])))]
4417 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4419 movhpd\t{%1, %0|%0, %1}
4424 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4425 (set_attr "prefix_data16" "1,*,*,*,*")
4426 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4429 [(set (match_operand:DF 0 "register_operand" "")
4431 (match_operand:V2DF 1 "memory_operand" "")
4432 (parallel [(const_int 1)])))]
4433 "TARGET_SSE2 && reload_completed"
4434 [(set (match_dup 0) (match_dup 1))]
4436 operands[1] = adjust_address (operands[1], DFmode, 8);
4439 ;; Avoid combining registers from different units in a single alternative,
4440 ;; see comment above inline_secondary_memory_needed function in i386.c
4441 (define_insn "sse2_storelpd"
4442 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4444 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4445 (parallel [(const_int 0)])))]
4446 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4448 %vmovlpd\t{%1, %0|%0, %1}
4453 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4454 (set_attr "prefix_data16" "1,*,*,*,*")
4455 (set_attr "prefix" "maybe_vex")
4456 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4459 [(set (match_operand:DF 0 "register_operand" "")
4461 (match_operand:V2DF 1 "nonimmediate_operand" "")
4462 (parallel [(const_int 0)])))]
4463 "TARGET_SSE2 && reload_completed"
4466 rtx op1 = operands[1];
4468 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4470 op1 = gen_lowpart (DFmode, op1);
4471 emit_move_insn (operands[0], op1);
4475 (define_expand "sse2_loadhpd_exp"
4476 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4479 (match_operand:V2DF 1 "nonimmediate_operand" "")
4480 (parallel [(const_int 0)]))
4481 (match_operand:DF 2 "nonimmediate_operand" "")))]
4483 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4485 ;; Avoid combining registers from different units in a single alternative,
4486 ;; see comment above inline_secondary_memory_needed function in i386.c
4487 (define_insn "*avx_loadhpd"
4488 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4491 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4492 (parallel [(const_int 0)]))
4493 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4494 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4496 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4497 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4501 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4502 (set_attr "prefix" "vex")
4503 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4505 (define_insn "sse2_loadhpd"
4506 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4509 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4510 (parallel [(const_int 0)]))
4511 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4512 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4514 movhpd\t{%2, %0|%0, %2}
4515 unpcklpd\t{%2, %0|%0, %2}
4516 shufpd\t{$1, %1, %0|%0, %1, 1}
4520 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4521 (set_attr "prefix_data16" "1,*,*,*,*,*")
4522 (set_attr "length_immediate" "*,*,1,*,*,*")
4523 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4526 [(set (match_operand:V2DF 0 "memory_operand" "")
4528 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4529 (match_operand:DF 1 "register_operand" "")))]
4530 "TARGET_SSE2 && reload_completed"
4531 [(set (match_dup 0) (match_dup 1))]
4533 operands[0] = adjust_address (operands[0], DFmode, 8);
4536 (define_expand "sse2_loadlpd_exp"
4537 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4539 (match_operand:DF 2 "nonimmediate_operand" "")
4541 (match_operand:V2DF 1 "nonimmediate_operand" "")
4542 (parallel [(const_int 1)]))))]
4544 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4546 ;; Avoid combining registers from different units in a single alternative,
4547 ;; see comment above inline_secondary_memory_needed function in i386.c
4548 (define_insn "*avx_loadlpd"
4549 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4551 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4553 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4554 (parallel [(const_int 1)]))))]
4555 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4557 vmovsd\t{%2, %0|%0, %2}
4558 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4559 vmovsd\t{%2, %1, %0|%0, %1, %2}
4560 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4564 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4565 (set_attr "prefix" "vex")
4566 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4568 (define_insn "sse2_loadlpd"
4569 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4571 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4573 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4574 (parallel [(const_int 1)]))))]
4575 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4577 movsd\t{%2, %0|%0, %2}
4578 movlpd\t{%2, %0|%0, %2}
4579 movsd\t{%2, %0|%0, %2}
4580 shufpd\t{$2, %2, %0|%0, %2, 2}
4581 movhpd\t{%H1, %0|%0, %H1}
4585 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4586 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4587 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4588 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4591 [(set (match_operand:V2DF 0 "memory_operand" "")
4593 (match_operand:DF 1 "register_operand" "")
4594 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4595 "TARGET_SSE2 && reload_completed"
4596 [(set (match_dup 0) (match_dup 1))]
4598 operands[0] = adjust_address (operands[0], DFmode, 8);
4601 ;; Not sure these two are ever used, but it doesn't hurt to have
4603 (define_insn "*vec_extractv2df_1_sse"
4604 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4606 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4607 (parallel [(const_int 1)])))]
4608 "!TARGET_SSE2 && TARGET_SSE
4609 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4611 movhps\t{%1, %0|%0, %1}
4612 movhlps\t{%1, %0|%0, %1}
4613 movlps\t{%H1, %0|%0, %H1}"
4614 [(set_attr "type" "ssemov")
4615 (set_attr "mode" "V2SF,V4SF,V2SF")])
4617 (define_insn "*vec_extractv2df_0_sse"
4618 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4620 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4621 (parallel [(const_int 0)])))]
4622 "!TARGET_SSE2 && TARGET_SSE
4623 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4625 movlps\t{%1, %0|%0, %1}
4626 movaps\t{%1, %0|%0, %1}
4627 movlps\t{%1, %0|%0, %1}"
4628 [(set_attr "type" "ssemov")
4629 (set_attr "mode" "V2SF,V4SF,V2SF")])
4631 (define_insn "*avx_movsd"
4632 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4634 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4635 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4639 vmovsd\t{%2, %1, %0|%0, %1, %2}
4640 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4641 vmovlpd\t{%2, %0|%0, %2}
4642 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4643 vmovhps\t{%1, %H0|%H0, %1}"
4644 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4645 (set_attr "prefix" "vex")
4646 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4648 (define_insn "sse2_movsd"
4649 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4651 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4652 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4656 movsd\t{%2, %0|%0, %2}
4657 movlpd\t{%2, %0|%0, %2}
4658 movlpd\t{%2, %0|%0, %2}
4659 shufpd\t{$2, %2, %0|%0, %2, 2}
4660 movhps\t{%H1, %0|%0, %H1}
4661 movhps\t{%1, %H0|%H0, %1}"
4662 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4663 (set_attr "prefix_data16" "*,1,1,*,*,*")
4664 (set_attr "length_immediate" "*,*,*,1,*,*")
4665 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4667 (define_insn "*vec_dupv2df_sse3"
4668 [(set (match_operand:V2DF 0 "register_operand" "=x")
4670 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4672 "%vmovddup\t{%1, %0|%0, %1}"
4673 [(set_attr "type" "sselog1")
4674 (set_attr "prefix" "maybe_vex")
4675 (set_attr "mode" "DF")])
4677 (define_insn "vec_dupv2df"
4678 [(set (match_operand:V2DF 0 "register_operand" "=x")
4680 (match_operand:DF 1 "register_operand" "0")))]
4683 [(set_attr "type" "sselog1")
4684 (set_attr "mode" "V2DF")])
4686 (define_insn "*vec_concatv2df_sse3"
4687 [(set (match_operand:V2DF 0 "register_operand" "=x")
4689 (match_operand:DF 1 "nonimmediate_operand" "xm")
4692 "%vmovddup\t{%1, %0|%0, %1}"
4693 [(set_attr "type" "sselog1")
4694 (set_attr "prefix" "maybe_vex")
4695 (set_attr "mode" "DF")])
4697 (define_insn "*vec_concatv2df_avx"
4698 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4700 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4701 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4704 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4705 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4706 vmovsd\t{%1, %0|%0, %1}"
4707 [(set_attr "type" "ssemov")
4708 (set_attr "prefix" "vex")
4709 (set_attr "mode" "DF,V1DF,DF")])
4711 (define_insn "*vec_concatv2df"
4712 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4714 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4715 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4718 unpcklpd\t{%2, %0|%0, %2}
4719 movhpd\t{%2, %0|%0, %2}
4720 movsd\t{%1, %0|%0, %1}
4721 movlhps\t{%2, %0|%0, %2}
4722 movhps\t{%2, %0|%0, %2}"
4723 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4724 (set_attr "prefix_data16" "*,1,*,*,*")
4725 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4727 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4729 ;; Parallel integral arithmetic
4731 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4733 (define_expand "neg<mode>2"
4734 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4737 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4739 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4741 (define_expand "<plusminus_insn><mode>3"
4742 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4744 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4745 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4747 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4749 (define_insn "*avx_<plusminus_insn><mode>3"
4750 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4752 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4753 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4754 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4755 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4756 [(set_attr "type" "sseiadd")
4757 (set_attr "prefix" "vex")
4758 (set_attr "mode" "TI")])
4760 (define_insn "*<plusminus_insn><mode>3"
4761 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4763 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4764 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4765 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4766 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4767 [(set_attr "type" "sseiadd")
4768 (set_attr "prefix_data16" "1")
4769 (set_attr "mode" "TI")])
4771 (define_expand "sse2_<plusminus_insn><mode>3"
4772 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4773 (sat_plusminus:SSEMODE12
4774 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4775 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4777 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4779 (define_insn "*avx_<plusminus_insn><mode>3"
4780 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4781 (sat_plusminus:SSEMODE12
4782 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4783 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4784 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4785 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4786 [(set_attr "type" "sseiadd")
4787 (set_attr "prefix" "vex")
4788 (set_attr "mode" "TI")])
4790 (define_insn "*sse2_<plusminus_insn><mode>3"
4791 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4792 (sat_plusminus:SSEMODE12
4793 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4794 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4795 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4796 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4797 [(set_attr "type" "sseiadd")
4798 (set_attr "prefix_data16" "1")
4799 (set_attr "mode" "TI")])
4801 (define_insn_and_split "mulv16qi3"
4802 [(set (match_operand:V16QI 0 "register_operand" "")
4803 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4804 (match_operand:V16QI 2 "register_operand" "")))]
4806 && can_create_pseudo_p ()"
4811 rtx t[12], op0, op[3];
4816 /* On SSE5, we can take advantage of the pperm instruction to pack and
4817 unpack the bytes. Unpack data such that we've got a source byte in
4818 each low byte of each word. We don't care what goes into the high
4819 byte, so put 0 there. */
4820 for (i = 0; i < 6; ++i)
4821 t[i] = gen_reg_rtx (V8HImode);
4823 for (i = 0; i < 2; i++)
4826 op[1] = operands[i+1];
4827 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4830 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4833 /* Multiply words. */
4834 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4835 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4837 /* Pack the low byte of each word back into a single xmm */
4838 op[0] = operands[0];
4841 ix86_expand_sse5_pack (op);
4845 for (i = 0; i < 12; ++i)
4846 t[i] = gen_reg_rtx (V16QImode);
4848 /* Unpack data such that we've got a source byte in each low byte of
4849 each word. We don't care what goes into the high byte of each word.
4850 Rather than trying to get zero in there, most convenient is to let
4851 it be a copy of the low byte. */
4852 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4853 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4854 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4855 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4857 /* Multiply words. The end-of-line annotations here give a picture of what
4858 the output of that instruction looks like. Dot means don't care; the
4859 letters are the bytes of the result with A being the most significant. */
4860 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4861 gen_lowpart (V8HImode, t[0]),
4862 gen_lowpart (V8HImode, t[1])));
4863 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4864 gen_lowpart (V8HImode, t[2]),
4865 gen_lowpart (V8HImode, t[3])));
4867 /* Extract the relevant bytes and merge them back together. */
4868 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4869 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4870 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4871 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4872 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4873 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4876 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4880 (define_expand "mulv8hi3"
4881 [(set (match_operand:V8HI 0 "register_operand" "")
4882 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4883 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4885 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4887 (define_insn "*avx_mulv8hi3"
4888 [(set (match_operand:V8HI 0 "register_operand" "=x")
4889 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4890 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4891 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4892 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4893 [(set_attr "type" "sseimul")
4894 (set_attr "prefix" "vex")
4895 (set_attr "mode" "TI")])
4897 (define_insn "*mulv8hi3"
4898 [(set (match_operand:V8HI 0 "register_operand" "=x")
4899 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4900 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4901 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4902 "pmullw\t{%2, %0|%0, %2}"
4903 [(set_attr "type" "sseimul")
4904 (set_attr "prefix_data16" "1")
4905 (set_attr "mode" "TI")])
4907 (define_expand "smulv8hi3_highpart"
4908 [(set (match_operand:V8HI 0 "register_operand" "")
4913 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4915 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4918 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4920 (define_insn "*avxv8hi3_highpart"
4921 [(set (match_operand:V8HI 0 "register_operand" "=x")
4926 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4928 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4930 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4931 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4932 [(set_attr "type" "sseimul")
4933 (set_attr "prefix" "vex")
4934 (set_attr "mode" "TI")])
4936 (define_insn "*smulv8hi3_highpart"
4937 [(set (match_operand:V8HI 0 "register_operand" "=x")
4942 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4944 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4946 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4947 "pmulhw\t{%2, %0|%0, %2}"
4948 [(set_attr "type" "sseimul")
4949 (set_attr "prefix_data16" "1")
4950 (set_attr "mode" "TI")])
4952 (define_expand "umulv8hi3_highpart"
4953 [(set (match_operand:V8HI 0 "register_operand" "")
4958 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4960 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4963 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4965 (define_insn "*avx_umulv8hi3_highpart"
4966 [(set (match_operand:V8HI 0 "register_operand" "=x")
4971 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4973 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4975 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4976 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
4977 [(set_attr "type" "sseimul")
4978 (set_attr "prefix" "vex")
4979 (set_attr "mode" "TI")])
4981 (define_insn "*umulv8hi3_highpart"
4982 [(set (match_operand:V8HI 0 "register_operand" "=x")
4987 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4989 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4991 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4992 "pmulhuw\t{%2, %0|%0, %2}"
4993 [(set_attr "type" "sseimul")
4994 (set_attr "prefix_data16" "1")
4995 (set_attr "mode" "TI")])
4997 (define_expand "sse2_umulv2siv2di3"
4998 [(set (match_operand:V2DI 0 "register_operand" "")
5002 (match_operand:V4SI 1 "nonimmediate_operand" "")
5003 (parallel [(const_int 0) (const_int 2)])))
5006 (match_operand:V4SI 2 "nonimmediate_operand" "")
5007 (parallel [(const_int 0) (const_int 2)])))))]
5009 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5011 (define_insn "*avx_umulv2siv2di3"
5012 [(set (match_operand:V2DI 0 "register_operand" "=x")
5016 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5017 (parallel [(const_int 0) (const_int 2)])))
5020 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5021 (parallel [(const_int 0) (const_int 2)])))))]
5022 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5023 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5024 [(set_attr "type" "sseimul")
5025 (set_attr "prefix" "vex")
5026 (set_attr "mode" "TI")])
5028 (define_insn "*sse2_umulv2siv2di3"
5029 [(set (match_operand:V2DI 0 "register_operand" "=x")
5033 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5034 (parallel [(const_int 0) (const_int 2)])))
5037 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5038 (parallel [(const_int 0) (const_int 2)])))))]
5039 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5040 "pmuludq\t{%2, %0|%0, %2}"
5041 [(set_attr "type" "sseimul")
5042 (set_attr "prefix_data16" "1")
5043 (set_attr "mode" "TI")])
5045 (define_expand "sse4_1_mulv2siv2di3"
5046 [(set (match_operand:V2DI 0 "register_operand" "")
5050 (match_operand:V4SI 1 "nonimmediate_operand" "")
5051 (parallel [(const_int 0) (const_int 2)])))
5054 (match_operand:V4SI 2 "nonimmediate_operand" "")
5055 (parallel [(const_int 0) (const_int 2)])))))]
5057 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5059 (define_insn "*avx_mulv2siv2di3"
5060 [(set (match_operand:V2DI 0 "register_operand" "=x")
5064 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5065 (parallel [(const_int 0) (const_int 2)])))
5068 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5069 (parallel [(const_int 0) (const_int 2)])))))]
5070 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5071 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5072 [(set_attr "type" "sseimul")
5073 (set_attr "prefix_extra" "1")
5074 (set_attr "prefix" "vex")
5075 (set_attr "mode" "TI")])
5077 (define_insn "*sse4_1_mulv2siv2di3"
5078 [(set (match_operand:V2DI 0 "register_operand" "=x")
5082 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5083 (parallel [(const_int 0) (const_int 2)])))
5086 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5087 (parallel [(const_int 0) (const_int 2)])))))]
5088 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5089 "pmuldq\t{%2, %0|%0, %2}"
5090 [(set_attr "type" "sseimul")
5091 (set_attr "prefix_extra" "1")
5092 (set_attr "mode" "TI")])
5094 (define_expand "sse2_pmaddwd"
5095 [(set (match_operand:V4SI 0 "register_operand" "")
5100 (match_operand:V8HI 1 "nonimmediate_operand" "")
5101 (parallel [(const_int 0)
5107 (match_operand:V8HI 2 "nonimmediate_operand" "")
5108 (parallel [(const_int 0)
5114 (vec_select:V4HI (match_dup 1)
5115 (parallel [(const_int 1)
5120 (vec_select:V4HI (match_dup 2)
5121 (parallel [(const_int 1)
5124 (const_int 7)]))))))]
5126 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5128 (define_insn "*avx_pmaddwd"
5129 [(set (match_operand:V4SI 0 "register_operand" "=x")
5134 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5135 (parallel [(const_int 0)
5141 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5142 (parallel [(const_int 0)
5148 (vec_select:V4HI (match_dup 1)
5149 (parallel [(const_int 1)
5154 (vec_select:V4HI (match_dup 2)
5155 (parallel [(const_int 1)
5158 (const_int 7)]))))))]
5159 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5160 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5161 [(set_attr "type" "sseiadd")
5162 (set_attr "prefix" "vex")
5163 (set_attr "mode" "TI")])
5165 (define_insn "*sse2_pmaddwd"
5166 [(set (match_operand:V4SI 0 "register_operand" "=x")
5171 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5172 (parallel [(const_int 0)
5178 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5179 (parallel [(const_int 0)
5185 (vec_select:V4HI (match_dup 1)
5186 (parallel [(const_int 1)
5191 (vec_select:V4HI (match_dup 2)
5192 (parallel [(const_int 1)
5195 (const_int 7)]))))))]
5196 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5197 "pmaddwd\t{%2, %0|%0, %2}"
5198 [(set_attr "type" "sseiadd")
5199 (set_attr "atom_unit" "simul")
5200 (set_attr "prefix_data16" "1")
5201 (set_attr "mode" "TI")])
5203 (define_expand "mulv4si3"
5204 [(set (match_operand:V4SI 0 "register_operand" "")
5205 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5206 (match_operand:V4SI 2 "register_operand" "")))]
5209 if (TARGET_SSE4_1 || TARGET_SSE5)
5210 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5213 (define_insn "*avx_mulv4si3"
5214 [(set (match_operand:V4SI 0 "register_operand" "=x")
5215 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5216 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5217 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5218 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5219 [(set_attr "type" "sseimul")
5220 (set_attr "prefix_extra" "1")
5221 (set_attr "prefix" "vex")
5222 (set_attr "mode" "TI")])
5224 (define_insn "*sse4_1_mulv4si3"
5225 [(set (match_operand:V4SI 0 "register_operand" "=x")
5226 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5227 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5228 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5229 "pmulld\t{%2, %0|%0, %2}"
5230 [(set_attr "type" "sseimul")
5231 (set_attr "prefix_extra" "1")
5232 (set_attr "mode" "TI")])
5234 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5235 ;; multiply/add. In general, we expect the define_split to occur before
5236 ;; register allocation, so we have to handle the corner case where the target
5237 ;; is the same as one of the inputs.
5238 (define_insn_and_split "*sse5_mulv4si3"
5239 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5240 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5241 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5244 "&& (reload_completed
5245 || (!reg_mentioned_p (operands[0], operands[1])
5246 && !reg_mentioned_p (operands[0], operands[2])))"
5250 (plus:V4SI (mult:V4SI (match_dup 1)
5254 operands[3] = CONST0_RTX (V4SImode);
5256 [(set_attr "type" "ssemuladd")
5257 (set_attr "mode" "TI")])
5259 (define_insn_and_split "*sse2_mulv4si3"
5260 [(set (match_operand:V4SI 0 "register_operand" "")
5261 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5262 (match_operand:V4SI 2 "register_operand" "")))]
5263 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5264 && can_create_pseudo_p ()"
5269 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5275 t1 = gen_reg_rtx (V4SImode);
5276 t2 = gen_reg_rtx (V4SImode);
5277 t3 = gen_reg_rtx (V4SImode);
5278 t4 = gen_reg_rtx (V4SImode);
5279 t5 = gen_reg_rtx (V4SImode);
5280 t6 = gen_reg_rtx (V4SImode);
5281 thirtytwo = GEN_INT (32);
5283 /* Multiply elements 2 and 0. */
5284 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5287 /* Shift both input vectors down one element, so that elements 3
5288 and 1 are now in the slots for elements 2 and 0. For K8, at
5289 least, this is faster than using a shuffle. */
5290 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5291 gen_lowpart (TImode, op1),
5293 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5294 gen_lowpart (TImode, op2),
5296 /* Multiply elements 3 and 1. */
5297 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5300 /* Move the results in element 2 down to element 1; we don't care
5301 what goes in elements 2 and 3. */
5302 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5303 const0_rtx, const0_rtx));
5304 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5305 const0_rtx, const0_rtx));
5307 /* Merge the parts back together. */
5308 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5312 (define_insn_and_split "mulv2di3"
5313 [(set (match_operand:V2DI 0 "register_operand" "")
5314 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5315 (match_operand:V2DI 2 "register_operand" "")))]
5317 && can_create_pseudo_p ()"
5322 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5327 /* op1: A,B,C,D, op2: E,F,G,H */
5329 op1 = gen_lowpart (V4SImode, operands[1]);
5330 op2 = gen_lowpart (V4SImode, operands[2]);
5331 t1 = gen_reg_rtx (V4SImode);
5332 t2 = gen_reg_rtx (V4SImode);
5333 t3 = gen_reg_rtx (V4SImode);
5334 t4 = gen_reg_rtx (V2DImode);
5335 t5 = gen_reg_rtx (V2DImode);
5338 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5345 emit_move_insn (t2, CONST0_RTX (V4SImode));
5347 /* t3: (B*E),(A*F),(D*G),(C*H) */
5348 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5350 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5351 emit_insn (gen_sse5_phadddq (t4, t3));
5353 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5354 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5356 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5357 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5364 t1 = gen_reg_rtx (V2DImode);
5365 t2 = gen_reg_rtx (V2DImode);
5366 t3 = gen_reg_rtx (V2DImode);
5367 t4 = gen_reg_rtx (V2DImode);
5368 t5 = gen_reg_rtx (V2DImode);
5369 t6 = gen_reg_rtx (V2DImode);
5370 thirtytwo = GEN_INT (32);
5372 /* Multiply low parts. */
5373 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5374 gen_lowpart (V4SImode, op2)));
5376 /* Shift input vectors left 32 bits so we can multiply high parts. */
5377 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5378 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5380 /* Multiply high parts by low parts. */
5381 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5382 gen_lowpart (V4SImode, t3)));
5383 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5384 gen_lowpart (V4SImode, t2)));
5386 /* Shift them back. */
5387 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5388 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5390 /* Add the three parts together. */
5391 emit_insn (gen_addv2di3 (t6, t1, t4));
5392 emit_insn (gen_addv2di3 (op0, t6, t5));
5396 (define_expand "vec_widen_smult_hi_v8hi"
5397 [(match_operand:V4SI 0 "register_operand" "")
5398 (match_operand:V8HI 1 "register_operand" "")
5399 (match_operand:V8HI 2 "register_operand" "")]
5402 rtx op1, op2, t1, t2, dest;
5406 t1 = gen_reg_rtx (V8HImode);
5407 t2 = gen_reg_rtx (V8HImode);
5408 dest = gen_lowpart (V8HImode, operands[0]);
5410 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5411 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5412 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5416 (define_expand "vec_widen_smult_lo_v8hi"
5417 [(match_operand:V4SI 0 "register_operand" "")
5418 (match_operand:V8HI 1 "register_operand" "")
5419 (match_operand:V8HI 2 "register_operand" "")]
5422 rtx op1, op2, t1, t2, dest;
5426 t1 = gen_reg_rtx (V8HImode);
5427 t2 = gen_reg_rtx (V8HImode);
5428 dest = gen_lowpart (V8HImode, operands[0]);
5430 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5431 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5432 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5436 (define_expand "vec_widen_umult_hi_v8hi"
5437 [(match_operand:V4SI 0 "register_operand" "")
5438 (match_operand:V8HI 1 "register_operand" "")
5439 (match_operand:V8HI 2 "register_operand" "")]
5442 rtx op1, op2, t1, t2, dest;
5446 t1 = gen_reg_rtx (V8HImode);
5447 t2 = gen_reg_rtx (V8HImode);
5448 dest = gen_lowpart (V8HImode, operands[0]);
5450 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5451 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5452 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5456 (define_expand "vec_widen_umult_lo_v8hi"
5457 [(match_operand:V4SI 0 "register_operand" "")
5458 (match_operand:V8HI 1 "register_operand" "")
5459 (match_operand:V8HI 2 "register_operand" "")]
5462 rtx op1, op2, t1, t2, dest;
5466 t1 = gen_reg_rtx (V8HImode);
5467 t2 = gen_reg_rtx (V8HImode);
5468 dest = gen_lowpart (V8HImode, operands[0]);
5470 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5471 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5472 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5476 (define_expand "vec_widen_smult_hi_v4si"
5477 [(match_operand:V2DI 0 "register_operand" "")
5478 (match_operand:V4SI 1 "register_operand" "")
5479 (match_operand:V4SI 2 "register_operand" "")]
5484 t1 = gen_reg_rtx (V4SImode);
5485 t2 = gen_reg_rtx (V4SImode);
5487 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5492 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5497 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5501 (define_expand "vec_widen_smult_lo_v4si"
5502 [(match_operand:V2DI 0 "register_operand" "")
5503 (match_operand:V4SI 1 "register_operand" "")
5504 (match_operand:V4SI 2 "register_operand" "")]
5509 t1 = gen_reg_rtx (V4SImode);
5510 t2 = gen_reg_rtx (V4SImode);
5512 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5517 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5522 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5527 (define_expand "vec_widen_umult_hi_v4si"
5528 [(match_operand:V2DI 0 "register_operand" "")
5529 (match_operand:V4SI 1 "register_operand" "")
5530 (match_operand:V4SI 2 "register_operand" "")]
5533 rtx op1, op2, t1, t2;
5537 t1 = gen_reg_rtx (V4SImode);
5538 t2 = gen_reg_rtx (V4SImode);
5540 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5541 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5542 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5546 (define_expand "vec_widen_umult_lo_v4si"
5547 [(match_operand:V2DI 0 "register_operand" "")
5548 (match_operand:V4SI 1 "register_operand" "")
5549 (match_operand:V4SI 2 "register_operand" "")]
5552 rtx op1, op2, t1, t2;
5556 t1 = gen_reg_rtx (V4SImode);
5557 t2 = gen_reg_rtx (V4SImode);
5559 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5560 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5561 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5565 (define_expand "sdot_prodv8hi"
5566 [(match_operand:V4SI 0 "register_operand" "")
5567 (match_operand:V8HI 1 "register_operand" "")
5568 (match_operand:V8HI 2 "register_operand" "")
5569 (match_operand:V4SI 3 "register_operand" "")]
5572 rtx t = gen_reg_rtx (V4SImode);
5573 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5574 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5578 (define_expand "udot_prodv4si"
5579 [(match_operand:V2DI 0 "register_operand" "")
5580 (match_operand:V4SI 1 "register_operand" "")
5581 (match_operand:V4SI 2 "register_operand" "")
5582 (match_operand:V2DI 3 "register_operand" "")]
5587 t1 = gen_reg_rtx (V2DImode);
5588 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5589 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5591 t2 = gen_reg_rtx (V4SImode);
5592 t3 = gen_reg_rtx (V4SImode);
5593 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5594 gen_lowpart (TImode, operands[1]),
5596 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5597 gen_lowpart (TImode, operands[2]),
5600 t4 = gen_reg_rtx (V2DImode);
5601 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5603 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5607 (define_insn "*avx_ashr<mode>3"
5608 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5610 (match_operand:SSEMODE24 1 "register_operand" "x")
5611 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5613 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5614 [(set_attr "type" "sseishft")
5615 (set_attr "prefix" "vex")
5616 (set (attr "length_immediate")
5617 (if_then_else (match_operand 2 "const_int_operand" "")
5619 (const_string "0")))
5620 (set_attr "mode" "TI")])
5622 (define_insn "ashr<mode>3"
5623 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5625 (match_operand:SSEMODE24 1 "register_operand" "0")
5626 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5628 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5629 [(set_attr "type" "sseishft")
5630 (set_attr "prefix_data16" "1")
5631 (set (attr "length_immediate")
5632 (if_then_else (match_operand 2 "const_int_operand" "")
5634 (const_string "0")))
5635 (set_attr "mode" "TI")])
5637 (define_insn "*avx_lshr<mode>3"
5638 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5639 (lshiftrt:SSEMODE248
5640 (match_operand:SSEMODE248 1 "register_operand" "x")
5641 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5643 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5644 [(set_attr "type" "sseishft")
5645 (set_attr "prefix" "vex")
5646 (set (attr "length_immediate")
5647 (if_then_else (match_operand 2 "const_int_operand" "")
5649 (const_string "0")))
5650 (set_attr "mode" "TI")])
5652 (define_insn "lshr<mode>3"
5653 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5654 (lshiftrt:SSEMODE248
5655 (match_operand:SSEMODE248 1 "register_operand" "0")
5656 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5658 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5659 [(set_attr "type" "sseishft")
5660 (set_attr "prefix_data16" "1")
5661 (set (attr "length_immediate")
5662 (if_then_else (match_operand 2 "const_int_operand" "")
5664 (const_string "0")))
5665 (set_attr "mode" "TI")])
5667 (define_insn "*avx_ashl<mode>3"
5668 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5670 (match_operand:SSEMODE248 1 "register_operand" "x")
5671 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5673 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5674 [(set_attr "type" "sseishft")
5675 (set_attr "prefix" "vex")
5676 (set (attr "length_immediate")
5677 (if_then_else (match_operand 2 "const_int_operand" "")
5679 (const_string "0")))
5680 (set_attr "mode" "TI")])
5682 (define_insn "ashl<mode>3"
5683 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5685 (match_operand:SSEMODE248 1 "register_operand" "0")
5686 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5688 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5689 [(set_attr "type" "sseishft")
5690 (set_attr "prefix_data16" "1")
5691 (set (attr "length_immediate")
5692 (if_then_else (match_operand 2 "const_int_operand" "")
5694 (const_string "0")))
5695 (set_attr "mode" "TI")])
5697 (define_expand "vec_shl_<mode>"
5698 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5699 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5700 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5703 operands[0] = gen_lowpart (TImode, operands[0]);
5704 operands[1] = gen_lowpart (TImode, operands[1]);
5707 (define_expand "vec_shr_<mode>"
5708 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5709 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5710 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5713 operands[0] = gen_lowpart (TImode, operands[0]);
5714 operands[1] = gen_lowpart (TImode, operands[1]);
5717 (define_insn "*avx_<code><mode>3"
5718 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5720 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5721 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5722 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5723 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5724 [(set_attr "type" "sseiadd")
5725 (set (attr "prefix_extra")
5727 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
5730 (const_string "0")))
5731 (set_attr "prefix" "vex")
5732 (set_attr "mode" "TI")])
5734 (define_expand "<code>v16qi3"
5735 [(set (match_operand:V16QI 0 "register_operand" "")
5737 (match_operand:V16QI 1 "nonimmediate_operand" "")
5738 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5740 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5742 (define_insn "*<code>v16qi3"
5743 [(set (match_operand:V16QI 0 "register_operand" "=x")
5745 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5746 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5747 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5748 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5749 [(set_attr "type" "sseiadd")
5750 (set_attr "prefix_data16" "1")
5751 (set_attr "mode" "TI")])
5753 (define_expand "<code>v8hi3"
5754 [(set (match_operand:V8HI 0 "register_operand" "")
5756 (match_operand:V8HI 1 "nonimmediate_operand" "")
5757 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5759 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5761 (define_insn "*<code>v8hi3"
5762 [(set (match_operand:V8HI 0 "register_operand" "=x")
5764 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5765 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5766 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5767 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5768 [(set_attr "type" "sseiadd")
5769 (set_attr "prefix_data16" "1")
5770 (set_attr "mode" "TI")])
5772 (define_expand "umaxv8hi3"
5773 [(set (match_operand:V8HI 0 "register_operand" "")
5774 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5775 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5779 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5782 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5783 if (rtx_equal_p (op3, op2))
5784 op3 = gen_reg_rtx (V8HImode);
5785 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5786 emit_insn (gen_addv8hi3 (op0, op3, op2));
5791 (define_expand "smax<mode>3"
5792 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5793 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5794 (match_operand:SSEMODE14 2 "register_operand" "")))]
5798 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5804 xops[0] = operands[0];
5805 xops[1] = operands[1];
5806 xops[2] = operands[2];
5807 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5808 xops[4] = operands[1];
5809 xops[5] = operands[2];
5810 ok = ix86_expand_int_vcond (xops);
5816 (define_insn "*sse4_1_<code><mode>3"
5817 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5819 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5820 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5821 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5822 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5823 [(set_attr "type" "sseiadd")
5824 (set_attr "prefix_extra" "1")
5825 (set_attr "mode" "TI")])
5827 (define_expand "umaxv4si3"
5828 [(set (match_operand:V4SI 0 "register_operand" "")
5829 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5830 (match_operand:V4SI 2 "register_operand" "")))]
5834 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5840 xops[0] = operands[0];
5841 xops[1] = operands[1];
5842 xops[2] = operands[2];
5843 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5844 xops[4] = operands[1];
5845 xops[5] = operands[2];
5846 ok = ix86_expand_int_vcond (xops);
5852 (define_insn "*sse4_1_<code><mode>3"
5853 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5855 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5856 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5857 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5858 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5859 [(set_attr "type" "sseiadd")
5860 (set_attr "prefix_extra" "1")
5861 (set_attr "mode" "TI")])
5863 (define_expand "smin<mode>3"
5864 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5865 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5866 (match_operand:SSEMODE14 2 "register_operand" "")))]
5870 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5876 xops[0] = operands[0];
5877 xops[1] = operands[2];
5878 xops[2] = operands[1];
5879 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5880 xops[4] = operands[1];
5881 xops[5] = operands[2];
5882 ok = ix86_expand_int_vcond (xops);
5888 (define_expand "umin<mode>3"
5889 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5890 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5891 (match_operand:SSEMODE24 2 "register_operand" "")))]
5895 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5901 xops[0] = operands[0];
5902 xops[1] = operands[2];
5903 xops[2] = operands[1];
5904 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5905 xops[4] = operands[1];
5906 xops[5] = operands[2];
5907 ok = ix86_expand_int_vcond (xops);
5913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5915 ;; Parallel integral comparisons
5917 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5919 (define_expand "sse2_eq<mode>3"
5920 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5922 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5923 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5924 "TARGET_SSE2 && !TARGET_SSE5"
5925 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5927 (define_insn "*avx_eq<mode>3"
5928 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5930 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5931 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5932 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5933 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5934 [(set_attr "type" "ssecmp")
5935 (set (attr "prefix_extra")
5936 (if_then_else (match_operand:V2DI 0 "" "")
5938 (const_string "*")))
5939 (set_attr "prefix" "vex")
5940 (set_attr "mode" "TI")])
5942 (define_insn "*sse2_eq<mode>3"
5943 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5945 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5946 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5947 "TARGET_SSE2 && !TARGET_SSE5
5948 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5949 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5950 [(set_attr "type" "ssecmp")
5951 (set_attr "prefix_data16" "1")
5952 (set_attr "mode" "TI")])
5954 (define_expand "sse4_1_eqv2di3"
5955 [(set (match_operand:V2DI 0 "register_operand" "")
5957 (match_operand:V2DI 1 "nonimmediate_operand" "")
5958 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5960 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5962 (define_insn "*sse4_1_eqv2di3"
5963 [(set (match_operand:V2DI 0 "register_operand" "=x")
5965 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
5966 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5967 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5968 "pcmpeqq\t{%2, %0|%0, %2}"
5969 [(set_attr "type" "ssecmp")
5970 (set_attr "prefix_extra" "1")
5971 (set_attr "mode" "TI")])
5973 (define_insn "*avx_gt<mode>3"
5974 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5976 (match_operand:SSEMODE1248 1 "register_operand" "x")
5977 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5979 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5980 [(set_attr "type" "ssecmp")
5981 (set (attr "prefix_extra")
5982 (if_then_else (match_operand:V2DI 0 "" "")
5984 (const_string "*")))
5985 (set_attr "prefix" "vex")
5986 (set_attr "mode" "TI")])
5988 (define_insn "sse2_gt<mode>3"
5989 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5991 (match_operand:SSEMODE124 1 "register_operand" "0")
5992 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5993 "TARGET_SSE2 && !TARGET_SSE5"
5994 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5995 [(set_attr "type" "ssecmp")
5996 (set_attr "prefix_data16" "1")
5997 (set_attr "mode" "TI")])
5999 (define_insn "sse4_2_gtv2di3"
6000 [(set (match_operand:V2DI 0 "register_operand" "=x")
6002 (match_operand:V2DI 1 "register_operand" "0")
6003 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6005 "pcmpgtq\t{%2, %0|%0, %2}"
6006 [(set_attr "type" "ssecmp")
6007 (set_attr "prefix_extra" "1")
6008 (set_attr "mode" "TI")])
6010 (define_expand "vcond<mode>"
6011 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6012 (if_then_else:SSEMODEI
6013 (match_operator 3 ""
6014 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
6015 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
6016 (match_operand:SSEMODEI 1 "general_operand" "")
6017 (match_operand:SSEMODEI 2 "general_operand" "")))]
6020 if (ix86_expand_int_vcond (operands))
6026 (define_expand "vcondu<mode>"
6027 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6028 (if_then_else:SSEMODEI
6029 (match_operator 3 ""
6030 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
6031 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
6032 (match_operand:SSEMODEI 1 "general_operand" "")
6033 (match_operand:SSEMODEI 2 "general_operand" "")))]
6036 if (ix86_expand_int_vcond (operands))
6042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6044 ;; Parallel bitwise logical operations
6046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6048 (define_expand "one_cmpl<mode>2"
6049 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6050 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6054 int i, n = GET_MODE_NUNITS (<MODE>mode);
6055 rtvec v = rtvec_alloc (n);
6057 for (i = 0; i < n; ++i)
6058 RTVEC_ELT (v, i) = constm1_rtx;
6060 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6063 (define_insn "*avx_andnot<mode>3"
6064 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6066 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6067 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6069 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6070 [(set_attr "type" "sselog")
6071 (set_attr "prefix" "vex")
6072 (set_attr "mode" "<avxvecpsmode>")])
6074 (define_insn "*sse_andnot<mode>3"
6075 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6077 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6078 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6079 "(TARGET_SSE && !TARGET_SSE2)"
6080 "andnps\t{%2, %0|%0, %2}"
6081 [(set_attr "type" "sselog")
6082 (set_attr "mode" "V4SF")])
6084 (define_insn "*avx_andnot<mode>3"
6085 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6087 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6088 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6090 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6091 [(set_attr "type" "sselog")
6092 (set_attr "prefix" "vex")
6093 (set_attr "mode" "TI")])
6095 (define_insn "sse2_andnot<mode>3"
6096 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6098 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6099 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6101 "pandn\t{%2, %0|%0, %2}"
6102 [(set_attr "type" "sselog")
6103 (set_attr "prefix_data16" "1")
6104 (set_attr "mode" "TI")])
6106 (define_insn "*andnottf3"
6107 [(set (match_operand:TF 0 "register_operand" "=x")
6109 (not:TF (match_operand:TF 1 "register_operand" "0"))
6110 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6112 "pandn\t{%2, %0|%0, %2}"
6113 [(set_attr "type" "sselog")
6114 (set_attr "prefix_data16" "1")
6115 (set_attr "mode" "TI")])
6117 (define_expand "<code><mode>3"
6118 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6120 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6121 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6123 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6125 (define_insn "*avx_<code><mode>3"
6126 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6128 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6129 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6131 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6132 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6133 [(set_attr "type" "sselog")
6134 (set_attr "prefix" "vex")
6135 (set_attr "mode" "<avxvecpsmode>")])
6137 (define_insn "*sse_<code><mode>3"
6138 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6140 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6141 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6142 "(TARGET_SSE && !TARGET_SSE2)
6143 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6144 "<plogicprefix>ps\t{%2, %0|%0, %2}"
6145 [(set_attr "type" "sselog")
6146 (set_attr "mode" "V4SF")])
6148 (define_insn "*avx_<code><mode>3"
6149 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6151 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6152 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6154 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6155 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6156 [(set_attr "type" "sselog")
6157 (set_attr "prefix" "vex")
6158 (set_attr "mode" "TI")])
6160 (define_insn "*sse2_<code><mode>3"
6161 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6163 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6164 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6165 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6166 "p<plogicprefix>\t{%2, %0|%0, %2}"
6167 [(set_attr "type" "sselog")
6168 (set_attr "prefix_data16" "1")
6169 (set_attr "mode" "TI")])
6171 (define_expand "<code>tf3"
6172 [(set (match_operand:TF 0 "register_operand" "")
6174 (match_operand:TF 1 "nonimmediate_operand" "")
6175 (match_operand:TF 2 "nonimmediate_operand" "")))]
6177 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6179 (define_insn "*<code>tf3"
6180 [(set (match_operand:TF 0 "register_operand" "=x")
6182 (match_operand:TF 1 "nonimmediate_operand" "%0")
6183 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6184 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6185 "p<plogicprefix>\t{%2, %0|%0, %2}"
6186 [(set_attr "type" "sselog")
6187 (set_attr "prefix_data16" "1")
6188 (set_attr "mode" "TI")])
6190 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6192 ;; Parallel integral element swizzling
6194 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6197 ;; op1 = abcdefghijklmnop
6198 ;; op2 = qrstuvwxyz012345
6199 ;; h1 = aqbrcsdteufvgwhx
6200 ;; l1 = iyjzk0l1m2n3o4p5
6201 ;; h2 = aiqybjrzcks0dlt1
6202 ;; l2 = emu2fnv3gow4hpx5
6203 ;; h3 = aeimquy2bfjnrvz3
6204 ;; l3 = cgkosw04dhlptx15
6205 ;; result = bdfhjlnprtvxz135
6206 (define_expand "vec_pack_trunc_v8hi"
6207 [(match_operand:V16QI 0 "register_operand" "")
6208 (match_operand:V8HI 1 "register_operand" "")
6209 (match_operand:V8HI 2 "register_operand" "")]
6212 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6216 ix86_expand_sse5_pack (operands);
6220 op1 = gen_lowpart (V16QImode, operands[1]);
6221 op2 = gen_lowpart (V16QImode, operands[2]);
6222 h1 = gen_reg_rtx (V16QImode);
6223 l1 = gen_reg_rtx (V16QImode);
6224 h2 = gen_reg_rtx (V16QImode);
6225 l2 = gen_reg_rtx (V16QImode);
6226 h3 = gen_reg_rtx (V16QImode);
6227 l3 = gen_reg_rtx (V16QImode);
6229 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6230 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6231 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6232 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6233 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6234 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6235 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6246 ;; result = bdfhjlnp
6247 (define_expand "vec_pack_trunc_v4si"
6248 [(match_operand:V8HI 0 "register_operand" "")
6249 (match_operand:V4SI 1 "register_operand" "")
6250 (match_operand:V4SI 2 "register_operand" "")]
6253 rtx op1, op2, h1, l1, h2, l2;
6257 ix86_expand_sse5_pack (operands);
6261 op1 = gen_lowpart (V8HImode, operands[1]);
6262 op2 = gen_lowpart (V8HImode, operands[2]);
6263 h1 = gen_reg_rtx (V8HImode);
6264 l1 = gen_reg_rtx (V8HImode);
6265 h2 = gen_reg_rtx (V8HImode);
6266 l2 = gen_reg_rtx (V8HImode);
6268 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6269 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6270 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6271 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6272 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6282 (define_expand "vec_pack_trunc_v2di"
6283 [(match_operand:V4SI 0 "register_operand" "")
6284 (match_operand:V2DI 1 "register_operand" "")
6285 (match_operand:V2DI 2 "register_operand" "")]
6288 rtx op1, op2, h1, l1;
6292 ix86_expand_sse5_pack (operands);
6296 op1 = gen_lowpart (V4SImode, operands[1]);
6297 op2 = gen_lowpart (V4SImode, operands[2]);
6298 h1 = gen_reg_rtx (V4SImode);
6299 l1 = gen_reg_rtx (V4SImode);
6301 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6302 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6303 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6307 (define_expand "vec_interleave_highv16qi"
6308 [(set (match_operand:V16QI 0 "register_operand" "")
6311 (match_operand:V16QI 1 "register_operand" "")
6312 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6313 (parallel [(const_int 8) (const_int 24)
6314 (const_int 9) (const_int 25)
6315 (const_int 10) (const_int 26)
6316 (const_int 11) (const_int 27)
6317 (const_int 12) (const_int 28)
6318 (const_int 13) (const_int 29)
6319 (const_int 14) (const_int 30)
6320 (const_int 15) (const_int 31)])))]
6323 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6327 (define_expand "vec_interleave_lowv16qi"
6328 [(set (match_operand:V16QI 0 "register_operand" "")
6331 (match_operand:V16QI 1 "register_operand" "")
6332 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6333 (parallel [(const_int 0) (const_int 16)
6334 (const_int 1) (const_int 17)
6335 (const_int 2) (const_int 18)
6336 (const_int 3) (const_int 19)
6337 (const_int 4) (const_int 20)
6338 (const_int 5) (const_int 21)
6339 (const_int 6) (const_int 22)
6340 (const_int 7) (const_int 23)])))]
6343 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6347 (define_expand "vec_interleave_highv8hi"
6348 [(set (match_operand:V8HI 0 "register_operand" "=")
6351 (match_operand:V8HI 1 "register_operand" "")
6352 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6353 (parallel [(const_int 4) (const_int 12)
6354 (const_int 5) (const_int 13)
6355 (const_int 6) (const_int 14)
6356 (const_int 7) (const_int 15)])))]
6359 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6363 (define_expand "vec_interleave_lowv8hi"
6364 [(set (match_operand:V8HI 0 "register_operand" "")
6367 (match_operand:V8HI 1 "register_operand" "")
6368 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6369 (parallel [(const_int 0) (const_int 8)
6370 (const_int 1) (const_int 9)
6371 (const_int 2) (const_int 10)
6372 (const_int 3) (const_int 11)])))]
6375 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6379 (define_expand "vec_interleave_highv4si"
6380 [(set (match_operand:V4SI 0 "register_operand" "")
6383 (match_operand:V4SI 1 "register_operand" "")
6384 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6385 (parallel [(const_int 2) (const_int 6)
6386 (const_int 3) (const_int 7)])))]
6389 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6393 (define_expand "vec_interleave_lowv4si"
6394 [(set (match_operand:V4SI 0 "register_operand" "")
6397 (match_operand:V4SI 1 "register_operand" "")
6398 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6399 (parallel [(const_int 0) (const_int 4)
6400 (const_int 1) (const_int 5)])))]
6403 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6407 (define_expand "vec_interleave_highv2di"
6408 [(set (match_operand:V2DI 0 "register_operand" "")
6411 (match_operand:V2DI 1 "register_operand" "")
6412 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6413 (parallel [(const_int 1)
6417 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6421 (define_expand "vec_interleave_lowv2di"
6422 [(set (match_operand:V2DI 0 "register_operand" "")
6425 (match_operand:V2DI 1 "register_operand" "")
6426 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6427 (parallel [(const_int 0)
6431 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6435 (define_expand "vec_interleave_highv4sf"
6436 [(set (match_operand:V4SF 0 "register_operand" "")
6439 (match_operand:V4SF 1 "register_operand" "")
6440 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6441 (parallel [(const_int 2) (const_int 6)
6442 (const_int 3) (const_int 7)])))]
6445 (define_expand "vec_interleave_lowv4sf"
6446 [(set (match_operand:V4SF 0 "register_operand" "")
6449 (match_operand:V4SF 1 "register_operand" "")
6450 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6451 (parallel [(const_int 0) (const_int 4)
6452 (const_int 1) (const_int 5)])))]
6455 (define_expand "vec_interleave_highv2df"
6456 [(set (match_operand:V2DF 0 "register_operand" "")
6459 (match_operand:V2DF 1 "register_operand" "")
6460 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6461 (parallel [(const_int 1)
6465 (define_expand "vec_interleave_lowv2df"
6466 [(set (match_operand:V2DF 0 "register_operand" "")
6469 (match_operand:V2DF 1 "register_operand" "")
6470 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6471 (parallel [(const_int 0)
6475 (define_insn "*avx_packsswb"
6476 [(set (match_operand:V16QI 0 "register_operand" "=x")
6479 (match_operand:V8HI 1 "register_operand" "x"))
6481 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6483 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6484 [(set_attr "type" "sselog")
6485 (set_attr "prefix" "vex")
6486 (set_attr "mode" "TI")])
6488 (define_insn "sse2_packsswb"
6489 [(set (match_operand:V16QI 0 "register_operand" "=x")
6492 (match_operand:V8HI 1 "register_operand" "0"))
6494 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6496 "packsswb\t{%2, %0|%0, %2}"
6497 [(set_attr "type" "sselog")
6498 (set_attr "prefix_data16" "1")
6499 (set_attr "mode" "TI")])
6501 (define_insn "*avx_packssdw"
6502 [(set (match_operand:V8HI 0 "register_operand" "=x")
6505 (match_operand:V4SI 1 "register_operand" "x"))
6507 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6509 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6510 [(set_attr "type" "sselog")
6511 (set_attr "prefix" "vex")
6512 (set_attr "mode" "TI")])
6514 (define_insn "sse2_packssdw"
6515 [(set (match_operand:V8HI 0 "register_operand" "=x")
6518 (match_operand:V4SI 1 "register_operand" "0"))
6520 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6522 "packssdw\t{%2, %0|%0, %2}"
6523 [(set_attr "type" "sselog")
6524 (set_attr "prefix_data16" "1")
6525 (set_attr "mode" "TI")])
6527 (define_insn "*avx_packuswb"
6528 [(set (match_operand:V16QI 0 "register_operand" "=x")
6531 (match_operand:V8HI 1 "register_operand" "x"))
6533 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6535 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6536 [(set_attr "type" "sselog")
6537 (set_attr "prefix" "vex")
6538 (set_attr "mode" "TI")])
6540 (define_insn "sse2_packuswb"
6541 [(set (match_operand:V16QI 0 "register_operand" "=x")
6544 (match_operand:V8HI 1 "register_operand" "0"))
6546 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6548 "packuswb\t{%2, %0|%0, %2}"
6549 [(set_attr "type" "sselog")
6550 (set_attr "prefix_data16" "1")
6551 (set_attr "mode" "TI")])
6553 (define_insn "*avx_punpckhbw"
6554 [(set (match_operand:V16QI 0 "register_operand" "=x")
6557 (match_operand:V16QI 1 "register_operand" "x")
6558 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6559 (parallel [(const_int 8) (const_int 24)
6560 (const_int 9) (const_int 25)
6561 (const_int 10) (const_int 26)
6562 (const_int 11) (const_int 27)
6563 (const_int 12) (const_int 28)
6564 (const_int 13) (const_int 29)
6565 (const_int 14) (const_int 30)
6566 (const_int 15) (const_int 31)])))]
6568 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6569 [(set_attr "type" "sselog")
6570 (set_attr "prefix" "vex")
6571 (set_attr "mode" "TI")])
6573 (define_insn "sse2_punpckhbw"
6574 [(set (match_operand:V16QI 0 "register_operand" "=x")
6577 (match_operand:V16QI 1 "register_operand" "0")
6578 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6579 (parallel [(const_int 8) (const_int 24)
6580 (const_int 9) (const_int 25)
6581 (const_int 10) (const_int 26)
6582 (const_int 11) (const_int 27)
6583 (const_int 12) (const_int 28)
6584 (const_int 13) (const_int 29)
6585 (const_int 14) (const_int 30)
6586 (const_int 15) (const_int 31)])))]
6588 "punpckhbw\t{%2, %0|%0, %2}"
6589 [(set_attr "type" "sselog")
6590 (set_attr "prefix_data16" "1")
6591 (set_attr "mode" "TI")])
6593 (define_insn "*avx_punpcklbw"
6594 [(set (match_operand:V16QI 0 "register_operand" "=x")
6597 (match_operand:V16QI 1 "register_operand" "x")
6598 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6599 (parallel [(const_int 0) (const_int 16)
6600 (const_int 1) (const_int 17)
6601 (const_int 2) (const_int 18)
6602 (const_int 3) (const_int 19)
6603 (const_int 4) (const_int 20)
6604 (const_int 5) (const_int 21)
6605 (const_int 6) (const_int 22)
6606 (const_int 7) (const_int 23)])))]
6608 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6609 [(set_attr "type" "sselog")
6610 (set_attr "prefix" "vex")
6611 (set_attr "mode" "TI")])
6613 (define_insn "sse2_punpcklbw"
6614 [(set (match_operand:V16QI 0 "register_operand" "=x")
6617 (match_operand:V16QI 1 "register_operand" "0")
6618 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6619 (parallel [(const_int 0) (const_int 16)
6620 (const_int 1) (const_int 17)
6621 (const_int 2) (const_int 18)
6622 (const_int 3) (const_int 19)
6623 (const_int 4) (const_int 20)
6624 (const_int 5) (const_int 21)
6625 (const_int 6) (const_int 22)
6626 (const_int 7) (const_int 23)])))]
6628 "punpcklbw\t{%2, %0|%0, %2}"
6629 [(set_attr "type" "sselog")
6630 (set_attr "prefix_data16" "1")
6631 (set_attr "mode" "TI")])
6633 (define_insn "*avx_punpckhwd"
6634 [(set (match_operand:V8HI 0 "register_operand" "=x")
6637 (match_operand:V8HI 1 "register_operand" "x")
6638 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6639 (parallel [(const_int 4) (const_int 12)
6640 (const_int 5) (const_int 13)
6641 (const_int 6) (const_int 14)
6642 (const_int 7) (const_int 15)])))]
6644 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6645 [(set_attr "type" "sselog")
6646 (set_attr "prefix" "vex")
6647 (set_attr "mode" "TI")])
6649 (define_insn "sse2_punpckhwd"
6650 [(set (match_operand:V8HI 0 "register_operand" "=x")
6653 (match_operand:V8HI 1 "register_operand" "0")
6654 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6655 (parallel [(const_int 4) (const_int 12)
6656 (const_int 5) (const_int 13)
6657 (const_int 6) (const_int 14)
6658 (const_int 7) (const_int 15)])))]
6660 "punpckhwd\t{%2, %0|%0, %2}"
6661 [(set_attr "type" "sselog")
6662 (set_attr "prefix_data16" "1")
6663 (set_attr "mode" "TI")])
6665 (define_insn "*avx_punpcklwd"
6666 [(set (match_operand:V8HI 0 "register_operand" "=x")
6669 (match_operand:V8HI 1 "register_operand" "x")
6670 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6671 (parallel [(const_int 0) (const_int 8)
6672 (const_int 1) (const_int 9)
6673 (const_int 2) (const_int 10)
6674 (const_int 3) (const_int 11)])))]
6676 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6677 [(set_attr "type" "sselog")
6678 (set_attr "prefix" "vex")
6679 (set_attr "mode" "TI")])
6681 (define_insn "sse2_punpcklwd"
6682 [(set (match_operand:V8HI 0 "register_operand" "=x")
6685 (match_operand:V8HI 1 "register_operand" "0")
6686 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6687 (parallel [(const_int 0) (const_int 8)
6688 (const_int 1) (const_int 9)
6689 (const_int 2) (const_int 10)
6690 (const_int 3) (const_int 11)])))]
6692 "punpcklwd\t{%2, %0|%0, %2}"
6693 [(set_attr "type" "sselog")
6694 (set_attr "prefix_data16" "1")
6695 (set_attr "mode" "TI")])
6697 (define_insn "*avx_punpckhdq"
6698 [(set (match_operand:V4SI 0 "register_operand" "=x")
6701 (match_operand:V4SI 1 "register_operand" "x")
6702 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6703 (parallel [(const_int 2) (const_int 6)
6704 (const_int 3) (const_int 7)])))]
6706 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6707 [(set_attr "type" "sselog")
6708 (set_attr "prefix" "vex")
6709 (set_attr "mode" "TI")])
6711 (define_insn "sse2_punpckhdq"
6712 [(set (match_operand:V4SI 0 "register_operand" "=x")
6715 (match_operand:V4SI 1 "register_operand" "0")
6716 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6717 (parallel [(const_int 2) (const_int 6)
6718 (const_int 3) (const_int 7)])))]
6720 "punpckhdq\t{%2, %0|%0, %2}"
6721 [(set_attr "type" "sselog")
6722 (set_attr "prefix_data16" "1")
6723 (set_attr "mode" "TI")])
6725 (define_insn "*avx_punpckldq"
6726 [(set (match_operand:V4SI 0 "register_operand" "=x")
6729 (match_operand:V4SI 1 "register_operand" "x")
6730 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6731 (parallel [(const_int 0) (const_int 4)
6732 (const_int 1) (const_int 5)])))]
6734 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6735 [(set_attr "type" "sselog")
6736 (set_attr "prefix" "vex")
6737 (set_attr "mode" "TI")])
6739 (define_insn "sse2_punpckldq"
6740 [(set (match_operand:V4SI 0 "register_operand" "=x")
6743 (match_operand:V4SI 1 "register_operand" "0")
6744 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6745 (parallel [(const_int 0) (const_int 4)
6746 (const_int 1) (const_int 5)])))]
6748 "punpckldq\t{%2, %0|%0, %2}"
6749 [(set_attr "type" "sselog")
6750 (set_attr "prefix_data16" "1")
6751 (set_attr "mode" "TI")])
6753 (define_insn "*avx_pinsr<ssevecsize>"
6754 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6755 (vec_merge:SSEMODE124
6756 (vec_duplicate:SSEMODE124
6757 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6758 (match_operand:SSEMODE124 1 "register_operand" "x")
6759 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6762 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6763 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6765 [(set_attr "type" "sselog")
6766 (set (attr "prefix_extra")
6767 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6769 (const_string "1")))
6770 (set_attr "length_immediate" "1")
6771 (set_attr "prefix" "vex")
6772 (set_attr "mode" "TI")])
6774 (define_insn "*sse4_1_pinsrb"
6775 [(set (match_operand:V16QI 0 "register_operand" "=x")
6777 (vec_duplicate:V16QI
6778 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6779 (match_operand:V16QI 1 "register_operand" "0")
6780 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6783 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6784 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6786 [(set_attr "type" "sselog")
6787 (set_attr "prefix_extra" "1")
6788 (set_attr "length_immediate" "1")
6789 (set_attr "mode" "TI")])
6791 (define_insn "*sse2_pinsrw"
6792 [(set (match_operand:V8HI 0 "register_operand" "=x")
6795 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6796 (match_operand:V8HI 1 "register_operand" "0")
6797 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6800 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6801 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6803 [(set_attr "type" "sselog")
6804 (set_attr "prefix_data16" "1")
6805 (set_attr "length_immediate" "1")
6806 (set_attr "mode" "TI")])
6808 ;; It must come before sse2_loadld since it is preferred.
6809 (define_insn "*sse4_1_pinsrd"
6810 [(set (match_operand:V4SI 0 "register_operand" "=x")
6813 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6814 (match_operand:V4SI 1 "register_operand" "0")
6815 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6818 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6819 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6821 [(set_attr "type" "sselog")
6822 (set_attr "prefix_extra" "1")
6823 (set_attr "length_immediate" "1")
6824 (set_attr "mode" "TI")])
6826 (define_insn "*avx_pinsrq"
6827 [(set (match_operand:V2DI 0 "register_operand" "=x")
6830 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6831 (match_operand:V2DI 1 "register_operand" "x")
6832 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6833 "TARGET_AVX && TARGET_64BIT"
6835 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6836 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6838 [(set_attr "type" "sselog")
6839 (set_attr "prefix_extra" "1")
6840 (set_attr "length_immediate" "1")
6841 (set_attr "prefix" "vex")
6842 (set_attr "mode" "TI")])
6844 (define_insn "*sse4_1_pinsrq"
6845 [(set (match_operand:V2DI 0 "register_operand" "=x")
6848 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6849 (match_operand:V2DI 1 "register_operand" "0")
6850 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6851 "TARGET_SSE4_1 && TARGET_64BIT"
6853 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6854 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6856 [(set_attr "type" "sselog")
6857 (set_attr "prefix_rex" "1")
6858 (set_attr "prefix_extra" "1")
6859 (set_attr "length_immediate" "1")
6860 (set_attr "mode" "TI")])
6862 (define_insn "*sse4_1_pextrb"
6863 [(set (match_operand:SI 0 "register_operand" "=r")
6866 (match_operand:V16QI 1 "register_operand" "x")
6867 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6869 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6870 [(set_attr "type" "sselog")
6871 (set_attr "prefix_extra" "1")
6872 (set_attr "length_immediate" "1")
6873 (set_attr "prefix" "maybe_vex")
6874 (set_attr "mode" "TI")])
6876 (define_insn "*sse4_1_pextrb_memory"
6877 [(set (match_operand:QI 0 "memory_operand" "=m")
6879 (match_operand:V16QI 1 "register_operand" "x")
6880 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6882 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6883 [(set_attr "type" "sselog")
6884 (set_attr "prefix_extra" "1")
6885 (set_attr "length_immediate" "1")
6886 (set_attr "prefix" "maybe_vex")
6887 (set_attr "mode" "TI")])
6889 (define_insn "*sse2_pextrw"
6890 [(set (match_operand:SI 0 "register_operand" "=r")
6893 (match_operand:V8HI 1 "register_operand" "x")
6894 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6896 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6897 [(set_attr "type" "sselog")
6898 (set_attr "prefix_data16" "1")
6899 (set_attr "length_immediate" "1")
6900 (set_attr "prefix" "maybe_vex")
6901 (set_attr "mode" "TI")])
6903 (define_insn "*sse4_1_pextrw_memory"
6904 [(set (match_operand:HI 0 "memory_operand" "=m")
6906 (match_operand:V8HI 1 "register_operand" "x")
6907 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6909 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6910 [(set_attr "type" "sselog")
6911 (set_attr "prefix_extra" "1")
6912 (set_attr "length_immediate" "1")
6913 (set_attr "prefix" "maybe_vex")
6914 (set_attr "mode" "TI")])
6916 (define_insn "*sse4_1_pextrd"
6917 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6919 (match_operand:V4SI 1 "register_operand" "x")
6920 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6922 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6923 [(set_attr "type" "sselog")
6924 (set_attr "prefix_extra" "1")
6925 (set_attr "length_immediate" "1")
6926 (set_attr "prefix" "maybe_vex")
6927 (set_attr "mode" "TI")])
6929 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6930 (define_insn "*sse4_1_pextrq"
6931 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6933 (match_operand:V2DI 1 "register_operand" "x")
6934 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6935 "TARGET_SSE4_1 && TARGET_64BIT"
6936 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6937 [(set_attr "type" "sselog")
6938 (set_attr "prefix_rex" "1")
6939 (set_attr "prefix_extra" "1")
6940 (set_attr "length_immediate" "1")
6941 (set_attr "prefix" "maybe_vex")
6942 (set_attr "mode" "TI")])
6944 (define_expand "sse2_pshufd"
6945 [(match_operand:V4SI 0 "register_operand" "")
6946 (match_operand:V4SI 1 "nonimmediate_operand" "")
6947 (match_operand:SI 2 "const_int_operand" "")]
6950 int mask = INTVAL (operands[2]);
6951 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6952 GEN_INT ((mask >> 0) & 3),
6953 GEN_INT ((mask >> 2) & 3),
6954 GEN_INT ((mask >> 4) & 3),
6955 GEN_INT ((mask >> 6) & 3)));
6959 (define_insn "sse2_pshufd_1"
6960 [(set (match_operand:V4SI 0 "register_operand" "=x")
6962 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6963 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6964 (match_operand 3 "const_0_to_3_operand" "")
6965 (match_operand 4 "const_0_to_3_operand" "")
6966 (match_operand 5 "const_0_to_3_operand" "")])))]
6970 mask |= INTVAL (operands[2]) << 0;
6971 mask |= INTVAL (operands[3]) << 2;
6972 mask |= INTVAL (operands[4]) << 4;
6973 mask |= INTVAL (operands[5]) << 6;
6974 operands[2] = GEN_INT (mask);
6976 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6978 [(set_attr "type" "sselog1")
6979 (set_attr "prefix_data16" "1")
6980 (set_attr "prefix" "maybe_vex")
6981 (set_attr "length_immediate" "1")
6982 (set_attr "mode" "TI")])
6984 (define_expand "sse2_pshuflw"
6985 [(match_operand:V8HI 0 "register_operand" "")
6986 (match_operand:V8HI 1 "nonimmediate_operand" "")
6987 (match_operand:SI 2 "const_int_operand" "")]
6990 int mask = INTVAL (operands[2]);
6991 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6992 GEN_INT ((mask >> 0) & 3),
6993 GEN_INT ((mask >> 2) & 3),
6994 GEN_INT ((mask >> 4) & 3),
6995 GEN_INT ((mask >> 6) & 3)));
6999 (define_insn "sse2_pshuflw_1"
7000 [(set (match_operand:V8HI 0 "register_operand" "=x")
7002 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7003 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7004 (match_operand 3 "const_0_to_3_operand" "")
7005 (match_operand 4 "const_0_to_3_operand" "")
7006 (match_operand 5 "const_0_to_3_operand" "")
7014 mask |= INTVAL (operands[2]) << 0;
7015 mask |= INTVAL (operands[3]) << 2;
7016 mask |= INTVAL (operands[4]) << 4;
7017 mask |= INTVAL (operands[5]) << 6;
7018 operands[2] = GEN_INT (mask);
7020 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7022 [(set_attr "type" "sselog")
7023 (set_attr "prefix_data16" "0")
7024 (set_attr "prefix_rep" "1")
7025 (set_attr "prefix" "maybe_vex")
7026 (set_attr "length_immediate" "1")
7027 (set_attr "mode" "TI")])
7029 (define_expand "sse2_pshufhw"
7030 [(match_operand:V8HI 0 "register_operand" "")
7031 (match_operand:V8HI 1 "nonimmediate_operand" "")
7032 (match_operand:SI 2 "const_int_operand" "")]
7035 int mask = INTVAL (operands[2]);
7036 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7037 GEN_INT (((mask >> 0) & 3) + 4),
7038 GEN_INT (((mask >> 2) & 3) + 4),
7039 GEN_INT (((mask >> 4) & 3) + 4),
7040 GEN_INT (((mask >> 6) & 3) + 4)));
7044 (define_insn "sse2_pshufhw_1"
7045 [(set (match_operand:V8HI 0 "register_operand" "=x")
7047 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7048 (parallel [(const_int 0)
7052 (match_operand 2 "const_4_to_7_operand" "")
7053 (match_operand 3 "const_4_to_7_operand" "")
7054 (match_operand 4 "const_4_to_7_operand" "")
7055 (match_operand 5 "const_4_to_7_operand" "")])))]
7059 mask |= (INTVAL (operands[2]) - 4) << 0;
7060 mask |= (INTVAL (operands[3]) - 4) << 2;
7061 mask |= (INTVAL (operands[4]) - 4) << 4;
7062 mask |= (INTVAL (operands[5]) - 4) << 6;
7063 operands[2] = GEN_INT (mask);
7065 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7067 [(set_attr "type" "sselog")
7068 (set_attr "prefix_rep" "1")
7069 (set_attr "prefix_data16" "0")
7070 (set_attr "prefix" "maybe_vex")
7071 (set_attr "length_immediate" "1")
7072 (set_attr "mode" "TI")])
7074 (define_expand "sse2_loadd"
7075 [(set (match_operand:V4SI 0 "register_operand" "")
7078 (match_operand:SI 1 "nonimmediate_operand" ""))
7082 "operands[2] = CONST0_RTX (V4SImode);")
7084 (define_insn "*avx_loadld"
7085 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7088 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7089 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7093 vmovd\t{%2, %0|%0, %2}
7094 vmovd\t{%2, %0|%0, %2}
7095 vmovss\t{%2, %1, %0|%0, %1, %2}"
7096 [(set_attr "type" "ssemov")
7097 (set_attr "prefix" "vex")
7098 (set_attr "mode" "TI,TI,V4SF")])
7100 (define_insn "sse2_loadld"
7101 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7104 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7105 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7109 movd\t{%2, %0|%0, %2}
7110 movd\t{%2, %0|%0, %2}
7111 movss\t{%2, %0|%0, %2}
7112 movss\t{%2, %0|%0, %2}"
7113 [(set_attr "type" "ssemov")
7114 (set_attr "mode" "TI,TI,V4SF,SF")])
7116 (define_insn_and_split "sse2_stored"
7117 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7119 (match_operand:V4SI 1 "register_operand" "x,Yi")
7120 (parallel [(const_int 0)])))]
7123 "&& reload_completed
7124 && (TARGET_INTER_UNIT_MOVES
7125 || MEM_P (operands [0])
7126 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7127 [(set (match_dup 0) (match_dup 1))]
7129 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7132 (define_insn_and_split "*vec_ext_v4si_mem"
7133 [(set (match_operand:SI 0 "register_operand" "=r")
7135 (match_operand:V4SI 1 "memory_operand" "o")
7136 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7142 int i = INTVAL (operands[2]);
7144 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7148 (define_expand "sse_storeq"
7149 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7151 (match_operand:V2DI 1 "register_operand" "")
7152 (parallel [(const_int 0)])))]
7156 (define_insn "*sse2_storeq_rex64"
7157 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7159 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7160 (parallel [(const_int 0)])))]
7161 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7165 %vmov{q}\t{%1, %0|%0, %1}"
7166 [(set_attr "type" "*,*,imov")
7167 (set_attr "prefix" "*,*,maybe_vex")
7168 (set_attr "mode" "*,*,DI")])
7170 (define_insn "*sse2_storeq"
7171 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7173 (match_operand:V2DI 1 "register_operand" "x")
7174 (parallel [(const_int 0)])))]
7179 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7181 (match_operand:V2DI 1 "register_operand" "")
7182 (parallel [(const_int 0)])))]
7185 && (TARGET_INTER_UNIT_MOVES
7186 || MEM_P (operands [0])
7187 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7188 [(set (match_dup 0) (match_dup 1))]
7190 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7193 (define_insn "*vec_extractv2di_1_rex64_avx"
7194 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7196 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7197 (parallel [(const_int 1)])))]
7200 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7202 vmovhps\t{%1, %0|%0, %1}
7203 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7204 vmovq\t{%H1, %0|%0, %H1}
7205 vmov{q}\t{%H1, %0|%0, %H1}"
7206 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7207 (set_attr "length_immediate" "*,1,*,*")
7208 (set_attr "memory" "*,none,*,*")
7209 (set_attr "prefix" "vex")
7210 (set_attr "mode" "V2SF,TI,TI,DI")])
7212 (define_insn "*vec_extractv2di_1_rex64"
7213 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7215 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7216 (parallel [(const_int 1)])))]
7217 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7219 movhps\t{%1, %0|%0, %1}
7220 psrldq\t{$8, %0|%0, 8}
7221 movq\t{%H1, %0|%0, %H1}
7222 mov{q}\t{%H1, %0|%0, %H1}"
7223 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7224 (set_attr "length_immediate" "*,1,*,*")
7225 (set_attr "atom_unit" "*,sishuf,*,*")
7226 (set_attr "memory" "*,none,*,*")
7227 (set_attr "mode" "V2SF,TI,TI,DI")])
7229 (define_insn "*vec_extractv2di_1_avx"
7230 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7232 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7233 (parallel [(const_int 1)])))]
7236 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7238 vmovhps\t{%1, %0|%0, %1}
7239 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7240 vmovq\t{%H1, %0|%0, %H1}"
7241 [(set_attr "type" "ssemov,sseishft,ssemov")
7242 (set_attr "length_immediate" "*,1,*")
7243 (set_attr "memory" "*,none,*")
7244 (set_attr "prefix" "vex")
7245 (set_attr "mode" "V2SF,TI,TI")])
7247 (define_insn "*vec_extractv2di_1_sse2"
7248 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7250 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7251 (parallel [(const_int 1)])))]
7253 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7255 movhps\t{%1, %0|%0, %1}
7256 psrldq\t{$8, %0|%0, 8}
7257 movq\t{%H1, %0|%0, %H1}"
7258 [(set_attr "type" "ssemov,sseishft,ssemov")
7259 (set_attr "length_immediate" "*,1,*")
7260 (set_attr "atom_unit" "*,sishuf,*")
7261 (set_attr "memory" "*,none,*")
7262 (set_attr "mode" "V2SF,TI,TI")])
7264 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7265 (define_insn "*vec_extractv2di_1_sse"
7266 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7268 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7269 (parallel [(const_int 1)])))]
7270 "!TARGET_SSE2 && TARGET_SSE
7271 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7273 movhps\t{%1, %0|%0, %1}
7274 movhlps\t{%1, %0|%0, %1}
7275 movlps\t{%H1, %0|%0, %H1}"
7276 [(set_attr "type" "ssemov")
7277 (set_attr "mode" "V2SF,V4SF,V2SF")])
7279 (define_insn "*vec_dupv4si"
7280 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7282 (match_operand:SI 1 "register_operand" " Y2,0")))]
7285 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7286 shufps\t{$0, %0, %0|%0, %0, 0}"
7287 [(set_attr "type" "sselog1")
7288 (set_attr "prefix" "maybe_vex,orig")
7289 (set_attr "length_immediate" "1")
7290 (set_attr "mode" "TI,V4SF")])
7292 (define_insn "*vec_dupv2di_avx"
7293 [(set (match_operand:V2DI 0 "register_operand" "=x")
7295 (match_operand:DI 1 "register_operand" "x")))]
7297 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7298 [(set_attr "type" "sselog1")
7299 (set_attr "prefix" "vex")
7300 (set_attr "mode" "TI")])
7302 (define_insn "*vec_dupv2di"
7303 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7305 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7310 [(set_attr "type" "sselog1,ssemov")
7311 (set_attr "mode" "TI,V4SF")])
7313 (define_insn "*vec_concatv2si_avx"
7314 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7316 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7317 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7320 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7321 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7322 vmovd\t{%1, %0|%0, %1}
7323 punpckldq\t{%2, %0|%0, %2}
7324 movd\t{%1, %0|%0, %1}"
7325 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7326 (set_attr "prefix_extra" "1,*,*,*,*")
7327 (set_attr "length_immediate" "1,*,*,*,*")
7328 (set (attr "prefix")
7329 (if_then_else (eq_attr "alternative" "3,4")
7330 (const_string "orig")
7331 (const_string "vex")))
7332 (set_attr "mode" "TI,TI,TI,DI,DI")])
7334 (define_insn "*vec_concatv2si_sse4_1"
7335 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7337 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7338 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7341 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7342 punpckldq\t{%2, %0|%0, %2}
7343 movd\t{%1, %0|%0, %1}
7344 punpckldq\t{%2, %0|%0, %2}
7345 movd\t{%1, %0|%0, %1}"
7346 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7347 (set_attr "prefix_extra" "1,*,*,*,*")
7348 (set_attr "length_immediate" "1,*,*,*,*")
7349 (set_attr "mode" "TI,TI,TI,DI,DI")])
7351 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7352 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7353 ;; alternatives pretty much forces the MMX alternative to be chosen.
7354 (define_insn "*vec_concatv2si_sse2"
7355 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7357 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7358 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7361 punpckldq\t{%2, %0|%0, %2}
7362 movd\t{%1, %0|%0, %1}
7363 punpckldq\t{%2, %0|%0, %2}
7364 movd\t{%1, %0|%0, %1}"
7365 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7366 (set_attr "mode" "TI,TI,DI,DI")])
7368 (define_insn "*vec_concatv2si_sse"
7369 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7371 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7372 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7375 unpcklps\t{%2, %0|%0, %2}
7376 movss\t{%1, %0|%0, %1}
7377 punpckldq\t{%2, %0|%0, %2}
7378 movd\t{%1, %0|%0, %1}"
7379 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7380 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7382 (define_insn "*vec_concatv4si_1_avx"
7383 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7385 (match_operand:V2SI 1 "register_operand" " x,x")
7386 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7389 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7390 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7391 [(set_attr "type" "sselog,ssemov")
7392 (set_attr "prefix" "vex")
7393 (set_attr "mode" "TI,V2SF")])
7395 (define_insn "*vec_concatv4si_1"
7396 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7398 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7399 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7402 punpcklqdq\t{%2, %0|%0, %2}
7403 movlhps\t{%2, %0|%0, %2}
7404 movhps\t{%2, %0|%0, %2}"
7405 [(set_attr "type" "sselog,ssemov,ssemov")
7406 (set_attr "mode" "TI,V4SF,V2SF")])
7408 (define_insn "*vec_concatv2di_avx"
7409 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7411 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7412 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7413 "!TARGET_64BIT && TARGET_AVX"
7415 vmovq\t{%1, %0|%0, %1}
7416 movq2dq\t{%1, %0|%0, %1}
7417 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7418 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7419 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7420 (set (attr "prefix")
7421 (if_then_else (eq_attr "alternative" "1")
7422 (const_string "orig")
7423 (const_string "vex")))
7424 (set_attr "mode" "TI,TI,TI,V2SF")])
7426 (define_insn "vec_concatv2di"
7427 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7429 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7430 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7431 "!TARGET_64BIT && TARGET_SSE"
7433 movq\t{%1, %0|%0, %1}
7434 movq2dq\t{%1, %0|%0, %1}
7435 punpcklqdq\t{%2, %0|%0, %2}
7436 movlhps\t{%2, %0|%0, %2}
7437 movhps\t{%2, %0|%0, %2}"
7438 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7439 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7441 (define_insn "*vec_concatv2di_rex64_avx"
7442 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7444 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7445 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7446 "TARGET_64BIT && TARGET_AVX"
7448 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7449 vmovq\t{%1, %0|%0, %1}
7450 vmovq\t{%1, %0|%0, %1}
7451 movq2dq\t{%1, %0|%0, %1}
7452 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7453 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7454 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7455 (set_attr "prefix_extra" "1,*,*,*,*,*")
7456 (set_attr "length_immediate" "1,*,*,*,*,*")
7457 (set (attr "prefix")
7458 (if_then_else (eq_attr "alternative" "3")
7459 (const_string "orig")
7460 (const_string "vex")))
7461 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7463 (define_insn "*vec_concatv2di_rex64_sse4_1"
7464 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7466 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7467 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7468 "TARGET_64BIT && TARGET_SSE4_1"
7470 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7471 movq\t{%1, %0|%0, %1}
7472 movq\t{%1, %0|%0, %1}
7473 movq2dq\t{%1, %0|%0, %1}
7474 punpcklqdq\t{%2, %0|%0, %2}
7475 movlhps\t{%2, %0|%0, %2}
7476 movhps\t{%2, %0|%0, %2}"
7477 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7478 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7479 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7480 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7481 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7483 (define_insn "*vec_concatv2di_rex64_sse"
7484 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7486 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7487 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7488 "TARGET_64BIT && TARGET_SSE"
7490 movq\t{%1, %0|%0, %1}
7491 movq\t{%1, %0|%0, %1}
7492 movq2dq\t{%1, %0|%0, %1}
7493 punpcklqdq\t{%2, %0|%0, %2}
7494 movlhps\t{%2, %0|%0, %2}
7495 movhps\t{%2, %0|%0, %2}"
7496 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7497 (set_attr "prefix_rex" "*,1,*,*,*,*")
7498 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7500 (define_expand "vec_unpacku_hi_v16qi"
7501 [(match_operand:V8HI 0 "register_operand" "")
7502 (match_operand:V16QI 1 "register_operand" "")]
7506 ix86_expand_sse4_unpack (operands, true, true);
7507 else if (TARGET_SSE5)
7508 ix86_expand_sse5_unpack (operands, true, true);
7510 ix86_expand_sse_unpack (operands, true, true);
7514 (define_expand "vec_unpacks_hi_v16qi"
7515 [(match_operand:V8HI 0 "register_operand" "")
7516 (match_operand:V16QI 1 "register_operand" "")]
7520 ix86_expand_sse4_unpack (operands, false, true);
7521 else if (TARGET_SSE5)
7522 ix86_expand_sse5_unpack (operands, false, true);
7524 ix86_expand_sse_unpack (operands, false, true);
7528 (define_expand "vec_unpacku_lo_v16qi"
7529 [(match_operand:V8HI 0 "register_operand" "")
7530 (match_operand:V16QI 1 "register_operand" "")]
7534 ix86_expand_sse4_unpack (operands, true, false);
7535 else if (TARGET_SSE5)
7536 ix86_expand_sse5_unpack (operands, true, false);
7538 ix86_expand_sse_unpack (operands, true, false);
7542 (define_expand "vec_unpacks_lo_v16qi"
7543 [(match_operand:V8HI 0 "register_operand" "")
7544 (match_operand:V16QI 1 "register_operand" "")]
7548 ix86_expand_sse4_unpack (operands, false, false);
7549 else if (TARGET_SSE5)
7550 ix86_expand_sse5_unpack (operands, false, false);
7552 ix86_expand_sse_unpack (operands, false, false);
7556 (define_expand "vec_unpacku_hi_v8hi"
7557 [(match_operand:V4SI 0 "register_operand" "")
7558 (match_operand:V8HI 1 "register_operand" "")]
7562 ix86_expand_sse4_unpack (operands, true, true);
7563 else if (TARGET_SSE5)
7564 ix86_expand_sse5_unpack (operands, true, true);
7566 ix86_expand_sse_unpack (operands, true, true);
7570 (define_expand "vec_unpacks_hi_v8hi"
7571 [(match_operand:V4SI 0 "register_operand" "")
7572 (match_operand:V8HI 1 "register_operand" "")]
7576 ix86_expand_sse4_unpack (operands, false, true);
7577 else if (TARGET_SSE5)
7578 ix86_expand_sse5_unpack (operands, false, true);
7580 ix86_expand_sse_unpack (operands, false, true);
7584 (define_expand "vec_unpacku_lo_v8hi"
7585 [(match_operand:V4SI 0 "register_operand" "")
7586 (match_operand:V8HI 1 "register_operand" "")]
7590 ix86_expand_sse4_unpack (operands, true, false);
7591 else if (TARGET_SSE5)
7592 ix86_expand_sse5_unpack (operands, true, false);
7594 ix86_expand_sse_unpack (operands, true, false);
7598 (define_expand "vec_unpacks_lo_v8hi"
7599 [(match_operand:V4SI 0 "register_operand" "")
7600 (match_operand:V8HI 1 "register_operand" "")]
7604 ix86_expand_sse4_unpack (operands, false, false);
7605 else if (TARGET_SSE5)
7606 ix86_expand_sse5_unpack (operands, false, false);
7608 ix86_expand_sse_unpack (operands, false, false);
7612 (define_expand "vec_unpacku_hi_v4si"
7613 [(match_operand:V2DI 0 "register_operand" "")
7614 (match_operand:V4SI 1 "register_operand" "")]
7618 ix86_expand_sse4_unpack (operands, true, true);
7619 else if (TARGET_SSE5)
7620 ix86_expand_sse5_unpack (operands, true, true);
7622 ix86_expand_sse_unpack (operands, true, true);
7626 (define_expand "vec_unpacks_hi_v4si"
7627 [(match_operand:V2DI 0 "register_operand" "")
7628 (match_operand:V4SI 1 "register_operand" "")]
7632 ix86_expand_sse4_unpack (operands, false, true);
7633 else if (TARGET_SSE5)
7634 ix86_expand_sse5_unpack (operands, false, true);
7636 ix86_expand_sse_unpack (operands, false, true);
7640 (define_expand "vec_unpacku_lo_v4si"
7641 [(match_operand:V2DI 0 "register_operand" "")
7642 (match_operand:V4SI 1 "register_operand" "")]
7646 ix86_expand_sse4_unpack (operands, true, false);
7647 else if (TARGET_SSE5)
7648 ix86_expand_sse5_unpack (operands, true, false);
7650 ix86_expand_sse_unpack (operands, true, false);
7654 (define_expand "vec_unpacks_lo_v4si"
7655 [(match_operand:V2DI 0 "register_operand" "")
7656 (match_operand:V4SI 1 "register_operand" "")]
7660 ix86_expand_sse4_unpack (operands, false, false);
7661 else if (TARGET_SSE5)
7662 ix86_expand_sse5_unpack (operands, false, false);
7664 ix86_expand_sse_unpack (operands, false, false);
7668 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7672 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7674 (define_expand "sse2_uavgv16qi3"
7675 [(set (match_operand:V16QI 0 "register_operand" "")
7681 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7683 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7684 (const_vector:V16QI [(const_int 1) (const_int 1)
7685 (const_int 1) (const_int 1)
7686 (const_int 1) (const_int 1)
7687 (const_int 1) (const_int 1)
7688 (const_int 1) (const_int 1)
7689 (const_int 1) (const_int 1)
7690 (const_int 1) (const_int 1)
7691 (const_int 1) (const_int 1)]))
7694 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7696 (define_insn "*avx_uavgv16qi3"
7697 [(set (match_operand:V16QI 0 "register_operand" "=x")
7703 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7705 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7706 (const_vector:V16QI [(const_int 1) (const_int 1)
7707 (const_int 1) (const_int 1)
7708 (const_int 1) (const_int 1)
7709 (const_int 1) (const_int 1)
7710 (const_int 1) (const_int 1)
7711 (const_int 1) (const_int 1)
7712 (const_int 1) (const_int 1)
7713 (const_int 1) (const_int 1)]))
7715 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7716 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7717 [(set_attr "type" "sseiadd")
7718 (set_attr "prefix" "vex")
7719 (set_attr "mode" "TI")])
7721 (define_insn "*sse2_uavgv16qi3"
7722 [(set (match_operand:V16QI 0 "register_operand" "=x")
7728 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7730 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7731 (const_vector:V16QI [(const_int 1) (const_int 1)
7732 (const_int 1) (const_int 1)
7733 (const_int 1) (const_int 1)
7734 (const_int 1) (const_int 1)
7735 (const_int 1) (const_int 1)
7736 (const_int 1) (const_int 1)
7737 (const_int 1) (const_int 1)
7738 (const_int 1) (const_int 1)]))
7740 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7741 "pavgb\t{%2, %0|%0, %2}"
7742 [(set_attr "type" "sseiadd")
7743 (set_attr "prefix_data16" "1")
7744 (set_attr "mode" "TI")])
7746 (define_expand "sse2_uavgv8hi3"
7747 [(set (match_operand:V8HI 0 "register_operand" "")
7753 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7755 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7756 (const_vector:V8HI [(const_int 1) (const_int 1)
7757 (const_int 1) (const_int 1)
7758 (const_int 1) (const_int 1)
7759 (const_int 1) (const_int 1)]))
7762 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7764 (define_insn "*avx_uavgv8hi3"
7765 [(set (match_operand:V8HI 0 "register_operand" "=x")
7771 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7773 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7774 (const_vector:V8HI [(const_int 1) (const_int 1)
7775 (const_int 1) (const_int 1)
7776 (const_int 1) (const_int 1)
7777 (const_int 1) (const_int 1)]))
7779 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7780 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7781 [(set_attr "type" "sseiadd")
7782 (set_attr "prefix" "vex")
7783 (set_attr "mode" "TI")])
7785 (define_insn "*sse2_uavgv8hi3"
7786 [(set (match_operand:V8HI 0 "register_operand" "=x")
7792 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7794 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7795 (const_vector:V8HI [(const_int 1) (const_int 1)
7796 (const_int 1) (const_int 1)
7797 (const_int 1) (const_int 1)
7798 (const_int 1) (const_int 1)]))
7800 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7801 "pavgw\t{%2, %0|%0, %2}"
7802 [(set_attr "type" "sseiadd")
7803 (set_attr "prefix_data16" "1")
7804 (set_attr "mode" "TI")])
7806 ;; The correct representation for this is absolutely enormous, and
7807 ;; surely not generally useful.
7808 (define_insn "*avx_psadbw"
7809 [(set (match_operand:V2DI 0 "register_operand" "=x")
7810 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7811 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7814 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7815 [(set_attr "type" "sseiadd")
7816 (set_attr "prefix" "vex")
7817 (set_attr "mode" "TI")])
7819 (define_insn "sse2_psadbw"
7820 [(set (match_operand:V2DI 0 "register_operand" "=x")
7821 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7822 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7825 "psadbw\t{%2, %0|%0, %2}"
7826 [(set_attr "type" "sseiadd")
7827 (set_attr "atom_unit" "simul")
7828 (set_attr "prefix_data16" "1")
7829 (set_attr "mode" "TI")])
7831 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7832 [(set (match_operand:SI 0 "register_operand" "=r")
7834 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7836 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7837 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7838 [(set_attr "type" "ssecvt")
7839 (set_attr "prefix" "vex")
7840 (set_attr "mode" "<MODE>")])
7842 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7843 [(set (match_operand:SI 0 "register_operand" "=r")
7845 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7847 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7848 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7849 [(set_attr "type" "ssemov")
7850 (set_attr "prefix" "maybe_vex")
7851 (set_attr "mode" "<MODE>")])
7853 (define_insn "sse2_pmovmskb"
7854 [(set (match_operand:SI 0 "register_operand" "=r")
7855 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7858 "%vpmovmskb\t{%1, %0|%0, %1}"
7859 [(set_attr "type" "ssemov")
7860 (set_attr "prefix_data16" "1")
7861 (set_attr "prefix" "maybe_vex")
7862 (set_attr "mode" "SI")])
7864 (define_expand "sse2_maskmovdqu"
7865 [(set (match_operand:V16QI 0 "memory_operand" "")
7866 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7867 (match_operand:V16QI 2 "register_operand" "")
7873 (define_insn "*sse2_maskmovdqu"
7874 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7875 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7876 (match_operand:V16QI 2 "register_operand" "x")
7877 (mem:V16QI (match_dup 0))]
7879 "TARGET_SSE2 && !TARGET_64BIT"
7880 ;; @@@ check ordering of operands in intel/nonintel syntax
7881 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7882 [(set_attr "type" "ssemov")
7883 (set_attr "prefix_data16" "1")
7884 ;; The implicit %rdi operand confuses default length_vex computation.
7885 (set_attr "length_vex" "3")
7886 (set_attr "prefix" "maybe_vex")
7887 (set_attr "mode" "TI")])
7889 (define_insn "*sse2_maskmovdqu_rex64"
7890 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7891 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7892 (match_operand:V16QI 2 "register_operand" "x")
7893 (mem:V16QI (match_dup 0))]
7895 "TARGET_SSE2 && TARGET_64BIT"
7896 ;; @@@ check ordering of operands in intel/nonintel syntax
7897 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7898 [(set_attr "type" "ssemov")
7899 (set_attr "prefix_data16" "1")
7900 ;; The implicit %rdi operand confuses default length_vex computation.
7901 (set (attr "length_vex")
7902 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
7903 (set_attr "prefix" "maybe_vex")
7904 (set_attr "mode" "TI")])
7906 (define_insn "sse_ldmxcsr"
7907 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7911 [(set_attr "type" "sse")
7912 (set_attr "atom_sse_attr" "mxcsr")
7913 (set_attr "prefix" "maybe_vex")
7914 (set_attr "memory" "load")])
7916 (define_insn "sse_stmxcsr"
7917 [(set (match_operand:SI 0 "memory_operand" "=m")
7918 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7921 [(set_attr "type" "sse")
7922 (set_attr "atom_sse_attr" "mxcsr")
7923 (set_attr "prefix" "maybe_vex")
7924 (set_attr "memory" "store")])
7926 (define_expand "sse_sfence"
7928 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7929 "TARGET_SSE || TARGET_3DNOW_A"
7931 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7932 MEM_VOLATILE_P (operands[0]) = 1;
7935 (define_insn "*sse_sfence"
7936 [(set (match_operand:BLK 0 "" "")
7937 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7938 "TARGET_SSE || TARGET_3DNOW_A"
7940 [(set_attr "type" "sse")
7941 (set_attr "length_address" "0")
7942 (set_attr "atom_sse_attr" "fence")
7943 (set_attr "memory" "unknown")])
7945 (define_insn "sse2_clflush"
7946 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7950 [(set_attr "type" "sse")
7951 (set_attr "atom_sse_attr" "fence")
7952 (set_attr "memory" "unknown")])
7954 (define_expand "sse2_mfence"
7956 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7959 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7960 MEM_VOLATILE_P (operands[0]) = 1;
7963 (define_insn "*sse2_mfence"
7964 [(set (match_operand:BLK 0 "" "")
7965 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7966 "TARGET_64BIT || TARGET_SSE2"
7968 [(set_attr "type" "sse")
7969 (set_attr "length_address" "0")
7970 (set_attr "atom_sse_attr" "fence")
7971 (set_attr "memory" "unknown")])
7973 (define_expand "sse2_lfence"
7975 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7978 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7979 MEM_VOLATILE_P (operands[0]) = 1;
7982 (define_insn "*sse2_lfence"
7983 [(set (match_operand:BLK 0 "" "")
7984 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7987 [(set_attr "type" "sse")
7988 (set_attr "length_address" "0")
7989 (set_attr "atom_sse_attr" "lfence")
7990 (set_attr "memory" "unknown")])
7992 (define_insn "sse3_mwait"
7993 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7994 (match_operand:SI 1 "register_operand" "c")]
7997 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7998 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7999 ;; we only need to set up 32bit registers.
8001 [(set_attr "length" "3")])
8003 (define_insn "sse3_monitor"
8004 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8005 (match_operand:SI 1 "register_operand" "c")
8006 (match_operand:SI 2 "register_operand" "d")]
8008 "TARGET_SSE3 && !TARGET_64BIT"
8009 "monitor\t%0, %1, %2"
8010 [(set_attr "length" "3")])
8012 (define_insn "sse3_monitor64"
8013 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8014 (match_operand:SI 1 "register_operand" "c")
8015 (match_operand:SI 2 "register_operand" "d")]
8017 "TARGET_SSE3 && TARGET_64BIT"
8018 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8019 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8020 ;; zero extended to 64bit, we only need to set up 32bit registers.
8022 [(set_attr "length" "3")])
8024 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8026 ;; SSSE3 instructions
8028 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8030 (define_insn "*avx_phaddwv8hi3"
8031 [(set (match_operand:V8HI 0 "register_operand" "=x")
8037 (match_operand:V8HI 1 "register_operand" "x")
8038 (parallel [(const_int 0)]))
8039 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8041 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8042 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8045 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8046 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8048 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8049 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8054 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8055 (parallel [(const_int 0)]))
8056 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8058 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8059 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8062 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8063 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8065 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8066 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8068 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8069 [(set_attr "type" "sseiadd")
8070 (set_attr "prefix_extra" "1")
8071 (set_attr "prefix" "vex")
8072 (set_attr "mode" "TI")])
8074 (define_insn "ssse3_phaddwv8hi3"
8075 [(set (match_operand:V8HI 0 "register_operand" "=x")
8081 (match_operand:V8HI 1 "register_operand" "0")
8082 (parallel [(const_int 0)]))
8083 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8085 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8086 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8089 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8090 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8092 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8093 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8098 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8099 (parallel [(const_int 0)]))
8100 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8102 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8103 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8106 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8107 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8109 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8110 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8112 "phaddw\t{%2, %0|%0, %2}"
8113 [(set_attr "type" "sseiadd")
8114 (set_attr "atom_unit" "complex")
8115 (set_attr "prefix_data16" "1")
8116 (set_attr "prefix_extra" "1")
8117 (set_attr "mode" "TI")])
8119 (define_insn "ssse3_phaddwv4hi3"
8120 [(set (match_operand:V4HI 0 "register_operand" "=y")
8125 (match_operand:V4HI 1 "register_operand" "0")
8126 (parallel [(const_int 0)]))
8127 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8129 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8130 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8134 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8135 (parallel [(const_int 0)]))
8136 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8138 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8139 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8141 "phaddw\t{%2, %0|%0, %2}"
8142 [(set_attr "type" "sseiadd")
8143 (set_attr "atom_unit" "complex")
8144 (set_attr "prefix_extra" "1")
8145 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8146 (set_attr "mode" "DI")])
8148 (define_insn "*avx_phadddv4si3"
8149 [(set (match_operand:V4SI 0 "register_operand" "=x")
8154 (match_operand:V4SI 1 "register_operand" "x")
8155 (parallel [(const_int 0)]))
8156 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8158 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8159 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8163 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8164 (parallel [(const_int 0)]))
8165 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8167 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8168 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8170 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8171 [(set_attr "type" "sseiadd")
8172 (set_attr "prefix_extra" "1")
8173 (set_attr "prefix" "vex")
8174 (set_attr "mode" "TI")])
8176 (define_insn "ssse3_phadddv4si3"
8177 [(set (match_operand:V4SI 0 "register_operand" "=x")
8182 (match_operand:V4SI 1 "register_operand" "0")
8183 (parallel [(const_int 0)]))
8184 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8186 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8187 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8191 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8192 (parallel [(const_int 0)]))
8193 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8195 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8196 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8198 "phaddd\t{%2, %0|%0, %2}"
8199 [(set_attr "type" "sseiadd")
8200 (set_attr "atom_unit" "complex")
8201 (set_attr "prefix_data16" "1")
8202 (set_attr "prefix_extra" "1")
8203 (set_attr "mode" "TI")])
8205 (define_insn "ssse3_phadddv2si3"
8206 [(set (match_operand:V2SI 0 "register_operand" "=y")
8210 (match_operand:V2SI 1 "register_operand" "0")
8211 (parallel [(const_int 0)]))
8212 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8215 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8216 (parallel [(const_int 0)]))
8217 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8219 "phaddd\t{%2, %0|%0, %2}"
8220 [(set_attr "type" "sseiadd")
8221 (set_attr "atom_unit" "complex")
8222 (set_attr "prefix_extra" "1")
8223 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8224 (set_attr "mode" "DI")])
8226 (define_insn "*avx_phaddswv8hi3"
8227 [(set (match_operand:V8HI 0 "register_operand" "=x")
8233 (match_operand:V8HI 1 "register_operand" "x")
8234 (parallel [(const_int 0)]))
8235 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8237 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8238 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8241 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8242 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8244 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8245 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8250 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8251 (parallel [(const_int 0)]))
8252 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8254 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8255 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8258 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8259 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8261 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8262 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8264 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8265 [(set_attr "type" "sseiadd")
8266 (set_attr "prefix_extra" "1")
8267 (set_attr "prefix" "vex")
8268 (set_attr "mode" "TI")])
8270 (define_insn "ssse3_phaddswv8hi3"
8271 [(set (match_operand:V8HI 0 "register_operand" "=x")
8277 (match_operand:V8HI 1 "register_operand" "0")
8278 (parallel [(const_int 0)]))
8279 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8281 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8282 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8285 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8286 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8288 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8289 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8294 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8295 (parallel [(const_int 0)]))
8296 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8298 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8299 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8302 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8303 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8305 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8306 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8308 "phaddsw\t{%2, %0|%0, %2}"
8309 [(set_attr "type" "sseiadd")
8310 (set_attr "atom_unit" "complex")
8311 (set_attr "prefix_data16" "1")
8312 (set_attr "prefix_extra" "1")
8313 (set_attr "mode" "TI")])
8315 (define_insn "ssse3_phaddswv4hi3"
8316 [(set (match_operand:V4HI 0 "register_operand" "=y")
8321 (match_operand:V4HI 1 "register_operand" "0")
8322 (parallel [(const_int 0)]))
8323 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8325 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8326 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8330 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8331 (parallel [(const_int 0)]))
8332 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8334 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8335 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8337 "phaddsw\t{%2, %0|%0, %2}"
8338 [(set_attr "type" "sseiadd")
8339 (set_attr "atom_unit" "complex")
8340 (set_attr "prefix_extra" "1")
8341 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8342 (set_attr "mode" "DI")])
8344 (define_insn "*avx_phsubwv8hi3"
8345 [(set (match_operand:V8HI 0 "register_operand" "=x")
8351 (match_operand:V8HI 1 "register_operand" "x")
8352 (parallel [(const_int 0)]))
8353 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8355 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8356 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8359 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8360 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8362 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8363 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8368 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8369 (parallel [(const_int 0)]))
8370 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8372 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8376 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8377 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8379 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8380 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8382 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8383 [(set_attr "type" "sseiadd")
8384 (set_attr "prefix_extra" "1")
8385 (set_attr "prefix" "vex")
8386 (set_attr "mode" "TI")])
8388 (define_insn "ssse3_phsubwv8hi3"
8389 [(set (match_operand:V8HI 0 "register_operand" "=x")
8395 (match_operand:V8HI 1 "register_operand" "0")
8396 (parallel [(const_int 0)]))
8397 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8399 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8400 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8403 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8404 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8406 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8407 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8412 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8413 (parallel [(const_int 0)]))
8414 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8416 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8417 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8420 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8421 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8423 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8424 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8426 "phsubw\t{%2, %0|%0, %2}"
8427 [(set_attr "type" "sseiadd")
8428 (set_attr "atom_unit" "complex")
8429 (set_attr "prefix_data16" "1")
8430 (set_attr "prefix_extra" "1")
8431 (set_attr "mode" "TI")])
8433 (define_insn "ssse3_phsubwv4hi3"
8434 [(set (match_operand:V4HI 0 "register_operand" "=y")
8439 (match_operand:V4HI 1 "register_operand" "0")
8440 (parallel [(const_int 0)]))
8441 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8443 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8448 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8449 (parallel [(const_int 0)]))
8450 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8455 "phsubw\t{%2, %0|%0, %2}"
8456 [(set_attr "type" "sseiadd")
8457 (set_attr "atom_unit" "complex")
8458 (set_attr "prefix_extra" "1")
8459 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8460 (set_attr "mode" "DI")])
8462 (define_insn "*avx_phsubdv4si3"
8463 [(set (match_operand:V4SI 0 "register_operand" "=x")
8468 (match_operand:V4SI 1 "register_operand" "x")
8469 (parallel [(const_int 0)]))
8470 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8472 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8473 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8477 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8478 (parallel [(const_int 0)]))
8479 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8481 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8482 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8484 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8485 [(set_attr "type" "sseiadd")
8486 (set_attr "prefix_extra" "1")
8487 (set_attr "prefix" "vex")
8488 (set_attr "mode" "TI")])
8490 (define_insn "ssse3_phsubdv4si3"
8491 [(set (match_operand:V4SI 0 "register_operand" "=x")
8496 (match_operand:V4SI 1 "register_operand" "0")
8497 (parallel [(const_int 0)]))
8498 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8500 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8501 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8505 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8506 (parallel [(const_int 0)]))
8507 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8509 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8510 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8512 "phsubd\t{%2, %0|%0, %2}"
8513 [(set_attr "type" "sseiadd")
8514 (set_attr "atom_unit" "complex")
8515 (set_attr "prefix_data16" "1")
8516 (set_attr "prefix_extra" "1")
8517 (set_attr "mode" "TI")])
8519 (define_insn "ssse3_phsubdv2si3"
8520 [(set (match_operand:V2SI 0 "register_operand" "=y")
8524 (match_operand:V2SI 1 "register_operand" "0")
8525 (parallel [(const_int 0)]))
8526 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8529 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8530 (parallel [(const_int 0)]))
8531 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8533 "phsubd\t{%2, %0|%0, %2}"
8534 [(set_attr "type" "sseiadd")
8535 (set_attr "atom_unit" "complex")
8536 (set_attr "prefix_extra" "1")
8537 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8538 (set_attr "mode" "DI")])
8540 (define_insn "*avx_phsubswv8hi3"
8541 [(set (match_operand:V8HI 0 "register_operand" "=x")
8547 (match_operand:V8HI 1 "register_operand" "x")
8548 (parallel [(const_int 0)]))
8549 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8551 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8552 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8555 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8558 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8564 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8565 (parallel [(const_int 0)]))
8566 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8568 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8569 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8572 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8573 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8575 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8576 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8578 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8579 [(set_attr "type" "sseiadd")
8580 (set_attr "prefix_extra" "1")
8581 (set_attr "prefix" "vex")
8582 (set_attr "mode" "TI")])
8584 (define_insn "ssse3_phsubswv8hi3"
8585 [(set (match_operand:V8HI 0 "register_operand" "=x")
8591 (match_operand:V8HI 1 "register_operand" "0")
8592 (parallel [(const_int 0)]))
8593 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8595 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8596 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8599 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8600 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8602 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8603 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8608 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8609 (parallel [(const_int 0)]))
8610 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8612 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8613 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8616 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8619 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8622 "phsubsw\t{%2, %0|%0, %2}"
8623 [(set_attr "type" "sseiadd")
8624 (set_attr "atom_unit" "complex")
8625 (set_attr "prefix_data16" "1")
8626 (set_attr "prefix_extra" "1")
8627 (set_attr "mode" "TI")])
8629 (define_insn "ssse3_phsubswv4hi3"
8630 [(set (match_operand:V4HI 0 "register_operand" "=y")
8635 (match_operand:V4HI 1 "register_operand" "0")
8636 (parallel [(const_int 0)]))
8637 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8639 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8640 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8644 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8645 (parallel [(const_int 0)]))
8646 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8648 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8649 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8651 "phsubsw\t{%2, %0|%0, %2}"
8652 [(set_attr "type" "sseiadd")
8653 (set_attr "atom_unit" "complex")
8654 (set_attr "prefix_extra" "1")
8655 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8656 (set_attr "mode" "DI")])
8658 (define_insn "*avx_pmaddubsw128"
8659 [(set (match_operand:V8HI 0 "register_operand" "=x")
8664 (match_operand:V16QI 1 "register_operand" "x")
8665 (parallel [(const_int 0)
8675 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8676 (parallel [(const_int 0)
8686 (vec_select:V16QI (match_dup 1)
8687 (parallel [(const_int 1)
8696 (vec_select:V16QI (match_dup 2)
8697 (parallel [(const_int 1)
8704 (const_int 15)]))))))]
8706 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8707 [(set_attr "type" "sseiadd")
8708 (set_attr "prefix_extra" "1")
8709 (set_attr "prefix" "vex")
8710 (set_attr "mode" "TI")])
8712 (define_insn "ssse3_pmaddubsw128"
8713 [(set (match_operand:V8HI 0 "register_operand" "=x")
8718 (match_operand:V16QI 1 "register_operand" "0")
8719 (parallel [(const_int 0)
8729 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8730 (parallel [(const_int 0)
8740 (vec_select:V16QI (match_dup 1)
8741 (parallel [(const_int 1)
8750 (vec_select:V16QI (match_dup 2)
8751 (parallel [(const_int 1)
8758 (const_int 15)]))))))]
8760 "pmaddubsw\t{%2, %0|%0, %2}"
8761 [(set_attr "type" "sseiadd")
8762 (set_attr "atom_unit" "simul")
8763 (set_attr "prefix_data16" "1")
8764 (set_attr "prefix_extra" "1")
8765 (set_attr "mode" "TI")])
8767 (define_insn "ssse3_pmaddubsw"
8768 [(set (match_operand:V4HI 0 "register_operand" "=y")
8773 (match_operand:V8QI 1 "register_operand" "0")
8774 (parallel [(const_int 0)
8780 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8781 (parallel [(const_int 0)
8787 (vec_select:V8QI (match_dup 1)
8788 (parallel [(const_int 1)
8793 (vec_select:V8QI (match_dup 2)
8794 (parallel [(const_int 1)
8797 (const_int 7)]))))))]
8799 "pmaddubsw\t{%2, %0|%0, %2}"
8800 [(set_attr "type" "sseiadd")
8801 (set_attr "atom_unit" "simul")
8802 (set_attr "prefix_extra" "1")
8803 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8804 (set_attr "mode" "DI")])
8806 (define_expand "ssse3_pmulhrswv8hi3"
8807 [(set (match_operand:V8HI 0 "register_operand" "")
8814 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8816 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8818 (const_vector:V8HI [(const_int 1) (const_int 1)
8819 (const_int 1) (const_int 1)
8820 (const_int 1) (const_int 1)
8821 (const_int 1) (const_int 1)]))
8824 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8826 (define_insn "*avx_pmulhrswv8hi3"
8827 [(set (match_operand:V8HI 0 "register_operand" "=x")
8834 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8836 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8838 (const_vector:V8HI [(const_int 1) (const_int 1)
8839 (const_int 1) (const_int 1)
8840 (const_int 1) (const_int 1)
8841 (const_int 1) (const_int 1)]))
8843 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8844 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8845 [(set_attr "type" "sseimul")
8846 (set_attr "prefix_extra" "1")
8847 (set_attr "prefix" "vex")
8848 (set_attr "mode" "TI")])
8850 (define_insn "*ssse3_pmulhrswv8hi3"
8851 [(set (match_operand:V8HI 0 "register_operand" "=x")
8858 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8860 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8862 (const_vector:V8HI [(const_int 1) (const_int 1)
8863 (const_int 1) (const_int 1)
8864 (const_int 1) (const_int 1)
8865 (const_int 1) (const_int 1)]))
8867 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8868 "pmulhrsw\t{%2, %0|%0, %2}"
8869 [(set_attr "type" "sseimul")
8870 (set_attr "prefix_data16" "1")
8871 (set_attr "prefix_extra" "1")
8872 (set_attr "mode" "TI")])
8874 (define_expand "ssse3_pmulhrswv4hi3"
8875 [(set (match_operand:V4HI 0 "register_operand" "")
8882 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8884 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8886 (const_vector:V4HI [(const_int 1) (const_int 1)
8887 (const_int 1) (const_int 1)]))
8890 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8892 (define_insn "*ssse3_pmulhrswv4hi3"
8893 [(set (match_operand:V4HI 0 "register_operand" "=y")
8900 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8902 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8904 (const_vector:V4HI [(const_int 1) (const_int 1)
8905 (const_int 1) (const_int 1)]))
8907 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8908 "pmulhrsw\t{%2, %0|%0, %2}"
8909 [(set_attr "type" "sseimul")
8910 (set_attr "prefix_extra" "1")
8911 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8912 (set_attr "mode" "DI")])
8914 (define_insn "*avx_pshufbv16qi3"
8915 [(set (match_operand:V16QI 0 "register_operand" "=x")
8916 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8917 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8920 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8921 [(set_attr "type" "sselog1")
8922 (set_attr "prefix_extra" "1")
8923 (set_attr "prefix" "vex")
8924 (set_attr "mode" "TI")])
8926 (define_insn "ssse3_pshufbv16qi3"
8927 [(set (match_operand:V16QI 0 "register_operand" "=x")
8928 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8929 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8932 "pshufb\t{%2, %0|%0, %2}";
8933 [(set_attr "type" "sselog1")
8934 (set_attr "prefix_data16" "1")
8935 (set_attr "prefix_extra" "1")
8936 (set_attr "mode" "TI")])
8938 (define_insn "ssse3_pshufbv8qi3"
8939 [(set (match_operand:V8QI 0 "register_operand" "=y")
8940 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8941 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8944 "pshufb\t{%2, %0|%0, %2}";
8945 [(set_attr "type" "sselog1")
8946 (set_attr "prefix_extra" "1")
8947 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8948 (set_attr "mode" "DI")])
8950 (define_insn "*avx_psign<mode>3"
8951 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8953 [(match_operand:SSEMODE124 1 "register_operand" "x")
8954 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8957 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8958 [(set_attr "type" "sselog1")
8959 (set_attr "prefix_extra" "1")
8960 (set_attr "prefix" "vex")
8961 (set_attr "mode" "TI")])
8963 (define_insn "ssse3_psign<mode>3"
8964 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8966 [(match_operand:SSEMODE124 1 "register_operand" "0")
8967 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8970 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8971 [(set_attr "type" "sselog1")
8972 (set_attr "prefix_data16" "1")
8973 (set_attr "prefix_extra" "1")
8974 (set_attr "mode" "TI")])
8976 (define_insn "ssse3_psign<mode>3"
8977 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8979 [(match_operand:MMXMODEI 1 "register_operand" "0")
8980 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8983 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8984 [(set_attr "type" "sselog1")
8985 (set_attr "prefix_extra" "1")
8986 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8987 (set_attr "mode" "DI")])
8989 (define_insn "*avx_palignrti"
8990 [(set (match_operand:TI 0 "register_operand" "=x")
8991 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8992 (match_operand:TI 2 "nonimmediate_operand" "xm")
8993 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8997 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8998 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9000 [(set_attr "type" "sseishft")
9001 (set_attr "prefix_extra" "1")
9002 (set_attr "length_immediate" "1")
9003 (set_attr "prefix" "vex")
9004 (set_attr "mode" "TI")])
9006 (define_insn "ssse3_palignrti"
9007 [(set (match_operand:TI 0 "register_operand" "=x")
9008 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9009 (match_operand:TI 2 "nonimmediate_operand" "xm")
9010 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9014 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9015 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9017 [(set_attr "type" "sseishft")
9018 (set_attr "atom_unit" "sishuf")
9019 (set_attr "prefix_data16" "1")
9020 (set_attr "prefix_extra" "1")
9021 (set_attr "length_immediate" "1")
9022 (set_attr "mode" "TI")])
9024 (define_insn "ssse3_palignrdi"
9025 [(set (match_operand:DI 0 "register_operand" "=y")
9026 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9027 (match_operand:DI 2 "nonimmediate_operand" "ym")
9028 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9032 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9033 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9035 [(set_attr "type" "sseishft")
9036 (set_attr "atom_unit" "sishuf")
9037 (set_attr "prefix_extra" "1")
9038 (set_attr "length_immediate" "1")
9039 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9040 (set_attr "mode" "DI")])
9042 (define_insn "abs<mode>2"
9043 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9044 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9046 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9047 [(set_attr "type" "sselog1")
9048 (set_attr "prefix_data16" "1")
9049 (set_attr "prefix_extra" "1")
9050 (set_attr "prefix" "maybe_vex")
9051 (set_attr "mode" "TI")])
9053 (define_insn "abs<mode>2"
9054 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9055 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9057 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9058 [(set_attr "type" "sselog1")
9059 (set_attr "prefix_rep" "0")
9060 (set_attr "prefix_extra" "1")
9061 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9062 (set_attr "mode" "DI")])
9064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9066 ;; AMD SSE4A instructions
9068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9070 (define_insn "sse4a_movnt<mode>"
9071 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9073 [(match_operand:MODEF 1 "register_operand" "x")]
9076 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9077 [(set_attr "type" "ssemov")
9078 (set_attr "mode" "<MODE>")])
9080 (define_insn "sse4a_vmmovnt<mode>"
9081 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9082 (unspec:<ssescalarmode>
9083 [(vec_select:<ssescalarmode>
9084 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9085 (parallel [(const_int 0)]))]
9088 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9089 [(set_attr "type" "ssemov")
9090 (set_attr "mode" "<ssescalarmode>")])
9092 (define_insn "sse4a_extrqi"
9093 [(set (match_operand:V2DI 0 "register_operand" "=x")
9094 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9095 (match_operand 2 "const_int_operand" "")
9096 (match_operand 3 "const_int_operand" "")]
9099 "extrq\t{%3, %2, %0|%0, %2, %3}"
9100 [(set_attr "type" "sse")
9101 (set_attr "prefix_data16" "1")
9102 (set_attr "length_immediate" "2")
9103 (set_attr "mode" "TI")])
9105 (define_insn "sse4a_extrq"
9106 [(set (match_operand:V2DI 0 "register_operand" "=x")
9107 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9108 (match_operand:V16QI 2 "register_operand" "x")]
9111 "extrq\t{%2, %0|%0, %2}"
9112 [(set_attr "type" "sse")
9113 (set_attr "prefix_data16" "1")
9114 (set_attr "mode" "TI")])
9116 (define_insn "sse4a_insertqi"
9117 [(set (match_operand:V2DI 0 "register_operand" "=x")
9118 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9119 (match_operand:V2DI 2 "register_operand" "x")
9120 (match_operand 3 "const_int_operand" "")
9121 (match_operand 4 "const_int_operand" "")]
9124 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9125 [(set_attr "type" "sseins")
9126 (set_attr "prefix_data16" "0")
9127 (set_attr "prefix_rep" "1")
9128 (set_attr "length_immediate" "2")
9129 (set_attr "mode" "TI")])
9131 (define_insn "sse4a_insertq"
9132 [(set (match_operand:V2DI 0 "register_operand" "=x")
9133 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9134 (match_operand:V2DI 2 "register_operand" "x")]
9137 "insertq\t{%2, %0|%0, %2}"
9138 [(set_attr "type" "sseins")
9139 (set_attr "prefix_data16" "0")
9140 (set_attr "prefix_rep" "1")
9141 (set_attr "mode" "TI")])
9143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9145 ;; Intel SSE4.1 instructions
9147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9149 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9150 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9151 (vec_merge:AVXMODEF2P
9152 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9153 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9154 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9156 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9157 [(set_attr "type" "ssemov")
9158 (set_attr "prefix_extra" "1")
9159 (set_attr "length_immediate" "1")
9160 (set_attr "prefix" "vex")
9161 (set_attr "mode" "<avxvecmode>")])
9163 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9164 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9166 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9167 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9168 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9171 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9172 [(set_attr "type" "ssemov")
9173 (set_attr "prefix_extra" "1")
9174 (set_attr "length_immediate" "1")
9175 (set_attr "prefix" "vex")
9176 (set_attr "mode" "<avxvecmode>")])
9178 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9179 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9180 (vec_merge:SSEMODEF2P
9181 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9182 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9183 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9185 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9186 [(set_attr "type" "ssemov")
9187 (set_attr "prefix_data16" "1")
9188 (set_attr "prefix_extra" "1")
9189 (set_attr "length_immediate" "1")
9190 (set_attr "mode" "<MODE>")])
9192 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9193 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9195 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9196 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9197 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9200 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9201 [(set_attr "type" "ssemov")
9202 (set_attr "prefix_data16" "1")
9203 (set_attr "prefix_extra" "1")
9204 (set_attr "mode" "<MODE>")])
9206 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9207 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9209 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9210 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9211 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9214 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9215 [(set_attr "type" "ssemul")
9216 (set_attr "prefix" "vex")
9217 (set_attr "prefix_extra" "1")
9218 (set_attr "length_immediate" "1")
9219 (set_attr "mode" "<avxvecmode>")])
9221 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9222 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9224 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9225 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9226 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9229 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9230 [(set_attr "type" "ssemul")
9231 (set_attr "prefix_data16" "1")
9232 (set_attr "prefix_extra" "1")
9233 (set_attr "length_immediate" "1")
9234 (set_attr "mode" "<MODE>")])
9236 (define_insn "sse4_1_movntdqa"
9237 [(set (match_operand:V2DI 0 "register_operand" "=x")
9238 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9241 "%vmovntdqa\t{%1, %0|%0, %1}"
9242 [(set_attr "type" "ssemov")
9243 (set_attr "prefix_extra" "1")
9244 (set_attr "prefix" "maybe_vex")
9245 (set_attr "mode" "TI")])
9247 (define_insn "*avx_mpsadbw"
9248 [(set (match_operand:V16QI 0 "register_operand" "=x")
9249 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9250 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9251 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9254 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9255 [(set_attr "type" "sselog1")
9256 (set_attr "prefix" "vex")
9257 (set_attr "prefix_extra" "1")
9258 (set_attr "length_immediate" "1")
9259 (set_attr "mode" "TI")])
9261 (define_insn "sse4_1_mpsadbw"
9262 [(set (match_operand:V16QI 0 "register_operand" "=x")
9263 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9264 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9265 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9268 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9269 [(set_attr "type" "sselog1")
9270 (set_attr "prefix_extra" "1")
9271 (set_attr "length_immediate" "1")
9272 (set_attr "mode" "TI")])
9274 (define_insn "*avx_packusdw"
9275 [(set (match_operand:V8HI 0 "register_operand" "=x")
9278 (match_operand:V4SI 1 "register_operand" "x"))
9280 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9282 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9283 [(set_attr "type" "sselog")
9284 (set_attr "prefix_extra" "1")
9285 (set_attr "prefix" "vex")
9286 (set_attr "mode" "TI")])
9288 (define_insn "sse4_1_packusdw"
9289 [(set (match_operand:V8HI 0 "register_operand" "=x")
9292 (match_operand:V4SI 1 "register_operand" "0"))
9294 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9296 "packusdw\t{%2, %0|%0, %2}"
9297 [(set_attr "type" "sselog")
9298 (set_attr "prefix_extra" "1")
9299 (set_attr "mode" "TI")])
9301 (define_insn "*avx_pblendvb"
9302 [(set (match_operand:V16QI 0 "register_operand" "=x")
9303 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9304 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9305 (match_operand:V16QI 3 "register_operand" "x")]
9308 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9309 [(set_attr "type" "ssemov")
9310 (set_attr "prefix_extra" "1")
9311 (set_attr "length_immediate" "1")
9312 (set_attr "prefix" "vex")
9313 (set_attr "mode" "TI")])
9315 (define_insn "sse4_1_pblendvb"
9316 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9317 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9318 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9319 (match_operand:V16QI 3 "register_operand" "Yz")]
9322 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9323 [(set_attr "type" "ssemov")
9324 (set_attr "prefix_extra" "1")
9325 (set_attr "mode" "TI")])
9327 (define_insn "*avx_pblendw"
9328 [(set (match_operand:V8HI 0 "register_operand" "=x")
9330 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9331 (match_operand:V8HI 1 "register_operand" "x")
9332 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9334 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9335 [(set_attr "type" "ssemov")
9336 (set_attr "prefix" "vex")
9337 (set_attr "prefix_extra" "1")
9338 (set_attr "length_immediate" "1")
9339 (set_attr "mode" "TI")])
9341 (define_insn "sse4_1_pblendw"
9342 [(set (match_operand:V8HI 0 "register_operand" "=x")
9344 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9345 (match_operand:V8HI 1 "register_operand" "0")
9346 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9348 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9349 [(set_attr "type" "ssemov")
9350 (set_attr "prefix_extra" "1")
9351 (set_attr "length_immediate" "1")
9352 (set_attr "mode" "TI")])
9354 (define_insn "sse4_1_phminposuw"
9355 [(set (match_operand:V8HI 0 "register_operand" "=x")
9356 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9357 UNSPEC_PHMINPOSUW))]
9359 "%vphminposuw\t{%1, %0|%0, %1}"
9360 [(set_attr "type" "sselog1")
9361 (set_attr "prefix_extra" "1")
9362 (set_attr "prefix" "maybe_vex")
9363 (set_attr "mode" "TI")])
9365 (define_insn "sse4_1_extendv8qiv8hi2"
9366 [(set (match_operand:V8HI 0 "register_operand" "=x")
9369 (match_operand:V16QI 1 "register_operand" "x")
9370 (parallel [(const_int 0)
9379 "%vpmovsxbw\t{%1, %0|%0, %1}"
9380 [(set_attr "type" "ssemov")
9381 (set_attr "prefix_extra" "1")
9382 (set_attr "prefix" "maybe_vex")
9383 (set_attr "mode" "TI")])
9385 (define_insn "*sse4_1_extendv8qiv8hi2"
9386 [(set (match_operand:V8HI 0 "register_operand" "=x")
9389 (vec_duplicate:V16QI
9390 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9391 (parallel [(const_int 0)
9400 "%vpmovsxbw\t{%1, %0|%0, %1}"
9401 [(set_attr "type" "ssemov")
9402 (set_attr "prefix_extra" "1")
9403 (set_attr "prefix" "maybe_vex")
9404 (set_attr "mode" "TI")])
9406 (define_insn "sse4_1_extendv4qiv4si2"
9407 [(set (match_operand:V4SI 0 "register_operand" "=x")
9410 (match_operand:V16QI 1 "register_operand" "x")
9411 (parallel [(const_int 0)
9416 "%vpmovsxbd\t{%1, %0|%0, %1}"
9417 [(set_attr "type" "ssemov")
9418 (set_attr "prefix_extra" "1")
9419 (set_attr "prefix" "maybe_vex")
9420 (set_attr "mode" "TI")])
9422 (define_insn "*sse4_1_extendv4qiv4si2"
9423 [(set (match_operand:V4SI 0 "register_operand" "=x")
9426 (vec_duplicate:V16QI
9427 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9428 (parallel [(const_int 0)
9433 "%vpmovsxbd\t{%1, %0|%0, %1}"
9434 [(set_attr "type" "ssemov")
9435 (set_attr "prefix_extra" "1")
9436 (set_attr "prefix" "maybe_vex")
9437 (set_attr "mode" "TI")])
9439 (define_insn "sse4_1_extendv2qiv2di2"
9440 [(set (match_operand:V2DI 0 "register_operand" "=x")
9443 (match_operand:V16QI 1 "register_operand" "x")
9444 (parallel [(const_int 0)
9447 "%vpmovsxbq\t{%1, %0|%0, %1}"
9448 [(set_attr "type" "ssemov")
9449 (set_attr "prefix_extra" "1")
9450 (set_attr "prefix" "maybe_vex")
9451 (set_attr "mode" "TI")])
9453 (define_insn "*sse4_1_extendv2qiv2di2"
9454 [(set (match_operand:V2DI 0 "register_operand" "=x")
9457 (vec_duplicate:V16QI
9458 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9459 (parallel [(const_int 0)
9462 "%vpmovsxbq\t{%1, %0|%0, %1}"
9463 [(set_attr "type" "ssemov")
9464 (set_attr "prefix_extra" "1")
9465 (set_attr "prefix" "maybe_vex")
9466 (set_attr "mode" "TI")])
9468 (define_insn "sse4_1_extendv4hiv4si2"
9469 [(set (match_operand:V4SI 0 "register_operand" "=x")
9472 (match_operand:V8HI 1 "register_operand" "x")
9473 (parallel [(const_int 0)
9478 "%vpmovsxwd\t{%1, %0|%0, %1}"
9479 [(set_attr "type" "ssemov")
9480 (set_attr "prefix_extra" "1")
9481 (set_attr "prefix" "maybe_vex")
9482 (set_attr "mode" "TI")])
9484 (define_insn "*sse4_1_extendv4hiv4si2"
9485 [(set (match_operand:V4SI 0 "register_operand" "=x")
9489 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9490 (parallel [(const_int 0)
9495 "%vpmovsxwd\t{%1, %0|%0, %1}"
9496 [(set_attr "type" "ssemov")
9497 (set_attr "prefix_extra" "1")
9498 (set_attr "prefix" "maybe_vex")
9499 (set_attr "mode" "TI")])
9501 (define_insn "sse4_1_extendv2hiv2di2"
9502 [(set (match_operand:V2DI 0 "register_operand" "=x")
9505 (match_operand:V8HI 1 "register_operand" "x")
9506 (parallel [(const_int 0)
9509 "%vpmovsxwq\t{%1, %0|%0, %1}"
9510 [(set_attr "type" "ssemov")
9511 (set_attr "prefix_extra" "1")
9512 (set_attr "prefix" "maybe_vex")
9513 (set_attr "mode" "TI")])
9515 (define_insn "*sse4_1_extendv2hiv2di2"
9516 [(set (match_operand:V2DI 0 "register_operand" "=x")
9520 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9521 (parallel [(const_int 0)
9524 "%vpmovsxwq\t{%1, %0|%0, %1}"
9525 [(set_attr "type" "ssemov")
9526 (set_attr "prefix_extra" "1")
9527 (set_attr "prefix" "maybe_vex")
9528 (set_attr "mode" "TI")])
9530 (define_insn "sse4_1_extendv2siv2di2"
9531 [(set (match_operand:V2DI 0 "register_operand" "=x")
9534 (match_operand:V4SI 1 "register_operand" "x")
9535 (parallel [(const_int 0)
9538 "%vpmovsxdq\t{%1, %0|%0, %1}"
9539 [(set_attr "type" "ssemov")
9540 (set_attr "prefix_extra" "1")
9541 (set_attr "prefix" "maybe_vex")
9542 (set_attr "mode" "TI")])
9544 (define_insn "*sse4_1_extendv2siv2di2"
9545 [(set (match_operand:V2DI 0 "register_operand" "=x")
9549 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9550 (parallel [(const_int 0)
9553 "%vpmovsxdq\t{%1, %0|%0, %1}"
9554 [(set_attr "type" "ssemov")
9555 (set_attr "prefix_extra" "1")
9556 (set_attr "prefix" "maybe_vex")
9557 (set_attr "mode" "TI")])
9559 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9560 [(set (match_operand:V8HI 0 "register_operand" "=x")
9563 (match_operand:V16QI 1 "register_operand" "x")
9564 (parallel [(const_int 0)
9573 "%vpmovzxbw\t{%1, %0|%0, %1}"
9574 [(set_attr "type" "ssemov")
9575 (set_attr "prefix_extra" "1")
9576 (set_attr "prefix" "maybe_vex")
9577 (set_attr "mode" "TI")])
9579 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9580 [(set (match_operand:V8HI 0 "register_operand" "=x")
9583 (vec_duplicate:V16QI
9584 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9585 (parallel [(const_int 0)
9594 "%vpmovzxbw\t{%1, %0|%0, %1}"
9595 [(set_attr "type" "ssemov")
9596 (set_attr "prefix_extra" "1")
9597 (set_attr "prefix" "maybe_vex")
9598 (set_attr "mode" "TI")])
9600 (define_insn "sse4_1_zero_extendv4qiv4si2"
9601 [(set (match_operand:V4SI 0 "register_operand" "=x")
9604 (match_operand:V16QI 1 "register_operand" "x")
9605 (parallel [(const_int 0)
9610 "%vpmovzxbd\t{%1, %0|%0, %1}"
9611 [(set_attr "type" "ssemov")
9612 (set_attr "prefix_extra" "1")
9613 (set_attr "prefix" "maybe_vex")
9614 (set_attr "mode" "TI")])
9616 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9617 [(set (match_operand:V4SI 0 "register_operand" "=x")
9620 (vec_duplicate:V16QI
9621 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9622 (parallel [(const_int 0)
9627 "%vpmovzxbd\t{%1, %0|%0, %1}"
9628 [(set_attr "type" "ssemov")
9629 (set_attr "prefix_extra" "1")
9630 (set_attr "prefix" "maybe_vex")
9631 (set_attr "mode" "TI")])
9633 (define_insn "sse4_1_zero_extendv2qiv2di2"
9634 [(set (match_operand:V2DI 0 "register_operand" "=x")
9637 (match_operand:V16QI 1 "register_operand" "x")
9638 (parallel [(const_int 0)
9641 "%vpmovzxbq\t{%1, %0|%0, %1}"
9642 [(set_attr "type" "ssemov")
9643 (set_attr "prefix_extra" "1")
9644 (set_attr "prefix" "maybe_vex")
9645 (set_attr "mode" "TI")])
9647 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9648 [(set (match_operand:V2DI 0 "register_operand" "=x")
9651 (vec_duplicate:V16QI
9652 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9653 (parallel [(const_int 0)
9656 "%vpmovzxbq\t{%1, %0|%0, %1}"
9657 [(set_attr "type" "ssemov")
9658 (set_attr "prefix_extra" "1")
9659 (set_attr "prefix" "maybe_vex")
9660 (set_attr "mode" "TI")])
9662 (define_insn "sse4_1_zero_extendv4hiv4si2"
9663 [(set (match_operand:V4SI 0 "register_operand" "=x")
9666 (match_operand:V8HI 1 "register_operand" "x")
9667 (parallel [(const_int 0)
9672 "%vpmovzxwd\t{%1, %0|%0, %1}"
9673 [(set_attr "type" "ssemov")
9674 (set_attr "prefix_extra" "1")
9675 (set_attr "prefix" "maybe_vex")
9676 (set_attr "mode" "TI")])
9678 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9679 [(set (match_operand:V4SI 0 "register_operand" "=x")
9683 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9684 (parallel [(const_int 0)
9689 "%vpmovzxwd\t{%1, %0|%0, %1}"
9690 [(set_attr "type" "ssemov")
9691 (set_attr "prefix_extra" "1")
9692 (set_attr "prefix" "maybe_vex")
9693 (set_attr "mode" "TI")])
9695 (define_insn "sse4_1_zero_extendv2hiv2di2"
9696 [(set (match_operand:V2DI 0 "register_operand" "=x")
9699 (match_operand:V8HI 1 "register_operand" "x")
9700 (parallel [(const_int 0)
9703 "%vpmovzxwq\t{%1, %0|%0, %1}"
9704 [(set_attr "type" "ssemov")
9705 (set_attr "prefix_extra" "1")
9706 (set_attr "prefix" "maybe_vex")
9707 (set_attr "mode" "TI")])
9709 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9710 [(set (match_operand:V2DI 0 "register_operand" "=x")
9714 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9715 (parallel [(const_int 0)
9718 "%vpmovzxwq\t{%1, %0|%0, %1}"
9719 [(set_attr "type" "ssemov")
9720 (set_attr "prefix_extra" "1")
9721 (set_attr "prefix" "maybe_vex")
9722 (set_attr "mode" "TI")])
9724 (define_insn "sse4_1_zero_extendv2siv2di2"
9725 [(set (match_operand:V2DI 0 "register_operand" "=x")
9728 (match_operand:V4SI 1 "register_operand" "x")
9729 (parallel [(const_int 0)
9732 "%vpmovzxdq\t{%1, %0|%0, %1}"
9733 [(set_attr "type" "ssemov")
9734 (set_attr "prefix_extra" "1")
9735 (set_attr "prefix" "maybe_vex")
9736 (set_attr "mode" "TI")])
9738 (define_insn "*sse4_1_zero_extendv2siv2di2"
9739 [(set (match_operand:V2DI 0 "register_operand" "=x")
9743 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9744 (parallel [(const_int 0)
9747 "%vpmovzxdq\t{%1, %0|%0, %1}"
9748 [(set_attr "type" "ssemov")
9749 (set_attr "prefix_extra" "1")
9750 (set_attr "prefix" "maybe_vex")
9751 (set_attr "mode" "TI")])
9753 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9754 ;; setting FLAGS_REG. But it is not a really compare instruction.
9755 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9756 [(set (reg:CC FLAGS_REG)
9757 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9758 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9761 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9762 [(set_attr "type" "ssecomi")
9763 (set_attr "prefix_extra" "1")
9764 (set_attr "prefix" "vex")
9765 (set_attr "mode" "<MODE>")])
9767 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9768 ;; But it is not a really compare instruction.
9769 (define_insn "avx_ptest256"
9770 [(set (reg:CC FLAGS_REG)
9771 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9772 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9775 "vptest\t{%1, %0|%0, %1}"
9776 [(set_attr "type" "ssecomi")
9777 (set_attr "prefix_extra" "1")
9778 (set_attr "prefix" "vex")
9779 (set_attr "mode" "OI")])
9781 (define_insn "sse4_1_ptest"
9782 [(set (reg:CC FLAGS_REG)
9783 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9784 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9787 "%vptest\t{%1, %0|%0, %1}"
9788 [(set_attr "type" "ssecomi")
9789 (set_attr "prefix_extra" "1")
9790 (set_attr "prefix" "maybe_vex")
9791 (set_attr "mode" "TI")])
9793 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9794 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9795 (unspec:AVX256MODEF2P
9796 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9797 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9800 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9801 [(set_attr "type" "ssecvt")
9802 (set_attr "prefix_extra" "1")
9803 (set_attr "length_immediate" "1")
9804 (set_attr "prefix" "vex")
9805 (set_attr "mode" "<MODE>")])
9807 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9808 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9810 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9811 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9814 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9815 [(set_attr "type" "ssecvt")
9816 (set_attr "prefix_data16" "1")
9817 (set_attr "prefix_extra" "1")
9818 (set_attr "length_immediate" "1")
9819 (set_attr "prefix" "maybe_vex")
9820 (set_attr "mode" "<MODE>")])
9822 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9823 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9824 (vec_merge:SSEMODEF2P
9826 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9827 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9829 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9832 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9833 [(set_attr "type" "ssecvt")
9834 (set_attr "prefix_extra" "1")
9835 (set_attr "length_immediate" "1")
9836 (set_attr "prefix" "vex")
9837 (set_attr "mode" "<MODE>")])
9839 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9840 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9841 (vec_merge:SSEMODEF2P
9843 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9844 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9846 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9849 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9850 [(set_attr "type" "ssecvt")
9851 (set_attr "prefix_data16" "1")
9852 (set_attr "prefix_extra" "1")
9853 (set_attr "length_immediate" "1")
9854 (set_attr "mode" "<MODE>")])
9856 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9858 ;; Intel SSE4.2 string/text processing instructions
9860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9862 (define_insn_and_split "sse4_2_pcmpestr"
9863 [(set (match_operand:SI 0 "register_operand" "=c,c")
9865 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9866 (match_operand:SI 3 "register_operand" "a,a")
9867 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9868 (match_operand:SI 5 "register_operand" "d,d")
9869 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9871 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9879 (set (reg:CC FLAGS_REG)
9888 && can_create_pseudo_p ()"
9893 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9894 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9895 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9898 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9899 operands[3], operands[4],
9900 operands[5], operands[6]));
9902 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9903 operands[3], operands[4],
9904 operands[5], operands[6]));
9905 if (flags && !(ecx || xmm0))
9906 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9907 operands[2], operands[3],
9908 operands[4], operands[5],
9912 [(set_attr "type" "sselog")
9913 (set_attr "prefix_data16" "1")
9914 (set_attr "prefix_extra" "1")
9915 (set_attr "length_immediate" "1")
9916 (set_attr "memory" "none,load")
9917 (set_attr "mode" "TI")])
9919 (define_insn "sse4_2_pcmpestri"
9920 [(set (match_operand:SI 0 "register_operand" "=c,c")
9922 [(match_operand:V16QI 1 "register_operand" "x,x")
9923 (match_operand:SI 2 "register_operand" "a,a")
9924 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9925 (match_operand:SI 4 "register_operand" "d,d")
9926 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9928 (set (reg:CC FLAGS_REG)
9937 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9938 [(set_attr "type" "sselog")
9939 (set_attr "prefix_data16" "1")
9940 (set_attr "prefix_extra" "1")
9941 (set_attr "prefix" "maybe_vex")
9942 (set_attr "length_immediate" "1")
9943 (set_attr "memory" "none,load")
9944 (set_attr "mode" "TI")])
9946 (define_insn "sse4_2_pcmpestrm"
9947 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9949 [(match_operand:V16QI 1 "register_operand" "x,x")
9950 (match_operand:SI 2 "register_operand" "a,a")
9951 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9952 (match_operand:SI 4 "register_operand" "d,d")
9953 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9955 (set (reg:CC FLAGS_REG)
9964 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9965 [(set_attr "type" "sselog")
9966 (set_attr "prefix_data16" "1")
9967 (set_attr "prefix_extra" "1")
9968 (set_attr "length_immediate" "1")
9969 (set_attr "prefix" "maybe_vex")
9970 (set_attr "memory" "none,load")
9971 (set_attr "mode" "TI")])
9973 (define_insn "sse4_2_pcmpestr_cconly"
9974 [(set (reg:CC FLAGS_REG)
9976 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9977 (match_operand:SI 3 "register_operand" "a,a,a,a")
9978 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9979 (match_operand:SI 5 "register_operand" "d,d,d,d")
9980 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9982 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9983 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9986 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9987 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9988 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9989 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9990 [(set_attr "type" "sselog")
9991 (set_attr "prefix_data16" "1")
9992 (set_attr "prefix_extra" "1")
9993 (set_attr "length_immediate" "1")
9994 (set_attr "memory" "none,load,none,load")
9995 (set_attr "prefix" "maybe_vex")
9996 (set_attr "mode" "TI")])
9998 (define_insn_and_split "sse4_2_pcmpistr"
9999 [(set (match_operand:SI 0 "register_operand" "=c,c")
10001 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10002 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10003 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10005 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10011 (set (reg:CC FLAGS_REG)
10018 && can_create_pseudo_p ()"
10023 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10024 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10025 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10028 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10029 operands[3], operands[4]));
10031 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10032 operands[3], operands[4]));
10033 if (flags && !(ecx || xmm0))
10034 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10035 operands[2], operands[3],
10039 [(set_attr "type" "sselog")
10040 (set_attr "prefix_data16" "1")
10041 (set_attr "prefix_extra" "1")
10042 (set_attr "length_immediate" "1")
10043 (set_attr "memory" "none,load")
10044 (set_attr "mode" "TI")])
10046 (define_insn "sse4_2_pcmpistri"
10047 [(set (match_operand:SI 0 "register_operand" "=c,c")
10049 [(match_operand:V16QI 1 "register_operand" "x,x")
10050 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10051 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10053 (set (reg:CC FLAGS_REG)
10060 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10061 [(set_attr "type" "sselog")
10062 (set_attr "prefix_data16" "1")
10063 (set_attr "prefix_extra" "1")
10064 (set_attr "length_immediate" "1")
10065 (set_attr "prefix" "maybe_vex")
10066 (set_attr "memory" "none,load")
10067 (set_attr "mode" "TI")])
10069 (define_insn "sse4_2_pcmpistrm"
10070 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10072 [(match_operand:V16QI 1 "register_operand" "x,x")
10073 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10074 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10076 (set (reg:CC FLAGS_REG)
10083 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10084 [(set_attr "type" "sselog")
10085 (set_attr "prefix_data16" "1")
10086 (set_attr "prefix_extra" "1")
10087 (set_attr "length_immediate" "1")
10088 (set_attr "prefix" "maybe_vex")
10089 (set_attr "memory" "none,load")
10090 (set_attr "mode" "TI")])
10092 (define_insn "sse4_2_pcmpistr_cconly"
10093 [(set (reg:CC FLAGS_REG)
10095 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10096 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10097 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10099 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10100 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10103 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10104 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10105 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10106 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10107 [(set_attr "type" "sselog")
10108 (set_attr "prefix_data16" "1")
10109 (set_attr "prefix_extra" "1")
10110 (set_attr "length_immediate" "1")
10111 (set_attr "memory" "none,load,none,load")
10112 (set_attr "prefix" "maybe_vex")
10113 (set_attr "mode" "TI")])
10115 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10117 ;; SSE5 instructions
10119 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10121 ;; SSE5 parallel integer multiply/add instructions.
10122 ;; Note the instruction does not allow the value being added to be a memory
10123 ;; operation. However by pretending via the nonimmediate_operand predicate
10124 ;; that it does and splitting it later allows the following to be recognized:
10125 ;; a[i] = b[i] * c[i] + d[i];
10126 (define_insn "sse5_pmacsww"
10127 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
10130 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
10131 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
10132 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
10133 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
10135 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10136 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10137 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10138 [(set_attr "type" "ssemuladd")
10139 (set_attr "mode" "TI")])
10141 ;; Split pmacsww with two memory operands into a load and the pmacsww.
10143 [(set (match_operand:V8HI 0 "register_operand" "")
10145 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
10146 (match_operand:V8HI 2 "nonimmediate_operand" ""))
10147 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
10149 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
10150 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
10151 && !reg_mentioned_p (operands[0], operands[1])
10152 && !reg_mentioned_p (operands[0], operands[2])
10153 && !reg_mentioned_p (operands[0], operands[3])"
10156 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
10157 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
10162 (define_insn "sse5_pmacssww"
10163 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
10165 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10166 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
10167 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
10168 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10170 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10171 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10172 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10173 [(set_attr "type" "ssemuladd")
10174 (set_attr "mode" "TI")])
10176 ;; Note the instruction does not allow the value being added to be a memory
10177 ;; operation. However by pretending via the nonimmediate_operand predicate
10178 ;; that it does and splitting it later allows the following to be recognized:
10179 ;; a[i] = b[i] * c[i] + d[i];
10180 (define_insn "sse5_pmacsdd"
10181 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10184 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10185 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
10186 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10187 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
10189 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10190 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10191 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10192 [(set_attr "type" "ssemuladd")
10193 (set_attr "mode" "TI")])
10195 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
10197 [(set (match_operand:V4SI 0 "register_operand" "")
10199 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
10200 (match_operand:V4SI 2 "nonimmediate_operand" ""))
10201 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
10203 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
10204 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
10205 && !reg_mentioned_p (operands[0], operands[1])
10206 && !reg_mentioned_p (operands[0], operands[2])
10207 && !reg_mentioned_p (operands[0], operands[3])"
10210 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
10211 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
10216 (define_insn "sse5_pmacssdd"
10217 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10219 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10220 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
10221 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10222 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10224 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10225 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10226 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10227 [(set_attr "type" "ssemuladd")
10228 (set_attr "mode" "TI")])
10230 (define_insn "sse5_pmacssdql"
10231 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10236 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10237 (parallel [(const_int 1)
10240 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10241 (parallel [(const_int 1)
10243 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10244 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10246 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10247 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10248 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10249 [(set_attr "type" "ssemuladd")
10250 (set_attr "mode" "TI")])
10252 (define_insn "sse5_pmacssdqh"
10253 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10258 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10259 (parallel [(const_int 0)
10263 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10264 (parallel [(const_int 0)
10266 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10267 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10269 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10270 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10271 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10272 [(set_attr "type" "ssemuladd")
10273 (set_attr "mode" "TI")])
10275 (define_insn "sse5_pmacsdql"
10276 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10281 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10282 (parallel [(const_int 1)
10286 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10287 (parallel [(const_int 1)
10289 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10290 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10292 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10293 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10294 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10295 [(set_attr "type" "ssemuladd")
10296 (set_attr "mode" "TI")])
10298 (define_insn_and_split "*sse5_pmacsdql_mem"
10299 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10304 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10305 (parallel [(const_int 1)
10309 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10310 (parallel [(const_int 1)
10312 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10313 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10315 "&& (reload_completed
10316 || (!reg_mentioned_p (operands[0], operands[1])
10317 && !reg_mentioned_p (operands[0], operands[2])))"
10318 [(set (match_dup 0)
10326 (parallel [(const_int 1)
10331 (parallel [(const_int 1)
10335 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10336 ;; fake it with a multiply/add. In general, we expect the define_split to
10337 ;; occur before register allocation, so we have to handle the corner case where
10338 ;; the target is the same as operands 1/2
10339 (define_insn_and_split "sse5_mulv2div2di3_low"
10340 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10344 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10345 (parallel [(const_int 1)
10349 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10350 (parallel [(const_int 1)
10351 (const_int 3)])))))]
10354 "&& (reload_completed
10355 || (!reg_mentioned_p (operands[0], operands[1])
10356 && !reg_mentioned_p (operands[0], operands[2])))"
10357 [(set (match_dup 0)
10365 (parallel [(const_int 1)
10370 (parallel [(const_int 1)
10374 operands[3] = CONST0_RTX (V2DImode);
10376 [(set_attr "type" "ssemuladd")
10377 (set_attr "mode" "TI")])
10379 (define_insn "sse5_pmacsdqh"
10380 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10385 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10386 (parallel [(const_int 0)
10390 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10391 (parallel [(const_int 0)
10393 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10394 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10396 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10397 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10398 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10399 [(set_attr "type" "ssemuladd")
10400 (set_attr "mode" "TI")])
10402 (define_insn_and_split "*sse5_pmacsdqh_mem"
10403 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10408 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10409 (parallel [(const_int 0)
10413 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10414 (parallel [(const_int 0)
10416 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10417 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10419 "&& (reload_completed
10420 || (!reg_mentioned_p (operands[0], operands[1])
10421 && !reg_mentioned_p (operands[0], operands[2])))"
10422 [(set (match_dup 0)
10430 (parallel [(const_int 0)
10435 (parallel [(const_int 0)
10439 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10440 ;; fake it with a multiply/add. In general, we expect the define_split to
10441 ;; occur before register allocation, so we have to handle the corner case where
10442 ;; the target is the same as either operands[1] or operands[2]
10443 (define_insn_and_split "sse5_mulv2div2di3_high"
10444 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10448 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10449 (parallel [(const_int 0)
10453 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10454 (parallel [(const_int 0)
10455 (const_int 2)])))))]
10458 "&& (reload_completed
10459 || (!reg_mentioned_p (operands[0], operands[1])
10460 && !reg_mentioned_p (operands[0], operands[2])))"
10461 [(set (match_dup 0)
10469 (parallel [(const_int 0)
10474 (parallel [(const_int 0)
10478 operands[3] = CONST0_RTX (V2DImode);
10480 [(set_attr "type" "ssemuladd")
10481 (set_attr "mode" "TI")])
10483 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10484 (define_insn "sse5_pmacsswd"
10485 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10490 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10491 (parallel [(const_int 1)
10497 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10498 (parallel [(const_int 1)
10502 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10503 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10505 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10506 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10507 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10508 [(set_attr "type" "ssemuladd")
10509 (set_attr "mode" "TI")])
10511 (define_insn "sse5_pmacswd"
10512 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10517 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10518 (parallel [(const_int 1)
10524 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10525 (parallel [(const_int 1)
10529 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10530 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10532 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10533 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10534 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10535 [(set_attr "type" "ssemuladd")
10536 (set_attr "mode" "TI")])
10538 (define_insn "sse5_pmadcsswd"
10539 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10545 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10546 (parallel [(const_int 0)
10552 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10553 (parallel [(const_int 0)
10561 (parallel [(const_int 1)
10568 (parallel [(const_int 1)
10571 (const_int 7)])))))
10572 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10573 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10575 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10576 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10577 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10578 [(set_attr "type" "ssemuladd")
10579 (set_attr "mode" "TI")])
10581 (define_insn "sse5_pmadcswd"
10582 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10588 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10589 (parallel [(const_int 0)
10595 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10596 (parallel [(const_int 0)
10604 (parallel [(const_int 1)
10611 (parallel [(const_int 1)
10614 (const_int 7)])))))
10615 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10616 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10618 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10619 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10620 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10621 [(set_attr "type" "ssemuladd")
10622 (set_attr "mode" "TI")])
10624 ;; SSE5 parallel XMM conditional moves
10625 (define_insn "sse5_pcmov_<mode>"
10626 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10627 (if_then_else:SSEMODE
10628 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10629 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10630 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10631 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10633 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10634 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10635 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10636 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10637 [(set_attr "type" "sse4arg")])
10639 ;; SSE5 horizontal add/subtract instructions
10640 (define_insn "sse5_phaddbw"
10641 [(set (match_operand:V8HI 0 "register_operand" "=x")
10645 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10646 (parallel [(const_int 0)
10657 (parallel [(const_int 1)
10664 (const_int 15)])))))]
10666 "phaddbw\t{%1, %0|%0, %1}"
10667 [(set_attr "type" "sseiadd1")])
10669 (define_insn "sse5_phaddbd"
10670 [(set (match_operand:V4SI 0 "register_operand" "=x")
10675 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10676 (parallel [(const_int 0)
10683 (parallel [(const_int 1)
10686 (const_int 13)]))))
10691 (parallel [(const_int 2)
10698 (parallel [(const_int 3)
10701 (const_int 15)]))))))]
10703 "phaddbd\t{%1, %0|%0, %1}"
10704 [(set_attr "type" "sseiadd1")])
10706 (define_insn "sse5_phaddbq"
10707 [(set (match_operand:V2DI 0 "register_operand" "=x")
10713 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10714 (parallel [(const_int 0)
10719 (parallel [(const_int 1)
10725 (parallel [(const_int 2)
10730 (parallel [(const_int 3)
10731 (const_int 7)])))))
10737 (parallel [(const_int 8)
10742 (parallel [(const_int 9)
10743 (const_int 13)]))))
10748 (parallel [(const_int 10)
10753 (parallel [(const_int 11)
10754 (const_int 15)])))))))]
10756 "phaddbq\t{%1, %0|%0, %1}"
10757 [(set_attr "type" "sseiadd1")])
10759 (define_insn "sse5_phaddwd"
10760 [(set (match_operand:V4SI 0 "register_operand" "=x")
10764 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10765 (parallel [(const_int 0)
10772 (parallel [(const_int 1)
10775 (const_int 7)])))))]
10777 "phaddwd\t{%1, %0|%0, %1}"
10778 [(set_attr "type" "sseiadd1")])
10780 (define_insn "sse5_phaddwq"
10781 [(set (match_operand:V2DI 0 "register_operand" "=x")
10786 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10787 (parallel [(const_int 0)
10792 (parallel [(const_int 1)
10798 (parallel [(const_int 2)
10803 (parallel [(const_int 3)
10804 (const_int 7)]))))))]
10806 "phaddwq\t{%1, %0|%0, %1}"
10807 [(set_attr "type" "sseiadd1")])
10809 (define_insn "sse5_phadddq"
10810 [(set (match_operand:V2DI 0 "register_operand" "=x")
10814 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10815 (parallel [(const_int 0)
10820 (parallel [(const_int 1)
10821 (const_int 3)])))))]
10823 "phadddq\t{%1, %0|%0, %1}"
10824 [(set_attr "type" "sseiadd1")])
10826 (define_insn "sse5_phaddubw"
10827 [(set (match_operand:V8HI 0 "register_operand" "=x")
10831 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10832 (parallel [(const_int 0)
10843 (parallel [(const_int 1)
10850 (const_int 15)])))))]
10852 "phaddubw\t{%1, %0|%0, %1}"
10853 [(set_attr "type" "sseiadd1")])
10855 (define_insn "sse5_phaddubd"
10856 [(set (match_operand:V4SI 0 "register_operand" "=x")
10861 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10862 (parallel [(const_int 0)
10869 (parallel [(const_int 1)
10872 (const_int 13)]))))
10877 (parallel [(const_int 2)
10884 (parallel [(const_int 3)
10887 (const_int 15)]))))))]
10889 "phaddubd\t{%1, %0|%0, %1}"
10890 [(set_attr "type" "sseiadd1")])
10892 (define_insn "sse5_phaddubq"
10893 [(set (match_operand:V2DI 0 "register_operand" "=x")
10899 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10900 (parallel [(const_int 0)
10905 (parallel [(const_int 1)
10911 (parallel [(const_int 2)
10916 (parallel [(const_int 3)
10917 (const_int 7)])))))
10923 (parallel [(const_int 8)
10928 (parallel [(const_int 9)
10929 (const_int 13)]))))
10934 (parallel [(const_int 10)
10939 (parallel [(const_int 11)
10940 (const_int 15)])))))))]
10942 "phaddubq\t{%1, %0|%0, %1}"
10943 [(set_attr "type" "sseiadd1")])
10945 (define_insn "sse5_phadduwd"
10946 [(set (match_operand:V4SI 0 "register_operand" "=x")
10950 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10951 (parallel [(const_int 0)
10958 (parallel [(const_int 1)
10961 (const_int 7)])))))]
10963 "phadduwd\t{%1, %0|%0, %1}"
10964 [(set_attr "type" "sseiadd1")])
10966 (define_insn "sse5_phadduwq"
10967 [(set (match_operand:V2DI 0 "register_operand" "=x")
10972 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10973 (parallel [(const_int 0)
10978 (parallel [(const_int 1)
10984 (parallel [(const_int 2)
10989 (parallel [(const_int 3)
10990 (const_int 7)]))))))]
10992 "phadduwq\t{%1, %0|%0, %1}"
10993 [(set_attr "type" "sseiadd1")])
10995 (define_insn "sse5_phaddudq"
10996 [(set (match_operand:V2DI 0 "register_operand" "=x")
11000 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11001 (parallel [(const_int 0)
11006 (parallel [(const_int 1)
11007 (const_int 3)])))))]
11009 "phaddudq\t{%1, %0|%0, %1}"
11010 [(set_attr "type" "sseiadd1")])
11012 (define_insn "sse5_phsubbw"
11013 [(set (match_operand:V8HI 0 "register_operand" "=x")
11017 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11018 (parallel [(const_int 0)
11029 (parallel [(const_int 1)
11036 (const_int 15)])))))]
11038 "phsubbw\t{%1, %0|%0, %1}"
11039 [(set_attr "type" "sseiadd1")])
11041 (define_insn "sse5_phsubwd"
11042 [(set (match_operand:V4SI 0 "register_operand" "=x")
11046 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11047 (parallel [(const_int 0)
11054 (parallel [(const_int 1)
11057 (const_int 7)])))))]
11059 "phsubwd\t{%1, %0|%0, %1}"
11060 [(set_attr "type" "sseiadd1")])
11062 (define_insn "sse5_phsubdq"
11063 [(set (match_operand:V2DI 0 "register_operand" "=x")
11067 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11068 (parallel [(const_int 0)
11073 (parallel [(const_int 1)
11074 (const_int 3)])))))]
11076 "phsubdq\t{%1, %0|%0, %1}"
11077 [(set_attr "type" "sseiadd1")])
11079 ;; SSE5 permute instructions
11080 (define_insn "sse5_pperm"
11081 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
11083 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
11084 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
11085 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
11086 UNSPEC_SSE5_PERMUTE))]
11087 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11088 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11089 [(set_attr "type" "sse4arg")
11090 (set_attr "mode" "TI")])
11092 ;; The following are for the various unpack insns which doesn't need the first
11093 ;; source operand, so we can just use the output operand for the first operand.
11094 ;; This allows either of the other two operands to be a memory operand. We
11095 ;; can't just use the first operand as an argument to the normal pperm because
11096 ;; then an output only argument, suddenly becomes an input operand.
11097 (define_insn "sse5_pperm_zero_v16qi_v8hi"
11098 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11101 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
11102 (match_operand 2 "" "")))) ;; parallel with const_int's
11103 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11105 && (register_operand (operands[1], V16QImode)
11106 || register_operand (operands[2], V16QImode))"
11107 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11108 [(set_attr "type" "sseadd")
11109 (set_attr "prefix_data16" "0")
11110 (set_attr "prefix_extra" "2")
11111 (set_attr "mode" "TI")])
11113 (define_insn "sse5_pperm_sign_v16qi_v8hi"
11114 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11117 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
11118 (match_operand 2 "" "")))) ;; parallel with const_int's
11119 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11121 && (register_operand (operands[1], V16QImode)
11122 || register_operand (operands[2], V16QImode))"
11123 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11124 [(set_attr "type" "sseadd")
11125 (set_attr "prefix_data16" "0")
11126 (set_attr "prefix_extra" "2")
11127 (set_attr "mode" "TI")])
11129 (define_insn "sse5_pperm_zero_v8hi_v4si"
11130 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11133 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
11134 (match_operand 2 "" "")))) ;; parallel with const_int's
11135 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11137 && (register_operand (operands[1], V8HImode)
11138 || register_operand (operands[2], V16QImode))"
11139 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11140 [(set_attr "type" "sseadd")
11141 (set_attr "prefix_data16" "0")
11142 (set_attr "prefix_extra" "2")
11143 (set_attr "mode" "TI")])
11145 (define_insn "sse5_pperm_sign_v8hi_v4si"
11146 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11149 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
11150 (match_operand 2 "" "")))) ;; parallel with const_int's
11151 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11153 && (register_operand (operands[1], V8HImode)
11154 || register_operand (operands[2], V16QImode))"
11155 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11156 [(set_attr "type" "sseadd")
11157 (set_attr "prefix_data16" "0")
11158 (set_attr "prefix_extra" "2")
11159 (set_attr "mode" "TI")])
11161 (define_insn "sse5_pperm_zero_v4si_v2di"
11162 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11165 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
11166 (match_operand 2 "" "")))) ;; parallel with const_int's
11167 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11169 && (register_operand (operands[1], V4SImode)
11170 || register_operand (operands[2], V16QImode))"
11171 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11172 [(set_attr "type" "sseadd")
11173 (set_attr "prefix_data16" "0")
11174 (set_attr "prefix_extra" "2")
11175 (set_attr "mode" "TI")])
11177 (define_insn "sse5_pperm_sign_v4si_v2di"
11178 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11181 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
11182 (match_operand 2 "" "")))) ;; parallel with const_int's
11183 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11185 && (register_operand (operands[1], V4SImode)
11186 || register_operand (operands[2], V16QImode))"
11187 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11188 [(set_attr "type" "sseadd")
11189 (set_attr "prefix_data16" "0")
11190 (set_attr "prefix_extra" "2")
11191 (set_attr "mode" "TI")])
11193 ;; SSE5 pack instructions that combine two vectors into a smaller vector
11194 (define_insn "sse5_pperm_pack_v2di_v4si"
11195 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
11198 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
11200 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11201 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11202 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11203 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11204 [(set_attr "type" "sse4arg")
11205 (set_attr "mode" "TI")])
11207 (define_insn "sse5_pperm_pack_v4si_v8hi"
11208 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
11211 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
11213 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11214 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11215 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11216 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11217 [(set_attr "type" "sse4arg")
11218 (set_attr "mode" "TI")])
11220 (define_insn "sse5_pperm_pack_v8hi_v16qi"
11221 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
11224 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
11226 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11227 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11228 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11229 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11230 [(set_attr "type" "sse4arg")
11231 (set_attr "mode" "TI")])
11233 ;; Floating point permutation (permps, permpd)
11234 (define_insn "sse5_perm<mode>"
11235 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
11237 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
11238 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
11239 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
11240 UNSPEC_SSE5_PERMUTE))]
11241 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11242 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11243 [(set_attr "type" "sse4arg")
11244 (set_attr "mode" "<MODE>")])
11246 ;; SSE5 packed rotate instructions
11247 (define_expand "rotl<mode>3"
11248 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11249 (rotate:SSEMODE1248
11250 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11251 (match_operand:SI 2 "general_operand")))]
11254 /* If we were given a scalar, convert it to parallel */
11255 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11257 rtvec vs = rtvec_alloc (<ssescalarnum>);
11258 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11259 rtx reg = gen_reg_rtx (<MODE>mode);
11260 rtx op2 = operands[2];
11263 if (GET_MODE (op2) != <ssescalarmode>mode)
11265 op2 = gen_reg_rtx (<ssescalarmode>mode);
11266 convert_move (op2, operands[2], false);
11269 for (i = 0; i < <ssescalarnum>; i++)
11270 RTVEC_ELT (vs, i) = op2;
11272 emit_insn (gen_vec_init<mode> (reg, par));
11273 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11278 (define_expand "rotr<mode>3"
11279 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11280 (rotatert:SSEMODE1248
11281 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11282 (match_operand:SI 2 "general_operand")))]
11285 /* If we were given a scalar, convert it to parallel */
11286 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11288 rtvec vs = rtvec_alloc (<ssescalarnum>);
11289 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11290 rtx neg = gen_reg_rtx (<MODE>mode);
11291 rtx reg = gen_reg_rtx (<MODE>mode);
11292 rtx op2 = operands[2];
11295 if (GET_MODE (op2) != <ssescalarmode>mode)
11297 op2 = gen_reg_rtx (<ssescalarmode>mode);
11298 convert_move (op2, operands[2], false);
11301 for (i = 0; i < <ssescalarnum>; i++)
11302 RTVEC_ELT (vs, i) = op2;
11304 emit_insn (gen_vec_init<mode> (reg, par));
11305 emit_insn (gen_neg<mode>2 (neg, reg));
11306 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
11311 (define_insn "sse5_rotl<mode>3"
11312 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11313 (rotate:SSEMODE1248
11314 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11315 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11317 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11318 [(set_attr "type" "sseishft")
11319 (set_attr "length_immediate" "1")
11320 (set_attr "mode" "TI")])
11322 (define_insn "sse5_rotr<mode>3"
11323 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11324 (rotatert:SSEMODE1248
11325 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11326 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11329 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11330 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11332 [(set_attr "type" "sseishft")
11333 (set_attr "length_immediate" "1")
11334 (set_attr "mode" "TI")])
11336 (define_expand "vrotr<mode>3"
11337 [(match_operand:SSEMODE1248 0 "register_operand" "")
11338 (match_operand:SSEMODE1248 1 "register_operand" "")
11339 (match_operand:SSEMODE1248 2 "register_operand" "")]
11342 rtx reg = gen_reg_rtx (<MODE>mode);
11343 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11344 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11348 (define_expand "vrotl<mode>3"
11349 [(match_operand:SSEMODE1248 0 "register_operand" "")
11350 (match_operand:SSEMODE1248 1 "register_operand" "")
11351 (match_operand:SSEMODE1248 2 "register_operand" "")]
11354 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11358 (define_insn "sse5_vrotl<mode>3"
11359 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11360 (if_then_else:SSEMODE1248
11362 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11364 (rotate:SSEMODE1248
11365 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11367 (rotatert:SSEMODE1248
11369 (neg:SSEMODE1248 (match_dup 2)))))]
11370 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11371 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11372 [(set_attr "type" "sseishft")
11373 (set_attr "prefix_data16" "0")
11374 (set_attr "prefix_extra" "2")
11375 (set_attr "mode" "TI")])
11377 ;; SSE5 packed shift instructions.
11378 ;; FIXME: add V2DI back in
11379 (define_expand "vlshr<mode>3"
11380 [(match_operand:SSEMODE124 0 "register_operand" "")
11381 (match_operand:SSEMODE124 1 "register_operand" "")
11382 (match_operand:SSEMODE124 2 "register_operand" "")]
11385 rtx neg = gen_reg_rtx (<MODE>mode);
11386 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11387 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
11391 (define_expand "vashr<mode>3"
11392 [(match_operand:SSEMODE124 0 "register_operand" "")
11393 (match_operand:SSEMODE124 1 "register_operand" "")
11394 (match_operand:SSEMODE124 2 "register_operand" "")]
11397 rtx neg = gen_reg_rtx (<MODE>mode);
11398 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11399 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
11403 (define_expand "vashl<mode>3"
11404 [(match_operand:SSEMODE124 0 "register_operand" "")
11405 (match_operand:SSEMODE124 1 "register_operand" "")
11406 (match_operand:SSEMODE124 2 "register_operand" "")]
11409 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
11413 (define_insn "sse5_ashl<mode>3"
11414 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11415 (if_then_else:SSEMODE1248
11417 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11419 (ashift:SSEMODE1248
11420 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11422 (ashiftrt:SSEMODE1248
11424 (neg:SSEMODE1248 (match_dup 2)))))]
11425 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11426 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11427 [(set_attr "type" "sseishft")
11428 (set_attr "prefix_data16" "0")
11429 (set_attr "prefix_extra" "2")
11430 (set_attr "mode" "TI")])
11432 (define_insn "sse5_lshl<mode>3"
11433 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11434 (if_then_else:SSEMODE1248
11436 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11438 (ashift:SSEMODE1248
11439 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11441 (lshiftrt:SSEMODE1248
11443 (neg:SSEMODE1248 (match_dup 2)))))]
11444 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11445 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11446 [(set_attr "type" "sseishft")
11447 (set_attr "prefix_data16" "0")
11448 (set_attr "prefix_extra" "2")
11449 (set_attr "mode" "TI")])
11451 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
11452 (define_expand "ashlv16qi3"
11453 [(match_operand:V16QI 0 "register_operand" "")
11454 (match_operand:V16QI 1 "register_operand" "")
11455 (match_operand:SI 2 "nonmemory_operand" "")]
11458 rtvec vs = rtvec_alloc (16);
11459 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11460 rtx reg = gen_reg_rtx (V16QImode);
11462 for (i = 0; i < 16; i++)
11463 RTVEC_ELT (vs, i) = operands[2];
11465 emit_insn (gen_vec_initv16qi (reg, par));
11466 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11470 (define_expand "lshlv16qi3"
11471 [(match_operand:V16QI 0 "register_operand" "")
11472 (match_operand:V16QI 1 "register_operand" "")
11473 (match_operand:SI 2 "nonmemory_operand" "")]
11476 rtvec vs = rtvec_alloc (16);
11477 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11478 rtx reg = gen_reg_rtx (V16QImode);
11480 for (i = 0; i < 16; i++)
11481 RTVEC_ELT (vs, i) = operands[2];
11483 emit_insn (gen_vec_initv16qi (reg, par));
11484 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
11488 (define_expand "ashrv16qi3"
11489 [(match_operand:V16QI 0 "register_operand" "")
11490 (match_operand:V16QI 1 "register_operand" "")
11491 (match_operand:SI 2 "nonmemory_operand" "")]
11494 rtvec vs = rtvec_alloc (16);
11495 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11496 rtx reg = gen_reg_rtx (V16QImode);
11498 rtx ele = ((CONST_INT_P (operands[2]))
11499 ? GEN_INT (- INTVAL (operands[2]))
11502 for (i = 0; i < 16; i++)
11503 RTVEC_ELT (vs, i) = ele;
11505 emit_insn (gen_vec_initv16qi (reg, par));
11507 if (!CONST_INT_P (operands[2]))
11509 rtx neg = gen_reg_rtx (V16QImode);
11510 emit_insn (gen_negv16qi2 (neg, reg));
11511 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11514 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11519 (define_expand "ashrv2di3"
11520 [(match_operand:V2DI 0 "register_operand" "")
11521 (match_operand:V2DI 1 "register_operand" "")
11522 (match_operand:DI 2 "nonmemory_operand" "")]
11525 rtvec vs = rtvec_alloc (2);
11526 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11527 rtx reg = gen_reg_rtx (V2DImode);
11530 if (CONST_INT_P (operands[2]))
11531 ele = GEN_INT (- INTVAL (operands[2]));
11532 else if (GET_MODE (operands[2]) != DImode)
11534 rtx move = gen_reg_rtx (DImode);
11535 ele = gen_reg_rtx (DImode);
11536 convert_move (move, operands[2], false);
11537 emit_insn (gen_negdi2 (ele, move));
11541 ele = gen_reg_rtx (DImode);
11542 emit_insn (gen_negdi2 (ele, operands[2]));
11545 RTVEC_ELT (vs, 0) = ele;
11546 RTVEC_ELT (vs, 1) = ele;
11547 emit_insn (gen_vec_initv2di (reg, par));
11548 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11552 ;; SSE5 FRCZ support
11554 (define_insn "sse5_frcz<mode>2"
11555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11557 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11560 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11561 [(set_attr "type" "ssecvt1")
11562 (set_attr "mode" "<MODE>")])
11565 (define_insn "sse5_vmfrcz<mode>2"
11566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11567 (vec_merge:SSEMODEF2P
11569 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11571 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11574 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11575 [(set_attr "type" "ssecvt1")
11576 (set_attr "mode" "<MODE>")])
11578 (define_insn "sse5_cvtph2ps"
11579 [(set (match_operand:V4SF 0 "register_operand" "=x")
11580 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11583 "cvtph2ps\t{%1, %0|%0, %1}"
11584 [(set_attr "type" "ssecvt")
11585 (set_attr "mode" "V4SF")])
11587 (define_insn "sse5_cvtps2ph"
11588 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11589 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11592 "cvtps2ph\t{%1, %0|%0, %1}"
11593 [(set_attr "type" "ssecvt")
11594 (set_attr "mode" "V4SF")])
11596 ;; Scalar versions of the com instructions that use vector types that are
11597 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11598 ;; com instructions fill in 0's in the upper bits instead of leaving them
11599 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11600 (define_expand "sse5_vmmaskcmp<mode>3"
11601 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11602 (vec_merge:SSEMODEF2P
11603 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11604 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11605 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11610 operands[4] = CONST0_RTX (<MODE>mode);
11613 (define_insn "*sse5_vmmaskcmp<mode>3"
11614 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11615 (vec_merge:SSEMODEF2P
11616 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11617 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11618 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11619 (match_operand:SSEMODEF2P 4 "")
11622 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11623 [(set_attr "type" "sse4arg")
11624 (set_attr "prefix_data16" "0")
11625 (set_attr "prefix_rep" "0")
11626 (set_attr "prefix_extra" "2")
11627 (set_attr "length_immediate" "1")
11628 (set_attr "mode" "<ssescalarmode>")])
11630 ;; We don't have a comparison operator that always returns true/false, so
11631 ;; handle comfalse and comtrue specially.
11632 (define_insn "sse5_com_tf<mode>3"
11633 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11635 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11636 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11637 (match_operand:SI 3 "const_int_operand" "n")]
11638 UNSPEC_SSE5_TRUEFALSE))]
11641 const char *ret = NULL;
11643 switch (INTVAL (operands[3]))
11646 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11650 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11654 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11658 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11662 gcc_unreachable ();
11667 [(set_attr "type" "ssecmp")
11668 (set_attr "prefix_data16" "0")
11669 (set_attr "prefix_rep" "0")
11670 (set_attr "prefix_extra" "2")
11671 (set_attr "length_immediate" "1")
11672 (set_attr "mode" "<MODE>")])
11674 (define_insn "sse5_maskcmp<mode>3"
11675 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11676 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11677 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11678 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11680 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11681 [(set_attr "type" "ssecmp")
11682 (set_attr "prefix_data16" "0")
11683 (set_attr "prefix_rep" "0")
11684 (set_attr "prefix_extra" "2")
11685 (set_attr "length_immediate" "1")
11686 (set_attr "mode" "<MODE>")])
11688 (define_insn "sse5_maskcmp<mode>3"
11689 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11690 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11691 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11692 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11694 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11695 [(set_attr "type" "sse4arg")
11696 (set_attr "prefix_data16" "0")
11697 (set_attr "prefix_rep" "0")
11698 (set_attr "prefix_extra" "2")
11699 (set_attr "length_immediate" "1")
11700 (set_attr "mode" "TI")])
11702 (define_insn "sse5_maskcmp_uns<mode>3"
11703 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11704 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11705 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11706 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11708 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11709 [(set_attr "type" "ssecmp")
11710 (set_attr "prefix_data16" "0")
11711 (set_attr "prefix_rep" "0")
11712 (set_attr "prefix_extra" "2")
11713 (set_attr "length_immediate" "1")
11714 (set_attr "mode" "TI")])
11716 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11717 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11718 ;; the exact instruction generated for the intrinsic.
11719 (define_insn "sse5_maskcmp_uns2<mode>3"
11720 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11721 (unspec:SSEMODE1248
11722 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11723 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11724 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11725 UNSPEC_SSE5_UNSIGNED_CMP))]
11727 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11728 [(set_attr "type" "ssecmp")
11729 (set_attr "prefix_data16" "0")
11730 (set_attr "prefix_extra" "2")
11731 (set_attr "length_immediate" "1")
11732 (set_attr "mode" "TI")])
11734 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11735 ;; being added here to be complete.
11736 (define_insn "sse5_pcom_tf<mode>3"
11737 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11738 (unspec:SSEMODE1248
11739 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11740 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11741 (match_operand:SI 3 "const_int_operand" "n")]
11742 UNSPEC_SSE5_TRUEFALSE))]
11745 return ((INTVAL (operands[3]) != 0)
11746 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11747 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11749 [(set_attr "type" "ssecmp")
11750 (set_attr "prefix_data16" "0")
11751 (set_attr "prefix_extra" "2")
11752 (set_attr "length_immediate" "1")
11753 (set_attr "mode" "TI")])
11755 (define_insn "*avx_aesenc"
11756 [(set (match_operand:V2DI 0 "register_operand" "=x")
11757 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11758 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11760 "TARGET_AES && TARGET_AVX"
11761 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11762 [(set_attr "type" "sselog1")
11763 (set_attr "prefix_extra" "1")
11764 (set_attr "prefix" "vex")
11765 (set_attr "mode" "TI")])
11767 (define_insn "aesenc"
11768 [(set (match_operand:V2DI 0 "register_operand" "=x")
11769 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11770 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11773 "aesenc\t{%2, %0|%0, %2}"
11774 [(set_attr "type" "sselog1")
11775 (set_attr "prefix_extra" "1")
11776 (set_attr "mode" "TI")])
11778 (define_insn "*avx_aesenclast"
11779 [(set (match_operand:V2DI 0 "register_operand" "=x")
11780 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11781 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11782 UNSPEC_AESENCLAST))]
11783 "TARGET_AES && TARGET_AVX"
11784 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11785 [(set_attr "type" "sselog1")
11786 (set_attr "prefix_extra" "1")
11787 (set_attr "prefix" "vex")
11788 (set_attr "mode" "TI")])
11790 (define_insn "aesenclast"
11791 [(set (match_operand:V2DI 0 "register_operand" "=x")
11792 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11793 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11794 UNSPEC_AESENCLAST))]
11796 "aesenclast\t{%2, %0|%0, %2}"
11797 [(set_attr "type" "sselog1")
11798 (set_attr "prefix_extra" "1")
11799 (set_attr "mode" "TI")])
11801 (define_insn "*avx_aesdec"
11802 [(set (match_operand:V2DI 0 "register_operand" "=x")
11803 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11804 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11806 "TARGET_AES && TARGET_AVX"
11807 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11808 [(set_attr "type" "sselog1")
11809 (set_attr "prefix_extra" "1")
11810 (set_attr "prefix" "vex")
11811 (set_attr "mode" "TI")])
11813 (define_insn "aesdec"
11814 [(set (match_operand:V2DI 0 "register_operand" "=x")
11815 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11816 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11819 "aesdec\t{%2, %0|%0, %2}"
11820 [(set_attr "type" "sselog1")
11821 (set_attr "prefix_extra" "1")
11822 (set_attr "mode" "TI")])
11824 (define_insn "*avx_aesdeclast"
11825 [(set (match_operand:V2DI 0 "register_operand" "=x")
11826 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11827 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11828 UNSPEC_AESDECLAST))]
11829 "TARGET_AES && TARGET_AVX"
11830 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11831 [(set_attr "type" "sselog1")
11832 (set_attr "prefix_extra" "1")
11833 (set_attr "prefix" "vex")
11834 (set_attr "mode" "TI")])
11836 (define_insn "aesdeclast"
11837 [(set (match_operand:V2DI 0 "register_operand" "=x")
11838 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11839 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11840 UNSPEC_AESDECLAST))]
11842 "aesdeclast\t{%2, %0|%0, %2}"
11843 [(set_attr "type" "sselog1")
11844 (set_attr "prefix_extra" "1")
11845 (set_attr "mode" "TI")])
11847 (define_insn "aesimc"
11848 [(set (match_operand:V2DI 0 "register_operand" "=x")
11849 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11852 "%vaesimc\t{%1, %0|%0, %1}"
11853 [(set_attr "type" "sselog1")
11854 (set_attr "prefix_extra" "1")
11855 (set_attr "prefix" "maybe_vex")
11856 (set_attr "mode" "TI")])
11858 (define_insn "aeskeygenassist"
11859 [(set (match_operand:V2DI 0 "register_operand" "=x")
11860 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11861 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11862 UNSPEC_AESKEYGENASSIST))]
11864 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11865 [(set_attr "type" "sselog1")
11866 (set_attr "prefix_extra" "1")
11867 (set_attr "length_immediate" "1")
11868 (set_attr "prefix" "maybe_vex")
11869 (set_attr "mode" "TI")])
11871 (define_insn "*vpclmulqdq"
11872 [(set (match_operand:V2DI 0 "register_operand" "=x")
11873 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11874 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11875 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11877 "TARGET_PCLMUL && TARGET_AVX"
11878 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11879 [(set_attr "type" "sselog1")
11880 (set_attr "prefix_extra" "1")
11881 (set_attr "length_immediate" "1")
11882 (set_attr "prefix" "vex")
11883 (set_attr "mode" "TI")])
11885 (define_insn "pclmulqdq"
11886 [(set (match_operand:V2DI 0 "register_operand" "=x")
11887 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11888 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11889 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11892 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11893 [(set_attr "type" "sselog1")
11894 (set_attr "prefix_extra" "1")
11895 (set_attr "length_immediate" "1")
11896 (set_attr "mode" "TI")])
11898 (define_expand "avx_vzeroall"
11899 [(match_par_dup 0 [(const_int 0)])]
11902 int nregs = TARGET_64BIT ? 16 : 8;
11905 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11907 XVECEXP (operands[0], 0, 0)
11908 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11911 for (regno = 0; regno < nregs; regno++)
11912 XVECEXP (operands[0], 0, regno + 1)
11913 = gen_rtx_SET (VOIDmode,
11914 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11915 CONST0_RTX (V8SImode));
11918 (define_insn "*avx_vzeroall"
11919 [(match_parallel 0 "vzeroall_operation"
11920 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11921 (set (match_operand 1 "register_operand" "=x")
11922 (match_operand 2 "const0_operand" "X"))])]
11925 [(set_attr "type" "sse")
11926 (set_attr "modrm" "0")
11927 (set_attr "memory" "none")
11928 (set_attr "prefix" "vex")
11929 (set_attr "mode" "OI")])
11931 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11932 (define_insn "avx_vzeroupper"
11933 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11934 (clobber (reg:V8SI XMM0_REG))
11935 (clobber (reg:V8SI XMM1_REG))
11936 (clobber (reg:V8SI XMM2_REG))
11937 (clobber (reg:V8SI XMM3_REG))
11938 (clobber (reg:V8SI XMM4_REG))
11939 (clobber (reg:V8SI XMM5_REG))
11940 (clobber (reg:V8SI XMM6_REG))
11941 (clobber (reg:V8SI XMM7_REG))]
11942 "TARGET_AVX && !TARGET_64BIT"
11944 [(set_attr "type" "sse")
11945 (set_attr "modrm" "0")
11946 (set_attr "memory" "none")
11947 (set_attr "prefix" "vex")
11948 (set_attr "mode" "OI")])
11950 (define_insn "avx_vzeroupper_rex64"
11951 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11952 (clobber (reg:V8SI XMM0_REG))
11953 (clobber (reg:V8SI XMM1_REG))
11954 (clobber (reg:V8SI XMM2_REG))
11955 (clobber (reg:V8SI XMM3_REG))
11956 (clobber (reg:V8SI XMM4_REG))
11957 (clobber (reg:V8SI XMM5_REG))
11958 (clobber (reg:V8SI XMM6_REG))
11959 (clobber (reg:V8SI XMM7_REG))
11960 (clobber (reg:V8SI XMM8_REG))
11961 (clobber (reg:V8SI XMM9_REG))
11962 (clobber (reg:V8SI XMM10_REG))
11963 (clobber (reg:V8SI XMM11_REG))
11964 (clobber (reg:V8SI XMM12_REG))
11965 (clobber (reg:V8SI XMM13_REG))
11966 (clobber (reg:V8SI XMM14_REG))
11967 (clobber (reg:V8SI XMM15_REG))]
11968 "TARGET_AVX && TARGET_64BIT"
11970 [(set_attr "type" "sse")
11971 (set_attr "modrm" "0")
11972 (set_attr "memory" "none")
11973 (set_attr "prefix" "vex")
11974 (set_attr "mode" "OI")])
11976 (define_insn "avx_vpermil<mode>"
11977 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11979 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
11980 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
11983 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11984 [(set_attr "type" "sselog")
11985 (set_attr "prefix_extra" "1")
11986 (set_attr "length_immediate" "1")
11987 (set_attr "prefix" "vex")
11988 (set_attr "mode" "<MODE>")])
11990 (define_insn "avx_vpermilvar<mode>3"
11991 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11993 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11994 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11997 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11998 [(set_attr "type" "sselog")
11999 (set_attr "prefix_extra" "1")
12000 (set_attr "prefix" "vex")
12001 (set_attr "mode" "<MODE>")])
12003 (define_insn "avx_vperm2f128<mode>3"
12004 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12005 (unspec:AVX256MODE2P
12006 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12007 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12008 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12009 UNSPEC_VPERMIL2F128))]
12011 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12012 [(set_attr "type" "sselog")
12013 (set_attr "prefix_extra" "1")
12014 (set_attr "length_immediate" "1")
12015 (set_attr "prefix" "vex")
12016 (set_attr "mode" "V8SF")])
12018 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
12019 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
12020 (vec_concat:AVXMODEF4P
12021 (vec_concat:<avxhalfvecmode>
12022 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
12024 (vec_concat:<avxhalfvecmode>
12028 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
12029 [(set_attr "type" "ssemov")
12030 (set_attr "prefix_extra" "1")
12031 (set_attr "prefix" "vex")
12032 (set_attr "mode" "<avxscalarmode>")])
12034 (define_insn "avx_vbroadcastss256"
12035 [(set (match_operand:V8SF 0 "register_operand" "=x")
12039 (match_operand:SF 1 "memory_operand" "m")
12052 "vbroadcastss\t{%1, %0|%0, %1}"
12053 [(set_attr "type" "ssemov")
12054 (set_attr "prefix_extra" "1")
12055 (set_attr "prefix" "vex")
12056 (set_attr "mode" "SF")])
12058 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
12059 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
12060 (vec_concat:AVX256MODEF2P
12061 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
12064 "vbroadcastf128\t{%1, %0|%0, %1}"
12065 [(set_attr "type" "ssemov")
12066 (set_attr "prefix_extra" "1")
12067 (set_attr "prefix" "vex")
12068 (set_attr "mode" "V4SF")])
12070 (define_expand "avx_vinsertf128<mode>"
12071 [(match_operand:AVX256MODE 0 "register_operand" "")
12072 (match_operand:AVX256MODE 1 "register_operand" "")
12073 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12074 (match_operand:SI 3 "const_0_to_1_operand" "")]
12077 switch (INTVAL (operands[3]))
12080 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12084 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12088 gcc_unreachable ();
12093 (define_insn "vec_set_lo_<mode>"
12094 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12095 (vec_concat:AVX256MODE4P
12096 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12097 (vec_select:<avxhalfvecmode>
12098 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12099 (parallel [(const_int 2) (const_int 3)]))))]
12101 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12102 [(set_attr "type" "sselog")
12103 (set_attr "prefix_extra" "1")
12104 (set_attr "length_immediate" "1")
12105 (set_attr "prefix" "vex")
12106 (set_attr "mode" "V8SF")])
12108 (define_insn "vec_set_hi_<mode>"
12109 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12110 (vec_concat:AVX256MODE4P
12111 (vec_select:<avxhalfvecmode>
12112 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12113 (parallel [(const_int 0) (const_int 1)]))
12114 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12116 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12117 [(set_attr "type" "sselog")
12118 (set_attr "prefix_extra" "1")
12119 (set_attr "length_immediate" "1")
12120 (set_attr "prefix" "vex")
12121 (set_attr "mode" "V8SF")])
12123 (define_insn "vec_set_lo_<mode>"
12124 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12125 (vec_concat:AVX256MODE8P
12126 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12127 (vec_select:<avxhalfvecmode>
12128 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12129 (parallel [(const_int 4) (const_int 5)
12130 (const_int 6) (const_int 7)]))))]
12132 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12133 [(set_attr "type" "sselog")
12134 (set_attr "prefix_extra" "1")
12135 (set_attr "length_immediate" "1")
12136 (set_attr "prefix" "vex")
12137 (set_attr "mode" "V8SF")])
12139 (define_insn "vec_set_hi_<mode>"
12140 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12141 (vec_concat:AVX256MODE8P
12142 (vec_select:<avxhalfvecmode>
12143 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12144 (parallel [(const_int 0) (const_int 1)
12145 (const_int 2) (const_int 3)]))
12146 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12148 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12149 [(set_attr "type" "sselog")
12150 (set_attr "prefix_extra" "1")
12151 (set_attr "length_immediate" "1")
12152 (set_attr "prefix" "vex")
12153 (set_attr "mode" "V8SF")])
12155 (define_insn "vec_set_lo_v16hi"
12156 [(set (match_operand:V16HI 0 "register_operand" "=x")
12158 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12160 (match_operand:V16HI 1 "register_operand" "x")
12161 (parallel [(const_int 8) (const_int 9)
12162 (const_int 10) (const_int 11)
12163 (const_int 12) (const_int 13)
12164 (const_int 14) (const_int 15)]))))]
12166 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12167 [(set_attr "type" "sselog")
12168 (set_attr "prefix_extra" "1")
12169 (set_attr "length_immediate" "1")
12170 (set_attr "prefix" "vex")
12171 (set_attr "mode" "V8SF")])
12173 (define_insn "vec_set_hi_v16hi"
12174 [(set (match_operand:V16HI 0 "register_operand" "=x")
12177 (match_operand:V16HI 1 "register_operand" "x")
12178 (parallel [(const_int 0) (const_int 1)
12179 (const_int 2) (const_int 3)
12180 (const_int 4) (const_int 5)
12181 (const_int 6) (const_int 7)]))
12182 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12184 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12185 [(set_attr "type" "sselog")
12186 (set_attr "prefix_extra" "1")
12187 (set_attr "length_immediate" "1")
12188 (set_attr "prefix" "vex")
12189 (set_attr "mode" "V8SF")])
12191 (define_insn "vec_set_lo_v32qi"
12192 [(set (match_operand:V32QI 0 "register_operand" "=x")
12194 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12196 (match_operand:V32QI 1 "register_operand" "x")
12197 (parallel [(const_int 16) (const_int 17)
12198 (const_int 18) (const_int 19)
12199 (const_int 20) (const_int 21)
12200 (const_int 22) (const_int 23)
12201 (const_int 24) (const_int 25)
12202 (const_int 26) (const_int 27)
12203 (const_int 28) (const_int 29)
12204 (const_int 30) (const_int 31)]))))]
12206 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12207 [(set_attr "type" "sselog")
12208 (set_attr "prefix_extra" "1")
12209 (set_attr "length_immediate" "1")
12210 (set_attr "prefix" "vex")
12211 (set_attr "mode" "V8SF")])
12213 (define_insn "vec_set_hi_v32qi"
12214 [(set (match_operand:V32QI 0 "register_operand" "=x")
12217 (match_operand:V32QI 1 "register_operand" "x")
12218 (parallel [(const_int 0) (const_int 1)
12219 (const_int 2) (const_int 3)
12220 (const_int 4) (const_int 5)
12221 (const_int 6) (const_int 7)
12222 (const_int 8) (const_int 9)
12223 (const_int 10) (const_int 11)
12224 (const_int 12) (const_int 13)
12225 (const_int 14) (const_int 15)]))
12226 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12228 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12229 [(set_attr "type" "sselog")
12230 (set_attr "prefix_extra" "1")
12231 (set_attr "length_immediate" "1")
12232 (set_attr "prefix" "vex")
12233 (set_attr "mode" "V8SF")])
12235 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12236 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12238 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12239 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12243 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12244 [(set_attr "type" "sselog1")
12245 (set_attr "prefix_extra" "1")
12246 (set_attr "prefix" "vex")
12247 (set_attr "mode" "<MODE>")])
12249 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12250 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12252 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12253 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12255 UNSPEC_MASKSTORE))]
12257 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12258 [(set_attr "type" "sselog1")
12259 (set_attr "prefix_extra" "1")
12260 (set_attr "prefix" "vex")
12261 (set_attr "mode" "<MODE>")])
12263 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12264 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12265 (unspec:AVX256MODE2P
12266 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12270 switch (which_alternative)
12275 switch (get_attr_mode (insn))
12278 return "vmovaps\t{%1, %x0|%x0, %1}";
12280 return "vmovapd\t{%1, %x0|%x0, %1}";
12282 return "vmovdqa\t{%1, %x0|%x0, %1}";
12289 gcc_unreachable ();
12291 [(set_attr "type" "ssemov")
12292 (set_attr "prefix" "vex")
12293 (set_attr "mode" "<avxvecmode>")
12294 (set (attr "length")
12295 (if_then_else (eq_attr "alternative" "0")
12297 (const_string "*")))])
12299 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12300 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12301 (unspec:<avxhalfvecmode>
12302 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12306 switch (which_alternative)
12311 switch (get_attr_mode (insn))
12314 return "vmovaps\t{%x1, %0|%0, %x1}";
12316 return "vmovapd\t{%x1, %0|%0, %x1}";
12318 return "vmovdqa\t{%x1, %0|%0, %x1}";
12325 gcc_unreachable ();
12327 [(set_attr "type" "ssemov")
12328 (set_attr "prefix" "vex")
12329 (set_attr "mode" "<avxvecmode>")
12330 (set (attr "length")
12331 (if_then_else (eq_attr "alternative" "0")
12333 (const_string "*")))])
12335 (define_expand "vec_init<mode>"
12336 [(match_operand:AVX256MODE 0 "register_operand" "")
12337 (match_operand 1 "" "")]
12340 ix86_expand_vector_init (false, operands[0], operands[1]);
12344 (define_insn "*vec_concat<mode>_avx"
12345 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12346 (vec_concat:AVX256MODE
12347 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12348 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12351 switch (which_alternative)
12354 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12356 switch (get_attr_mode (insn))
12359 return "vmovaps\t{%1, %x0|%x0, %1}";
12361 return "vmovapd\t{%1, %x0|%x0, %1}";
12363 return "vmovdqa\t{%1, %x0|%x0, %1}";
12366 gcc_unreachable ();
12369 [(set_attr "type" "sselog,ssemov")
12370 (set_attr "prefix_extra" "1,*")
12371 (set_attr "length_immediate" "1,*")
12372 (set_attr "prefix" "vex")
12373 (set_attr "mode" "<avxvecmode>")])