1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Modes handled by integer vcond pattern
68 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
69 (V2DI "TARGET_SSE4_2 || TARGET_SSE5")])
71 ;; Mapping from float mode to required SSE level
72 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
74 ;; Mapping from integer vector mode to mnemonic suffix
75 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
77 ;; Mapping of the sse5 suffix
78 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
79 (V4SF "ps") (V2DF "pd")])
80 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
81 (V4SF "ss") (V2DF "sd")])
82 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
84 ;; Mapping of the max integer size for sse5 rotate immediate constraint
85 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
87 ;; Mapping of vector modes back to the scalar modes
88 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
89 (V16QI "QI") (V8HI "HI")
90 (V4SI "SI") (V2DI "DI")])
92 ;; Mapping of vector modes to a vector mode of double size
93 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
94 (V4SF "V8SF") (V4SI "V8SI")])
96 ;; Number of scalar elements in each vector type
97 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
98 (V16QI "16") (V8HI "8")
99 (V4SI "4") (V2DI "2")])
102 (define_mode_attr avxvecmode
103 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
104 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
105 (V8SF "V8SF") (V4DF "V4DF")])
106 (define_mode_attr avxvecpsmode
107 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
108 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
109 (define_mode_attr avxhalfvecmode
110 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
111 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
112 (define_mode_attr avxscalarmode
113 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
114 (V8SF "SF") (V4DF "DF")])
115 (define_mode_attr avxcvtvecmode
116 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
117 (define_mode_attr avxpermvecmode
118 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
119 (define_mode_attr avxmodesuffixf2c
120 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
121 (define_mode_attr avxmodesuffixp
122 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
124 (define_mode_attr avxmodesuffix
125 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
126 (V8SI "256") (V8SF "256") (V4DF "256")])
128 ;; Mapping of immediate bits for blend instructions
129 (define_mode_attr blendbits
130 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
132 ;; Mapping of immediate bits for vpermil instructions
133 (define_mode_attr vpermilbits
134 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
136 ;; Mapping of immediate bits for pinsr instructions
137 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
139 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
145 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
147 (define_expand "mov<mode>"
148 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
149 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
152 ix86_expand_vector_move (<MODE>mode, operands);
156 (define_insn "*avx_mov<mode>_internal"
157 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
158 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
160 && (register_operand (operands[0], <MODE>mode)
161 || register_operand (operands[1], <MODE>mode))"
163 switch (which_alternative)
166 return standard_sse_constant_opcode (insn, operands[1]);
169 switch (get_attr_mode (insn))
173 return "vmovaps\t{%1, %0|%0, %1}";
176 return "vmovapd\t{%1, %0|%0, %1}";
178 return "vmovdqa\t{%1, %0|%0, %1}";
184 [(set_attr "type" "sselog1,ssemov,ssemov")
185 (set_attr "prefix" "vex")
186 (set_attr "mode" "<avxvecmode>")])
188 ;; All of these patterns are enabled for SSE1 as well as SSE2.
189 ;; This is essential for maintaining stable calling conventions.
191 (define_expand "mov<mode>"
192 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
193 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
196 ix86_expand_vector_move (<MODE>mode, operands);
200 (define_insn "*mov<mode>_internal"
201 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
202 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
204 && (register_operand (operands[0], <MODE>mode)
205 || register_operand (operands[1], <MODE>mode))"
207 switch (which_alternative)
210 return standard_sse_constant_opcode (insn, operands[1]);
213 switch (get_attr_mode (insn))
216 return "movaps\t{%1, %0|%0, %1}";
218 return "movapd\t{%1, %0|%0, %1}";
220 return "movdqa\t{%1, %0|%0, %1}";
226 [(set_attr "type" "sselog1,ssemov,ssemov")
228 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
229 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
230 (and (eq_attr "alternative" "2")
231 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
233 (const_string "V4SF")
234 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
235 (const_string "V4SF")
236 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
237 (const_string "V2DF")
239 (const_string "TI")))])
241 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
242 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
243 ;; from memory, we'd prefer to load the memory directly into the %xmm
244 ;; register. To facilitate this happy circumstance, this pattern won't
245 ;; split until after register allocation. If the 64-bit value didn't
246 ;; come from memory, this is the best we can do. This is much better
247 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
250 (define_insn_and_split "movdi_to_sse"
252 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
253 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
254 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
255 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
257 "&& reload_completed"
260 if (register_operand (operands[1], DImode))
262 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
263 Assemble the 64-bit DImode value in an xmm register. */
264 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
265 gen_rtx_SUBREG (SImode, operands[1], 0)));
266 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
267 gen_rtx_SUBREG (SImode, operands[1], 4)));
268 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
270 else if (memory_operand (operands[1], DImode))
271 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
279 "TARGET_SSE && reload_completed"
282 (vec_duplicate:V4SF (match_dup 1))
286 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
287 operands[2] = CONST0_RTX (V4SFmode);
291 [(set (match_operand:V2DF 0 "register_operand" "")
292 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
293 "TARGET_SSE2 && reload_completed"
294 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
296 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
297 operands[2] = CONST0_RTX (DFmode);
300 (define_expand "push<mode>1"
301 [(match_operand:AVX256MODE 0 "register_operand" "")]
304 ix86_expand_push (<MODE>mode, operands[0]);
308 (define_expand "push<mode>1"
309 [(match_operand:SSEMODE 0 "register_operand" "")]
312 ix86_expand_push (<MODE>mode, operands[0]);
316 (define_expand "movmisalign<mode>"
317 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
318 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
321 ix86_expand_vector_move_misalign (<MODE>mode, operands);
325 (define_expand "movmisalign<mode>"
326 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
327 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
330 ix86_expand_vector_move_misalign (<MODE>mode, operands);
334 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
335 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
337 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
339 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
340 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
341 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
342 [(set_attr "type" "ssemov")
343 (set_attr "movu" "1")
344 (set_attr "prefix" "vex")
345 (set_attr "mode" "<MODE>")])
347 (define_insn "sse2_movq128"
348 [(set (match_operand:V2DI 0 "register_operand" "=x")
351 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
352 (parallel [(const_int 0)]))
355 "%vmovq\t{%1, %0|%0, %1}"
356 [(set_attr "type" "ssemov")
357 (set_attr "prefix" "maybe_vex")
358 (set_attr "mode" "TI")])
360 (define_insn "<sse>_movup<ssemodesuffixf2c>"
361 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
363 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
365 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
366 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
367 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
368 [(set_attr "type" "ssemov")
369 (set_attr "movu" "1")
370 (set_attr "mode" "<MODE>")])
372 (define_insn "avx_movdqu<avxmodesuffix>"
373 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
375 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
377 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
378 "vmovdqu\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "movu" "1")
381 (set_attr "prefix" "vex")
382 (set_attr "mode" "<avxvecmode>")])
384 (define_insn "sse2_movdqu"
385 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
386 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
388 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
389 "movdqu\t{%1, %0|%0, %1}"
390 [(set_attr "type" "ssemov")
391 (set_attr "movu" "1")
392 (set_attr "prefix_data16" "1")
393 (set_attr "mode" "TI")])
395 (define_insn "avx_movnt<mode>"
396 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
398 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
400 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
401 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
402 [(set_attr "type" "ssemov")
403 (set_attr "prefix" "vex")
404 (set_attr "mode" "<MODE>")])
406 (define_insn "<sse>_movnt<mode>"
407 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
409 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
411 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
412 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
413 [(set_attr "type" "ssemov")
414 (set_attr "mode" "<MODE>")])
416 (define_insn "avx_movnt<mode>"
417 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
419 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
422 "vmovntdq\t{%1, %0|%0, %1}"
423 [(set_attr "type" "ssecvt")
424 (set_attr "prefix" "vex")
425 (set_attr "mode" "<avxvecmode>")])
427 (define_insn "sse2_movntv2di"
428 [(set (match_operand:V2DI 0 "memory_operand" "=m")
429 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
432 "movntdq\t{%1, %0|%0, %1}"
433 [(set_attr "type" "ssemov")
434 (set_attr "prefix_data16" "1")
435 (set_attr "mode" "TI")])
437 (define_insn "sse2_movntsi"
438 [(set (match_operand:SI 0 "memory_operand" "=m")
439 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
442 "movnti\t{%1, %0|%0, %1}"
443 [(set_attr "type" "ssemov")
444 (set_attr "prefix_data16" "0")
445 (set_attr "mode" "V2DF")])
447 (define_insn "avx_lddqu<avxmodesuffix>"
448 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
450 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
453 "vlddqu\t{%1, %0|%0, %1}"
454 [(set_attr "type" "ssecvt")
455 (set_attr "movu" "1")
456 (set_attr "prefix" "vex")
457 (set_attr "mode" "<avxvecmode>")])
459 (define_insn "sse3_lddqu"
460 [(set (match_operand:V16QI 0 "register_operand" "=x")
461 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
464 "lddqu\t{%1, %0|%0, %1}"
465 [(set_attr "type" "ssemov")
466 (set_attr "movu" "1")
467 (set_attr "prefix_data16" "0")
468 (set_attr "prefix_rep" "1")
469 (set_attr "mode" "TI")])
471 ; Expand patterns for non-temporal stores. At the moment, only those
472 ; that directly map to insns are defined; it would be possible to
473 ; define patterns for other modes that would expand to several insns.
475 (define_expand "storent<mode>"
476 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
478 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
480 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
483 (define_expand "storent<mode>"
484 [(set (match_operand:MODEF 0 "memory_operand" "")
486 [(match_operand:MODEF 1 "register_operand" "")]
491 (define_expand "storentv2di"
492 [(set (match_operand:V2DI 0 "memory_operand" "")
493 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
498 (define_expand "storentsi"
499 [(set (match_operand:SI 0 "memory_operand" "")
500 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
507 ;; Parallel floating point arithmetic
509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
511 (define_expand "<code><mode>2"
512 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
514 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
515 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
516 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
518 (define_expand "<plusminus_insn><mode>3"
519 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
520 (plusminus:AVX256MODEF2P
521 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
522 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
523 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
524 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
526 (define_insn "*avx_<plusminus_insn><mode>3"
527 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
528 (plusminus:AVXMODEF2P
529 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
530 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
531 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
532 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
533 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
534 [(set_attr "type" "sseadd")
535 (set_attr "prefix" "vex")
536 (set_attr "mode" "<avxvecmode>")])
538 (define_expand "<plusminus_insn><mode>3"
539 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
540 (plusminus:SSEMODEF2P
541 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
542 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
543 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
544 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
546 (define_insn "*<plusminus_insn><mode>3"
547 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
548 (plusminus:SSEMODEF2P
549 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
550 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
551 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
552 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
553 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
554 [(set_attr "type" "sseadd")
555 (set_attr "mode" "<MODE>")])
557 (define_insn "*avx_vm<plusminus_insn><mode>3"
558 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
559 (vec_merge:SSEMODEF2P
560 (plusminus:SSEMODEF2P
561 (match_operand:SSEMODEF2P 1 "register_operand" "x")
562 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
565 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
566 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
567 [(set_attr "type" "sseadd")
568 (set_attr "prefix" "vex")
569 (set_attr "mode" "<ssescalarmode>")])
571 (define_insn "<sse>_vm<plusminus_insn><mode>3"
572 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
573 (vec_merge:SSEMODEF2P
574 (plusminus:SSEMODEF2P
575 (match_operand:SSEMODEF2P 1 "register_operand" "0")
576 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
579 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
580 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
581 [(set_attr "type" "sseadd")
582 (set_attr "mode" "<ssescalarmode>")])
584 (define_expand "mul<mode>3"
585 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
587 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
588 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
589 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
590 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
592 (define_insn "*avx_mul<mode>3"
593 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
595 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
596 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
597 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
598 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
599 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
600 [(set_attr "type" "ssemul")
601 (set_attr "prefix" "vex")
602 (set_attr "mode" "<avxvecmode>")])
604 (define_expand "mul<mode>3"
605 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
607 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
609 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
610 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
612 (define_insn "*mul<mode>3"
613 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
615 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
616 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
617 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
618 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
619 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
620 [(set_attr "type" "ssemul")
621 (set_attr "mode" "<MODE>")])
623 (define_insn "*avx_vmmul<mode>3"
624 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
625 (vec_merge:SSEMODEF2P
627 (match_operand:SSEMODEF2P 1 "register_operand" "x")
628 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
631 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
632 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
633 [(set_attr "type" "ssemul")
634 (set_attr "prefix" "vex")
635 (set_attr "mode" "<ssescalarmode>")])
637 (define_insn "<sse>_vmmul<mode>3"
638 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
639 (vec_merge:SSEMODEF2P
641 (match_operand:SSEMODEF2P 1 "register_operand" "0")
642 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
645 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
646 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
647 [(set_attr "type" "ssemul")
648 (set_attr "mode" "<ssescalarmode>")])
650 (define_expand "divv8sf3"
651 [(set (match_operand:V8SF 0 "register_operand" "")
652 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
653 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
656 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
658 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
659 && flag_finite_math_only && !flag_trapping_math
660 && flag_unsafe_math_optimizations)
662 ix86_emit_swdivsf (operands[0], operands[1],
663 operands[2], V8SFmode);
668 (define_expand "divv4df3"
669 [(set (match_operand:V4DF 0 "register_operand" "")
670 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
671 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
673 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
675 (define_insn "avx_div<mode>3"
676 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
678 (match_operand:AVXMODEF2P 1 "register_operand" "x")
679 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
680 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
681 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
682 [(set_attr "type" "ssediv")
683 (set_attr "prefix" "vex")
684 (set_attr "mode" "<MODE>")])
686 (define_expand "divv4sf3"
687 [(set (match_operand:V4SF 0 "register_operand" "")
688 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
689 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
692 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
693 && flag_finite_math_only && !flag_trapping_math
694 && flag_unsafe_math_optimizations)
696 ix86_emit_swdivsf (operands[0], operands[1],
697 operands[2], V4SFmode);
702 (define_expand "divv2df3"
703 [(set (match_operand:V2DF 0 "register_operand" "")
704 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
705 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
709 (define_insn "*avx_div<mode>3"
710 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
712 (match_operand:SSEMODEF2P 1 "register_operand" "x")
713 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
714 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
715 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
716 [(set_attr "type" "ssediv")
717 (set_attr "prefix" "vex")
718 (set_attr "mode" "<MODE>")])
720 (define_insn "<sse>_div<mode>3"
721 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
723 (match_operand:SSEMODEF2P 1 "register_operand" "0")
724 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
725 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
726 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
727 [(set_attr "type" "ssediv")
728 (set_attr "mode" "<MODE>")])
730 (define_insn "*avx_vmdiv<mode>3"
731 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
732 (vec_merge:SSEMODEF2P
734 (match_operand:SSEMODEF2P 1 "register_operand" "x")
735 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
738 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
739 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
740 [(set_attr "type" "ssediv")
741 (set_attr "prefix" "vex")
742 (set_attr "mode" "<ssescalarmode>")])
744 (define_insn "<sse>_vmdiv<mode>3"
745 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
746 (vec_merge:SSEMODEF2P
748 (match_operand:SSEMODEF2P 1 "register_operand" "0")
749 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
752 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
753 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
754 [(set_attr "type" "ssediv")
755 (set_attr "mode" "<ssescalarmode>")])
757 (define_insn "avx_rcpv8sf2"
758 [(set (match_operand:V8SF 0 "register_operand" "=x")
760 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
762 "vrcpps\t{%1, %0|%0, %1}"
763 [(set_attr "type" "sse")
764 (set_attr "prefix" "vex")
765 (set_attr "mode" "V8SF")])
767 (define_insn "sse_rcpv4sf2"
768 [(set (match_operand:V4SF 0 "register_operand" "=x")
770 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
772 "%vrcpps\t{%1, %0|%0, %1}"
773 [(set_attr "type" "sse")
774 (set_attr "atom_sse_attr" "rcp")
775 (set_attr "prefix" "maybe_vex")
776 (set_attr "mode" "V4SF")])
778 (define_insn "*avx_vmrcpv4sf2"
779 [(set (match_operand:V4SF 0 "register_operand" "=x")
781 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
783 (match_operand:V4SF 2 "register_operand" "x")
786 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
787 [(set_attr "type" "sse")
788 (set_attr "prefix" "vex")
789 (set_attr "mode" "SF")])
791 (define_insn "sse_vmrcpv4sf2"
792 [(set (match_operand:V4SF 0 "register_operand" "=x")
794 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
796 (match_operand:V4SF 2 "register_operand" "0")
799 "rcpss\t{%1, %0|%0, %1}"
800 [(set_attr "type" "sse")
801 (set_attr "atom_sse_attr" "rcp")
802 (set_attr "mode" "SF")])
804 (define_expand "sqrtv8sf2"
805 [(set (match_operand:V8SF 0 "register_operand" "")
806 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
809 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
810 && flag_finite_math_only && !flag_trapping_math
811 && flag_unsafe_math_optimizations)
813 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
818 (define_insn "avx_sqrtv8sf2"
819 [(set (match_operand:V8SF 0 "register_operand" "=x")
820 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
822 "vsqrtps\t{%1, %0|%0, %1}"
823 [(set_attr "type" "sse")
824 (set_attr "prefix" "vex")
825 (set_attr "mode" "V8SF")])
827 (define_expand "sqrtv4sf2"
828 [(set (match_operand:V4SF 0 "register_operand" "")
829 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
832 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
833 && flag_finite_math_only && !flag_trapping_math
834 && flag_unsafe_math_optimizations)
836 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
841 (define_insn "sse_sqrtv4sf2"
842 [(set (match_operand:V4SF 0 "register_operand" "=x")
843 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
845 "%vsqrtps\t{%1, %0|%0, %1}"
846 [(set_attr "type" "sse")
847 (set_attr "atom_sse_attr" "sqrt")
848 (set_attr "prefix" "maybe_vex")
849 (set_attr "mode" "V4SF")])
851 (define_insn "sqrtv4df2"
852 [(set (match_operand:V4DF 0 "register_operand" "=x")
853 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
855 "vsqrtpd\t{%1, %0|%0, %1}"
856 [(set_attr "type" "sse")
857 (set_attr "prefix" "vex")
858 (set_attr "mode" "V4DF")])
860 (define_insn "sqrtv2df2"
861 [(set (match_operand:V2DF 0 "register_operand" "=x")
862 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
864 "%vsqrtpd\t{%1, %0|%0, %1}"
865 [(set_attr "type" "sse")
866 (set_attr "prefix" "maybe_vex")
867 (set_attr "mode" "V2DF")])
869 (define_insn "*avx_vmsqrt<mode>2"
870 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
871 (vec_merge:SSEMODEF2P
873 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
874 (match_operand:SSEMODEF2P 2 "register_operand" "x")
876 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
877 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
878 [(set_attr "type" "sse")
879 (set_attr "prefix" "vex")
880 (set_attr "mode" "<ssescalarmode>")])
882 (define_insn "<sse>_vmsqrt<mode>2"
883 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
884 (vec_merge:SSEMODEF2P
886 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
887 (match_operand:SSEMODEF2P 2 "register_operand" "0")
889 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
890 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
891 [(set_attr "type" "sse")
892 (set_attr "atom_sse_attr" "sqrt")
893 (set_attr "mode" "<ssescalarmode>")])
895 (define_expand "rsqrtv8sf2"
896 [(set (match_operand:V8SF 0 "register_operand" "")
898 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
899 "TARGET_AVX && TARGET_SSE_MATH"
901 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
905 (define_insn "avx_rsqrtv8sf2"
906 [(set (match_operand:V8SF 0 "register_operand" "=x")
908 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
910 "vrsqrtps\t{%1, %0|%0, %1}"
911 [(set_attr "type" "sse")
912 (set_attr "prefix" "vex")
913 (set_attr "mode" "V8SF")])
915 (define_expand "rsqrtv4sf2"
916 [(set (match_operand:V4SF 0 "register_operand" "")
918 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
921 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
925 (define_insn "sse_rsqrtv4sf2"
926 [(set (match_operand:V4SF 0 "register_operand" "=x")
928 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
930 "%vrsqrtps\t{%1, %0|%0, %1}"
931 [(set_attr "type" "sse")
932 (set_attr "prefix" "maybe_vex")
933 (set_attr "mode" "V4SF")])
935 (define_insn "*avx_vmrsqrtv4sf2"
936 [(set (match_operand:V4SF 0 "register_operand" "=x")
938 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
940 (match_operand:V4SF 2 "register_operand" "x")
943 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
944 [(set_attr "type" "sse")
945 (set_attr "prefix" "vex")
946 (set_attr "mode" "SF")])
948 (define_insn "sse_vmrsqrtv4sf2"
949 [(set (match_operand:V4SF 0 "register_operand" "=x")
951 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
953 (match_operand:V4SF 2 "register_operand" "0")
956 "rsqrtss\t{%1, %0|%0, %1}"
957 [(set_attr "type" "sse")
958 (set_attr "mode" "SF")])
960 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
961 ;; isn't really correct, as those rtl operators aren't defined when
962 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
964 (define_expand "<code><mode>3"
965 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
966 (smaxmin:AVX256MODEF2P
967 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
968 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
969 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
971 if (!flag_finite_math_only)
972 operands[1] = force_reg (<MODE>mode, operands[1]);
973 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
976 (define_expand "<code><mode>3"
977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
979 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
980 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
981 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
983 if (!flag_finite_math_only)
984 operands[1] = force_reg (<MODE>mode, operands[1]);
985 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
988 (define_insn "*avx_<code><mode>3_finite"
989 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
991 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
992 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
993 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
994 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
995 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
996 [(set_attr "type" "sseadd")
997 (set_attr "prefix" "vex")
998 (set_attr "mode" "<MODE>")])
1000 (define_insn "*<code><mode>3_finite"
1001 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1003 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1004 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1005 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1006 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1007 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1008 [(set_attr "type" "sseadd")
1009 (set_attr "mode" "<MODE>")])
1011 (define_insn "*avx_<code><mode>3"
1012 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1014 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1015 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1016 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1017 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1018 [(set_attr "type" "sseadd")
1019 (set_attr "prefix" "vex")
1020 (set_attr "mode" "<avxvecmode>")])
1022 (define_insn "*<code><mode>3"
1023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1025 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1026 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1027 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1028 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1029 [(set_attr "type" "sseadd")
1030 (set_attr "mode" "<MODE>")])
1032 (define_insn "*avx_vm<code><mode>3"
1033 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1034 (vec_merge:SSEMODEF2P
1036 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1037 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1040 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1041 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1042 [(set_attr "type" "sse")
1043 (set_attr "prefix" "vex")
1044 (set_attr "mode" "<ssescalarmode>")])
1046 (define_insn "<sse>_vm<code><mode>3"
1047 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1048 (vec_merge:SSEMODEF2P
1050 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1051 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1054 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1055 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1056 [(set_attr "type" "sseadd")
1057 (set_attr "mode" "<ssescalarmode>")])
1059 ;; These versions of the min/max patterns implement exactly the operations
1060 ;; min = (op1 < op2 ? op1 : op2)
1061 ;; max = (!(op1 < op2) ? op1 : op2)
1062 ;; Their operands are not commutative, and thus they may be used in the
1063 ;; presence of -0.0 and NaN.
1065 (define_insn "*avx_ieee_smin<mode>3"
1066 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1068 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1069 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1071 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1072 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1073 [(set_attr "type" "sseadd")
1074 (set_attr "prefix" "vex")
1075 (set_attr "mode" "<avxvecmode>")])
1077 (define_insn "*avx_ieee_smax<mode>3"
1078 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1080 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1081 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1083 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1084 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1085 [(set_attr "type" "sseadd")
1086 (set_attr "prefix" "vex")
1087 (set_attr "mode" "<avxvecmode>")])
1089 (define_insn "*ieee_smin<mode>3"
1090 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1092 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1093 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1095 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1096 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1097 [(set_attr "type" "sseadd")
1098 (set_attr "mode" "<MODE>")])
1100 (define_insn "*ieee_smax<mode>3"
1101 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1103 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1104 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1106 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1107 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "mode" "<MODE>")])
1111 (define_insn "avx_addsubv8sf3"
1112 [(set (match_operand:V8SF 0 "register_operand" "=x")
1115 (match_operand:V8SF 1 "register_operand" "x")
1116 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1117 (minus:V8SF (match_dup 1) (match_dup 2))
1120 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1121 [(set_attr "type" "sseadd")
1122 (set_attr "prefix" "vex")
1123 (set_attr "mode" "V8SF")])
1125 (define_insn "avx_addsubv4df3"
1126 [(set (match_operand:V4DF 0 "register_operand" "=x")
1129 (match_operand:V4DF 1 "register_operand" "x")
1130 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1131 (minus:V4DF (match_dup 1) (match_dup 2))
1134 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1135 [(set_attr "type" "sseadd")
1136 (set_attr "prefix" "vex")
1137 (set_attr "mode" "V4DF")])
1139 (define_insn "*avx_addsubv4sf3"
1140 [(set (match_operand:V4SF 0 "register_operand" "=x")
1143 (match_operand:V4SF 1 "register_operand" "x")
1144 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1145 (minus:V4SF (match_dup 1) (match_dup 2))
1148 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1149 [(set_attr "type" "sseadd")
1150 (set_attr "prefix" "vex")
1151 (set_attr "mode" "V4SF")])
1153 (define_insn "sse3_addsubv4sf3"
1154 [(set (match_operand:V4SF 0 "register_operand" "=x")
1157 (match_operand:V4SF 1 "register_operand" "0")
1158 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1159 (minus:V4SF (match_dup 1) (match_dup 2))
1162 "addsubps\t{%2, %0|%0, %2}"
1163 [(set_attr "type" "sseadd")
1164 (set_attr "prefix_rep" "1")
1165 (set_attr "mode" "V4SF")])
1167 (define_insn "*avx_addsubv2df3"
1168 [(set (match_operand:V2DF 0 "register_operand" "=x")
1171 (match_operand:V2DF 1 "register_operand" "x")
1172 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1173 (minus:V2DF (match_dup 1) (match_dup 2))
1176 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1177 [(set_attr "type" "sseadd")
1178 (set_attr "prefix" "vex")
1179 (set_attr "mode" "V2DF")])
1181 (define_insn "sse3_addsubv2df3"
1182 [(set (match_operand:V2DF 0 "register_operand" "=x")
1185 (match_operand:V2DF 1 "register_operand" "0")
1186 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1187 (minus:V2DF (match_dup 1) (match_dup 2))
1190 "addsubpd\t{%2, %0|%0, %2}"
1191 [(set_attr "type" "sseadd")
1192 (set_attr "atom_unit" "complex")
1193 (set_attr "mode" "V2DF")])
1195 (define_insn "avx_h<plusminus_insn>v4df3"
1196 [(set (match_operand:V4DF 0 "register_operand" "=x")
1201 (match_operand:V4DF 1 "register_operand" "x")
1202 (parallel [(const_int 0)]))
1203 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1205 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1206 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1210 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1211 (parallel [(const_int 0)]))
1212 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1214 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1215 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1217 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1218 [(set_attr "type" "sseadd")
1219 (set_attr "prefix" "vex")
1220 (set_attr "mode" "V4DF")])
1222 (define_insn "avx_h<plusminus_insn>v8sf3"
1223 [(set (match_operand:V8SF 0 "register_operand" "=x")
1229 (match_operand:V8SF 1 "register_operand" "x")
1230 (parallel [(const_int 0)]))
1231 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1233 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1234 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1238 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1239 (parallel [(const_int 0)]))
1240 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1242 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1243 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1247 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1248 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1250 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1251 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1254 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1255 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1257 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1258 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1260 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1261 [(set_attr "type" "sseadd")
1262 (set_attr "prefix" "vex")
1263 (set_attr "mode" "V8SF")])
1265 (define_insn "*avx_h<plusminus_insn>v4sf3"
1266 [(set (match_operand:V4SF 0 "register_operand" "=x")
1271 (match_operand:V4SF 1 "register_operand" "x")
1272 (parallel [(const_int 0)]))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1275 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1276 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1280 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1281 (parallel [(const_int 0)]))
1282 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1284 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1285 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1287 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1288 [(set_attr "type" "sseadd")
1289 (set_attr "prefix" "vex")
1290 (set_attr "mode" "V4SF")])
1292 (define_insn "sse3_h<plusminus_insn>v4sf3"
1293 [(set (match_operand:V4SF 0 "register_operand" "=x")
1298 (match_operand:V4SF 1 "register_operand" "0")
1299 (parallel [(const_int 0)]))
1300 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1302 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1303 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1307 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1308 (parallel [(const_int 0)]))
1309 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1311 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1312 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1314 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1315 [(set_attr "type" "sseadd")
1316 (set_attr "atom_unit" "complex")
1317 (set_attr "prefix_rep" "1")
1318 (set_attr "mode" "V4SF")])
1320 (define_insn "*avx_h<plusminus_insn>v2df3"
1321 [(set (match_operand:V2DF 0 "register_operand" "=x")
1325 (match_operand:V2DF 1 "register_operand" "x")
1326 (parallel [(const_int 0)]))
1327 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1330 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1331 (parallel [(const_int 0)]))
1332 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1334 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1335 [(set_attr "type" "sseadd")
1336 (set_attr "prefix" "vex")
1337 (set_attr "mode" "V2DF")])
1339 (define_insn "sse3_h<plusminus_insn>v2df3"
1340 [(set (match_operand:V2DF 0 "register_operand" "=x")
1344 (match_operand:V2DF 1 "register_operand" "0")
1345 (parallel [(const_int 0)]))
1346 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1349 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1350 (parallel [(const_int 0)]))
1351 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1353 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1354 [(set_attr "type" "sseadd")
1355 (set_attr "mode" "V2DF")])
1357 (define_expand "reduc_splus_v4sf"
1358 [(match_operand:V4SF 0 "register_operand" "")
1359 (match_operand:V4SF 1 "register_operand" "")]
1364 rtx tmp = gen_reg_rtx (V4SFmode);
1365 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1366 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1369 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1373 (define_expand "reduc_splus_v2df"
1374 [(match_operand:V2DF 0 "register_operand" "")
1375 (match_operand:V2DF 1 "register_operand" "")]
1378 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1382 (define_expand "reduc_smax_v4sf"
1383 [(match_operand:V4SF 0 "register_operand" "")
1384 (match_operand:V4SF 1 "register_operand" "")]
1387 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1391 (define_expand "reduc_smin_v4sf"
1392 [(match_operand:V4SF 0 "register_operand" "")
1393 (match_operand:V4SF 1 "register_operand" "")]
1396 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1400 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1402 ;; Parallel floating point comparisons
1404 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1406 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1407 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1409 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1410 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1411 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1414 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1415 [(set_attr "type" "ssecmp")
1416 (set_attr "length_immediate" "1")
1417 (set_attr "prefix" "vex")
1418 (set_attr "mode" "<MODE>")])
1420 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1421 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1422 (vec_merge:SSEMODEF2P
1424 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1425 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1426 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1431 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1432 [(set_attr "type" "ssecmp")
1433 (set_attr "length_immediate" "1")
1434 (set_attr "prefix" "vex")
1435 (set_attr "mode" "<ssescalarmode>")])
1437 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1438 ;; may generate 256bit vector compare instructions.
1439 (define_insn "*avx_maskcmp<mode>3"
1440 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1441 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1442 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1443 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1444 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1445 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1446 [(set_attr "type" "ssecmp")
1447 (set_attr "prefix" "vex")
1448 (set_attr "length_immediate" "1")
1449 (set_attr "mode" "<avxvecmode>")])
1451 (define_insn "<sse>_maskcmp<mode>3"
1452 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1453 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1454 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1455 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1456 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1458 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1459 [(set_attr "type" "ssecmp")
1460 (set_attr "length_immediate" "1")
1461 (set_attr "mode" "<MODE>")])
1463 (define_insn "<sse>_vmmaskcmp<mode>3"
1464 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1465 (vec_merge:SSEMODEF2P
1466 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1467 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1468 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1471 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1472 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1473 [(set_attr "type" "ssecmp")
1474 (set_attr "length_immediate" "1")
1475 (set_attr "mode" "<ssescalarmode>")])
1477 (define_insn "<sse>_comi"
1478 [(set (reg:CCFP FLAGS_REG)
1481 (match_operand:<ssevecmode> 0 "register_operand" "x")
1482 (parallel [(const_int 0)]))
1484 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1485 (parallel [(const_int 0)]))))]
1486 "SSE_FLOAT_MODE_P (<MODE>mode)"
1487 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1488 [(set_attr "type" "ssecomi")
1489 (set_attr "prefix" "maybe_vex")
1490 (set_attr "prefix_rep" "0")
1491 (set (attr "prefix_data16")
1492 (if_then_else (eq_attr "mode" "DF")
1494 (const_string "0")))
1495 (set_attr "mode" "<MODE>")])
1497 (define_insn "<sse>_ucomi"
1498 [(set (reg:CCFPU FLAGS_REG)
1501 (match_operand:<ssevecmode> 0 "register_operand" "x")
1502 (parallel [(const_int 0)]))
1504 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1505 (parallel [(const_int 0)]))))]
1506 "SSE_FLOAT_MODE_P (<MODE>mode)"
1507 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1508 [(set_attr "type" "ssecomi")
1509 (set_attr "prefix" "maybe_vex")
1510 (set_attr "prefix_rep" "0")
1511 (set (attr "prefix_data16")
1512 (if_then_else (eq_attr "mode" "DF")
1514 (const_string "0")))
1515 (set_attr "mode" "<MODE>")])
1517 (define_expand "vcond<mode>"
1518 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1519 (if_then_else:SSEMODEF2P
1520 (match_operator 3 ""
1521 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1522 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1523 (match_operand:SSEMODEF2P 1 "general_operand" "")
1524 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1525 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1527 bool ok = ix86_expand_fp_vcond (operands);
1532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1534 ;; Parallel floating point logical operations
1536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1538 (define_insn "avx_andnot<mode>3"
1539 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1542 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1543 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1544 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1545 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1546 [(set_attr "type" "sselog")
1547 (set_attr "prefix" "vex")
1548 (set_attr "mode" "<avxvecmode>")])
1550 (define_insn "<sse>_andnot<mode>3"
1551 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1554 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1555 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1556 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1557 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1558 [(set_attr "type" "sselog")
1559 (set_attr "mode" "<MODE>")])
1561 (define_expand "<code><mode>3"
1562 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1563 (plogic:AVX256MODEF2P
1564 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1565 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1566 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1567 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1569 (define_insn "*avx_<code><mode>3"
1570 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1572 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1573 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1574 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1575 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1576 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1577 [(set_attr "type" "sselog")
1578 (set_attr "prefix" "vex")
1579 (set_attr "mode" "<avxvecmode>")])
1581 (define_expand "<code><mode>3"
1582 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1584 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1585 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1586 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1587 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1589 (define_insn "*<code><mode>3"
1590 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1592 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1593 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1594 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1595 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1596 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1597 [(set_attr "type" "sselog")
1598 (set_attr "mode" "<MODE>")])
1600 (define_expand "copysign<mode>3"
1603 (not:SSEMODEF2P (match_dup 3))
1604 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1606 (and:SSEMODEF2P (match_dup 3)
1607 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1608 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1609 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1610 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1612 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1614 operands[4] = gen_reg_rtx (<MODE>mode);
1615 operands[5] = gen_reg_rtx (<MODE>mode);
1618 ;; Also define scalar versions. These are used for abs, neg, and
1619 ;; conditional move. Using subregs into vector modes causes register
1620 ;; allocation lossage. These patterns do not allow memory operands
1621 ;; because the native instructions read the full 128-bits.
1623 (define_insn "*avx_andnot<mode>3"
1624 [(set (match_operand:MODEF 0 "register_operand" "=x")
1627 (match_operand:MODEF 1 "register_operand" "x"))
1628 (match_operand:MODEF 2 "register_operand" "x")))]
1629 "AVX_FLOAT_MODE_P (<MODE>mode)"
1630 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1631 [(set_attr "type" "sselog")
1632 (set_attr "prefix" "vex")
1633 (set_attr "mode" "<ssevecmode>")])
1635 (define_insn "*andnot<mode>3"
1636 [(set (match_operand:MODEF 0 "register_operand" "=x")
1639 (match_operand:MODEF 1 "register_operand" "0"))
1640 (match_operand:MODEF 2 "register_operand" "x")))]
1641 "SSE_FLOAT_MODE_P (<MODE>mode)"
1642 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "sselog")
1644 (set_attr "mode" "<ssevecmode>")])
1646 (define_insn "*avx_<code><mode>3"
1647 [(set (match_operand:MODEF 0 "register_operand" "=x")
1649 (match_operand:MODEF 1 "register_operand" "x")
1650 (match_operand:MODEF 2 "register_operand" "x")))]
1651 "AVX_FLOAT_MODE_P (<MODE>mode)"
1652 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1653 [(set_attr "type" "sselog")
1654 (set_attr "prefix" "vex")
1655 (set_attr "mode" "<ssevecmode>")])
1657 (define_insn "*<code><mode>3"
1658 [(set (match_operand:MODEF 0 "register_operand" "=x")
1660 (match_operand:MODEF 1 "register_operand" "0")
1661 (match_operand:MODEF 2 "register_operand" "x")))]
1662 "SSE_FLOAT_MODE_P (<MODE>mode)"
1663 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1664 [(set_attr "type" "sselog")
1665 (set_attr "mode" "<ssevecmode>")])
1667 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1669 ;; SSE5 floating point multiply/accumulate instructions This includes the
1670 ;; scalar version of the instructions as well as the vector
1672 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1674 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1675 ;; combine to generate a multiply/add with two memory references. We then
1676 ;; split this insn, into loading up the destination register with one of the
1677 ;; memory operations. If we don't manage to split the insn, reload will
1678 ;; generate the appropriate moves. The reason this is needed, is that combine
1679 ;; has already folded one of the memory references into both the multiply and
1680 ;; add insns, and it can't generate a new pseudo. I.e.:
1681 ;; (set (reg1) (mem (addr1)))
1682 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1683 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1685 (define_insn "sse5_fmadd<mode>4"
1686 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1689 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1690 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1691 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1692 "TARGET_SSE5 && TARGET_FUSED_MADD
1693 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1694 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1695 [(set_attr "type" "ssemuladd")
1696 (set_attr "mode" "<MODE>")])
1698 ;; Split fmadd with two memory operands into a load and the fmadd.
1700 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1703 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1704 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1705 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1707 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1708 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1709 && !reg_mentioned_p (operands[0], operands[1])
1710 && !reg_mentioned_p (operands[0], operands[2])
1711 && !reg_mentioned_p (operands[0], operands[3])"
1714 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1715 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1716 operands[2], operands[3]));
1720 ;; For the scalar operations, use operand1 for the upper words that aren't
1721 ;; modified, so restrict the forms that are generated.
1722 ;; Scalar version of fmadd
1723 (define_insn "sse5_vmfmadd<mode>4"
1724 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1725 (vec_merge:SSEMODEF2P
1728 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1730 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1733 "TARGET_SSE5 && TARGET_FUSED_MADD
1734 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1735 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1736 [(set_attr "type" "ssemuladd")
1737 (set_attr "mode" "<MODE>")])
1739 ;; Floating multiply and subtract
1740 ;; Allow two memory operands the same as fmadd
1741 (define_insn "sse5_fmsub<mode>4"
1742 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1745 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1746 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1747 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1748 "TARGET_SSE5 && TARGET_FUSED_MADD
1749 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1750 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1751 [(set_attr "type" "ssemuladd")
1752 (set_attr "mode" "<MODE>")])
1754 ;; Split fmsub with two memory operands into a load and the fmsub.
1756 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1759 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1760 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1761 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1763 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1764 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1765 && !reg_mentioned_p (operands[0], operands[1])
1766 && !reg_mentioned_p (operands[0], operands[2])
1767 && !reg_mentioned_p (operands[0], operands[3])"
1770 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1771 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1772 operands[2], operands[3]));
1776 ;; For the scalar operations, use operand1 for the upper words that aren't
1777 ;; modified, so restrict the forms that are generated.
1778 ;; Scalar version of fmsub
1779 (define_insn "sse5_vmfmsub<mode>4"
1780 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1781 (vec_merge:SSEMODEF2P
1784 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1785 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1786 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1789 "TARGET_SSE5 && TARGET_FUSED_MADD
1790 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1791 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1792 [(set_attr "type" "ssemuladd")
1793 (set_attr "mode" "<MODE>")])
1795 ;; Floating point negative multiply and add
1796 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1797 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1798 ;; Allow two memory operands to help in optimizing.
1799 (define_insn "sse5_fnmadd<mode>4"
1800 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1802 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1804 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1805 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1806 "TARGET_SSE5 && TARGET_FUSED_MADD
1807 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1808 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1809 [(set_attr "type" "ssemuladd")
1810 (set_attr "mode" "<MODE>")])
1812 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1814 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1816 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1818 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1819 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1821 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1822 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1823 && !reg_mentioned_p (operands[0], operands[1])
1824 && !reg_mentioned_p (operands[0], operands[2])
1825 && !reg_mentioned_p (operands[0], operands[3])"
1828 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1829 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1830 operands[2], operands[3]));
1834 ;; For the scalar operations, use operand1 for the upper words that aren't
1835 ;; modified, so restrict the forms that are generated.
1836 ;; Scalar version of fnmadd
1837 (define_insn "sse5_vmfnmadd<mode>4"
1838 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1839 (vec_merge:SSEMODEF2P
1841 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1843 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1844 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1847 "TARGET_SSE5 && TARGET_FUSED_MADD
1848 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1849 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1850 [(set_attr "type" "ssemuladd")
1851 (set_attr "mode" "<MODE>")])
1853 ;; Floating point negative multiply and subtract
1854 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1855 ;; Allow 2 memory operands to help with optimization
1856 (define_insn "sse5_fnmsub<mode>4"
1857 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1861 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1862 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1863 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1864 "TARGET_SSE5 && TARGET_FUSED_MADD
1865 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1866 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1867 [(set_attr "type" "ssemuladd")
1868 (set_attr "mode" "<MODE>")])
1870 ;; Split fnmsub with two memory operands into a load and the fmsub.
1872 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1876 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1877 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1878 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1880 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1881 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1882 && !reg_mentioned_p (operands[0], operands[1])
1883 && !reg_mentioned_p (operands[0], operands[2])
1884 && !reg_mentioned_p (operands[0], operands[3])"
1887 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1888 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1889 operands[2], operands[3]));
1893 ;; For the scalar operations, use operand1 for the upper words that aren't
1894 ;; modified, so restrict the forms that are generated.
1895 ;; Scalar version of fnmsub
1896 (define_insn "sse5_vmfnmsub<mode>4"
1897 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1898 (vec_merge:SSEMODEF2P
1902 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1903 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1904 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1907 "TARGET_SSE5 && TARGET_FUSED_MADD
1908 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1909 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1910 [(set_attr "type" "ssemuladd")
1911 (set_attr "mode" "<MODE>")])
1913 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1914 ;; even if the user used -mno-fused-madd
1915 ;; Parallel instructions. During instruction generation, just default
1916 ;; to registers, and let combine later build the appropriate instruction.
1917 (define_expand "sse5i_fmadd<mode>4"
1918 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1922 (match_operand:SSEMODEF2P 1 "register_operand" "")
1923 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1924 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1925 UNSPEC_SSE5_INTRINSIC))]
1928 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1929 if (TARGET_FUSED_MADD)
1931 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1932 operands[2], operands[3]));
1937 (define_insn "*sse5i_fmadd<mode>4"
1938 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1942 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1943 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1944 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1945 UNSPEC_SSE5_INTRINSIC))]
1946 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1947 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1948 [(set_attr "type" "ssemuladd")
1949 (set_attr "mode" "<MODE>")])
1951 (define_expand "sse5i_fmsub<mode>4"
1952 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1956 (match_operand:SSEMODEF2P 1 "register_operand" "")
1957 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1958 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1959 UNSPEC_SSE5_INTRINSIC))]
1962 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1963 if (TARGET_FUSED_MADD)
1965 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1966 operands[2], operands[3]));
1971 (define_insn "*sse5i_fmsub<mode>4"
1972 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1976 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1977 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1978 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1979 UNSPEC_SSE5_INTRINSIC))]
1980 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1981 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1982 [(set_attr "type" "ssemuladd")
1983 (set_attr "mode" "<MODE>")])
1985 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1986 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1987 (define_expand "sse5i_fnmadd<mode>4"
1988 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1991 (match_operand:SSEMODEF2P 3 "register_operand" "")
1993 (match_operand:SSEMODEF2P 1 "register_operand" "")
1994 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1995 UNSPEC_SSE5_INTRINSIC))]
1998 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1999 if (TARGET_FUSED_MADD)
2001 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
2002 operands[2], operands[3]));
2007 (define_insn "*sse5i_fnmadd<mode>4"
2008 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2011 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
2013 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
2014 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
2015 UNSPEC_SSE5_INTRINSIC))]
2016 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2017 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2018 [(set_attr "type" "ssemuladd")
2019 (set_attr "mode" "<MODE>")])
2021 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
2022 (define_expand "sse5i_fnmsub<mode>4"
2023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2028 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2029 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2030 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
2031 UNSPEC_SSE5_INTRINSIC))]
2034 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2035 if (TARGET_FUSED_MADD)
2037 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
2038 operands[2], operands[3]));
2043 (define_insn "*sse5i_fnmsub<mode>4"
2044 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2049 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2050 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2051 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2052 UNSPEC_SSE5_INTRINSIC))]
2053 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2054 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2055 [(set_attr "type" "ssemuladd")
2056 (set_attr "mode" "<MODE>")])
2058 ;; Scalar instructions
2059 (define_expand "sse5i_vmfmadd<mode>4"
2060 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2062 [(vec_merge:SSEMODEF2P
2065 (match_operand:SSEMODEF2P 1 "register_operand" "")
2066 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2067 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2070 UNSPEC_SSE5_INTRINSIC))]
2073 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2074 if (TARGET_FUSED_MADD)
2076 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2077 operands[2], operands[3]));
2082 ;; For the scalar operations, use operand1 for the upper words that aren't
2083 ;; modified, so restrict the forms that are accepted.
2084 (define_insn "*sse5i_vmfmadd<mode>4"
2085 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2087 [(vec_merge:SSEMODEF2P
2090 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2091 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2092 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2095 UNSPEC_SSE5_INTRINSIC))]
2096 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2097 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2098 [(set_attr "type" "ssemuladd")
2099 (set_attr "mode" "<ssescalarmode>")])
2101 (define_expand "sse5i_vmfmsub<mode>4"
2102 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2104 [(vec_merge:SSEMODEF2P
2107 (match_operand:SSEMODEF2P 1 "register_operand" "")
2108 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2109 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2112 UNSPEC_SSE5_INTRINSIC))]
2115 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2116 if (TARGET_FUSED_MADD)
2118 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2119 operands[2], operands[3]));
2124 (define_insn "*sse5i_vmfmsub<mode>4"
2125 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2127 [(vec_merge:SSEMODEF2P
2130 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2131 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2132 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2135 UNSPEC_SSE5_INTRINSIC))]
2136 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2137 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2138 [(set_attr "type" "ssemuladd")
2139 (set_attr "mode" "<ssescalarmode>")])
2141 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2142 (define_expand "sse5i_vmfnmadd<mode>4"
2143 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2145 [(vec_merge:SSEMODEF2P
2147 (match_operand:SSEMODEF2P 3 "register_operand" "")
2149 (match_operand:SSEMODEF2P 1 "register_operand" "")
2150 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2153 UNSPEC_SSE5_INTRINSIC))]
2156 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2157 if (TARGET_FUSED_MADD)
2159 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2160 operands[2], operands[3]));
2165 (define_insn "*sse5i_vmfnmadd<mode>4"
2166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2168 [(vec_merge:SSEMODEF2P
2170 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2172 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2173 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2176 UNSPEC_SSE5_INTRINSIC))]
2177 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2178 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2179 [(set_attr "type" "ssemuladd")
2180 (set_attr "mode" "<ssescalarmode>")])
2182 (define_expand "sse5i_vmfnmsub<mode>4"
2183 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2185 [(vec_merge:SSEMODEF2P
2189 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2190 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2191 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2194 UNSPEC_SSE5_INTRINSIC))]
2197 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2198 if (TARGET_FUSED_MADD)
2200 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2201 operands[2], operands[3]));
2206 (define_insn "*sse5i_vmfnmsub<mode>4"
2207 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2209 [(vec_merge:SSEMODEF2P
2213 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2214 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2215 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2218 UNSPEC_SSE5_INTRINSIC))]
2219 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2220 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2221 [(set_attr "type" "ssemuladd")
2222 (set_attr "mode" "<ssescalarmode>")])
2224 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2226 ;; Parallel single-precision floating point conversion operations
2228 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2230 (define_insn "sse_cvtpi2ps"
2231 [(set (match_operand:V4SF 0 "register_operand" "=x")
2234 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2235 (match_operand:V4SF 1 "register_operand" "0")
2238 "cvtpi2ps\t{%2, %0|%0, %2}"
2239 [(set_attr "type" "ssecvt")
2240 (set_attr "mode" "V4SF")])
2242 (define_insn "sse_cvtps2pi"
2243 [(set (match_operand:V2SI 0 "register_operand" "=y")
2245 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2247 (parallel [(const_int 0) (const_int 1)])))]
2249 "cvtps2pi\t{%1, %0|%0, %1}"
2250 [(set_attr "type" "ssecvt")
2251 (set_attr "unit" "mmx")
2252 (set_attr "mode" "DI")])
2254 (define_insn "sse_cvttps2pi"
2255 [(set (match_operand:V2SI 0 "register_operand" "=y")
2257 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2258 (parallel [(const_int 0) (const_int 1)])))]
2260 "cvttps2pi\t{%1, %0|%0, %1}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "unit" "mmx")
2263 (set_attr "prefix_rep" "0")
2264 (set_attr "mode" "SF")])
2266 (define_insn "*avx_cvtsi2ss"
2267 [(set (match_operand:V4SF 0 "register_operand" "=x")
2270 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2271 (match_operand:V4SF 1 "register_operand" "x")
2274 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2275 [(set_attr "type" "sseicvt")
2276 (set_attr "prefix" "vex")
2277 (set_attr "mode" "SF")])
2279 (define_insn "sse_cvtsi2ss"
2280 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2283 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2284 (match_operand:V4SF 1 "register_operand" "0,0")
2287 "cvtsi2ss\t{%2, %0|%0, %2}"
2288 [(set_attr "type" "sseicvt")
2289 (set_attr "athlon_decode" "vector,double")
2290 (set_attr "amdfam10_decode" "vector,double")
2291 (set_attr "mode" "SF")])
2293 (define_insn "*avx_cvtsi2ssq"
2294 [(set (match_operand:V4SF 0 "register_operand" "=x")
2297 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2298 (match_operand:V4SF 1 "register_operand" "x")
2300 "TARGET_AVX && TARGET_64BIT"
2301 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2302 [(set_attr "type" "sseicvt")
2303 (set_attr "length_vex" "4")
2304 (set_attr "prefix" "vex")
2305 (set_attr "mode" "SF")])
2307 (define_insn "sse_cvtsi2ssq"
2308 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2311 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2312 (match_operand:V4SF 1 "register_operand" "0,0")
2314 "TARGET_SSE && TARGET_64BIT"
2315 "cvtsi2ssq\t{%2, %0|%0, %2}"
2316 [(set_attr "type" "sseicvt")
2317 (set_attr "prefix_rex" "1")
2318 (set_attr "athlon_decode" "vector,double")
2319 (set_attr "amdfam10_decode" "vector,double")
2320 (set_attr "mode" "SF")])
2322 (define_insn "sse_cvtss2si"
2323 [(set (match_operand:SI 0 "register_operand" "=r,r")
2326 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2327 (parallel [(const_int 0)]))]
2328 UNSPEC_FIX_NOTRUNC))]
2330 "%vcvtss2si\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "sseicvt")
2332 (set_attr "athlon_decode" "double,vector")
2333 (set_attr "prefix_rep" "1")
2334 (set_attr "prefix" "maybe_vex")
2335 (set_attr "mode" "SI")])
2337 (define_insn "sse_cvtss2si_2"
2338 [(set (match_operand:SI 0 "register_operand" "=r,r")
2339 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2340 UNSPEC_FIX_NOTRUNC))]
2342 "%vcvtss2si\t{%1, %0|%0, %1}"
2343 [(set_attr "type" "sseicvt")
2344 (set_attr "athlon_decode" "double,vector")
2345 (set_attr "amdfam10_decode" "double,double")
2346 (set_attr "prefix_rep" "1")
2347 (set_attr "prefix" "maybe_vex")
2348 (set_attr "mode" "SI")])
2350 (define_insn "sse_cvtss2siq"
2351 [(set (match_operand:DI 0 "register_operand" "=r,r")
2354 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2355 (parallel [(const_int 0)]))]
2356 UNSPEC_FIX_NOTRUNC))]
2357 "TARGET_SSE && TARGET_64BIT"
2358 "%vcvtss2siq\t{%1, %0|%0, %1}"
2359 [(set_attr "type" "sseicvt")
2360 (set_attr "athlon_decode" "double,vector")
2361 (set_attr "prefix_rep" "1")
2362 (set_attr "prefix" "maybe_vex")
2363 (set_attr "mode" "DI")])
2365 (define_insn "sse_cvtss2siq_2"
2366 [(set (match_operand:DI 0 "register_operand" "=r,r")
2367 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2368 UNSPEC_FIX_NOTRUNC))]
2369 "TARGET_SSE && TARGET_64BIT"
2370 "%vcvtss2siq\t{%1, %0|%0, %1}"
2371 [(set_attr "type" "sseicvt")
2372 (set_attr "athlon_decode" "double,vector")
2373 (set_attr "amdfam10_decode" "double,double")
2374 (set_attr "prefix_rep" "1")
2375 (set_attr "prefix" "maybe_vex")
2376 (set_attr "mode" "DI")])
2378 (define_insn "sse_cvttss2si"
2379 [(set (match_operand:SI 0 "register_operand" "=r,r")
2382 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2383 (parallel [(const_int 0)]))))]
2385 "%vcvttss2si\t{%1, %0|%0, %1}"
2386 [(set_attr "type" "sseicvt")
2387 (set_attr "athlon_decode" "double,vector")
2388 (set_attr "amdfam10_decode" "double,double")
2389 (set_attr "prefix_rep" "1")
2390 (set_attr "prefix" "maybe_vex")
2391 (set_attr "mode" "SI")])
2393 (define_insn "sse_cvttss2siq"
2394 [(set (match_operand:DI 0 "register_operand" "=r,r")
2397 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2398 (parallel [(const_int 0)]))))]
2399 "TARGET_SSE && TARGET_64BIT"
2400 "%vcvttss2siq\t{%1, %0|%0, %1}"
2401 [(set_attr "type" "sseicvt")
2402 (set_attr "athlon_decode" "double,vector")
2403 (set_attr "amdfam10_decode" "double,double")
2404 (set_attr "prefix_rep" "1")
2405 (set_attr "prefix" "maybe_vex")
2406 (set_attr "mode" "DI")])
2408 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2409 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2410 (float:AVXMODEDCVTDQ2PS
2411 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2413 "vcvtdq2ps\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt")
2415 (set_attr "prefix" "vex")
2416 (set_attr "mode" "<avxvecmode>")])
2418 (define_insn "sse2_cvtdq2ps"
2419 [(set (match_operand:V4SF 0 "register_operand" "=x")
2420 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2422 "cvtdq2ps\t{%1, %0|%0, %1}"
2423 [(set_attr "type" "ssecvt")
2424 (set_attr "mode" "V4SF")])
2426 (define_expand "sse2_cvtudq2ps"
2428 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2430 (lt:V4SF (match_dup 5) (match_dup 3)))
2432 (and:V4SF (match_dup 6) (match_dup 4)))
2433 (set (match_operand:V4SF 0 "register_operand" "")
2434 (plus:V4SF (match_dup 5) (match_dup 7)))]
2437 REAL_VALUE_TYPE TWO32r;
2441 real_ldexp (&TWO32r, &dconst1, 32);
2442 x = const_double_from_real_value (TWO32r, SFmode);
2444 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2445 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2447 for (i = 5; i < 8; i++)
2448 operands[i] = gen_reg_rtx (V4SFmode);
2451 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2452 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2453 (unspec:AVXMODEDCVTPS2DQ
2454 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2455 UNSPEC_FIX_NOTRUNC))]
2457 "vcvtps2dq\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "ssecvt")
2459 (set_attr "prefix" "vex")
2460 (set_attr "mode" "<avxvecmode>")])
2462 (define_insn "sse2_cvtps2dq"
2463 [(set (match_operand:V4SI 0 "register_operand" "=x")
2464 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2465 UNSPEC_FIX_NOTRUNC))]
2467 "cvtps2dq\t{%1, %0|%0, %1}"
2468 [(set_attr "type" "ssecvt")
2469 (set_attr "prefix_data16" "1")
2470 (set_attr "mode" "TI")])
2472 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2473 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2474 (fix:AVXMODEDCVTPS2DQ
2475 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2477 "vcvttps2dq\t{%1, %0|%0, %1}"
2478 [(set_attr "type" "ssecvt")
2479 (set_attr "prefix" "vex")
2480 (set_attr "mode" "<avxvecmode>")])
2482 (define_insn "sse2_cvttps2dq"
2483 [(set (match_operand:V4SI 0 "register_operand" "=x")
2484 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2486 "cvttps2dq\t{%1, %0|%0, %1}"
2487 [(set_attr "type" "ssecvt")
2488 (set_attr "prefix_rep" "1")
2489 (set_attr "prefix_data16" "0")
2490 (set_attr "mode" "TI")])
2492 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2494 ;; Parallel double-precision floating point conversion operations
2496 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2498 (define_insn "sse2_cvtpi2pd"
2499 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2500 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2502 "cvtpi2pd\t{%1, %0|%0, %1}"
2503 [(set_attr "type" "ssecvt")
2504 (set_attr "unit" "mmx,*")
2505 (set_attr "prefix_data16" "1,*")
2506 (set_attr "mode" "V2DF")])
2508 (define_insn "sse2_cvtpd2pi"
2509 [(set (match_operand:V2SI 0 "register_operand" "=y")
2510 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2511 UNSPEC_FIX_NOTRUNC))]
2513 "cvtpd2pi\t{%1, %0|%0, %1}"
2514 [(set_attr "type" "ssecvt")
2515 (set_attr "unit" "mmx")
2516 (set_attr "prefix_data16" "1")
2517 (set_attr "mode" "DI")])
2519 (define_insn "sse2_cvttpd2pi"
2520 [(set (match_operand:V2SI 0 "register_operand" "=y")
2521 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2523 "cvttpd2pi\t{%1, %0|%0, %1}"
2524 [(set_attr "type" "ssecvt")
2525 (set_attr "unit" "mmx")
2526 (set_attr "prefix_data16" "1")
2527 (set_attr "mode" "TI")])
2529 (define_insn "*avx_cvtsi2sd"
2530 [(set (match_operand:V2DF 0 "register_operand" "=x")
2533 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2534 (match_operand:V2DF 1 "register_operand" "x")
2537 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2538 [(set_attr "type" "sseicvt")
2539 (set_attr "prefix" "vex")
2540 (set_attr "mode" "DF")])
2542 (define_insn "sse2_cvtsi2sd"
2543 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2546 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2547 (match_operand:V2DF 1 "register_operand" "0,0")
2550 "cvtsi2sd\t{%2, %0|%0, %2}"
2551 [(set_attr "type" "sseicvt")
2552 (set_attr "mode" "DF")
2553 (set_attr "athlon_decode" "double,direct")
2554 (set_attr "amdfam10_decode" "vector,double")])
2556 (define_insn "*avx_cvtsi2sdq"
2557 [(set (match_operand:V2DF 0 "register_operand" "=x")
2560 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2561 (match_operand:V2DF 1 "register_operand" "x")
2563 "TARGET_AVX && TARGET_64BIT"
2564 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2565 [(set_attr "type" "sseicvt")
2566 (set_attr "length_vex" "4")
2567 (set_attr "prefix" "vex")
2568 (set_attr "mode" "DF")])
2570 (define_insn "sse2_cvtsi2sdq"
2571 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2574 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2575 (match_operand:V2DF 1 "register_operand" "0,0")
2577 "TARGET_SSE2 && TARGET_64BIT"
2578 "cvtsi2sdq\t{%2, %0|%0, %2}"
2579 [(set_attr "type" "sseicvt")
2580 (set_attr "prefix_rex" "1")
2581 (set_attr "mode" "DF")
2582 (set_attr "athlon_decode" "double,direct")
2583 (set_attr "amdfam10_decode" "vector,double")])
2585 (define_insn "sse2_cvtsd2si"
2586 [(set (match_operand:SI 0 "register_operand" "=r,r")
2589 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2590 (parallel [(const_int 0)]))]
2591 UNSPEC_FIX_NOTRUNC))]
2593 "%vcvtsd2si\t{%1, %0|%0, %1}"
2594 [(set_attr "type" "sseicvt")
2595 (set_attr "athlon_decode" "double,vector")
2596 (set_attr "prefix_rep" "1")
2597 (set_attr "prefix" "maybe_vex")
2598 (set_attr "mode" "SI")])
2600 (define_insn "sse2_cvtsd2si_2"
2601 [(set (match_operand:SI 0 "register_operand" "=r,r")
2602 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2603 UNSPEC_FIX_NOTRUNC))]
2605 "%vcvtsd2si\t{%1, %0|%0, %1}"
2606 [(set_attr "type" "sseicvt")
2607 (set_attr "athlon_decode" "double,vector")
2608 (set_attr "amdfam10_decode" "double,double")
2609 (set_attr "prefix_rep" "1")
2610 (set_attr "prefix" "maybe_vex")
2611 (set_attr "mode" "SI")])
2613 (define_insn "sse2_cvtsd2siq"
2614 [(set (match_operand:DI 0 "register_operand" "=r,r")
2617 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2618 (parallel [(const_int 0)]))]
2619 UNSPEC_FIX_NOTRUNC))]
2620 "TARGET_SSE2 && TARGET_64BIT"
2621 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2622 [(set_attr "type" "sseicvt")
2623 (set_attr "athlon_decode" "double,vector")
2624 (set_attr "prefix_rep" "1")
2625 (set_attr "prefix" "maybe_vex")
2626 (set_attr "mode" "DI")])
2628 (define_insn "sse2_cvtsd2siq_2"
2629 [(set (match_operand:DI 0 "register_operand" "=r,r")
2630 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2631 UNSPEC_FIX_NOTRUNC))]
2632 "TARGET_SSE2 && TARGET_64BIT"
2633 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2634 [(set_attr "type" "sseicvt")
2635 (set_attr "athlon_decode" "double,vector")
2636 (set_attr "amdfam10_decode" "double,double")
2637 (set_attr "prefix_rep" "1")
2638 (set_attr "prefix" "maybe_vex")
2639 (set_attr "mode" "DI")])
2641 (define_insn "sse2_cvttsd2si"
2642 [(set (match_operand:SI 0 "register_operand" "=r,r")
2645 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2646 (parallel [(const_int 0)]))))]
2648 "%vcvttsd2si\t{%1, %0|%0, %1}"
2649 [(set_attr "type" "sseicvt")
2650 (set_attr "prefix_rep" "1")
2651 (set_attr "prefix" "maybe_vex")
2652 (set_attr "mode" "SI")
2653 (set_attr "athlon_decode" "double,vector")
2654 (set_attr "amdfam10_decode" "double,double")])
2656 (define_insn "sse2_cvttsd2siq"
2657 [(set (match_operand:DI 0 "register_operand" "=r,r")
2660 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2661 (parallel [(const_int 0)]))))]
2662 "TARGET_SSE2 && TARGET_64BIT"
2663 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2664 [(set_attr "type" "sseicvt")
2665 (set_attr "prefix_rep" "1")
2666 (set_attr "prefix" "maybe_vex")
2667 (set_attr "mode" "DI")
2668 (set_attr "athlon_decode" "double,vector")
2669 (set_attr "amdfam10_decode" "double,double")])
2671 (define_insn "avx_cvtdq2pd256"
2672 [(set (match_operand:V4DF 0 "register_operand" "=x")
2673 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2675 "vcvtdq2pd\t{%1, %0|%0, %1}"
2676 [(set_attr "type" "ssecvt")
2677 (set_attr "prefix" "vex")
2678 (set_attr "mode" "V4DF")])
2680 (define_insn "sse2_cvtdq2pd"
2681 [(set (match_operand:V2DF 0 "register_operand" "=x")
2684 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2685 (parallel [(const_int 0) (const_int 1)]))))]
2687 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2688 [(set_attr "type" "ssecvt")
2689 (set_attr "prefix" "maybe_vex")
2690 (set_attr "mode" "V2DF")])
2692 (define_insn "avx_cvtpd2dq256"
2693 [(set (match_operand:V4SI 0 "register_operand" "=x")
2694 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2695 UNSPEC_FIX_NOTRUNC))]
2697 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2698 [(set_attr "type" "ssecvt")
2699 (set_attr "prefix" "vex")
2700 (set_attr "mode" "OI")])
2702 (define_expand "sse2_cvtpd2dq"
2703 [(set (match_operand:V4SI 0 "register_operand" "")
2705 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2709 "operands[2] = CONST0_RTX (V2SImode);")
2711 (define_insn "*sse2_cvtpd2dq"
2712 [(set (match_operand:V4SI 0 "register_operand" "=x")
2714 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2716 (match_operand:V2SI 2 "const0_operand" "")))]
2718 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2719 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2720 [(set_attr "type" "ssecvt")
2721 (set_attr "prefix_rep" "1")
2722 (set_attr "prefix_data16" "0")
2723 (set_attr "prefix" "maybe_vex")
2724 (set_attr "mode" "TI")
2725 (set_attr "amdfam10_decode" "double")])
2727 (define_insn "avx_cvttpd2dq256"
2728 [(set (match_operand:V4SI 0 "register_operand" "=x")
2729 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2731 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2732 [(set_attr "type" "ssecvt")
2733 (set_attr "prefix" "vex")
2734 (set_attr "mode" "OI")])
2736 (define_expand "sse2_cvttpd2dq"
2737 [(set (match_operand:V4SI 0 "register_operand" "")
2739 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2742 "operands[2] = CONST0_RTX (V2SImode);")
2744 (define_insn "*sse2_cvttpd2dq"
2745 [(set (match_operand:V4SI 0 "register_operand" "=x")
2747 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2748 (match_operand:V2SI 2 "const0_operand" "")))]
2750 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2751 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2752 [(set_attr "type" "ssecvt")
2753 (set_attr "prefix" "maybe_vex")
2754 (set_attr "mode" "TI")
2755 (set_attr "amdfam10_decode" "double")])
2757 (define_insn "*avx_cvtsd2ss"
2758 [(set (match_operand:V4SF 0 "register_operand" "=x")
2761 (float_truncate:V2SF
2762 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2763 (match_operand:V4SF 1 "register_operand" "x")
2766 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2767 [(set_attr "type" "ssecvt")
2768 (set_attr "prefix" "vex")
2769 (set_attr "mode" "SF")])
2771 (define_insn "sse2_cvtsd2ss"
2772 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2775 (float_truncate:V2SF
2776 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2777 (match_operand:V4SF 1 "register_operand" "0,0")
2780 "cvtsd2ss\t{%2, %0|%0, %2}"
2781 [(set_attr "type" "ssecvt")
2782 (set_attr "athlon_decode" "vector,double")
2783 (set_attr "amdfam10_decode" "vector,double")
2784 (set_attr "mode" "SF")])
2786 (define_insn "*avx_cvtss2sd"
2787 [(set (match_operand:V2DF 0 "register_operand" "=x")
2791 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2792 (parallel [(const_int 0) (const_int 1)])))
2793 (match_operand:V2DF 1 "register_operand" "x")
2796 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2797 [(set_attr "type" "ssecvt")
2798 (set_attr "prefix" "vex")
2799 (set_attr "mode" "DF")])
2801 (define_insn "sse2_cvtss2sd"
2802 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2806 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2807 (parallel [(const_int 0) (const_int 1)])))
2808 (match_operand:V2DF 1 "register_operand" "0,0")
2811 "cvtss2sd\t{%2, %0|%0, %2}"
2812 [(set_attr "type" "ssecvt")
2813 (set_attr "amdfam10_decode" "vector,double")
2814 (set_attr "mode" "DF")])
2816 (define_insn "avx_cvtpd2ps256"
2817 [(set (match_operand:V4SF 0 "register_operand" "=x")
2818 (float_truncate:V4SF
2819 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2821 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2822 [(set_attr "type" "ssecvt")
2823 (set_attr "prefix" "vex")
2824 (set_attr "mode" "V4SF")])
2826 (define_expand "sse2_cvtpd2ps"
2827 [(set (match_operand:V4SF 0 "register_operand" "")
2829 (float_truncate:V2SF
2830 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2833 "operands[2] = CONST0_RTX (V2SFmode);")
2835 (define_insn "*sse2_cvtpd2ps"
2836 [(set (match_operand:V4SF 0 "register_operand" "=x")
2838 (float_truncate:V2SF
2839 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2840 (match_operand:V2SF 2 "const0_operand" "")))]
2842 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2843 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2844 [(set_attr "type" "ssecvt")
2845 (set_attr "prefix_data16" "1")
2846 (set_attr "prefix" "maybe_vex")
2847 (set_attr "mode" "V4SF")
2848 (set_attr "amdfam10_decode" "double")])
2850 (define_insn "avx_cvtps2pd256"
2851 [(set (match_operand:V4DF 0 "register_operand" "=x")
2853 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2855 "vcvtps2pd\t{%1, %0|%0, %1}"
2856 [(set_attr "type" "ssecvt")
2857 (set_attr "prefix" "vex")
2858 (set_attr "mode" "V4DF")])
2860 (define_insn "sse2_cvtps2pd"
2861 [(set (match_operand:V2DF 0 "register_operand" "=x")
2864 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2865 (parallel [(const_int 0) (const_int 1)]))))]
2867 "%vcvtps2pd\t{%1, %0|%0, %1}"
2868 [(set_attr "type" "ssecvt")
2869 (set_attr "prefix" "maybe_vex")
2870 (set_attr "mode" "V2DF")
2871 (set_attr "prefix_data16" "0")
2872 (set_attr "amdfam10_decode" "direct")])
2874 (define_expand "vec_unpacks_hi_v4sf"
2879 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2880 (parallel [(const_int 6)
2884 (set (match_operand:V2DF 0 "register_operand" "")
2888 (parallel [(const_int 0) (const_int 1)]))))]
2891 operands[2] = gen_reg_rtx (V4SFmode);
2894 (define_expand "vec_unpacks_lo_v4sf"
2895 [(set (match_operand:V2DF 0 "register_operand" "")
2898 (match_operand:V4SF 1 "nonimmediate_operand" "")
2899 (parallel [(const_int 0) (const_int 1)]))))]
2902 (define_expand "vec_unpacks_float_hi_v8hi"
2903 [(match_operand:V4SF 0 "register_operand" "")
2904 (match_operand:V8HI 1 "register_operand" "")]
2907 rtx tmp = gen_reg_rtx (V4SImode);
2909 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2910 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2914 (define_expand "vec_unpacks_float_lo_v8hi"
2915 [(match_operand:V4SF 0 "register_operand" "")
2916 (match_operand:V8HI 1 "register_operand" "")]
2919 rtx tmp = gen_reg_rtx (V4SImode);
2921 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2922 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2926 (define_expand "vec_unpacku_float_hi_v8hi"
2927 [(match_operand:V4SF 0 "register_operand" "")
2928 (match_operand:V8HI 1 "register_operand" "")]
2931 rtx tmp = gen_reg_rtx (V4SImode);
2933 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2934 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2938 (define_expand "vec_unpacku_float_lo_v8hi"
2939 [(match_operand:V4SF 0 "register_operand" "")
2940 (match_operand:V8HI 1 "register_operand" "")]
2943 rtx tmp = gen_reg_rtx (V4SImode);
2945 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2946 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2950 (define_expand "vec_unpacks_float_hi_v4si"
2953 (match_operand:V4SI 1 "nonimmediate_operand" "")
2954 (parallel [(const_int 2)
2958 (set (match_operand:V2DF 0 "register_operand" "")
2962 (parallel [(const_int 0) (const_int 1)]))))]
2964 "operands[2] = gen_reg_rtx (V4SImode);")
2966 (define_expand "vec_unpacks_float_lo_v4si"
2967 [(set (match_operand:V2DF 0 "register_operand" "")
2970 (match_operand:V4SI 1 "nonimmediate_operand" "")
2971 (parallel [(const_int 0) (const_int 1)]))))]
2974 (define_expand "vec_unpacku_float_hi_v4si"
2977 (match_operand:V4SI 1 "nonimmediate_operand" "")
2978 (parallel [(const_int 2)
2986 (parallel [(const_int 0) (const_int 1)]))))
2988 (lt:V2DF (match_dup 6) (match_dup 3)))
2990 (and:V2DF (match_dup 7) (match_dup 4)))
2991 (set (match_operand:V2DF 0 "register_operand" "")
2992 (plus:V2DF (match_dup 6) (match_dup 8)))]
2995 REAL_VALUE_TYPE TWO32r;
2999 real_ldexp (&TWO32r, &dconst1, 32);
3000 x = const_double_from_real_value (TWO32r, DFmode);
3002 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3003 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3005 operands[5] = gen_reg_rtx (V4SImode);
3007 for (i = 6; i < 9; i++)
3008 operands[i] = gen_reg_rtx (V2DFmode);
3011 (define_expand "vec_unpacku_float_lo_v4si"
3015 (match_operand:V4SI 1 "nonimmediate_operand" "")
3016 (parallel [(const_int 0) (const_int 1)]))))
3018 (lt:V2DF (match_dup 5) (match_dup 3)))
3020 (and:V2DF (match_dup 6) (match_dup 4)))
3021 (set (match_operand:V2DF 0 "register_operand" "")
3022 (plus:V2DF (match_dup 5) (match_dup 7)))]
3025 REAL_VALUE_TYPE TWO32r;
3029 real_ldexp (&TWO32r, &dconst1, 32);
3030 x = const_double_from_real_value (TWO32r, DFmode);
3032 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3033 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3035 for (i = 5; i < 8; i++)
3036 operands[i] = gen_reg_rtx (V2DFmode);
3039 (define_expand "vec_pack_trunc_v2df"
3040 [(match_operand:V4SF 0 "register_operand" "")
3041 (match_operand:V2DF 1 "nonimmediate_operand" "")
3042 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3047 r1 = gen_reg_rtx (V4SFmode);
3048 r2 = gen_reg_rtx (V4SFmode);
3050 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3051 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3052 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3056 (define_expand "vec_pack_sfix_trunc_v2df"
3057 [(match_operand:V4SI 0 "register_operand" "")
3058 (match_operand:V2DF 1 "nonimmediate_operand" "")
3059 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3064 r1 = gen_reg_rtx (V4SImode);
3065 r2 = gen_reg_rtx (V4SImode);
3067 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3068 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3069 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3070 gen_lowpart (V2DImode, r1),
3071 gen_lowpart (V2DImode, r2)));
3075 (define_expand "vec_pack_sfix_v2df"
3076 [(match_operand:V4SI 0 "register_operand" "")
3077 (match_operand:V2DF 1 "nonimmediate_operand" "")
3078 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3083 r1 = gen_reg_rtx (V4SImode);
3084 r2 = gen_reg_rtx (V4SImode);
3086 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3087 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3088 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3089 gen_lowpart (V2DImode, r1),
3090 gen_lowpart (V2DImode, r2)));
3094 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3096 ;; Parallel single-precision floating point element swizzling
3098 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3100 (define_expand "sse_movhlps_exp"
3101 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3104 (match_operand:V4SF 1 "nonimmediate_operand" "")
3105 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3106 (parallel [(const_int 6)
3111 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3113 (define_insn "*avx_movhlps"
3114 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3117 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3118 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3119 (parallel [(const_int 6)
3123 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3125 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3126 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3127 vmovhps\t{%2, %0|%0, %2}"
3128 [(set_attr "type" "ssemov")
3129 (set_attr "prefix" "vex")
3130 (set_attr "mode" "V4SF,V2SF,V2SF")])
3132 (define_insn "sse_movhlps"
3133 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3136 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3137 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3138 (parallel [(const_int 6)
3142 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3144 movhlps\t{%2, %0|%0, %2}
3145 movlps\t{%H2, %0|%0, %H2}
3146 movhps\t{%2, %0|%0, %2}"
3147 [(set_attr "type" "ssemov")
3148 (set_attr "mode" "V4SF,V2SF,V2SF")])
3150 (define_expand "sse_movlhps_exp"
3151 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3154 (match_operand:V4SF 1 "nonimmediate_operand" "")
3155 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3156 (parallel [(const_int 0)
3161 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3163 (define_insn "*avx_movlhps"
3164 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3167 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3168 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3169 (parallel [(const_int 0)
3173 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3175 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3176 vmovhps\t{%2, %1, %0|%0, %1, %2}
3177 vmovlps\t{%2, %H0|%H0, %2}"
3178 [(set_attr "type" "ssemov")
3179 (set_attr "prefix" "vex")
3180 (set_attr "mode" "V4SF,V2SF,V2SF")])
3182 (define_insn "sse_movlhps"
3183 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3186 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3187 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3188 (parallel [(const_int 0)
3192 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3194 movlhps\t{%2, %0|%0, %2}
3195 movhps\t{%2, %0|%0, %2}
3196 movlps\t{%2, %H0|%H0, %2}"
3197 [(set_attr "type" "ssemov")
3198 (set_attr "mode" "V4SF,V2SF,V2SF")])
3200 (define_insn "avx_unpckhps256"
3201 [(set (match_operand:V8SF 0 "register_operand" "=x")
3204 (match_operand:V8SF 1 "register_operand" "x")
3205 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3206 (parallel [(const_int 2) (const_int 10)
3207 (const_int 3) (const_int 11)
3208 (const_int 6) (const_int 14)
3209 (const_int 7) (const_int 15)])))]
3211 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3212 [(set_attr "type" "sselog")
3213 (set_attr "prefix" "vex")
3214 (set_attr "mode" "V8SF")])
3216 (define_insn "*avx_unpckhps"
3217 [(set (match_operand:V4SF 0 "register_operand" "=x")
3220 (match_operand:V4SF 1 "register_operand" "x")
3221 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3222 (parallel [(const_int 2) (const_int 6)
3223 (const_int 3) (const_int 7)])))]
3225 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3226 [(set_attr "type" "sselog")
3227 (set_attr "prefix" "vex")
3228 (set_attr "mode" "V4SF")])
3230 (define_insn "sse_unpckhps"
3231 [(set (match_operand:V4SF 0 "register_operand" "=x")
3234 (match_operand:V4SF 1 "register_operand" "0")
3235 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3236 (parallel [(const_int 2) (const_int 6)
3237 (const_int 3) (const_int 7)])))]
3239 "unpckhps\t{%2, %0|%0, %2}"
3240 [(set_attr "type" "sselog")
3241 (set_attr "mode" "V4SF")])
3243 (define_insn "avx_unpcklps256"
3244 [(set (match_operand:V8SF 0 "register_operand" "=x")
3247 (match_operand:V8SF 1 "register_operand" "x")
3248 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3249 (parallel [(const_int 0) (const_int 8)
3250 (const_int 1) (const_int 9)
3251 (const_int 4) (const_int 12)
3252 (const_int 5) (const_int 13)])))]
3254 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3255 [(set_attr "type" "sselog")
3256 (set_attr "prefix" "vex")
3257 (set_attr "mode" "V8SF")])
3259 (define_insn "*avx_unpcklps"
3260 [(set (match_operand:V4SF 0 "register_operand" "=x")
3263 (match_operand:V4SF 1 "register_operand" "x")
3264 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3265 (parallel [(const_int 0) (const_int 4)
3266 (const_int 1) (const_int 5)])))]
3268 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3269 [(set_attr "type" "sselog")
3270 (set_attr "prefix" "vex")
3271 (set_attr "mode" "V4SF")])
3273 (define_insn "sse_unpcklps"
3274 [(set (match_operand:V4SF 0 "register_operand" "=x")
3277 (match_operand:V4SF 1 "register_operand" "0")
3278 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3279 (parallel [(const_int 0) (const_int 4)
3280 (const_int 1) (const_int 5)])))]
3282 "unpcklps\t{%2, %0|%0, %2}"
3283 [(set_attr "type" "sselog")
3284 (set_attr "mode" "V4SF")])
3286 ;; These are modeled with the same vec_concat as the others so that we
3287 ;; capture users of shufps that can use the new instructions
3288 (define_insn "avx_movshdup256"
3289 [(set (match_operand:V8SF 0 "register_operand" "=x")
3292 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3294 (parallel [(const_int 1) (const_int 1)
3295 (const_int 3) (const_int 3)
3296 (const_int 5) (const_int 5)
3297 (const_int 7) (const_int 7)])))]
3299 "vmovshdup\t{%1, %0|%0, %1}"
3300 [(set_attr "type" "sse")
3301 (set_attr "prefix" "vex")
3302 (set_attr "mode" "V8SF")])
3304 (define_insn "sse3_movshdup"
3305 [(set (match_operand:V4SF 0 "register_operand" "=x")
3308 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3310 (parallel [(const_int 1)
3315 "%vmovshdup\t{%1, %0|%0, %1}"
3316 [(set_attr "type" "sse")
3317 (set_attr "prefix_rep" "1")
3318 (set_attr "prefix" "maybe_vex")
3319 (set_attr "mode" "V4SF")])
3321 (define_insn "avx_movsldup256"
3322 [(set (match_operand:V8SF 0 "register_operand" "=x")
3325 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3327 (parallel [(const_int 0) (const_int 0)
3328 (const_int 2) (const_int 2)
3329 (const_int 4) (const_int 4)
3330 (const_int 6) (const_int 6)])))]
3332 "vmovsldup\t{%1, %0|%0, %1}"
3333 [(set_attr "type" "sse")
3334 (set_attr "prefix" "vex")
3335 (set_attr "mode" "V8SF")])
3337 (define_insn "sse3_movsldup"
3338 [(set (match_operand:V4SF 0 "register_operand" "=x")
3341 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3343 (parallel [(const_int 0)
3348 "%vmovsldup\t{%1, %0|%0, %1}"
3349 [(set_attr "type" "sse")
3350 (set_attr "prefix_rep" "1")
3351 (set_attr "prefix" "maybe_vex")
3352 (set_attr "mode" "V4SF")])
3354 (define_expand "avx_shufps256"
3355 [(match_operand:V8SF 0 "register_operand" "")
3356 (match_operand:V8SF 1 "register_operand" "")
3357 (match_operand:V8SF 2 "nonimmediate_operand" "")
3358 (match_operand:SI 3 "const_int_operand" "")]
3361 int mask = INTVAL (operands[3]);
3362 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3363 GEN_INT ((mask >> 0) & 3),
3364 GEN_INT ((mask >> 2) & 3),
3365 GEN_INT (((mask >> 4) & 3) + 8),
3366 GEN_INT (((mask >> 6) & 3) + 8),
3367 GEN_INT (((mask >> 0) & 3) + 4),
3368 GEN_INT (((mask >> 2) & 3) + 4),
3369 GEN_INT (((mask >> 4) & 3) + 12),
3370 GEN_INT (((mask >> 6) & 3) + 12)));
3374 ;; One bit in mask selects 2 elements.
3375 (define_insn "avx_shufps256_1"
3376 [(set (match_operand:V8SF 0 "register_operand" "=x")
3379 (match_operand:V8SF 1 "register_operand" "x")
3380 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3381 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3382 (match_operand 4 "const_0_to_3_operand" "")
3383 (match_operand 5 "const_8_to_11_operand" "")
3384 (match_operand 6 "const_8_to_11_operand" "")
3385 (match_operand 7 "const_4_to_7_operand" "")
3386 (match_operand 8 "const_4_to_7_operand" "")
3387 (match_operand 9 "const_12_to_15_operand" "")
3388 (match_operand 10 "const_12_to_15_operand" "")])))]
3390 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3391 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3392 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3393 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3396 mask = INTVAL (operands[3]);
3397 mask |= INTVAL (operands[4]) << 2;
3398 mask |= (INTVAL (operands[5]) - 8) << 4;
3399 mask |= (INTVAL (operands[6]) - 8) << 6;
3400 operands[3] = GEN_INT (mask);
3402 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3404 [(set_attr "type" "sselog")
3405 (set_attr "length_immediate" "1")
3406 (set_attr "prefix" "vex")
3407 (set_attr "mode" "V8SF")])
3409 (define_expand "sse_shufps"
3410 [(match_operand:V4SF 0 "register_operand" "")
3411 (match_operand:V4SF 1 "register_operand" "")
3412 (match_operand:V4SF 2 "nonimmediate_operand" "")
3413 (match_operand:SI 3 "const_int_operand" "")]
3416 int mask = INTVAL (operands[3]);
3417 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3418 GEN_INT ((mask >> 0) & 3),
3419 GEN_INT ((mask >> 2) & 3),
3420 GEN_INT (((mask >> 4) & 3) + 4),
3421 GEN_INT (((mask >> 6) & 3) + 4)));
3425 (define_insn "*avx_shufps_<mode>"
3426 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3427 (vec_select:SSEMODE4S
3428 (vec_concat:<ssedoublesizemode>
3429 (match_operand:SSEMODE4S 1 "register_operand" "x")
3430 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3431 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3432 (match_operand 4 "const_0_to_3_operand" "")
3433 (match_operand 5 "const_4_to_7_operand" "")
3434 (match_operand 6 "const_4_to_7_operand" "")])))]
3438 mask |= INTVAL (operands[3]) << 0;
3439 mask |= INTVAL (operands[4]) << 2;
3440 mask |= (INTVAL (operands[5]) - 4) << 4;
3441 mask |= (INTVAL (operands[6]) - 4) << 6;
3442 operands[3] = GEN_INT (mask);
3444 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3446 [(set_attr "type" "sselog")
3447 (set_attr "length_immediate" "1")
3448 (set_attr "prefix" "vex")
3449 (set_attr "mode" "V4SF")])
3451 (define_insn "sse_shufps_<mode>"
3452 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3453 (vec_select:SSEMODE4S
3454 (vec_concat:<ssedoublesizemode>
3455 (match_operand:SSEMODE4S 1 "register_operand" "0")
3456 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3457 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3458 (match_operand 4 "const_0_to_3_operand" "")
3459 (match_operand 5 "const_4_to_7_operand" "")
3460 (match_operand 6 "const_4_to_7_operand" "")])))]
3464 mask |= INTVAL (operands[3]) << 0;
3465 mask |= INTVAL (operands[4]) << 2;
3466 mask |= (INTVAL (operands[5]) - 4) << 4;
3467 mask |= (INTVAL (operands[6]) - 4) << 6;
3468 operands[3] = GEN_INT (mask);
3470 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3472 [(set_attr "type" "sselog")
3473 (set_attr "length_immediate" "1")
3474 (set_attr "mode" "V4SF")])
3476 (define_insn "sse_storehps"
3477 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3479 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3480 (parallel [(const_int 2) (const_int 3)])))]
3483 %vmovhps\t{%1, %0|%0, %1}
3484 %vmovhlps\t{%1, %d0|%d0, %1}
3485 %vmovlps\t{%H1, %d0|%d0, %H1}"
3486 [(set_attr "type" "ssemov")
3487 (set_attr "prefix" "maybe_vex")
3488 (set_attr "mode" "V2SF,V4SF,V2SF")])
3490 (define_expand "sse_loadhps_exp"
3491 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3494 (match_operand:V4SF 1 "nonimmediate_operand" "")
3495 (parallel [(const_int 0) (const_int 1)]))
3496 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3498 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3500 (define_insn "*avx_loadhps"
3501 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3504 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3505 (parallel [(const_int 0) (const_int 1)]))
3506 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3509 vmovhps\t{%2, %1, %0|%0, %1, %2}
3510 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3511 vmovlps\t{%2, %H0|%H0, %2}"
3512 [(set_attr "type" "ssemov")
3513 (set_attr "prefix" "vex")
3514 (set_attr "mode" "V2SF,V4SF,V2SF")])
3516 (define_insn "sse_loadhps"
3517 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3520 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3521 (parallel [(const_int 0) (const_int 1)]))
3522 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3525 movhps\t{%2, %0|%0, %2}
3526 movlhps\t{%2, %0|%0, %2}
3527 movlps\t{%2, %H0|%H0, %2}"
3528 [(set_attr "type" "ssemov")
3529 (set_attr "mode" "V2SF,V4SF,V2SF")])
3531 (define_insn "*avx_storelps"
3532 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3534 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3535 (parallel [(const_int 0) (const_int 1)])))]
3538 vmovlps\t{%1, %0|%0, %1}
3539 vmovaps\t{%1, %0|%0, %1}
3540 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3541 [(set_attr "type" "ssemov")
3542 (set_attr "prefix" "vex")
3543 (set_attr "mode" "V2SF,V2DF,V2SF")])
3545 (define_insn "sse_storelps"
3546 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3548 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3549 (parallel [(const_int 0) (const_int 1)])))]
3552 movlps\t{%1, %0|%0, %1}
3553 movaps\t{%1, %0|%0, %1}
3554 movlps\t{%1, %0|%0, %1}"
3555 [(set_attr "type" "ssemov")
3556 (set_attr "mode" "V2SF,V4SF,V2SF")])
3558 (define_expand "sse_loadlps_exp"
3559 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3561 (match_operand:V2SF 2 "nonimmediate_operand" "")
3563 (match_operand:V4SF 1 "nonimmediate_operand" "")
3564 (parallel [(const_int 2) (const_int 3)]))))]
3566 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3568 (define_insn "*avx_loadlps"
3569 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3571 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3573 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3574 (parallel [(const_int 2) (const_int 3)]))))]
3577 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3578 vmovlps\t{%2, %1, %0|%0, %1, %2}
3579 vmovlps\t{%2, %0|%0, %2}"
3580 [(set_attr "type" "sselog,ssemov,ssemov")
3581 (set_attr "length_immediate" "1,*,*")
3582 (set_attr "prefix" "vex")
3583 (set_attr "mode" "V4SF,V2SF,V2SF")])
3585 (define_insn "sse_loadlps"
3586 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3588 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3590 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3591 (parallel [(const_int 2) (const_int 3)]))))]
3594 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3595 movlps\t{%2, %0|%0, %2}
3596 movlps\t{%2, %0|%0, %2}"
3597 [(set_attr "type" "sselog,ssemov,ssemov")
3598 (set_attr "length_immediate" "1,*,*")
3599 (set_attr "mode" "V4SF,V2SF,V2SF")])
3601 (define_insn "*avx_movss"
3602 [(set (match_operand:V4SF 0 "register_operand" "=x")
3604 (match_operand:V4SF 2 "register_operand" "x")
3605 (match_operand:V4SF 1 "register_operand" "x")
3608 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3609 [(set_attr "type" "ssemov")
3610 (set_attr "prefix" "vex")
3611 (set_attr "mode" "SF")])
3613 (define_insn "sse_movss"
3614 [(set (match_operand:V4SF 0 "register_operand" "=x")
3616 (match_operand:V4SF 2 "register_operand" "x")
3617 (match_operand:V4SF 1 "register_operand" "0")
3620 "movss\t{%2, %0|%0, %2}"
3621 [(set_attr "type" "ssemov")
3622 (set_attr "mode" "SF")])
3624 (define_insn "*vec_dupv4sf_avx"
3625 [(set (match_operand:V4SF 0 "register_operand" "=x")
3627 (match_operand:SF 1 "register_operand" "x")))]
3629 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3630 [(set_attr "type" "sselog1")
3631 (set_attr "length_immediate" "1")
3632 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V4SF")])
3635 (define_insn "*vec_dupv4sf"
3636 [(set (match_operand:V4SF 0 "register_operand" "=x")
3638 (match_operand:SF 1 "register_operand" "0")))]
3640 "shufps\t{$0, %0, %0|%0, %0, 0}"
3641 [(set_attr "type" "sselog1")
3642 (set_attr "length_immediate" "1")
3643 (set_attr "mode" "V4SF")])
3645 (define_insn "*vec_concatv2sf_avx"
3646 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3648 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3649 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3652 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3653 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3654 vmovss\t{%1, %0|%0, %1}
3655 punpckldq\t{%2, %0|%0, %2}
3656 movd\t{%1, %0|%0, %1}"
3657 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3658 (set_attr "length_immediate" "*,1,*,*,*")
3659 (set_attr "prefix_extra" "*,1,*,*,*")
3660 (set (attr "prefix")
3661 (if_then_else (eq_attr "alternative" "3,4")
3662 (const_string "orig")
3663 (const_string "vex")))
3664 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3666 ;; Although insertps takes register source, we prefer
3667 ;; unpcklps with register source since it is shorter.
3668 (define_insn "*vec_concatv2sf_sse4_1"
3669 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3671 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3672 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3675 unpcklps\t{%2, %0|%0, %2}
3676 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3677 movss\t{%1, %0|%0, %1}
3678 punpckldq\t{%2, %0|%0, %2}
3679 movd\t{%1, %0|%0, %1}"
3680 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3681 (set_attr "prefix_data16" "*,1,*,*,*")
3682 (set_attr "prefix_extra" "*,1,*,*,*")
3683 (set_attr "length_immediate" "*,1,*,*,*")
3684 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3686 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3687 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3688 ;; alternatives pretty much forces the MMX alternative to be chosen.
3689 (define_insn "*vec_concatv2sf_sse"
3690 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3692 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3693 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3696 unpcklps\t{%2, %0|%0, %2}
3697 movss\t{%1, %0|%0, %1}
3698 punpckldq\t{%2, %0|%0, %2}
3699 movd\t{%1, %0|%0, %1}"
3700 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3701 (set_attr "mode" "V4SF,SF,DI,DI")])
3703 (define_insn "*vec_concatv4sf_avx"
3704 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3706 (match_operand:V2SF 1 "register_operand" " x,x")
3707 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3710 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3711 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3712 [(set_attr "type" "ssemov")
3713 (set_attr "prefix" "vex")
3714 (set_attr "mode" "V4SF,V2SF")])
3716 (define_insn "*vec_concatv4sf_sse"
3717 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3719 (match_operand:V2SF 1 "register_operand" " 0,0")
3720 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3723 movlhps\t{%2, %0|%0, %2}
3724 movhps\t{%2, %0|%0, %2}"
3725 [(set_attr "type" "ssemov")
3726 (set_attr "mode" "V4SF,V2SF")])
3728 (define_expand "vec_init<mode>"
3729 [(match_operand:SSEMODE 0 "register_operand" "")
3730 (match_operand 1 "" "")]
3733 ix86_expand_vector_init (false, operands[0], operands[1]);
3737 (define_insn "*vec_setv4sf_0_avx"
3738 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3741 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3742 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3746 vmovss\t{%2, %1, %0|%0, %1, %2}
3747 vmovss\t{%2, %0|%0, %2}
3748 vmovd\t{%2, %0|%0, %2}
3750 [(set_attr "type" "ssemov")
3751 (set_attr "prefix" "vex")
3752 (set_attr "mode" "SF")])
3754 (define_insn "vec_setv4sf_0"
3755 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3758 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3759 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3763 movss\t{%2, %0|%0, %2}
3764 movss\t{%2, %0|%0, %2}
3765 movd\t{%2, %0|%0, %2}
3767 [(set_attr "type" "ssemov")
3768 (set_attr "mode" "SF")])
3770 ;; A subset is vec_setv4sf.
3771 (define_insn "*vec_setv4sf_avx"
3772 [(set (match_operand:V4SF 0 "register_operand" "=x")
3775 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3776 (match_operand:V4SF 1 "register_operand" "x")
3777 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3780 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3781 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3783 [(set_attr "type" "sselog")
3784 (set_attr "prefix_extra" "1")
3785 (set_attr "length_immediate" "1")
3786 (set_attr "prefix" "vex")
3787 (set_attr "mode" "V4SF")])
3789 (define_insn "*vec_setv4sf_sse4_1"
3790 [(set (match_operand:V4SF 0 "register_operand" "=x")
3793 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3794 (match_operand:V4SF 1 "register_operand" "0")
3795 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3798 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3799 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3801 [(set_attr "type" "sselog")
3802 (set_attr "prefix_data16" "1")
3803 (set_attr "prefix_extra" "1")
3804 (set_attr "length_immediate" "1")
3805 (set_attr "mode" "V4SF")])
3807 (define_insn "*avx_insertps"
3808 [(set (match_operand:V4SF 0 "register_operand" "=x")
3809 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3810 (match_operand:V4SF 1 "register_operand" "x")
3811 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3814 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3815 [(set_attr "type" "sselog")
3816 (set_attr "prefix" "vex")
3817 (set_attr "prefix_extra" "1")
3818 (set_attr "length_immediate" "1")
3819 (set_attr "mode" "V4SF")])
3821 (define_insn "sse4_1_insertps"
3822 [(set (match_operand:V4SF 0 "register_operand" "=x")
3823 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3824 (match_operand:V4SF 1 "register_operand" "0")
3825 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3828 "insertps\t{%3, %2, %0|%0, %2, %3}";
3829 [(set_attr "type" "sselog")
3830 (set_attr "prefix_data16" "1")
3831 (set_attr "prefix_extra" "1")
3832 (set_attr "length_immediate" "1")
3833 (set_attr "mode" "V4SF")])
3836 [(set (match_operand:V4SF 0 "memory_operand" "")
3839 (match_operand:SF 1 "nonmemory_operand" ""))
3842 "TARGET_SSE && reload_completed"
3845 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3849 (define_expand "vec_set<mode>"
3850 [(match_operand:SSEMODE 0 "register_operand" "")
3851 (match_operand:<ssescalarmode> 1 "register_operand" "")
3852 (match_operand 2 "const_int_operand" "")]
3855 ix86_expand_vector_set (false, operands[0], operands[1],
3856 INTVAL (operands[2]));
3860 (define_insn_and_split "*vec_extractv4sf_0"
3861 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3863 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3864 (parallel [(const_int 0)])))]
3865 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3867 "&& reload_completed"
3870 rtx op1 = operands[1];
3872 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3874 op1 = gen_lowpart (SFmode, op1);
3875 emit_move_insn (operands[0], op1);
3879 (define_expand "avx_vextractf128<mode>"
3880 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3881 (match_operand:AVX256MODE 1 "register_operand" "")
3882 (match_operand:SI 2 "const_0_to_1_operand" "")]
3885 switch (INTVAL (operands[2]))
3888 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3891 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3899 (define_insn "vec_extract_lo_<mode>"
3900 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3901 (vec_select:<avxhalfvecmode>
3902 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3903 (parallel [(const_int 0) (const_int 1)])))]
3905 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3906 [(set_attr "type" "sselog")
3907 (set_attr "prefix_extra" "1")
3908 (set_attr "length_immediate" "1")
3909 (set_attr "memory" "none,store")
3910 (set_attr "prefix" "vex")
3911 (set_attr "mode" "V8SF")])
3913 (define_insn "vec_extract_hi_<mode>"
3914 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3915 (vec_select:<avxhalfvecmode>
3916 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3917 (parallel [(const_int 2) (const_int 3)])))]
3919 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3920 [(set_attr "type" "sselog")
3921 (set_attr "prefix_extra" "1")
3922 (set_attr "length_immediate" "1")
3923 (set_attr "memory" "none,store")
3924 (set_attr "prefix" "vex")
3925 (set_attr "mode" "V8SF")])
3927 (define_insn "vec_extract_lo_<mode>"
3928 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3929 (vec_select:<avxhalfvecmode>
3930 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3931 (parallel [(const_int 0) (const_int 1)
3932 (const_int 2) (const_int 3)])))]
3934 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3935 [(set_attr "type" "sselog")
3936 (set_attr "prefix_extra" "1")
3937 (set_attr "length_immediate" "1")
3938 (set_attr "memory" "none,store")
3939 (set_attr "prefix" "vex")
3940 (set_attr "mode" "V8SF")])
3942 (define_insn "vec_extract_hi_<mode>"
3943 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3944 (vec_select:<avxhalfvecmode>
3945 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3946 (parallel [(const_int 4) (const_int 5)
3947 (const_int 6) (const_int 7)])))]
3949 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3950 [(set_attr "type" "sselog")
3951 (set_attr "prefix_extra" "1")
3952 (set_attr "length_immediate" "1")
3953 (set_attr "memory" "none,store")
3954 (set_attr "prefix" "vex")
3955 (set_attr "mode" "V8SF")])
3957 (define_insn "vec_extract_lo_v16hi"
3958 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3960 (match_operand:V16HI 1 "register_operand" "x,x")
3961 (parallel [(const_int 0) (const_int 1)
3962 (const_int 2) (const_int 3)
3963 (const_int 4) (const_int 5)
3964 (const_int 6) (const_int 7)])))]
3966 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3967 [(set_attr "type" "sselog")
3968 (set_attr "prefix_extra" "1")
3969 (set_attr "length_immediate" "1")
3970 (set_attr "memory" "none,store")
3971 (set_attr "prefix" "vex")
3972 (set_attr "mode" "V8SF")])
3974 (define_insn "vec_extract_hi_v16hi"
3975 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3977 (match_operand:V16HI 1 "register_operand" "x,x")
3978 (parallel [(const_int 8) (const_int 9)
3979 (const_int 10) (const_int 11)
3980 (const_int 12) (const_int 13)
3981 (const_int 14) (const_int 15)])))]
3983 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3984 [(set_attr "type" "sselog")
3985 (set_attr "prefix_extra" "1")
3986 (set_attr "length_immediate" "1")
3987 (set_attr "memory" "none,store")
3988 (set_attr "prefix" "vex")
3989 (set_attr "mode" "V8SF")])
3991 (define_insn "vec_extract_lo_v32qi"
3992 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3994 (match_operand:V32QI 1 "register_operand" "x,x")
3995 (parallel [(const_int 0) (const_int 1)
3996 (const_int 2) (const_int 3)
3997 (const_int 4) (const_int 5)
3998 (const_int 6) (const_int 7)
3999 (const_int 8) (const_int 9)
4000 (const_int 10) (const_int 11)
4001 (const_int 12) (const_int 13)
4002 (const_int 14) (const_int 15)])))]
4004 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4005 [(set_attr "type" "sselog")
4006 (set_attr "prefix_extra" "1")
4007 (set_attr "length_immediate" "1")
4008 (set_attr "memory" "none,store")
4009 (set_attr "prefix" "vex")
4010 (set_attr "mode" "V8SF")])
4012 (define_insn "vec_extract_hi_v32qi"
4013 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4015 (match_operand:V32QI 1 "register_operand" "x,x")
4016 (parallel [(const_int 16) (const_int 17)
4017 (const_int 18) (const_int 19)
4018 (const_int 20) (const_int 21)
4019 (const_int 22) (const_int 23)
4020 (const_int 24) (const_int 25)
4021 (const_int 26) (const_int 27)
4022 (const_int 28) (const_int 29)
4023 (const_int 30) (const_int 31)])))]
4025 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4026 [(set_attr "type" "sselog")
4027 (set_attr "prefix_extra" "1")
4028 (set_attr "length_immediate" "1")
4029 (set_attr "memory" "none,store")
4030 (set_attr "prefix" "vex")
4031 (set_attr "mode" "V8SF")])
4033 (define_insn "*sse4_1_extractps"
4034 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4036 (match_operand:V4SF 1 "register_operand" "x")
4037 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4039 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4040 [(set_attr "type" "sselog")
4041 (set_attr "prefix_data16" "1")
4042 (set_attr "prefix_extra" "1")
4043 (set_attr "length_immediate" "1")
4044 (set_attr "prefix" "maybe_vex")
4045 (set_attr "mode" "V4SF")])
4047 (define_insn_and_split "*vec_extract_v4sf_mem"
4048 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4050 (match_operand:V4SF 1 "memory_operand" "o")
4051 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4057 int i = INTVAL (operands[2]);
4059 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4063 (define_expand "vec_extract<mode>"
4064 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4065 (match_operand:SSEMODE 1 "register_operand" "")
4066 (match_operand 2 "const_int_operand" "")]
4069 ix86_expand_vector_extract (false, operands[0], operands[1],
4070 INTVAL (operands[2]));
4074 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4076 ;; Parallel double-precision floating point element swizzling
4078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4080 (define_insn "avx_unpckhpd256"
4081 [(set (match_operand:V4DF 0 "register_operand" "=x")
4084 (match_operand:V4DF 1 "register_operand" "x")
4085 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4086 (parallel [(const_int 1) (const_int 5)
4087 (const_int 3) (const_int 7)])))]
4089 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4090 [(set_attr "type" "sselog")
4091 (set_attr "prefix" "vex")
4092 (set_attr "mode" "V4DF")])
4094 (define_expand "sse2_unpckhpd_exp"
4095 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4098 (match_operand:V2DF 1 "nonimmediate_operand" "")
4099 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4100 (parallel [(const_int 1)
4103 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4105 (define_insn "*avx_unpckhpd"
4106 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4109 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
4110 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
4111 (parallel [(const_int 1)
4113 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4115 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4116 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4117 vmovhpd\t{%1, %0|%0, %1}"
4118 [(set_attr "type" "sselog,ssemov,ssemov")
4119 (set_attr "prefix" "vex")
4120 (set_attr "mode" "V2DF,V1DF,V1DF")])
4122 (define_insn "sse2_unpckhpd"
4123 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4126 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4127 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4128 (parallel [(const_int 1)
4130 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4132 unpckhpd\t{%2, %0|%0, %2}
4133 movlpd\t{%H1, %0|%0, %H1}
4134 movhpd\t{%1, %0|%0, %1}"
4135 [(set_attr "type" "sselog,ssemov,ssemov")
4136 (set_attr "prefix_data16" "*,1,1")
4137 (set_attr "mode" "V2DF,V1DF,V1DF")])
4139 (define_insn "avx_movddup256"
4140 [(set (match_operand:V4DF 0 "register_operand" "=x")
4143 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
4145 (parallel [(const_int 0) (const_int 2)
4146 (const_int 4) (const_int 6)])))]
4148 "vmovddup\t{%1, %0|%0, %1}"
4149 [(set_attr "type" "sselog1")
4150 (set_attr "prefix" "vex")
4151 (set_attr "mode" "V4DF")])
4153 (define_insn "*avx_movddup"
4154 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4157 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4159 (parallel [(const_int 0)
4161 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4163 vmovddup\t{%1, %0|%0, %1}
4165 [(set_attr "type" "sselog1,ssemov")
4166 (set_attr "prefix" "vex")
4167 (set_attr "mode" "V2DF")])
4169 (define_insn "*sse3_movddup"
4170 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4173 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4175 (parallel [(const_int 0)
4177 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4179 movddup\t{%1, %0|%0, %1}
4181 [(set_attr "type" "sselog1,ssemov")
4182 (set_attr "mode" "V2DF")])
4185 [(set (match_operand:V2DF 0 "memory_operand" "")
4188 (match_operand:V2DF 1 "register_operand" "")
4190 (parallel [(const_int 0)
4192 "TARGET_SSE3 && reload_completed"
4195 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4196 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4197 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4201 (define_insn "avx_unpcklpd256"
4202 [(set (match_operand:V4DF 0 "register_operand" "=x")
4205 (match_operand:V4DF 1 "register_operand" "x")
4206 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4207 (parallel [(const_int 0) (const_int 4)
4208 (const_int 2) (const_int 6)])))]
4210 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4211 [(set_attr "type" "sselog")
4212 (set_attr "prefix" "vex")
4213 (set_attr "mode" "V4DF")])
4215 (define_expand "sse2_unpcklpd_exp"
4216 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4219 (match_operand:V2DF 1 "nonimmediate_operand" "")
4220 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4221 (parallel [(const_int 0)
4224 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4226 (define_insn "*avx_unpcklpd"
4227 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4230 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4231 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4232 (parallel [(const_int 0)
4234 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4236 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4237 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4238 vmovlpd\t{%2, %H0|%H0, %2}"
4239 [(set_attr "type" "sselog,ssemov,ssemov")
4240 (set_attr "prefix" "vex")
4241 (set_attr "mode" "V2DF,V1DF,V1DF")])
4243 (define_insn "sse2_unpcklpd"
4244 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4247 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4248 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4249 (parallel [(const_int 0)
4251 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4253 unpcklpd\t{%2, %0|%0, %2}
4254 movhpd\t{%2, %0|%0, %2}
4255 movlpd\t{%2, %H0|%H0, %2}"
4256 [(set_attr "type" "sselog,ssemov,ssemov")
4257 (set_attr "prefix_data16" "*,1,1")
4258 (set_attr "mode" "V2DF,V1DF,V1DF")])
4260 (define_expand "avx_shufpd256"
4261 [(match_operand:V4DF 0 "register_operand" "")
4262 (match_operand:V4DF 1 "register_operand" "")
4263 (match_operand:V4DF 2 "nonimmediate_operand" "")
4264 (match_operand:SI 3 "const_int_operand" "")]
4267 int mask = INTVAL (operands[3]);
4268 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4270 GEN_INT (mask & 2 ? 5 : 4),
4271 GEN_INT (mask & 4 ? 3 : 2),
4272 GEN_INT (mask & 8 ? 7 : 6)));
4276 (define_insn "avx_shufpd256_1"
4277 [(set (match_operand:V4DF 0 "register_operand" "=x")
4280 (match_operand:V4DF 1 "register_operand" "x")
4281 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4282 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4283 (match_operand 4 "const_4_to_5_operand" "")
4284 (match_operand 5 "const_2_to_3_operand" "")
4285 (match_operand 6 "const_6_to_7_operand" "")])))]
4289 mask = INTVAL (operands[3]);
4290 mask |= (INTVAL (operands[4]) - 4) << 1;
4291 mask |= (INTVAL (operands[5]) - 2) << 2;
4292 mask |= (INTVAL (operands[6]) - 6) << 3;
4293 operands[3] = GEN_INT (mask);
4295 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4297 [(set_attr "type" "sselog")
4298 (set_attr "length_immediate" "1")
4299 (set_attr "prefix" "vex")
4300 (set_attr "mode" "V4DF")])
4302 (define_expand "sse2_shufpd"
4303 [(match_operand:V2DF 0 "register_operand" "")
4304 (match_operand:V2DF 1 "register_operand" "")
4305 (match_operand:V2DF 2 "nonimmediate_operand" "")
4306 (match_operand:SI 3 "const_int_operand" "")]
4309 int mask = INTVAL (operands[3]);
4310 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4312 GEN_INT (mask & 2 ? 3 : 2)));
4316 (define_expand "vec_extract_even<mode>"
4317 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4318 (vec_select:SSEMODE4S
4319 (vec_concat:<ssedoublesizemode>
4320 (match_operand:SSEMODE4S 1 "register_operand" "")
4321 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4322 (parallel [(const_int 0)
4328 (define_expand "vec_extract_odd<mode>"
4329 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4330 (vec_select:SSEMODE4S
4331 (vec_concat:<ssedoublesizemode>
4332 (match_operand:SSEMODE4S 1 "register_operand" "")
4333 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4334 (parallel [(const_int 1)
4340 (define_expand "vec_extract_even<mode>"
4341 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4342 (vec_select:SSEMODE2D
4343 (vec_concat:<ssedoublesizemode>
4344 (match_operand:SSEMODE2D 1 "register_operand" "")
4345 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4346 (parallel [(const_int 0)
4350 (define_expand "vec_extract_odd<mode>"
4351 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4352 (vec_select:SSEMODE2D
4353 (vec_concat:<ssedoublesizemode>
4354 (match_operand:SSEMODE2D 1 "register_operand" "")
4355 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4356 (parallel [(const_int 1)
4360 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4361 (define_insn "*avx_punpckhqdq"
4362 [(set (match_operand:V2DI 0 "register_operand" "=x")
4365 (match_operand:V2DI 1 "register_operand" "x")
4366 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4367 (parallel [(const_int 1)
4370 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4371 [(set_attr "type" "sselog")
4372 (set_attr "prefix" "vex")
4373 (set_attr "mode" "TI")])
4375 (define_insn "sse2_punpckhqdq"
4376 [(set (match_operand:V2DI 0 "register_operand" "=x")
4379 (match_operand:V2DI 1 "register_operand" "0")
4380 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4381 (parallel [(const_int 1)
4384 "punpckhqdq\t{%2, %0|%0, %2}"
4385 [(set_attr "type" "sselog")
4386 (set_attr "prefix_data16" "1")
4387 (set_attr "mode" "TI")])
4389 (define_insn "*avx_punpcklqdq"
4390 [(set (match_operand:V2DI 0 "register_operand" "=x")
4393 (match_operand:V2DI 1 "register_operand" "x")
4394 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4395 (parallel [(const_int 0)
4398 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4399 [(set_attr "type" "sselog")
4400 (set_attr "prefix" "vex")
4401 (set_attr "mode" "TI")])
4403 (define_insn "sse2_punpcklqdq"
4404 [(set (match_operand:V2DI 0 "register_operand" "=x")
4407 (match_operand:V2DI 1 "register_operand" "0")
4408 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4409 (parallel [(const_int 0)
4412 "punpcklqdq\t{%2, %0|%0, %2}"
4413 [(set_attr "type" "sselog")
4414 (set_attr "prefix_data16" "1")
4415 (set_attr "mode" "TI")])
4417 (define_insn "*avx_shufpd_<mode>"
4418 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4419 (vec_select:SSEMODE2D
4420 (vec_concat:<ssedoublesizemode>
4421 (match_operand:SSEMODE2D 1 "register_operand" "x")
4422 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4423 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4424 (match_operand 4 "const_2_to_3_operand" "")])))]
4428 mask = INTVAL (operands[3]);
4429 mask |= (INTVAL (operands[4]) - 2) << 1;
4430 operands[3] = GEN_INT (mask);
4432 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4434 [(set_attr "type" "sselog")
4435 (set_attr "length_immediate" "1")
4436 (set_attr "prefix" "vex")
4437 (set_attr "mode" "V2DF")])
4439 (define_insn "sse2_shufpd_<mode>"
4440 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4441 (vec_select:SSEMODE2D
4442 (vec_concat:<ssedoublesizemode>
4443 (match_operand:SSEMODE2D 1 "register_operand" "0")
4444 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4445 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4446 (match_operand 4 "const_2_to_3_operand" "")])))]
4450 mask = INTVAL (operands[3]);
4451 mask |= (INTVAL (operands[4]) - 2) << 1;
4452 operands[3] = GEN_INT (mask);
4454 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4456 [(set_attr "type" "sselog")
4457 (set_attr "length_immediate" "1")
4458 (set_attr "mode" "V2DF")])
4460 ;; Avoid combining registers from different units in a single alternative,
4461 ;; see comment above inline_secondary_memory_needed function in i386.c
4462 (define_insn "*avx_storehpd"
4463 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4465 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4466 (parallel [(const_int 1)])))]
4467 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4469 vmovhpd\t{%1, %0|%0, %1}
4470 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4474 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4475 (set_attr "prefix" "vex")
4476 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4478 (define_insn "sse2_storehpd"
4479 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4481 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4482 (parallel [(const_int 1)])))]
4483 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4485 movhpd\t{%1, %0|%0, %1}
4490 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4491 (set_attr "prefix_data16" "1,*,*,*,*")
4492 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4495 [(set (match_operand:DF 0 "register_operand" "")
4497 (match_operand:V2DF 1 "memory_operand" "")
4498 (parallel [(const_int 1)])))]
4499 "TARGET_SSE2 && reload_completed"
4500 [(set (match_dup 0) (match_dup 1))]
4502 operands[1] = adjust_address (operands[1], DFmode, 8);
4505 ;; Avoid combining registers from different units in a single alternative,
4506 ;; see comment above inline_secondary_memory_needed function in i386.c
4507 (define_insn "sse2_storelpd"
4508 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4510 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4511 (parallel [(const_int 0)])))]
4512 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4514 %vmovlpd\t{%1, %0|%0, %1}
4519 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4520 (set_attr "prefix_data16" "1,*,*,*,*")
4521 (set_attr "prefix" "maybe_vex")
4522 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4525 [(set (match_operand:DF 0 "register_operand" "")
4527 (match_operand:V2DF 1 "nonimmediate_operand" "")
4528 (parallel [(const_int 0)])))]
4529 "TARGET_SSE2 && reload_completed"
4532 rtx op1 = operands[1];
4534 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4536 op1 = gen_lowpart (DFmode, op1);
4537 emit_move_insn (operands[0], op1);
4541 (define_expand "sse2_loadhpd_exp"
4542 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4545 (match_operand:V2DF 1 "nonimmediate_operand" "")
4546 (parallel [(const_int 0)]))
4547 (match_operand:DF 2 "nonimmediate_operand" "")))]
4549 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4551 ;; Avoid combining registers from different units in a single alternative,
4552 ;; see comment above inline_secondary_memory_needed function in i386.c
4553 (define_insn "*avx_loadhpd"
4554 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4557 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4558 (parallel [(const_int 0)]))
4559 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4560 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4562 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4563 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4567 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4568 (set_attr "prefix" "vex")
4569 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4571 (define_insn "sse2_loadhpd"
4572 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4575 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4576 (parallel [(const_int 0)]))
4577 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4578 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4580 movhpd\t{%2, %0|%0, %2}
4581 unpcklpd\t{%2, %0|%0, %2}
4582 shufpd\t{$1, %1, %0|%0, %1, 1}
4586 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4587 (set_attr "prefix_data16" "1,*,*,*,*,*")
4588 (set_attr "length_immediate" "*,*,1,*,*,*")
4589 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4592 [(set (match_operand:V2DF 0 "memory_operand" "")
4594 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4595 (match_operand:DF 1 "register_operand" "")))]
4596 "TARGET_SSE2 && reload_completed"
4597 [(set (match_dup 0) (match_dup 1))]
4599 operands[0] = adjust_address (operands[0], DFmode, 8);
4602 (define_expand "sse2_loadlpd_exp"
4603 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4605 (match_operand:DF 2 "nonimmediate_operand" "")
4607 (match_operand:V2DF 1 "nonimmediate_operand" "")
4608 (parallel [(const_int 1)]))))]
4610 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4612 ;; Avoid combining registers from different units in a single alternative,
4613 ;; see comment above inline_secondary_memory_needed function in i386.c
4614 (define_insn "*avx_loadlpd"
4615 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4617 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4619 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4620 (parallel [(const_int 1)]))))]
4621 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4623 vmovsd\t{%2, %0|%0, %2}
4624 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4625 vmovsd\t{%2, %1, %0|%0, %1, %2}
4626 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4630 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4631 (set_attr "prefix" "vex")
4632 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4634 (define_insn "sse2_loadlpd"
4635 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4637 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4639 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4640 (parallel [(const_int 1)]))))]
4641 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4643 movsd\t{%2, %0|%0, %2}
4644 movlpd\t{%2, %0|%0, %2}
4645 movsd\t{%2, %0|%0, %2}
4646 shufpd\t{$2, %2, %0|%0, %2, 2}
4647 movhpd\t{%H1, %0|%0, %H1}
4651 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4652 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4653 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4654 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4657 [(set (match_operand:V2DF 0 "memory_operand" "")
4659 (match_operand:DF 1 "register_operand" "")
4660 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4661 "TARGET_SSE2 && reload_completed"
4662 [(set (match_dup 0) (match_dup 1))]
4664 operands[0] = adjust_address (operands[0], DFmode, 8);
4667 ;; Not sure these two are ever used, but it doesn't hurt to have
4669 (define_insn "*vec_extractv2df_1_sse"
4670 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4672 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4673 (parallel [(const_int 1)])))]
4674 "!TARGET_SSE2 && TARGET_SSE
4675 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4677 movhps\t{%1, %0|%0, %1}
4678 movhlps\t{%1, %0|%0, %1}
4679 movlps\t{%H1, %0|%0, %H1}"
4680 [(set_attr "type" "ssemov")
4681 (set_attr "mode" "V2SF,V4SF,V2SF")])
4683 (define_insn "*vec_extractv2df_0_sse"
4684 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4686 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4687 (parallel [(const_int 0)])))]
4688 "!TARGET_SSE2 && TARGET_SSE
4689 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4691 movlps\t{%1, %0|%0, %1}
4692 movaps\t{%1, %0|%0, %1}
4693 movlps\t{%1, %0|%0, %1}"
4694 [(set_attr "type" "ssemov")
4695 (set_attr "mode" "V2SF,V4SF,V2SF")])
4697 (define_insn "*avx_movsd"
4698 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4700 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4701 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4705 vmovsd\t{%2, %1, %0|%0, %1, %2}
4706 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4707 vmovlpd\t{%2, %0|%0, %2}
4708 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4709 vmovhps\t{%1, %H0|%H0, %1}"
4710 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4711 (set_attr "prefix" "vex")
4712 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4714 (define_insn "sse2_movsd"
4715 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4717 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4718 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4722 movsd\t{%2, %0|%0, %2}
4723 movlpd\t{%2, %0|%0, %2}
4724 movlpd\t{%2, %0|%0, %2}
4725 shufpd\t{$2, %2, %0|%0, %2, 2}
4726 movhps\t{%H1, %0|%0, %H1}
4727 movhps\t{%1, %H0|%H0, %1}"
4728 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4729 (set_attr "prefix_data16" "*,1,1,*,*,*")
4730 (set_attr "length_immediate" "*,*,*,1,*,*")
4731 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4733 (define_insn "*vec_dupv2df_sse3"
4734 [(set (match_operand:V2DF 0 "register_operand" "=x")
4736 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4738 "%vmovddup\t{%1, %0|%0, %1}"
4739 [(set_attr "type" "sselog1")
4740 (set_attr "prefix" "maybe_vex")
4741 (set_attr "mode" "DF")])
4743 (define_insn "vec_dupv2df"
4744 [(set (match_operand:V2DF 0 "register_operand" "=x")
4746 (match_operand:DF 1 "register_operand" "0")))]
4749 [(set_attr "type" "sselog1")
4750 (set_attr "mode" "V2DF")])
4752 (define_insn "*vec_concatv2df_sse3"
4753 [(set (match_operand:V2DF 0 "register_operand" "=x")
4755 (match_operand:DF 1 "nonimmediate_operand" "xm")
4758 "%vmovddup\t{%1, %0|%0, %1}"
4759 [(set_attr "type" "sselog1")
4760 (set_attr "prefix" "maybe_vex")
4761 (set_attr "mode" "DF")])
4763 (define_insn "*vec_concatv2df_avx"
4764 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4766 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4767 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4770 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4771 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4772 vmovsd\t{%1, %0|%0, %1}"
4773 [(set_attr "type" "ssemov")
4774 (set_attr "prefix" "vex")
4775 (set_attr "mode" "DF,V1DF,DF")])
4777 (define_insn "*vec_concatv2df"
4778 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4780 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4781 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4784 unpcklpd\t{%2, %0|%0, %2}
4785 movhpd\t{%2, %0|%0, %2}
4786 movsd\t{%1, %0|%0, %1}
4787 movlhps\t{%2, %0|%0, %2}
4788 movhps\t{%2, %0|%0, %2}"
4789 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4790 (set_attr "prefix_data16" "*,1,*,*,*")
4791 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4793 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4795 ;; Parallel integral arithmetic
4797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4799 (define_expand "neg<mode>2"
4800 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4803 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4805 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4807 (define_expand "<plusminus_insn><mode>3"
4808 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4810 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4811 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4813 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4815 (define_insn "*avx_<plusminus_insn><mode>3"
4816 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4818 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4819 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4820 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4821 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4822 [(set_attr "type" "sseiadd")
4823 (set_attr "prefix" "vex")
4824 (set_attr "mode" "TI")])
4826 (define_insn "*<plusminus_insn><mode>3"
4827 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4829 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4830 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4831 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4832 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4833 [(set_attr "type" "sseiadd")
4834 (set_attr "prefix_data16" "1")
4835 (set_attr "mode" "TI")])
4837 (define_expand "sse2_<plusminus_insn><mode>3"
4838 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4839 (sat_plusminus:SSEMODE12
4840 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4841 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4843 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4845 (define_insn "*avx_<plusminus_insn><mode>3"
4846 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4847 (sat_plusminus:SSEMODE12
4848 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4849 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4850 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4851 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4852 [(set_attr "type" "sseiadd")
4853 (set_attr "prefix" "vex")
4854 (set_attr "mode" "TI")])
4856 (define_insn "*sse2_<plusminus_insn><mode>3"
4857 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4858 (sat_plusminus:SSEMODE12
4859 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4860 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4861 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4862 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4863 [(set_attr "type" "sseiadd")
4864 (set_attr "prefix_data16" "1")
4865 (set_attr "mode" "TI")])
4867 (define_insn_and_split "mulv16qi3"
4868 [(set (match_operand:V16QI 0 "register_operand" "")
4869 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4870 (match_operand:V16QI 2 "register_operand" "")))]
4872 && can_create_pseudo_p ()"
4877 rtx t[12], op0, op[3];
4882 /* On SSE5, we can take advantage of the pperm instruction to pack and
4883 unpack the bytes. Unpack data such that we've got a source byte in
4884 each low byte of each word. We don't care what goes into the high
4885 byte, so put 0 there. */
4886 for (i = 0; i < 6; ++i)
4887 t[i] = gen_reg_rtx (V8HImode);
4889 for (i = 0; i < 2; i++)
4892 op[1] = operands[i+1];
4893 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4896 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4899 /* Multiply words. */
4900 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4901 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4903 /* Pack the low byte of each word back into a single xmm */
4904 op[0] = operands[0];
4907 ix86_expand_sse5_pack (op);
4911 for (i = 0; i < 12; ++i)
4912 t[i] = gen_reg_rtx (V16QImode);
4914 /* Unpack data such that we've got a source byte in each low byte of
4915 each word. We don't care what goes into the high byte of each word.
4916 Rather than trying to get zero in there, most convenient is to let
4917 it be a copy of the low byte. */
4918 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4919 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4920 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4921 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4923 /* Multiply words. The end-of-line annotations here give a picture of what
4924 the output of that instruction looks like. Dot means don't care; the
4925 letters are the bytes of the result with A being the most significant. */
4926 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4927 gen_lowpart (V8HImode, t[0]),
4928 gen_lowpart (V8HImode, t[1])));
4929 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4930 gen_lowpart (V8HImode, t[2]),
4931 gen_lowpart (V8HImode, t[3])));
4933 /* Extract the relevant bytes and merge them back together. */
4934 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4935 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4936 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4937 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4938 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4939 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4942 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4946 (define_expand "mulv8hi3"
4947 [(set (match_operand:V8HI 0 "register_operand" "")
4948 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4949 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4951 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4953 (define_insn "*avx_mulv8hi3"
4954 [(set (match_operand:V8HI 0 "register_operand" "=x")
4955 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4956 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4957 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4958 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4959 [(set_attr "type" "sseimul")
4960 (set_attr "prefix" "vex")
4961 (set_attr "mode" "TI")])
4963 (define_insn "*mulv8hi3"
4964 [(set (match_operand:V8HI 0 "register_operand" "=x")
4965 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4966 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4967 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4968 "pmullw\t{%2, %0|%0, %2}"
4969 [(set_attr "type" "sseimul")
4970 (set_attr "prefix_data16" "1")
4971 (set_attr "mode" "TI")])
4973 (define_expand "smulv8hi3_highpart"
4974 [(set (match_operand:V8HI 0 "register_operand" "")
4979 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4981 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4984 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4986 (define_insn "*avxv8hi3_highpart"
4987 [(set (match_operand:V8HI 0 "register_operand" "=x")
4992 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4994 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4996 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4997 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4998 [(set_attr "type" "sseimul")
4999 (set_attr "prefix" "vex")
5000 (set_attr "mode" "TI")])
5002 (define_insn "*smulv8hi3_highpart"
5003 [(set (match_operand:V8HI 0 "register_operand" "=x")
5008 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5010 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5012 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5013 "pmulhw\t{%2, %0|%0, %2}"
5014 [(set_attr "type" "sseimul")
5015 (set_attr "prefix_data16" "1")
5016 (set_attr "mode" "TI")])
5018 (define_expand "umulv8hi3_highpart"
5019 [(set (match_operand:V8HI 0 "register_operand" "")
5024 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5026 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5029 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5031 (define_insn "*avx_umulv8hi3_highpart"
5032 [(set (match_operand:V8HI 0 "register_operand" "=x")
5037 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5041 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5042 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5043 [(set_attr "type" "sseimul")
5044 (set_attr "prefix" "vex")
5045 (set_attr "mode" "TI")])
5047 (define_insn "*umulv8hi3_highpart"
5048 [(set (match_operand:V8HI 0 "register_operand" "=x")
5053 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5055 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5057 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5058 "pmulhuw\t{%2, %0|%0, %2}"
5059 [(set_attr "type" "sseimul")
5060 (set_attr "prefix_data16" "1")
5061 (set_attr "mode" "TI")])
5063 (define_expand "sse2_umulv2siv2di3"
5064 [(set (match_operand:V2DI 0 "register_operand" "")
5068 (match_operand:V4SI 1 "nonimmediate_operand" "")
5069 (parallel [(const_int 0) (const_int 2)])))
5072 (match_operand:V4SI 2 "nonimmediate_operand" "")
5073 (parallel [(const_int 0) (const_int 2)])))))]
5075 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5077 (define_insn "*avx_umulv2siv2di3"
5078 [(set (match_operand:V2DI 0 "register_operand" "=x")
5082 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5083 (parallel [(const_int 0) (const_int 2)])))
5086 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5087 (parallel [(const_int 0) (const_int 2)])))))]
5088 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5089 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5090 [(set_attr "type" "sseimul")
5091 (set_attr "prefix" "vex")
5092 (set_attr "mode" "TI")])
5094 (define_insn "*sse2_umulv2siv2di3"
5095 [(set (match_operand:V2DI 0 "register_operand" "=x")
5099 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5100 (parallel [(const_int 0) (const_int 2)])))
5103 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5104 (parallel [(const_int 0) (const_int 2)])))))]
5105 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5106 "pmuludq\t{%2, %0|%0, %2}"
5107 [(set_attr "type" "sseimul")
5108 (set_attr "prefix_data16" "1")
5109 (set_attr "mode" "TI")])
5111 (define_expand "sse4_1_mulv2siv2di3"
5112 [(set (match_operand:V2DI 0 "register_operand" "")
5116 (match_operand:V4SI 1 "nonimmediate_operand" "")
5117 (parallel [(const_int 0) (const_int 2)])))
5120 (match_operand:V4SI 2 "nonimmediate_operand" "")
5121 (parallel [(const_int 0) (const_int 2)])))))]
5123 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5125 (define_insn "*avx_mulv2siv2di3"
5126 [(set (match_operand:V2DI 0 "register_operand" "=x")
5130 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5131 (parallel [(const_int 0) (const_int 2)])))
5134 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5135 (parallel [(const_int 0) (const_int 2)])))))]
5136 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5137 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5138 [(set_attr "type" "sseimul")
5139 (set_attr "prefix_extra" "1")
5140 (set_attr "prefix" "vex")
5141 (set_attr "mode" "TI")])
5143 (define_insn "*sse4_1_mulv2siv2di3"
5144 [(set (match_operand:V2DI 0 "register_operand" "=x")
5148 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5149 (parallel [(const_int 0) (const_int 2)])))
5152 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5153 (parallel [(const_int 0) (const_int 2)])))))]
5154 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5155 "pmuldq\t{%2, %0|%0, %2}"
5156 [(set_attr "type" "sseimul")
5157 (set_attr "prefix_extra" "1")
5158 (set_attr "mode" "TI")])
5160 (define_expand "sse2_pmaddwd"
5161 [(set (match_operand:V4SI 0 "register_operand" "")
5166 (match_operand:V8HI 1 "nonimmediate_operand" "")
5167 (parallel [(const_int 0)
5173 (match_operand:V8HI 2 "nonimmediate_operand" "")
5174 (parallel [(const_int 0)
5180 (vec_select:V4HI (match_dup 1)
5181 (parallel [(const_int 1)
5186 (vec_select:V4HI (match_dup 2)
5187 (parallel [(const_int 1)
5190 (const_int 7)]))))))]
5192 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5194 (define_insn "*avx_pmaddwd"
5195 [(set (match_operand:V4SI 0 "register_operand" "=x")
5200 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5201 (parallel [(const_int 0)
5207 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5208 (parallel [(const_int 0)
5214 (vec_select:V4HI (match_dup 1)
5215 (parallel [(const_int 1)
5220 (vec_select:V4HI (match_dup 2)
5221 (parallel [(const_int 1)
5224 (const_int 7)]))))))]
5225 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5226 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5227 [(set_attr "type" "sseiadd")
5228 (set_attr "prefix" "vex")
5229 (set_attr "mode" "TI")])
5231 (define_insn "*sse2_pmaddwd"
5232 [(set (match_operand:V4SI 0 "register_operand" "=x")
5237 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5238 (parallel [(const_int 0)
5244 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5245 (parallel [(const_int 0)
5251 (vec_select:V4HI (match_dup 1)
5252 (parallel [(const_int 1)
5257 (vec_select:V4HI (match_dup 2)
5258 (parallel [(const_int 1)
5261 (const_int 7)]))))))]
5262 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5263 "pmaddwd\t{%2, %0|%0, %2}"
5264 [(set_attr "type" "sseiadd")
5265 (set_attr "atom_unit" "simul")
5266 (set_attr "prefix_data16" "1")
5267 (set_attr "mode" "TI")])
5269 (define_expand "mulv4si3"
5270 [(set (match_operand:V4SI 0 "register_operand" "")
5271 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5272 (match_operand:V4SI 2 "register_operand" "")))]
5275 if (TARGET_SSE4_1 || TARGET_SSE5)
5276 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5279 (define_insn "*avx_mulv4si3"
5280 [(set (match_operand:V4SI 0 "register_operand" "=x")
5281 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5282 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5283 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5284 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5285 [(set_attr "type" "sseimul")
5286 (set_attr "prefix_extra" "1")
5287 (set_attr "prefix" "vex")
5288 (set_attr "mode" "TI")])
5290 (define_insn "*sse4_1_mulv4si3"
5291 [(set (match_operand:V4SI 0 "register_operand" "=x")
5292 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5293 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5294 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5295 "pmulld\t{%2, %0|%0, %2}"
5296 [(set_attr "type" "sseimul")
5297 (set_attr "prefix_extra" "1")
5298 (set_attr "mode" "TI")])
5300 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5301 ;; multiply/add. In general, we expect the define_split to occur before
5302 ;; register allocation, so we have to handle the corner case where the target
5303 ;; is the same as one of the inputs.
5304 (define_insn_and_split "*sse5_mulv4si3"
5305 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5306 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5307 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5310 "&& (reload_completed
5311 || (!reg_mentioned_p (operands[0], operands[1])
5312 && !reg_mentioned_p (operands[0], operands[2])))"
5316 (plus:V4SI (mult:V4SI (match_dup 1)
5320 operands[3] = CONST0_RTX (V4SImode);
5322 [(set_attr "type" "ssemuladd")
5323 (set_attr "mode" "TI")])
5325 (define_insn_and_split "*sse2_mulv4si3"
5326 [(set (match_operand:V4SI 0 "register_operand" "")
5327 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5328 (match_operand:V4SI 2 "register_operand" "")))]
5329 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5330 && can_create_pseudo_p ()"
5335 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5341 t1 = gen_reg_rtx (V4SImode);
5342 t2 = gen_reg_rtx (V4SImode);
5343 t3 = gen_reg_rtx (V4SImode);
5344 t4 = gen_reg_rtx (V4SImode);
5345 t5 = gen_reg_rtx (V4SImode);
5346 t6 = gen_reg_rtx (V4SImode);
5347 thirtytwo = GEN_INT (32);
5349 /* Multiply elements 2 and 0. */
5350 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5353 /* Shift both input vectors down one element, so that elements 3
5354 and 1 are now in the slots for elements 2 and 0. For K8, at
5355 least, this is faster than using a shuffle. */
5356 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5357 gen_lowpart (TImode, op1),
5359 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5360 gen_lowpart (TImode, op2),
5362 /* Multiply elements 3 and 1. */
5363 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5366 /* Move the results in element 2 down to element 1; we don't care
5367 what goes in elements 2 and 3. */
5368 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5369 const0_rtx, const0_rtx));
5370 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5371 const0_rtx, const0_rtx));
5373 /* Merge the parts back together. */
5374 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5378 (define_insn_and_split "mulv2di3"
5379 [(set (match_operand:V2DI 0 "register_operand" "")
5380 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5381 (match_operand:V2DI 2 "register_operand" "")))]
5383 && can_create_pseudo_p ()"
5388 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5393 /* op1: A,B,C,D, op2: E,F,G,H */
5395 op1 = gen_lowpart (V4SImode, operands[1]);
5396 op2 = gen_lowpart (V4SImode, operands[2]);
5397 t1 = gen_reg_rtx (V4SImode);
5398 t2 = gen_reg_rtx (V4SImode);
5399 t3 = gen_reg_rtx (V4SImode);
5400 t4 = gen_reg_rtx (V2DImode);
5401 t5 = gen_reg_rtx (V2DImode);
5404 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5411 emit_move_insn (t2, CONST0_RTX (V4SImode));
5413 /* t3: (B*E),(A*F),(D*G),(C*H) */
5414 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5416 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5417 emit_insn (gen_sse5_phadddq (t4, t3));
5419 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5420 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5422 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5423 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5430 t1 = gen_reg_rtx (V2DImode);
5431 t2 = gen_reg_rtx (V2DImode);
5432 t3 = gen_reg_rtx (V2DImode);
5433 t4 = gen_reg_rtx (V2DImode);
5434 t5 = gen_reg_rtx (V2DImode);
5435 t6 = gen_reg_rtx (V2DImode);
5436 thirtytwo = GEN_INT (32);
5438 /* Multiply low parts. */
5439 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5440 gen_lowpart (V4SImode, op2)));
5442 /* Shift input vectors left 32 bits so we can multiply high parts. */
5443 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5444 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5446 /* Multiply high parts by low parts. */
5447 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5448 gen_lowpart (V4SImode, t3)));
5449 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5450 gen_lowpart (V4SImode, t2)));
5452 /* Shift them back. */
5453 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5454 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5456 /* Add the three parts together. */
5457 emit_insn (gen_addv2di3 (t6, t1, t4));
5458 emit_insn (gen_addv2di3 (op0, t6, t5));
5462 (define_expand "vec_widen_smult_hi_v8hi"
5463 [(match_operand:V4SI 0 "register_operand" "")
5464 (match_operand:V8HI 1 "register_operand" "")
5465 (match_operand:V8HI 2 "register_operand" "")]
5468 rtx op1, op2, t1, t2, dest;
5472 t1 = gen_reg_rtx (V8HImode);
5473 t2 = gen_reg_rtx (V8HImode);
5474 dest = gen_lowpart (V8HImode, operands[0]);
5476 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5477 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5478 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5482 (define_expand "vec_widen_smult_lo_v8hi"
5483 [(match_operand:V4SI 0 "register_operand" "")
5484 (match_operand:V8HI 1 "register_operand" "")
5485 (match_operand:V8HI 2 "register_operand" "")]
5488 rtx op1, op2, t1, t2, dest;
5492 t1 = gen_reg_rtx (V8HImode);
5493 t2 = gen_reg_rtx (V8HImode);
5494 dest = gen_lowpart (V8HImode, operands[0]);
5496 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5497 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5498 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5502 (define_expand "vec_widen_umult_hi_v8hi"
5503 [(match_operand:V4SI 0 "register_operand" "")
5504 (match_operand:V8HI 1 "register_operand" "")
5505 (match_operand:V8HI 2 "register_operand" "")]
5508 rtx op1, op2, t1, t2, dest;
5512 t1 = gen_reg_rtx (V8HImode);
5513 t2 = gen_reg_rtx (V8HImode);
5514 dest = gen_lowpart (V8HImode, operands[0]);
5516 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5517 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5518 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5522 (define_expand "vec_widen_umult_lo_v8hi"
5523 [(match_operand:V4SI 0 "register_operand" "")
5524 (match_operand:V8HI 1 "register_operand" "")
5525 (match_operand:V8HI 2 "register_operand" "")]
5528 rtx op1, op2, t1, t2, dest;
5532 t1 = gen_reg_rtx (V8HImode);
5533 t2 = gen_reg_rtx (V8HImode);
5534 dest = gen_lowpart (V8HImode, operands[0]);
5536 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5537 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5538 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5542 (define_expand "vec_widen_smult_hi_v4si"
5543 [(match_operand:V2DI 0 "register_operand" "")
5544 (match_operand:V4SI 1 "register_operand" "")
5545 (match_operand:V4SI 2 "register_operand" "")]
5550 t1 = gen_reg_rtx (V4SImode);
5551 t2 = gen_reg_rtx (V4SImode);
5553 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5558 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5563 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5567 (define_expand "vec_widen_smult_lo_v4si"
5568 [(match_operand:V2DI 0 "register_operand" "")
5569 (match_operand:V4SI 1 "register_operand" "")
5570 (match_operand:V4SI 2 "register_operand" "")]
5575 t1 = gen_reg_rtx (V4SImode);
5576 t2 = gen_reg_rtx (V4SImode);
5578 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5583 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5588 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5593 (define_expand "vec_widen_umult_hi_v4si"
5594 [(match_operand:V2DI 0 "register_operand" "")
5595 (match_operand:V4SI 1 "register_operand" "")
5596 (match_operand:V4SI 2 "register_operand" "")]
5599 rtx op1, op2, t1, t2;
5603 t1 = gen_reg_rtx (V4SImode);
5604 t2 = gen_reg_rtx (V4SImode);
5606 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5607 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5608 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5612 (define_expand "vec_widen_umult_lo_v4si"
5613 [(match_operand:V2DI 0 "register_operand" "")
5614 (match_operand:V4SI 1 "register_operand" "")
5615 (match_operand:V4SI 2 "register_operand" "")]
5618 rtx op1, op2, t1, t2;
5622 t1 = gen_reg_rtx (V4SImode);
5623 t2 = gen_reg_rtx (V4SImode);
5625 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5626 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5627 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5631 (define_expand "sdot_prodv8hi"
5632 [(match_operand:V4SI 0 "register_operand" "")
5633 (match_operand:V8HI 1 "register_operand" "")
5634 (match_operand:V8HI 2 "register_operand" "")
5635 (match_operand:V4SI 3 "register_operand" "")]
5638 rtx t = gen_reg_rtx (V4SImode);
5639 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5640 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5644 (define_expand "udot_prodv4si"
5645 [(match_operand:V2DI 0 "register_operand" "")
5646 (match_operand:V4SI 1 "register_operand" "")
5647 (match_operand:V4SI 2 "register_operand" "")
5648 (match_operand:V2DI 3 "register_operand" "")]
5653 t1 = gen_reg_rtx (V2DImode);
5654 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5655 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5657 t2 = gen_reg_rtx (V4SImode);
5658 t3 = gen_reg_rtx (V4SImode);
5659 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5660 gen_lowpart (TImode, operands[1]),
5662 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5663 gen_lowpart (TImode, operands[2]),
5666 t4 = gen_reg_rtx (V2DImode);
5667 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5669 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5673 (define_insn "*avx_ashr<mode>3"
5674 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5676 (match_operand:SSEMODE24 1 "register_operand" "x")
5677 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5679 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5680 [(set_attr "type" "sseishft")
5681 (set_attr "prefix" "vex")
5682 (set (attr "length_immediate")
5683 (if_then_else (match_operand 2 "const_int_operand" "")
5685 (const_string "0")))
5686 (set_attr "mode" "TI")])
5688 (define_insn "ashr<mode>3"
5689 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5691 (match_operand:SSEMODE24 1 "register_operand" "0")
5692 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5694 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5695 [(set_attr "type" "sseishft")
5696 (set_attr "prefix_data16" "1")
5697 (set (attr "length_immediate")
5698 (if_then_else (match_operand 2 "const_int_operand" "")
5700 (const_string "0")))
5701 (set_attr "mode" "TI")])
5703 (define_insn "*avx_lshr<mode>3"
5704 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5705 (lshiftrt:SSEMODE248
5706 (match_operand:SSEMODE248 1 "register_operand" "x")
5707 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5709 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5710 [(set_attr "type" "sseishft")
5711 (set_attr "prefix" "vex")
5712 (set (attr "length_immediate")
5713 (if_then_else (match_operand 2 "const_int_operand" "")
5715 (const_string "0")))
5716 (set_attr "mode" "TI")])
5718 (define_insn "lshr<mode>3"
5719 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5720 (lshiftrt:SSEMODE248
5721 (match_operand:SSEMODE248 1 "register_operand" "0")
5722 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5724 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5725 [(set_attr "type" "sseishft")
5726 (set_attr "prefix_data16" "1")
5727 (set (attr "length_immediate")
5728 (if_then_else (match_operand 2 "const_int_operand" "")
5730 (const_string "0")))
5731 (set_attr "mode" "TI")])
5733 (define_insn "*avx_ashl<mode>3"
5734 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5736 (match_operand:SSEMODE248 1 "register_operand" "x")
5737 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5739 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5740 [(set_attr "type" "sseishft")
5741 (set_attr "prefix" "vex")
5742 (set (attr "length_immediate")
5743 (if_then_else (match_operand 2 "const_int_operand" "")
5745 (const_string "0")))
5746 (set_attr "mode" "TI")])
5748 (define_insn "ashl<mode>3"
5749 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5751 (match_operand:SSEMODE248 1 "register_operand" "0")
5752 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5754 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5755 [(set_attr "type" "sseishft")
5756 (set_attr "prefix_data16" "1")
5757 (set (attr "length_immediate")
5758 (if_then_else (match_operand 2 "const_int_operand" "")
5760 (const_string "0")))
5761 (set_attr "mode" "TI")])
5763 (define_expand "vec_shl_<mode>"
5764 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5765 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5766 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5769 operands[0] = gen_lowpart (TImode, operands[0]);
5770 operands[1] = gen_lowpart (TImode, operands[1]);
5773 (define_expand "vec_shr_<mode>"
5774 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5775 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5776 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5779 operands[0] = gen_lowpart (TImode, operands[0]);
5780 operands[1] = gen_lowpart (TImode, operands[1]);
5783 (define_insn "*avx_<code><mode>3"
5784 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5786 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5787 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5788 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5789 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5790 [(set_attr "type" "sseiadd")
5791 (set (attr "prefix_extra")
5793 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
5796 (const_string "0")))
5797 (set_attr "prefix" "vex")
5798 (set_attr "mode" "TI")])
5800 (define_expand "<code>v16qi3"
5801 [(set (match_operand:V16QI 0 "register_operand" "")
5803 (match_operand:V16QI 1 "nonimmediate_operand" "")
5804 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5806 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5808 (define_insn "*<code>v16qi3"
5809 [(set (match_operand:V16QI 0 "register_operand" "=x")
5811 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5812 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5813 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5814 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5815 [(set_attr "type" "sseiadd")
5816 (set_attr "prefix_data16" "1")
5817 (set_attr "mode" "TI")])
5819 (define_expand "<code>v8hi3"
5820 [(set (match_operand:V8HI 0 "register_operand" "")
5822 (match_operand:V8HI 1 "nonimmediate_operand" "")
5823 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5825 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5827 (define_insn "*<code>v8hi3"
5828 [(set (match_operand:V8HI 0 "register_operand" "=x")
5830 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5831 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5832 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5833 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5834 [(set_attr "type" "sseiadd")
5835 (set_attr "prefix_data16" "1")
5836 (set_attr "mode" "TI")])
5838 (define_expand "umaxv8hi3"
5839 [(set (match_operand:V8HI 0 "register_operand" "")
5840 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5841 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5845 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5848 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5849 if (rtx_equal_p (op3, op2))
5850 op3 = gen_reg_rtx (V8HImode);
5851 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5852 emit_insn (gen_addv8hi3 (op0, op3, op2));
5857 (define_expand "smax<mode>3"
5858 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5859 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5860 (match_operand:SSEMODE14 2 "register_operand" "")))]
5864 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5870 xops[0] = operands[0];
5871 xops[1] = operands[1];
5872 xops[2] = operands[2];
5873 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5874 xops[4] = operands[1];
5875 xops[5] = operands[2];
5876 ok = ix86_expand_int_vcond (xops);
5882 (define_insn "*sse4_1_<code><mode>3"
5883 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5885 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5886 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5887 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5888 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5889 [(set_attr "type" "sseiadd")
5890 (set_attr "prefix_extra" "1")
5891 (set_attr "mode" "TI")])
5893 (define_expand "umaxv4si3"
5894 [(set (match_operand:V4SI 0 "register_operand" "")
5895 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5896 (match_operand:V4SI 2 "register_operand" "")))]
5900 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5906 xops[0] = operands[0];
5907 xops[1] = operands[1];
5908 xops[2] = operands[2];
5909 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5910 xops[4] = operands[1];
5911 xops[5] = operands[2];
5912 ok = ix86_expand_int_vcond (xops);
5918 (define_insn "*sse4_1_<code><mode>3"
5919 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5921 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5922 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5923 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5924 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5925 [(set_attr "type" "sseiadd")
5926 (set_attr "prefix_extra" "1")
5927 (set_attr "mode" "TI")])
5929 (define_expand "smin<mode>3"
5930 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5931 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5932 (match_operand:SSEMODE14 2 "register_operand" "")))]
5936 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5942 xops[0] = operands[0];
5943 xops[1] = operands[2];
5944 xops[2] = operands[1];
5945 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5946 xops[4] = operands[1];
5947 xops[5] = operands[2];
5948 ok = ix86_expand_int_vcond (xops);
5954 (define_expand "umin<mode>3"
5955 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5956 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5957 (match_operand:SSEMODE24 2 "register_operand" "")))]
5961 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5967 xops[0] = operands[0];
5968 xops[1] = operands[2];
5969 xops[2] = operands[1];
5970 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5971 xops[4] = operands[1];
5972 xops[5] = operands[2];
5973 ok = ix86_expand_int_vcond (xops);
5979 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5981 ;; Parallel integral comparisons
5983 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5985 (define_expand "sse2_eq<mode>3"
5986 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5988 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5989 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5990 "TARGET_SSE2 && !TARGET_SSE5"
5991 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5993 (define_insn "*avx_eq<mode>3"
5994 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5996 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5997 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5998 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5999 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6000 [(set_attr "type" "ssecmp")
6001 (set (attr "prefix_extra")
6002 (if_then_else (match_operand:V2DI 0 "" "")
6004 (const_string "*")))
6005 (set_attr "prefix" "vex")
6006 (set_attr "mode" "TI")])
6008 (define_insn "*sse2_eq<mode>3"
6009 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6011 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6012 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6013 "TARGET_SSE2 && !TARGET_SSE5
6014 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6015 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6016 [(set_attr "type" "ssecmp")
6017 (set_attr "prefix_data16" "1")
6018 (set_attr "mode" "TI")])
6020 (define_expand "sse4_1_eqv2di3"
6021 [(set (match_operand:V2DI 0 "register_operand" "")
6023 (match_operand:V2DI 1 "nonimmediate_operand" "")
6024 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6026 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6028 (define_insn "*sse4_1_eqv2di3"
6029 [(set (match_operand:V2DI 0 "register_operand" "=x")
6031 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6032 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6033 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6034 "pcmpeqq\t{%2, %0|%0, %2}"
6035 [(set_attr "type" "ssecmp")
6036 (set_attr "prefix_extra" "1")
6037 (set_attr "mode" "TI")])
6039 (define_insn "*avx_gt<mode>3"
6040 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6042 (match_operand:SSEMODE1248 1 "register_operand" "x")
6043 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6045 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6046 [(set_attr "type" "ssecmp")
6047 (set (attr "prefix_extra")
6048 (if_then_else (match_operand:V2DI 0 "" "")
6050 (const_string "*")))
6051 (set_attr "prefix" "vex")
6052 (set_attr "mode" "TI")])
6054 (define_insn "sse2_gt<mode>3"
6055 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6057 (match_operand:SSEMODE124 1 "register_operand" "0")
6058 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6059 "TARGET_SSE2 && !TARGET_SSE5"
6060 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6061 [(set_attr "type" "ssecmp")
6062 (set_attr "prefix_data16" "1")
6063 (set_attr "mode" "TI")])
6065 (define_insn "sse4_2_gtv2di3"
6066 [(set (match_operand:V2DI 0 "register_operand" "=x")
6068 (match_operand:V2DI 1 "register_operand" "0")
6069 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6071 "pcmpgtq\t{%2, %0|%0, %2}"
6072 [(set_attr "type" "ssecmp")
6073 (set_attr "prefix_extra" "1")
6074 (set_attr "mode" "TI")])
6076 (define_expand "vcond<mode>"
6077 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6078 (if_then_else:SSEMODE124C8
6079 (match_operator 3 ""
6080 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6081 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6082 (match_operand:SSEMODE124C8 1 "general_operand" "")
6083 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6086 bool ok = ix86_expand_int_vcond (operands);
6091 (define_expand "vcondu<mode>"
6092 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6093 (if_then_else:SSEMODE124C8
6094 (match_operator 3 ""
6095 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6096 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6097 (match_operand:SSEMODE124C8 1 "general_operand" "")
6098 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6101 bool ok = ix86_expand_int_vcond (operands);
6106 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6108 ;; Parallel bitwise logical operations
6110 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6112 (define_expand "one_cmpl<mode>2"
6113 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6114 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6118 int i, n = GET_MODE_NUNITS (<MODE>mode);
6119 rtvec v = rtvec_alloc (n);
6121 for (i = 0; i < n; ++i)
6122 RTVEC_ELT (v, i) = constm1_rtx;
6124 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6127 (define_insn "*avx_andnot<mode>3"
6128 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6130 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6131 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6133 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6134 [(set_attr "type" "sselog")
6135 (set_attr "prefix" "vex")
6136 (set_attr "mode" "<avxvecpsmode>")])
6138 (define_insn "*sse_andnot<mode>3"
6139 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6141 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6142 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6143 "(TARGET_SSE && !TARGET_SSE2)"
6144 "andnps\t{%2, %0|%0, %2}"
6145 [(set_attr "type" "sselog")
6146 (set_attr "mode" "V4SF")])
6148 (define_insn "*avx_andnot<mode>3"
6149 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6151 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6152 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6154 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6155 [(set_attr "type" "sselog")
6156 (set_attr "prefix" "vex")
6157 (set_attr "mode" "TI")])
6159 (define_insn "sse2_andnot<mode>3"
6160 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6162 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6163 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6165 "pandn\t{%2, %0|%0, %2}"
6166 [(set_attr "type" "sselog")
6167 (set_attr "prefix_data16" "1")
6168 (set_attr "mode" "TI")])
6170 (define_insn "*andnottf3"
6171 [(set (match_operand:TF 0 "register_operand" "=x")
6173 (not:TF (match_operand:TF 1 "register_operand" "0"))
6174 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6176 "pandn\t{%2, %0|%0, %2}"
6177 [(set_attr "type" "sselog")
6178 (set_attr "prefix_data16" "1")
6179 (set_attr "mode" "TI")])
6181 (define_expand "<code><mode>3"
6182 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6184 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6185 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6187 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6189 (define_insn "*avx_<code><mode>3"
6190 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6192 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6193 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6195 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6196 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6197 [(set_attr "type" "sselog")
6198 (set_attr "prefix" "vex")
6199 (set_attr "mode" "<avxvecpsmode>")])
6201 (define_insn "*sse_<code><mode>3"
6202 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6204 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6205 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6206 "(TARGET_SSE && !TARGET_SSE2)
6207 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6208 "<plogicprefix>ps\t{%2, %0|%0, %2}"
6209 [(set_attr "type" "sselog")
6210 (set_attr "mode" "V4SF")])
6212 (define_insn "*avx_<code><mode>3"
6213 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6215 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6216 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6218 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6219 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6220 [(set_attr "type" "sselog")
6221 (set_attr "prefix" "vex")
6222 (set_attr "mode" "TI")])
6224 (define_insn "*sse2_<code><mode>3"
6225 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6227 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6228 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6229 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6230 "p<plogicprefix>\t{%2, %0|%0, %2}"
6231 [(set_attr "type" "sselog")
6232 (set_attr "prefix_data16" "1")
6233 (set_attr "mode" "TI")])
6235 (define_expand "<code>tf3"
6236 [(set (match_operand:TF 0 "register_operand" "")
6238 (match_operand:TF 1 "nonimmediate_operand" "")
6239 (match_operand:TF 2 "nonimmediate_operand" "")))]
6241 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6243 (define_insn "*<code>tf3"
6244 [(set (match_operand:TF 0 "register_operand" "=x")
6246 (match_operand:TF 1 "nonimmediate_operand" "%0")
6247 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6248 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6249 "p<plogicprefix>\t{%2, %0|%0, %2}"
6250 [(set_attr "type" "sselog")
6251 (set_attr "prefix_data16" "1")
6252 (set_attr "mode" "TI")])
6254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6256 ;; Parallel integral element swizzling
6258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6261 ;; op1 = abcdefghijklmnop
6262 ;; op2 = qrstuvwxyz012345
6263 ;; h1 = aqbrcsdteufvgwhx
6264 ;; l1 = iyjzk0l1m2n3o4p5
6265 ;; h2 = aiqybjrzcks0dlt1
6266 ;; l2 = emu2fnv3gow4hpx5
6267 ;; h3 = aeimquy2bfjnrvz3
6268 ;; l3 = cgkosw04dhlptx15
6269 ;; result = bdfhjlnprtvxz135
6270 (define_expand "vec_pack_trunc_v8hi"
6271 [(match_operand:V16QI 0 "register_operand" "")
6272 (match_operand:V8HI 1 "register_operand" "")
6273 (match_operand:V8HI 2 "register_operand" "")]
6276 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6280 ix86_expand_sse5_pack (operands);
6284 op1 = gen_lowpart (V16QImode, operands[1]);
6285 op2 = gen_lowpart (V16QImode, operands[2]);
6286 h1 = gen_reg_rtx (V16QImode);
6287 l1 = gen_reg_rtx (V16QImode);
6288 h2 = gen_reg_rtx (V16QImode);
6289 l2 = gen_reg_rtx (V16QImode);
6290 h3 = gen_reg_rtx (V16QImode);
6291 l3 = gen_reg_rtx (V16QImode);
6293 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6294 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6295 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6296 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6297 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6298 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6299 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6310 ;; result = bdfhjlnp
6311 (define_expand "vec_pack_trunc_v4si"
6312 [(match_operand:V8HI 0 "register_operand" "")
6313 (match_operand:V4SI 1 "register_operand" "")
6314 (match_operand:V4SI 2 "register_operand" "")]
6317 rtx op1, op2, h1, l1, h2, l2;
6321 ix86_expand_sse5_pack (operands);
6325 op1 = gen_lowpart (V8HImode, operands[1]);
6326 op2 = gen_lowpart (V8HImode, operands[2]);
6327 h1 = gen_reg_rtx (V8HImode);
6328 l1 = gen_reg_rtx (V8HImode);
6329 h2 = gen_reg_rtx (V8HImode);
6330 l2 = gen_reg_rtx (V8HImode);
6332 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6333 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6334 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6335 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6336 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6346 (define_expand "vec_pack_trunc_v2di"
6347 [(match_operand:V4SI 0 "register_operand" "")
6348 (match_operand:V2DI 1 "register_operand" "")
6349 (match_operand:V2DI 2 "register_operand" "")]
6352 rtx op1, op2, h1, l1;
6356 ix86_expand_sse5_pack (operands);
6360 op1 = gen_lowpart (V4SImode, operands[1]);
6361 op2 = gen_lowpart (V4SImode, operands[2]);
6362 h1 = gen_reg_rtx (V4SImode);
6363 l1 = gen_reg_rtx (V4SImode);
6365 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6366 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6367 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6371 (define_expand "vec_interleave_highv16qi"
6372 [(set (match_operand:V16QI 0 "register_operand" "")
6375 (match_operand:V16QI 1 "register_operand" "")
6376 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6377 (parallel [(const_int 8) (const_int 24)
6378 (const_int 9) (const_int 25)
6379 (const_int 10) (const_int 26)
6380 (const_int 11) (const_int 27)
6381 (const_int 12) (const_int 28)
6382 (const_int 13) (const_int 29)
6383 (const_int 14) (const_int 30)
6384 (const_int 15) (const_int 31)])))]
6387 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6391 (define_expand "vec_interleave_lowv16qi"
6392 [(set (match_operand:V16QI 0 "register_operand" "")
6395 (match_operand:V16QI 1 "register_operand" "")
6396 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6397 (parallel [(const_int 0) (const_int 16)
6398 (const_int 1) (const_int 17)
6399 (const_int 2) (const_int 18)
6400 (const_int 3) (const_int 19)
6401 (const_int 4) (const_int 20)
6402 (const_int 5) (const_int 21)
6403 (const_int 6) (const_int 22)
6404 (const_int 7) (const_int 23)])))]
6407 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6411 (define_expand "vec_interleave_highv8hi"
6412 [(set (match_operand:V8HI 0 "register_operand" "=")
6415 (match_operand:V8HI 1 "register_operand" "")
6416 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6417 (parallel [(const_int 4) (const_int 12)
6418 (const_int 5) (const_int 13)
6419 (const_int 6) (const_int 14)
6420 (const_int 7) (const_int 15)])))]
6423 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6427 (define_expand "vec_interleave_lowv8hi"
6428 [(set (match_operand:V8HI 0 "register_operand" "")
6431 (match_operand:V8HI 1 "register_operand" "")
6432 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6433 (parallel [(const_int 0) (const_int 8)
6434 (const_int 1) (const_int 9)
6435 (const_int 2) (const_int 10)
6436 (const_int 3) (const_int 11)])))]
6439 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6443 (define_expand "vec_interleave_highv4si"
6444 [(set (match_operand:V4SI 0 "register_operand" "")
6447 (match_operand:V4SI 1 "register_operand" "")
6448 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6449 (parallel [(const_int 2) (const_int 6)
6450 (const_int 3) (const_int 7)])))]
6453 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6457 (define_expand "vec_interleave_lowv4si"
6458 [(set (match_operand:V4SI 0 "register_operand" "")
6461 (match_operand:V4SI 1 "register_operand" "")
6462 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6463 (parallel [(const_int 0) (const_int 4)
6464 (const_int 1) (const_int 5)])))]
6467 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6471 (define_expand "vec_interleave_highv2di"
6472 [(set (match_operand:V2DI 0 "register_operand" "")
6475 (match_operand:V2DI 1 "register_operand" "")
6476 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6477 (parallel [(const_int 1)
6481 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6485 (define_expand "vec_interleave_lowv2di"
6486 [(set (match_operand:V2DI 0 "register_operand" "")
6489 (match_operand:V2DI 1 "register_operand" "")
6490 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6491 (parallel [(const_int 0)
6495 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6499 (define_expand "vec_interleave_highv4sf"
6500 [(set (match_operand:V4SF 0 "register_operand" "")
6503 (match_operand:V4SF 1 "register_operand" "")
6504 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6505 (parallel [(const_int 2) (const_int 6)
6506 (const_int 3) (const_int 7)])))]
6509 (define_expand "vec_interleave_lowv4sf"
6510 [(set (match_operand:V4SF 0 "register_operand" "")
6513 (match_operand:V4SF 1 "register_operand" "")
6514 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6515 (parallel [(const_int 0) (const_int 4)
6516 (const_int 1) (const_int 5)])))]
6519 (define_expand "vec_interleave_highv2df"
6520 [(set (match_operand:V2DF 0 "register_operand" "")
6523 (match_operand:V2DF 1 "register_operand" "")
6524 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6525 (parallel [(const_int 1)
6529 (define_expand "vec_interleave_lowv2df"
6530 [(set (match_operand:V2DF 0 "register_operand" "")
6533 (match_operand:V2DF 1 "register_operand" "")
6534 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6535 (parallel [(const_int 0)
6539 (define_insn "*avx_packsswb"
6540 [(set (match_operand:V16QI 0 "register_operand" "=x")
6543 (match_operand:V8HI 1 "register_operand" "x"))
6545 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6547 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6548 [(set_attr "type" "sselog")
6549 (set_attr "prefix" "vex")
6550 (set_attr "mode" "TI")])
6552 (define_insn "sse2_packsswb"
6553 [(set (match_operand:V16QI 0 "register_operand" "=x")
6556 (match_operand:V8HI 1 "register_operand" "0"))
6558 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6560 "packsswb\t{%2, %0|%0, %2}"
6561 [(set_attr "type" "sselog")
6562 (set_attr "prefix_data16" "1")
6563 (set_attr "mode" "TI")])
6565 (define_insn "*avx_packssdw"
6566 [(set (match_operand:V8HI 0 "register_operand" "=x")
6569 (match_operand:V4SI 1 "register_operand" "x"))
6571 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6573 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6574 [(set_attr "type" "sselog")
6575 (set_attr "prefix" "vex")
6576 (set_attr "mode" "TI")])
6578 (define_insn "sse2_packssdw"
6579 [(set (match_operand:V8HI 0 "register_operand" "=x")
6582 (match_operand:V4SI 1 "register_operand" "0"))
6584 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6586 "packssdw\t{%2, %0|%0, %2}"
6587 [(set_attr "type" "sselog")
6588 (set_attr "prefix_data16" "1")
6589 (set_attr "mode" "TI")])
6591 (define_insn "*avx_packuswb"
6592 [(set (match_operand:V16QI 0 "register_operand" "=x")
6595 (match_operand:V8HI 1 "register_operand" "x"))
6597 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6599 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6600 [(set_attr "type" "sselog")
6601 (set_attr "prefix" "vex")
6602 (set_attr "mode" "TI")])
6604 (define_insn "sse2_packuswb"
6605 [(set (match_operand:V16QI 0 "register_operand" "=x")
6608 (match_operand:V8HI 1 "register_operand" "0"))
6610 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6612 "packuswb\t{%2, %0|%0, %2}"
6613 [(set_attr "type" "sselog")
6614 (set_attr "prefix_data16" "1")
6615 (set_attr "mode" "TI")])
6617 (define_insn "*avx_punpckhbw"
6618 [(set (match_operand:V16QI 0 "register_operand" "=x")
6621 (match_operand:V16QI 1 "register_operand" "x")
6622 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6623 (parallel [(const_int 8) (const_int 24)
6624 (const_int 9) (const_int 25)
6625 (const_int 10) (const_int 26)
6626 (const_int 11) (const_int 27)
6627 (const_int 12) (const_int 28)
6628 (const_int 13) (const_int 29)
6629 (const_int 14) (const_int 30)
6630 (const_int 15) (const_int 31)])))]
6632 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6633 [(set_attr "type" "sselog")
6634 (set_attr "prefix" "vex")
6635 (set_attr "mode" "TI")])
6637 (define_insn "sse2_punpckhbw"
6638 [(set (match_operand:V16QI 0 "register_operand" "=x")
6641 (match_operand:V16QI 1 "register_operand" "0")
6642 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6643 (parallel [(const_int 8) (const_int 24)
6644 (const_int 9) (const_int 25)
6645 (const_int 10) (const_int 26)
6646 (const_int 11) (const_int 27)
6647 (const_int 12) (const_int 28)
6648 (const_int 13) (const_int 29)
6649 (const_int 14) (const_int 30)
6650 (const_int 15) (const_int 31)])))]
6652 "punpckhbw\t{%2, %0|%0, %2}"
6653 [(set_attr "type" "sselog")
6654 (set_attr "prefix_data16" "1")
6655 (set_attr "mode" "TI")])
6657 (define_insn "*avx_punpcklbw"
6658 [(set (match_operand:V16QI 0 "register_operand" "=x")
6661 (match_operand:V16QI 1 "register_operand" "x")
6662 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6663 (parallel [(const_int 0) (const_int 16)
6664 (const_int 1) (const_int 17)
6665 (const_int 2) (const_int 18)
6666 (const_int 3) (const_int 19)
6667 (const_int 4) (const_int 20)
6668 (const_int 5) (const_int 21)
6669 (const_int 6) (const_int 22)
6670 (const_int 7) (const_int 23)])))]
6672 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6673 [(set_attr "type" "sselog")
6674 (set_attr "prefix" "vex")
6675 (set_attr "mode" "TI")])
6677 (define_insn "sse2_punpcklbw"
6678 [(set (match_operand:V16QI 0 "register_operand" "=x")
6681 (match_operand:V16QI 1 "register_operand" "0")
6682 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6683 (parallel [(const_int 0) (const_int 16)
6684 (const_int 1) (const_int 17)
6685 (const_int 2) (const_int 18)
6686 (const_int 3) (const_int 19)
6687 (const_int 4) (const_int 20)
6688 (const_int 5) (const_int 21)
6689 (const_int 6) (const_int 22)
6690 (const_int 7) (const_int 23)])))]
6692 "punpcklbw\t{%2, %0|%0, %2}"
6693 [(set_attr "type" "sselog")
6694 (set_attr "prefix_data16" "1")
6695 (set_attr "mode" "TI")])
6697 (define_insn "*avx_punpckhwd"
6698 [(set (match_operand:V8HI 0 "register_operand" "=x")
6701 (match_operand:V8HI 1 "register_operand" "x")
6702 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6703 (parallel [(const_int 4) (const_int 12)
6704 (const_int 5) (const_int 13)
6705 (const_int 6) (const_int 14)
6706 (const_int 7) (const_int 15)])))]
6708 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6709 [(set_attr "type" "sselog")
6710 (set_attr "prefix" "vex")
6711 (set_attr "mode" "TI")])
6713 (define_insn "sse2_punpckhwd"
6714 [(set (match_operand:V8HI 0 "register_operand" "=x")
6717 (match_operand:V8HI 1 "register_operand" "0")
6718 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6719 (parallel [(const_int 4) (const_int 12)
6720 (const_int 5) (const_int 13)
6721 (const_int 6) (const_int 14)
6722 (const_int 7) (const_int 15)])))]
6724 "punpckhwd\t{%2, %0|%0, %2}"
6725 [(set_attr "type" "sselog")
6726 (set_attr "prefix_data16" "1")
6727 (set_attr "mode" "TI")])
6729 (define_insn "*avx_punpcklwd"
6730 [(set (match_operand:V8HI 0 "register_operand" "=x")
6733 (match_operand:V8HI 1 "register_operand" "x")
6734 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6735 (parallel [(const_int 0) (const_int 8)
6736 (const_int 1) (const_int 9)
6737 (const_int 2) (const_int 10)
6738 (const_int 3) (const_int 11)])))]
6740 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6741 [(set_attr "type" "sselog")
6742 (set_attr "prefix" "vex")
6743 (set_attr "mode" "TI")])
6745 (define_insn "sse2_punpcklwd"
6746 [(set (match_operand:V8HI 0 "register_operand" "=x")
6749 (match_operand:V8HI 1 "register_operand" "0")
6750 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6751 (parallel [(const_int 0) (const_int 8)
6752 (const_int 1) (const_int 9)
6753 (const_int 2) (const_int 10)
6754 (const_int 3) (const_int 11)])))]
6756 "punpcklwd\t{%2, %0|%0, %2}"
6757 [(set_attr "type" "sselog")
6758 (set_attr "prefix_data16" "1")
6759 (set_attr "mode" "TI")])
6761 (define_insn "*avx_punpckhdq"
6762 [(set (match_operand:V4SI 0 "register_operand" "=x")
6765 (match_operand:V4SI 1 "register_operand" "x")
6766 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6767 (parallel [(const_int 2) (const_int 6)
6768 (const_int 3) (const_int 7)])))]
6770 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6771 [(set_attr "type" "sselog")
6772 (set_attr "prefix" "vex")
6773 (set_attr "mode" "TI")])
6775 (define_insn "sse2_punpckhdq"
6776 [(set (match_operand:V4SI 0 "register_operand" "=x")
6779 (match_operand:V4SI 1 "register_operand" "0")
6780 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6781 (parallel [(const_int 2) (const_int 6)
6782 (const_int 3) (const_int 7)])))]
6784 "punpckhdq\t{%2, %0|%0, %2}"
6785 [(set_attr "type" "sselog")
6786 (set_attr "prefix_data16" "1")
6787 (set_attr "mode" "TI")])
6789 (define_insn "*avx_punpckldq"
6790 [(set (match_operand:V4SI 0 "register_operand" "=x")
6793 (match_operand:V4SI 1 "register_operand" "x")
6794 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6795 (parallel [(const_int 0) (const_int 4)
6796 (const_int 1) (const_int 5)])))]
6798 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6799 [(set_attr "type" "sselog")
6800 (set_attr "prefix" "vex")
6801 (set_attr "mode" "TI")])
6803 (define_insn "sse2_punpckldq"
6804 [(set (match_operand:V4SI 0 "register_operand" "=x")
6807 (match_operand:V4SI 1 "register_operand" "0")
6808 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6809 (parallel [(const_int 0) (const_int 4)
6810 (const_int 1) (const_int 5)])))]
6812 "punpckldq\t{%2, %0|%0, %2}"
6813 [(set_attr "type" "sselog")
6814 (set_attr "prefix_data16" "1")
6815 (set_attr "mode" "TI")])
6817 (define_insn "*avx_pinsr<ssevecsize>"
6818 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6819 (vec_merge:SSEMODE124
6820 (vec_duplicate:SSEMODE124
6821 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6822 (match_operand:SSEMODE124 1 "register_operand" "x")
6823 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6826 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6827 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6829 [(set_attr "type" "sselog")
6830 (set (attr "prefix_extra")
6831 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6833 (const_string "1")))
6834 (set_attr "length_immediate" "1")
6835 (set_attr "prefix" "vex")
6836 (set_attr "mode" "TI")])
6838 (define_insn "*sse4_1_pinsrb"
6839 [(set (match_operand:V16QI 0 "register_operand" "=x")
6841 (vec_duplicate:V16QI
6842 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6843 (match_operand:V16QI 1 "register_operand" "0")
6844 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6847 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6848 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6850 [(set_attr "type" "sselog")
6851 (set_attr "prefix_extra" "1")
6852 (set_attr "length_immediate" "1")
6853 (set_attr "mode" "TI")])
6855 (define_insn "*sse2_pinsrw"
6856 [(set (match_operand:V8HI 0 "register_operand" "=x")
6859 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6860 (match_operand:V8HI 1 "register_operand" "0")
6861 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6864 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6865 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6867 [(set_attr "type" "sselog")
6868 (set_attr "prefix_data16" "1")
6869 (set_attr "length_immediate" "1")
6870 (set_attr "mode" "TI")])
6872 ;; It must come before sse2_loadld since it is preferred.
6873 (define_insn "*sse4_1_pinsrd"
6874 [(set (match_operand:V4SI 0 "register_operand" "=x")
6877 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6878 (match_operand:V4SI 1 "register_operand" "0")
6879 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6882 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6883 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6885 [(set_attr "type" "sselog")
6886 (set_attr "prefix_extra" "1")
6887 (set_attr "length_immediate" "1")
6888 (set_attr "mode" "TI")])
6890 (define_insn "*avx_pinsrq"
6891 [(set (match_operand:V2DI 0 "register_operand" "=x")
6894 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6895 (match_operand:V2DI 1 "register_operand" "x")
6896 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6897 "TARGET_AVX && TARGET_64BIT"
6899 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6900 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6902 [(set_attr "type" "sselog")
6903 (set_attr "prefix_extra" "1")
6904 (set_attr "length_immediate" "1")
6905 (set_attr "prefix" "vex")
6906 (set_attr "mode" "TI")])
6908 (define_insn "*sse4_1_pinsrq"
6909 [(set (match_operand:V2DI 0 "register_operand" "=x")
6912 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6913 (match_operand:V2DI 1 "register_operand" "0")
6914 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6915 "TARGET_SSE4_1 && TARGET_64BIT"
6917 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6918 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6920 [(set_attr "type" "sselog")
6921 (set_attr "prefix_rex" "1")
6922 (set_attr "prefix_extra" "1")
6923 (set_attr "length_immediate" "1")
6924 (set_attr "mode" "TI")])
6926 (define_insn "*sse4_1_pextrb"
6927 [(set (match_operand:SI 0 "register_operand" "=r")
6930 (match_operand:V16QI 1 "register_operand" "x")
6931 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6933 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6934 [(set_attr "type" "sselog")
6935 (set_attr "prefix_extra" "1")
6936 (set_attr "length_immediate" "1")
6937 (set_attr "prefix" "maybe_vex")
6938 (set_attr "mode" "TI")])
6940 (define_insn "*sse4_1_pextrb_memory"
6941 [(set (match_operand:QI 0 "memory_operand" "=m")
6943 (match_operand:V16QI 1 "register_operand" "x")
6944 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6946 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6947 [(set_attr "type" "sselog")
6948 (set_attr "prefix_extra" "1")
6949 (set_attr "length_immediate" "1")
6950 (set_attr "prefix" "maybe_vex")
6951 (set_attr "mode" "TI")])
6953 (define_insn "*sse2_pextrw"
6954 [(set (match_operand:SI 0 "register_operand" "=r")
6957 (match_operand:V8HI 1 "register_operand" "x")
6958 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6960 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6961 [(set_attr "type" "sselog")
6962 (set_attr "prefix_data16" "1")
6963 (set_attr "length_immediate" "1")
6964 (set_attr "prefix" "maybe_vex")
6965 (set_attr "mode" "TI")])
6967 (define_insn "*sse4_1_pextrw_memory"
6968 [(set (match_operand:HI 0 "memory_operand" "=m")
6970 (match_operand:V8HI 1 "register_operand" "x")
6971 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6973 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6974 [(set_attr "type" "sselog")
6975 (set_attr "prefix_extra" "1")
6976 (set_attr "length_immediate" "1")
6977 (set_attr "prefix" "maybe_vex")
6978 (set_attr "mode" "TI")])
6980 (define_insn "*sse4_1_pextrd"
6981 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6983 (match_operand:V4SI 1 "register_operand" "x")
6984 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6986 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6987 [(set_attr "type" "sselog")
6988 (set_attr "prefix_extra" "1")
6989 (set_attr "length_immediate" "1")
6990 (set_attr "prefix" "maybe_vex")
6991 (set_attr "mode" "TI")])
6993 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6994 (define_insn "*sse4_1_pextrq"
6995 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6997 (match_operand:V2DI 1 "register_operand" "x")
6998 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6999 "TARGET_SSE4_1 && TARGET_64BIT"
7000 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7001 [(set_attr "type" "sselog")
7002 (set_attr "prefix_rex" "1")
7003 (set_attr "prefix_extra" "1")
7004 (set_attr "length_immediate" "1")
7005 (set_attr "prefix" "maybe_vex")
7006 (set_attr "mode" "TI")])
7008 (define_expand "sse2_pshufd"
7009 [(match_operand:V4SI 0 "register_operand" "")
7010 (match_operand:V4SI 1 "nonimmediate_operand" "")
7011 (match_operand:SI 2 "const_int_operand" "")]
7014 int mask = INTVAL (operands[2]);
7015 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7016 GEN_INT ((mask >> 0) & 3),
7017 GEN_INT ((mask >> 2) & 3),
7018 GEN_INT ((mask >> 4) & 3),
7019 GEN_INT ((mask >> 6) & 3)));
7023 (define_insn "sse2_pshufd_1"
7024 [(set (match_operand:V4SI 0 "register_operand" "=x")
7026 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7027 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7028 (match_operand 3 "const_0_to_3_operand" "")
7029 (match_operand 4 "const_0_to_3_operand" "")
7030 (match_operand 5 "const_0_to_3_operand" "")])))]
7034 mask |= INTVAL (operands[2]) << 0;
7035 mask |= INTVAL (operands[3]) << 2;
7036 mask |= INTVAL (operands[4]) << 4;
7037 mask |= INTVAL (operands[5]) << 6;
7038 operands[2] = GEN_INT (mask);
7040 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7042 [(set_attr "type" "sselog1")
7043 (set_attr "prefix_data16" "1")
7044 (set_attr "prefix" "maybe_vex")
7045 (set_attr "length_immediate" "1")
7046 (set_attr "mode" "TI")])
7048 (define_expand "sse2_pshuflw"
7049 [(match_operand:V8HI 0 "register_operand" "")
7050 (match_operand:V8HI 1 "nonimmediate_operand" "")
7051 (match_operand:SI 2 "const_int_operand" "")]
7054 int mask = INTVAL (operands[2]);
7055 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7056 GEN_INT ((mask >> 0) & 3),
7057 GEN_INT ((mask >> 2) & 3),
7058 GEN_INT ((mask >> 4) & 3),
7059 GEN_INT ((mask >> 6) & 3)));
7063 (define_insn "sse2_pshuflw_1"
7064 [(set (match_operand:V8HI 0 "register_operand" "=x")
7066 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7067 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7068 (match_operand 3 "const_0_to_3_operand" "")
7069 (match_operand 4 "const_0_to_3_operand" "")
7070 (match_operand 5 "const_0_to_3_operand" "")
7078 mask |= INTVAL (operands[2]) << 0;
7079 mask |= INTVAL (operands[3]) << 2;
7080 mask |= INTVAL (operands[4]) << 4;
7081 mask |= INTVAL (operands[5]) << 6;
7082 operands[2] = GEN_INT (mask);
7084 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7086 [(set_attr "type" "sselog")
7087 (set_attr "prefix_data16" "0")
7088 (set_attr "prefix_rep" "1")
7089 (set_attr "prefix" "maybe_vex")
7090 (set_attr "length_immediate" "1")
7091 (set_attr "mode" "TI")])
7093 (define_expand "sse2_pshufhw"
7094 [(match_operand:V8HI 0 "register_operand" "")
7095 (match_operand:V8HI 1 "nonimmediate_operand" "")
7096 (match_operand:SI 2 "const_int_operand" "")]
7099 int mask = INTVAL (operands[2]);
7100 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7101 GEN_INT (((mask >> 0) & 3) + 4),
7102 GEN_INT (((mask >> 2) & 3) + 4),
7103 GEN_INT (((mask >> 4) & 3) + 4),
7104 GEN_INT (((mask >> 6) & 3) + 4)));
7108 (define_insn "sse2_pshufhw_1"
7109 [(set (match_operand:V8HI 0 "register_operand" "=x")
7111 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7112 (parallel [(const_int 0)
7116 (match_operand 2 "const_4_to_7_operand" "")
7117 (match_operand 3 "const_4_to_7_operand" "")
7118 (match_operand 4 "const_4_to_7_operand" "")
7119 (match_operand 5 "const_4_to_7_operand" "")])))]
7123 mask |= (INTVAL (operands[2]) - 4) << 0;
7124 mask |= (INTVAL (operands[3]) - 4) << 2;
7125 mask |= (INTVAL (operands[4]) - 4) << 4;
7126 mask |= (INTVAL (operands[5]) - 4) << 6;
7127 operands[2] = GEN_INT (mask);
7129 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7131 [(set_attr "type" "sselog")
7132 (set_attr "prefix_rep" "1")
7133 (set_attr "prefix_data16" "0")
7134 (set_attr "prefix" "maybe_vex")
7135 (set_attr "length_immediate" "1")
7136 (set_attr "mode" "TI")])
7138 (define_expand "sse2_loadd"
7139 [(set (match_operand:V4SI 0 "register_operand" "")
7142 (match_operand:SI 1 "nonimmediate_operand" ""))
7146 "operands[2] = CONST0_RTX (V4SImode);")
7148 (define_insn "*avx_loadld"
7149 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7152 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7153 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7157 vmovd\t{%2, %0|%0, %2}
7158 vmovd\t{%2, %0|%0, %2}
7159 vmovss\t{%2, %1, %0|%0, %1, %2}"
7160 [(set_attr "type" "ssemov")
7161 (set_attr "prefix" "vex")
7162 (set_attr "mode" "TI,TI,V4SF")])
7164 (define_insn "sse2_loadld"
7165 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7168 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7169 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7173 movd\t{%2, %0|%0, %2}
7174 movd\t{%2, %0|%0, %2}
7175 movss\t{%2, %0|%0, %2}
7176 movss\t{%2, %0|%0, %2}"
7177 [(set_attr "type" "ssemov")
7178 (set_attr "mode" "TI,TI,V4SF,SF")])
7180 (define_insn_and_split "sse2_stored"
7181 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7183 (match_operand:V4SI 1 "register_operand" "x,Yi")
7184 (parallel [(const_int 0)])))]
7187 "&& reload_completed
7188 && (TARGET_INTER_UNIT_MOVES
7189 || MEM_P (operands [0])
7190 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7191 [(set (match_dup 0) (match_dup 1))]
7193 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7196 (define_insn_and_split "*vec_ext_v4si_mem"
7197 [(set (match_operand:SI 0 "register_operand" "=r")
7199 (match_operand:V4SI 1 "memory_operand" "o")
7200 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7206 int i = INTVAL (operands[2]);
7208 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7212 (define_expand "sse_storeq"
7213 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7215 (match_operand:V2DI 1 "register_operand" "")
7216 (parallel [(const_int 0)])))]
7220 (define_insn "*sse2_storeq_rex64"
7221 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7223 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7224 (parallel [(const_int 0)])))]
7225 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7229 %vmov{q}\t{%1, %0|%0, %1}"
7230 [(set_attr "type" "*,*,imov")
7231 (set_attr "prefix" "*,*,maybe_vex")
7232 (set_attr "mode" "*,*,DI")])
7234 (define_insn "*sse2_storeq"
7235 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7237 (match_operand:V2DI 1 "register_operand" "x")
7238 (parallel [(const_int 0)])))]
7243 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7245 (match_operand:V2DI 1 "register_operand" "")
7246 (parallel [(const_int 0)])))]
7249 && (TARGET_INTER_UNIT_MOVES
7250 || MEM_P (operands [0])
7251 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7252 [(set (match_dup 0) (match_dup 1))]
7254 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7257 (define_insn "*vec_extractv2di_1_rex64_avx"
7258 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7260 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7261 (parallel [(const_int 1)])))]
7264 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7266 vmovhps\t{%1, %0|%0, %1}
7267 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7268 vmovq\t{%H1, %0|%0, %H1}
7269 vmov{q}\t{%H1, %0|%0, %H1}"
7270 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7271 (set_attr "length_immediate" "*,1,*,*")
7272 (set_attr "memory" "*,none,*,*")
7273 (set_attr "prefix" "vex")
7274 (set_attr "mode" "V2SF,TI,TI,DI")])
7276 (define_insn "*vec_extractv2di_1_rex64"
7277 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7279 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7280 (parallel [(const_int 1)])))]
7281 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7283 movhps\t{%1, %0|%0, %1}
7284 psrldq\t{$8, %0|%0, 8}
7285 movq\t{%H1, %0|%0, %H1}
7286 mov{q}\t{%H1, %0|%0, %H1}"
7287 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7288 (set_attr "length_immediate" "*,1,*,*")
7289 (set_attr "atom_unit" "*,sishuf,*,*")
7290 (set_attr "memory" "*,none,*,*")
7291 (set_attr "mode" "V2SF,TI,TI,DI")])
7293 (define_insn "*vec_extractv2di_1_avx"
7294 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7296 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7297 (parallel [(const_int 1)])))]
7300 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7302 vmovhps\t{%1, %0|%0, %1}
7303 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7304 vmovq\t{%H1, %0|%0, %H1}"
7305 [(set_attr "type" "ssemov,sseishft,ssemov")
7306 (set_attr "length_immediate" "*,1,*")
7307 (set_attr "memory" "*,none,*")
7308 (set_attr "prefix" "vex")
7309 (set_attr "mode" "V2SF,TI,TI")])
7311 (define_insn "*vec_extractv2di_1_sse2"
7312 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7314 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7315 (parallel [(const_int 1)])))]
7317 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7319 movhps\t{%1, %0|%0, %1}
7320 psrldq\t{$8, %0|%0, 8}
7321 movq\t{%H1, %0|%0, %H1}"
7322 [(set_attr "type" "ssemov,sseishft,ssemov")
7323 (set_attr "length_immediate" "*,1,*")
7324 (set_attr "atom_unit" "*,sishuf,*")
7325 (set_attr "memory" "*,none,*")
7326 (set_attr "mode" "V2SF,TI,TI")])
7328 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7329 (define_insn "*vec_extractv2di_1_sse"
7330 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7332 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7333 (parallel [(const_int 1)])))]
7334 "!TARGET_SSE2 && TARGET_SSE
7335 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7337 movhps\t{%1, %0|%0, %1}
7338 movhlps\t{%1, %0|%0, %1}
7339 movlps\t{%H1, %0|%0, %H1}"
7340 [(set_attr "type" "ssemov")
7341 (set_attr "mode" "V2SF,V4SF,V2SF")])
7343 (define_insn "*vec_dupv4si"
7344 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7346 (match_operand:SI 1 "register_operand" " Y2,0")))]
7349 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7350 shufps\t{$0, %0, %0|%0, %0, 0}"
7351 [(set_attr "type" "sselog1")
7352 (set_attr "prefix" "maybe_vex,orig")
7353 (set_attr "length_immediate" "1")
7354 (set_attr "mode" "TI,V4SF")])
7356 (define_insn "*vec_dupv2di_avx"
7357 [(set (match_operand:V2DI 0 "register_operand" "=x")
7359 (match_operand:DI 1 "register_operand" "x")))]
7361 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7362 [(set_attr "type" "sselog1")
7363 (set_attr "prefix" "vex")
7364 (set_attr "mode" "TI")])
7366 (define_insn "*vec_dupv2di"
7367 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7369 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7374 [(set_attr "type" "sselog1,ssemov")
7375 (set_attr "mode" "TI,V4SF")])
7377 (define_insn "*vec_concatv2si_avx"
7378 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7380 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7381 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7384 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7385 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7386 vmovd\t{%1, %0|%0, %1}
7387 punpckldq\t{%2, %0|%0, %2}
7388 movd\t{%1, %0|%0, %1}"
7389 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7390 (set_attr "prefix_extra" "1,*,*,*,*")
7391 (set_attr "length_immediate" "1,*,*,*,*")
7392 (set (attr "prefix")
7393 (if_then_else (eq_attr "alternative" "3,4")
7394 (const_string "orig")
7395 (const_string "vex")))
7396 (set_attr "mode" "TI,TI,TI,DI,DI")])
7398 (define_insn "*vec_concatv2si_sse4_1"
7399 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7401 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7402 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7405 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7406 punpckldq\t{%2, %0|%0, %2}
7407 movd\t{%1, %0|%0, %1}
7408 punpckldq\t{%2, %0|%0, %2}
7409 movd\t{%1, %0|%0, %1}"
7410 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7411 (set_attr "prefix_extra" "1,*,*,*,*")
7412 (set_attr "length_immediate" "1,*,*,*,*")
7413 (set_attr "mode" "TI,TI,TI,DI,DI")])
7415 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7416 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7417 ;; alternatives pretty much forces the MMX alternative to be chosen.
7418 (define_insn "*vec_concatv2si_sse2"
7419 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7421 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7422 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7425 punpckldq\t{%2, %0|%0, %2}
7426 movd\t{%1, %0|%0, %1}
7427 punpckldq\t{%2, %0|%0, %2}
7428 movd\t{%1, %0|%0, %1}"
7429 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7430 (set_attr "mode" "TI,TI,DI,DI")])
7432 (define_insn "*vec_concatv2si_sse"
7433 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7435 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7436 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7439 unpcklps\t{%2, %0|%0, %2}
7440 movss\t{%1, %0|%0, %1}
7441 punpckldq\t{%2, %0|%0, %2}
7442 movd\t{%1, %0|%0, %1}"
7443 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7444 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7446 (define_insn "*vec_concatv4si_1_avx"
7447 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7449 (match_operand:V2SI 1 "register_operand" " x,x")
7450 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7453 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7454 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7455 [(set_attr "type" "sselog,ssemov")
7456 (set_attr "prefix" "vex")
7457 (set_attr "mode" "TI,V2SF")])
7459 (define_insn "*vec_concatv4si_1"
7460 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7462 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7463 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7466 punpcklqdq\t{%2, %0|%0, %2}
7467 movlhps\t{%2, %0|%0, %2}
7468 movhps\t{%2, %0|%0, %2}"
7469 [(set_attr "type" "sselog,ssemov,ssemov")
7470 (set_attr "mode" "TI,V4SF,V2SF")])
7472 (define_insn "*vec_concatv2di_avx"
7473 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7475 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7476 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7477 "!TARGET_64BIT && TARGET_AVX"
7479 vmovq\t{%1, %0|%0, %1}
7480 movq2dq\t{%1, %0|%0, %1}
7481 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7482 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7483 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7484 (set (attr "prefix")
7485 (if_then_else (eq_attr "alternative" "1")
7486 (const_string "orig")
7487 (const_string "vex")))
7488 (set_attr "mode" "TI,TI,TI,V2SF")])
7490 (define_insn "vec_concatv2di"
7491 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7493 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7494 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7495 "!TARGET_64BIT && TARGET_SSE"
7497 movq\t{%1, %0|%0, %1}
7498 movq2dq\t{%1, %0|%0, %1}
7499 punpcklqdq\t{%2, %0|%0, %2}
7500 movlhps\t{%2, %0|%0, %2}
7501 movhps\t{%2, %0|%0, %2}"
7502 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7503 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7505 (define_insn "*vec_concatv2di_rex64_avx"
7506 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7508 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7509 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7510 "TARGET_64BIT && TARGET_AVX"
7512 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7513 vmovq\t{%1, %0|%0, %1}
7514 vmovq\t{%1, %0|%0, %1}
7515 movq2dq\t{%1, %0|%0, %1}
7516 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7517 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7518 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7519 (set_attr "prefix_extra" "1,*,*,*,*,*")
7520 (set_attr "length_immediate" "1,*,*,*,*,*")
7521 (set (attr "prefix")
7522 (if_then_else (eq_attr "alternative" "3")
7523 (const_string "orig")
7524 (const_string "vex")))
7525 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7527 (define_insn "*vec_concatv2di_rex64_sse4_1"
7528 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7530 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7531 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7532 "TARGET_64BIT && TARGET_SSE4_1"
7534 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7535 movq\t{%1, %0|%0, %1}
7536 movq\t{%1, %0|%0, %1}
7537 movq2dq\t{%1, %0|%0, %1}
7538 punpcklqdq\t{%2, %0|%0, %2}
7539 movlhps\t{%2, %0|%0, %2}
7540 movhps\t{%2, %0|%0, %2}"
7541 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7542 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7543 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7544 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7545 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7547 (define_insn "*vec_concatv2di_rex64_sse"
7548 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7550 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7551 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7552 "TARGET_64BIT && TARGET_SSE"
7554 movq\t{%1, %0|%0, %1}
7555 movq\t{%1, %0|%0, %1}
7556 movq2dq\t{%1, %0|%0, %1}
7557 punpcklqdq\t{%2, %0|%0, %2}
7558 movlhps\t{%2, %0|%0, %2}
7559 movhps\t{%2, %0|%0, %2}"
7560 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7561 (set_attr "prefix_rex" "*,1,*,*,*,*")
7562 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7564 (define_expand "vec_unpacku_hi_v16qi"
7565 [(match_operand:V8HI 0 "register_operand" "")
7566 (match_operand:V16QI 1 "register_operand" "")]
7570 ix86_expand_sse4_unpack (operands, true, true);
7571 else if (TARGET_SSE5)
7572 ix86_expand_sse5_unpack (operands, true, true);
7574 ix86_expand_sse_unpack (operands, true, true);
7578 (define_expand "vec_unpacks_hi_v16qi"
7579 [(match_operand:V8HI 0 "register_operand" "")
7580 (match_operand:V16QI 1 "register_operand" "")]
7584 ix86_expand_sse4_unpack (operands, false, true);
7585 else if (TARGET_SSE5)
7586 ix86_expand_sse5_unpack (operands, false, true);
7588 ix86_expand_sse_unpack (operands, false, true);
7592 (define_expand "vec_unpacku_lo_v16qi"
7593 [(match_operand:V8HI 0 "register_operand" "")
7594 (match_operand:V16QI 1 "register_operand" "")]
7598 ix86_expand_sse4_unpack (operands, true, false);
7599 else if (TARGET_SSE5)
7600 ix86_expand_sse5_unpack (operands, true, false);
7602 ix86_expand_sse_unpack (operands, true, false);
7606 (define_expand "vec_unpacks_lo_v16qi"
7607 [(match_operand:V8HI 0 "register_operand" "")
7608 (match_operand:V16QI 1 "register_operand" "")]
7612 ix86_expand_sse4_unpack (operands, false, false);
7613 else if (TARGET_SSE5)
7614 ix86_expand_sse5_unpack (operands, false, false);
7616 ix86_expand_sse_unpack (operands, false, false);
7620 (define_expand "vec_unpacku_hi_v8hi"
7621 [(match_operand:V4SI 0 "register_operand" "")
7622 (match_operand:V8HI 1 "register_operand" "")]
7626 ix86_expand_sse4_unpack (operands, true, true);
7627 else if (TARGET_SSE5)
7628 ix86_expand_sse5_unpack (operands, true, true);
7630 ix86_expand_sse_unpack (operands, true, true);
7634 (define_expand "vec_unpacks_hi_v8hi"
7635 [(match_operand:V4SI 0 "register_operand" "")
7636 (match_operand:V8HI 1 "register_operand" "")]
7640 ix86_expand_sse4_unpack (operands, false, true);
7641 else if (TARGET_SSE5)
7642 ix86_expand_sse5_unpack (operands, false, true);
7644 ix86_expand_sse_unpack (operands, false, true);
7648 (define_expand "vec_unpacku_lo_v8hi"
7649 [(match_operand:V4SI 0 "register_operand" "")
7650 (match_operand:V8HI 1 "register_operand" "")]
7654 ix86_expand_sse4_unpack (operands, true, false);
7655 else if (TARGET_SSE5)
7656 ix86_expand_sse5_unpack (operands, true, false);
7658 ix86_expand_sse_unpack (operands, true, false);
7662 (define_expand "vec_unpacks_lo_v8hi"
7663 [(match_operand:V4SI 0 "register_operand" "")
7664 (match_operand:V8HI 1 "register_operand" "")]
7668 ix86_expand_sse4_unpack (operands, false, false);
7669 else if (TARGET_SSE5)
7670 ix86_expand_sse5_unpack (operands, false, false);
7672 ix86_expand_sse_unpack (operands, false, false);
7676 (define_expand "vec_unpacku_hi_v4si"
7677 [(match_operand:V2DI 0 "register_operand" "")
7678 (match_operand:V4SI 1 "register_operand" "")]
7682 ix86_expand_sse4_unpack (operands, true, true);
7683 else if (TARGET_SSE5)
7684 ix86_expand_sse5_unpack (operands, true, true);
7686 ix86_expand_sse_unpack (operands, true, true);
7690 (define_expand "vec_unpacks_hi_v4si"
7691 [(match_operand:V2DI 0 "register_operand" "")
7692 (match_operand:V4SI 1 "register_operand" "")]
7696 ix86_expand_sse4_unpack (operands, false, true);
7697 else if (TARGET_SSE5)
7698 ix86_expand_sse5_unpack (operands, false, true);
7700 ix86_expand_sse_unpack (operands, false, true);
7704 (define_expand "vec_unpacku_lo_v4si"
7705 [(match_operand:V2DI 0 "register_operand" "")
7706 (match_operand:V4SI 1 "register_operand" "")]
7710 ix86_expand_sse4_unpack (operands, true, false);
7711 else if (TARGET_SSE5)
7712 ix86_expand_sse5_unpack (operands, true, false);
7714 ix86_expand_sse_unpack (operands, true, false);
7718 (define_expand "vec_unpacks_lo_v4si"
7719 [(match_operand:V2DI 0 "register_operand" "")
7720 (match_operand:V4SI 1 "register_operand" "")]
7724 ix86_expand_sse4_unpack (operands, false, false);
7725 else if (TARGET_SSE5)
7726 ix86_expand_sse5_unpack (operands, false, false);
7728 ix86_expand_sse_unpack (operands, false, false);
7732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7736 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7738 (define_expand "sse2_uavgv16qi3"
7739 [(set (match_operand:V16QI 0 "register_operand" "")
7745 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7747 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7748 (const_vector:V16QI [(const_int 1) (const_int 1)
7749 (const_int 1) (const_int 1)
7750 (const_int 1) (const_int 1)
7751 (const_int 1) (const_int 1)
7752 (const_int 1) (const_int 1)
7753 (const_int 1) (const_int 1)
7754 (const_int 1) (const_int 1)
7755 (const_int 1) (const_int 1)]))
7758 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7760 (define_insn "*avx_uavgv16qi3"
7761 [(set (match_operand:V16QI 0 "register_operand" "=x")
7767 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7769 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7770 (const_vector:V16QI [(const_int 1) (const_int 1)
7771 (const_int 1) (const_int 1)
7772 (const_int 1) (const_int 1)
7773 (const_int 1) (const_int 1)
7774 (const_int 1) (const_int 1)
7775 (const_int 1) (const_int 1)
7776 (const_int 1) (const_int 1)
7777 (const_int 1) (const_int 1)]))
7779 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7780 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7781 [(set_attr "type" "sseiadd")
7782 (set_attr "prefix" "vex")
7783 (set_attr "mode" "TI")])
7785 (define_insn "*sse2_uavgv16qi3"
7786 [(set (match_operand:V16QI 0 "register_operand" "=x")
7792 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7794 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7795 (const_vector:V16QI [(const_int 1) (const_int 1)
7796 (const_int 1) (const_int 1)
7797 (const_int 1) (const_int 1)
7798 (const_int 1) (const_int 1)
7799 (const_int 1) (const_int 1)
7800 (const_int 1) (const_int 1)
7801 (const_int 1) (const_int 1)
7802 (const_int 1) (const_int 1)]))
7804 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7805 "pavgb\t{%2, %0|%0, %2}"
7806 [(set_attr "type" "sseiadd")
7807 (set_attr "prefix_data16" "1")
7808 (set_attr "mode" "TI")])
7810 (define_expand "sse2_uavgv8hi3"
7811 [(set (match_operand:V8HI 0 "register_operand" "")
7817 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7819 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7820 (const_vector:V8HI [(const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)
7822 (const_int 1) (const_int 1)
7823 (const_int 1) (const_int 1)]))
7826 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7828 (define_insn "*avx_uavgv8hi3"
7829 [(set (match_operand:V8HI 0 "register_operand" "=x")
7835 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7837 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7838 (const_vector:V8HI [(const_int 1) (const_int 1)
7839 (const_int 1) (const_int 1)
7840 (const_int 1) (const_int 1)
7841 (const_int 1) (const_int 1)]))
7843 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7844 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7845 [(set_attr "type" "sseiadd")
7846 (set_attr "prefix" "vex")
7847 (set_attr "mode" "TI")])
7849 (define_insn "*sse2_uavgv8hi3"
7850 [(set (match_operand:V8HI 0 "register_operand" "=x")
7856 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7858 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7859 (const_vector:V8HI [(const_int 1) (const_int 1)
7860 (const_int 1) (const_int 1)
7861 (const_int 1) (const_int 1)
7862 (const_int 1) (const_int 1)]))
7864 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7865 "pavgw\t{%2, %0|%0, %2}"
7866 [(set_attr "type" "sseiadd")
7867 (set_attr "prefix_data16" "1")
7868 (set_attr "mode" "TI")])
7870 ;; The correct representation for this is absolutely enormous, and
7871 ;; surely not generally useful.
7872 (define_insn "*avx_psadbw"
7873 [(set (match_operand:V2DI 0 "register_operand" "=x")
7874 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7875 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7878 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7879 [(set_attr "type" "sseiadd")
7880 (set_attr "prefix" "vex")
7881 (set_attr "mode" "TI")])
7883 (define_insn "sse2_psadbw"
7884 [(set (match_operand:V2DI 0 "register_operand" "=x")
7885 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7886 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7889 "psadbw\t{%2, %0|%0, %2}"
7890 [(set_attr "type" "sseiadd")
7891 (set_attr "atom_unit" "simul")
7892 (set_attr "prefix_data16" "1")
7893 (set_attr "mode" "TI")])
7895 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7896 [(set (match_operand:SI 0 "register_operand" "=r")
7898 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7900 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7901 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7902 [(set_attr "type" "ssecvt")
7903 (set_attr "prefix" "vex")
7904 (set_attr "mode" "<MODE>")])
7906 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7907 [(set (match_operand:SI 0 "register_operand" "=r")
7909 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7911 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7912 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7913 [(set_attr "type" "ssemov")
7914 (set_attr "prefix" "maybe_vex")
7915 (set_attr "mode" "<MODE>")])
7917 (define_insn "sse2_pmovmskb"
7918 [(set (match_operand:SI 0 "register_operand" "=r")
7919 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7922 "%vpmovmskb\t{%1, %0|%0, %1}"
7923 [(set_attr "type" "ssemov")
7924 (set_attr "prefix_data16" "1")
7925 (set_attr "prefix" "maybe_vex")
7926 (set_attr "mode" "SI")])
7928 (define_expand "sse2_maskmovdqu"
7929 [(set (match_operand:V16QI 0 "memory_operand" "")
7930 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7931 (match_operand:V16QI 2 "register_operand" "")
7937 (define_insn "*sse2_maskmovdqu"
7938 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7939 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7940 (match_operand:V16QI 2 "register_operand" "x")
7941 (mem:V16QI (match_dup 0))]
7943 "TARGET_SSE2 && !TARGET_64BIT"
7944 ;; @@@ check ordering of operands in intel/nonintel syntax
7945 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7946 [(set_attr "type" "ssemov")
7947 (set_attr "prefix_data16" "1")
7948 ;; The implicit %rdi operand confuses default length_vex computation.
7949 (set_attr "length_vex" "3")
7950 (set_attr "prefix" "maybe_vex")
7951 (set_attr "mode" "TI")])
7953 (define_insn "*sse2_maskmovdqu_rex64"
7954 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7955 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7956 (match_operand:V16QI 2 "register_operand" "x")
7957 (mem:V16QI (match_dup 0))]
7959 "TARGET_SSE2 && TARGET_64BIT"
7960 ;; @@@ check ordering of operands in intel/nonintel syntax
7961 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7962 [(set_attr "type" "ssemov")
7963 (set_attr "prefix_data16" "1")
7964 ;; The implicit %rdi operand confuses default length_vex computation.
7965 (set (attr "length_vex")
7966 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
7967 (set_attr "prefix" "maybe_vex")
7968 (set_attr "mode" "TI")])
7970 (define_insn "sse_ldmxcsr"
7971 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7975 [(set_attr "type" "sse")
7976 (set_attr "atom_sse_attr" "mxcsr")
7977 (set_attr "prefix" "maybe_vex")
7978 (set_attr "memory" "load")])
7980 (define_insn "sse_stmxcsr"
7981 [(set (match_operand:SI 0 "memory_operand" "=m")
7982 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7985 [(set_attr "type" "sse")
7986 (set_attr "atom_sse_attr" "mxcsr")
7987 (set_attr "prefix" "maybe_vex")
7988 (set_attr "memory" "store")])
7990 (define_expand "sse_sfence"
7992 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7993 "TARGET_SSE || TARGET_3DNOW_A"
7995 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7996 MEM_VOLATILE_P (operands[0]) = 1;
7999 (define_insn "*sse_sfence"
8000 [(set (match_operand:BLK 0 "" "")
8001 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8002 "TARGET_SSE || TARGET_3DNOW_A"
8004 [(set_attr "type" "sse")
8005 (set_attr "length_address" "0")
8006 (set_attr "atom_sse_attr" "fence")
8007 (set_attr "memory" "unknown")])
8009 (define_insn "sse2_clflush"
8010 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8014 [(set_attr "type" "sse")
8015 (set_attr "atom_sse_attr" "fence")
8016 (set_attr "memory" "unknown")])
8018 (define_expand "sse2_mfence"
8020 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8023 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8024 MEM_VOLATILE_P (operands[0]) = 1;
8027 (define_insn "*sse2_mfence"
8028 [(set (match_operand:BLK 0 "" "")
8029 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8030 "TARGET_64BIT || TARGET_SSE2"
8032 [(set_attr "type" "sse")
8033 (set_attr "length_address" "0")
8034 (set_attr "atom_sse_attr" "fence")
8035 (set_attr "memory" "unknown")])
8037 (define_expand "sse2_lfence"
8039 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8042 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8043 MEM_VOLATILE_P (operands[0]) = 1;
8046 (define_insn "*sse2_lfence"
8047 [(set (match_operand:BLK 0 "" "")
8048 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8051 [(set_attr "type" "sse")
8052 (set_attr "length_address" "0")
8053 (set_attr "atom_sse_attr" "lfence")
8054 (set_attr "memory" "unknown")])
8056 (define_insn "sse3_mwait"
8057 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8058 (match_operand:SI 1 "register_operand" "c")]
8061 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8062 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8063 ;; we only need to set up 32bit registers.
8065 [(set_attr "length" "3")])
8067 (define_insn "sse3_monitor"
8068 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8069 (match_operand:SI 1 "register_operand" "c")
8070 (match_operand:SI 2 "register_operand" "d")]
8072 "TARGET_SSE3 && !TARGET_64BIT"
8073 "monitor\t%0, %1, %2"
8074 [(set_attr "length" "3")])
8076 (define_insn "sse3_monitor64"
8077 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8078 (match_operand:SI 1 "register_operand" "c")
8079 (match_operand:SI 2 "register_operand" "d")]
8081 "TARGET_SSE3 && TARGET_64BIT"
8082 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8083 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8084 ;; zero extended to 64bit, we only need to set up 32bit registers.
8086 [(set_attr "length" "3")])
8088 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8090 ;; SSSE3 instructions
8092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8094 (define_insn "*avx_phaddwv8hi3"
8095 [(set (match_operand:V8HI 0 "register_operand" "=x")
8101 (match_operand:V8HI 1 "register_operand" "x")
8102 (parallel [(const_int 0)]))
8103 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8105 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8106 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8109 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8110 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8112 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8113 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8118 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8119 (parallel [(const_int 0)]))
8120 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8122 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8123 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8126 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8127 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8129 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8130 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8132 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8133 [(set_attr "type" "sseiadd")
8134 (set_attr "prefix_extra" "1")
8135 (set_attr "prefix" "vex")
8136 (set_attr "mode" "TI")])
8138 (define_insn "ssse3_phaddwv8hi3"
8139 [(set (match_operand:V8HI 0 "register_operand" "=x")
8145 (match_operand:V8HI 1 "register_operand" "0")
8146 (parallel [(const_int 0)]))
8147 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8149 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8150 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8153 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8154 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8156 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8157 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8162 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8163 (parallel [(const_int 0)]))
8164 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8166 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8167 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8170 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8171 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8173 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8174 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8176 "phaddw\t{%2, %0|%0, %2}"
8177 [(set_attr "type" "sseiadd")
8178 (set_attr "atom_unit" "complex")
8179 (set_attr "prefix_data16" "1")
8180 (set_attr "prefix_extra" "1")
8181 (set_attr "mode" "TI")])
8183 (define_insn "ssse3_phaddwv4hi3"
8184 [(set (match_operand:V4HI 0 "register_operand" "=y")
8189 (match_operand:V4HI 1 "register_operand" "0")
8190 (parallel [(const_int 0)]))
8191 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8193 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8194 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8198 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8199 (parallel [(const_int 0)]))
8200 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8202 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8203 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8205 "phaddw\t{%2, %0|%0, %2}"
8206 [(set_attr "type" "sseiadd")
8207 (set_attr "atom_unit" "complex")
8208 (set_attr "prefix_extra" "1")
8209 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8210 (set_attr "mode" "DI")])
8212 (define_insn "*avx_phadddv4si3"
8213 [(set (match_operand:V4SI 0 "register_operand" "=x")
8218 (match_operand:V4SI 1 "register_operand" "x")
8219 (parallel [(const_int 0)]))
8220 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8222 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8223 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8227 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8228 (parallel [(const_int 0)]))
8229 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8231 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8232 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8234 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8235 [(set_attr "type" "sseiadd")
8236 (set_attr "prefix_extra" "1")
8237 (set_attr "prefix" "vex")
8238 (set_attr "mode" "TI")])
8240 (define_insn "ssse3_phadddv4si3"
8241 [(set (match_operand:V4SI 0 "register_operand" "=x")
8246 (match_operand:V4SI 1 "register_operand" "0")
8247 (parallel [(const_int 0)]))
8248 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8250 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8251 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8255 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8256 (parallel [(const_int 0)]))
8257 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8259 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8260 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8262 "phaddd\t{%2, %0|%0, %2}"
8263 [(set_attr "type" "sseiadd")
8264 (set_attr "atom_unit" "complex")
8265 (set_attr "prefix_data16" "1")
8266 (set_attr "prefix_extra" "1")
8267 (set_attr "mode" "TI")])
8269 (define_insn "ssse3_phadddv2si3"
8270 [(set (match_operand:V2SI 0 "register_operand" "=y")
8274 (match_operand:V2SI 1 "register_operand" "0")
8275 (parallel [(const_int 0)]))
8276 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8279 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8280 (parallel [(const_int 0)]))
8281 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8283 "phaddd\t{%2, %0|%0, %2}"
8284 [(set_attr "type" "sseiadd")
8285 (set_attr "atom_unit" "complex")
8286 (set_attr "prefix_extra" "1")
8287 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8288 (set_attr "mode" "DI")])
8290 (define_insn "*avx_phaddswv8hi3"
8291 [(set (match_operand:V8HI 0 "register_operand" "=x")
8297 (match_operand:V8HI 1 "register_operand" "x")
8298 (parallel [(const_int 0)]))
8299 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8302 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8305 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8306 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8308 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8309 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8314 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8315 (parallel [(const_int 0)]))
8316 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8318 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8319 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8322 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8323 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8325 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8326 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8328 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8329 [(set_attr "type" "sseiadd")
8330 (set_attr "prefix_extra" "1")
8331 (set_attr "prefix" "vex")
8332 (set_attr "mode" "TI")])
8334 (define_insn "ssse3_phaddswv8hi3"
8335 [(set (match_operand:V8HI 0 "register_operand" "=x")
8341 (match_operand:V8HI 1 "register_operand" "0")
8342 (parallel [(const_int 0)]))
8343 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8345 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8346 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8349 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8350 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8352 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8353 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8358 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8359 (parallel [(const_int 0)]))
8360 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8362 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8363 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8366 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8367 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8369 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8370 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8372 "phaddsw\t{%2, %0|%0, %2}"
8373 [(set_attr "type" "sseiadd")
8374 (set_attr "atom_unit" "complex")
8375 (set_attr "prefix_data16" "1")
8376 (set_attr "prefix_extra" "1")
8377 (set_attr "mode" "TI")])
8379 (define_insn "ssse3_phaddswv4hi3"
8380 [(set (match_operand:V4HI 0 "register_operand" "=y")
8385 (match_operand:V4HI 1 "register_operand" "0")
8386 (parallel [(const_int 0)]))
8387 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8389 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8390 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8394 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8395 (parallel [(const_int 0)]))
8396 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8398 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8399 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8401 "phaddsw\t{%2, %0|%0, %2}"
8402 [(set_attr "type" "sseiadd")
8403 (set_attr "atom_unit" "complex")
8404 (set_attr "prefix_extra" "1")
8405 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8406 (set_attr "mode" "DI")])
8408 (define_insn "*avx_phsubwv8hi3"
8409 [(set (match_operand:V8HI 0 "register_operand" "=x")
8415 (match_operand:V8HI 1 "register_operand" "x")
8416 (parallel [(const_int 0)]))
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8419 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8420 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8423 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8424 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8426 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8427 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8432 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8433 (parallel [(const_int 0)]))
8434 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8436 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8437 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8440 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8441 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8444 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8446 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8447 [(set_attr "type" "sseiadd")
8448 (set_attr "prefix_extra" "1")
8449 (set_attr "prefix" "vex")
8450 (set_attr "mode" "TI")])
8452 (define_insn "ssse3_phsubwv8hi3"
8453 [(set (match_operand:V8HI 0 "register_operand" "=x")
8459 (match_operand:V8HI 1 "register_operand" "0")
8460 (parallel [(const_int 0)]))
8461 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8463 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8464 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8467 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8468 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8471 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8476 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8477 (parallel [(const_int 0)]))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8490 "phsubw\t{%2, %0|%0, %2}"
8491 [(set_attr "type" "sseiadd")
8492 (set_attr "atom_unit" "complex")
8493 (set_attr "prefix_data16" "1")
8494 (set_attr "prefix_extra" "1")
8495 (set_attr "mode" "TI")])
8497 (define_insn "ssse3_phsubwv4hi3"
8498 [(set (match_operand:V4HI 0 "register_operand" "=y")
8503 (match_operand:V4HI 1 "register_operand" "0")
8504 (parallel [(const_int 0)]))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8507 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8512 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8513 (parallel [(const_int 0)]))
8514 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8516 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8517 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8519 "phsubw\t{%2, %0|%0, %2}"
8520 [(set_attr "type" "sseiadd")
8521 (set_attr "atom_unit" "complex")
8522 (set_attr "prefix_extra" "1")
8523 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8524 (set_attr "mode" "DI")])
8526 (define_insn "*avx_phsubdv4si3"
8527 [(set (match_operand:V4SI 0 "register_operand" "=x")
8532 (match_operand:V4SI 1 "register_operand" "x")
8533 (parallel [(const_int 0)]))
8534 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8536 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8537 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8541 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8542 (parallel [(const_int 0)]))
8543 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8545 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8546 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8548 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8549 [(set_attr "type" "sseiadd")
8550 (set_attr "prefix_extra" "1")
8551 (set_attr "prefix" "vex")
8552 (set_attr "mode" "TI")])
8554 (define_insn "ssse3_phsubdv4si3"
8555 [(set (match_operand:V4SI 0 "register_operand" "=x")
8560 (match_operand:V4SI 1 "register_operand" "0")
8561 (parallel [(const_int 0)]))
8562 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8564 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8565 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8569 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8570 (parallel [(const_int 0)]))
8571 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8573 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8574 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8576 "phsubd\t{%2, %0|%0, %2}"
8577 [(set_attr "type" "sseiadd")
8578 (set_attr "atom_unit" "complex")
8579 (set_attr "prefix_data16" "1")
8580 (set_attr "prefix_extra" "1")
8581 (set_attr "mode" "TI")])
8583 (define_insn "ssse3_phsubdv2si3"
8584 [(set (match_operand:V2SI 0 "register_operand" "=y")
8588 (match_operand:V2SI 1 "register_operand" "0")
8589 (parallel [(const_int 0)]))
8590 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8593 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8594 (parallel [(const_int 0)]))
8595 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8597 "phsubd\t{%2, %0|%0, %2}"
8598 [(set_attr "type" "sseiadd")
8599 (set_attr "atom_unit" "complex")
8600 (set_attr "prefix_extra" "1")
8601 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8602 (set_attr "mode" "DI")])
8604 (define_insn "*avx_phsubswv8hi3"
8605 [(set (match_operand:V8HI 0 "register_operand" "=x")
8611 (match_operand:V8HI 1 "register_operand" "x")
8612 (parallel [(const_int 0)]))
8613 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8615 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8616 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8619 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8620 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8622 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8623 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8628 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8629 (parallel [(const_int 0)]))
8630 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8633 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8636 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8637 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8640 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8642 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8643 [(set_attr "type" "sseiadd")
8644 (set_attr "prefix_extra" "1")
8645 (set_attr "prefix" "vex")
8646 (set_attr "mode" "TI")])
8648 (define_insn "ssse3_phsubswv8hi3"
8649 [(set (match_operand:V8HI 0 "register_operand" "=x")
8655 (match_operand:V8HI 1 "register_operand" "0")
8656 (parallel [(const_int 0)]))
8657 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8660 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8663 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8664 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8667 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8672 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8673 (parallel [(const_int 0)]))
8674 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8676 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8677 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8680 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8683 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8684 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8686 "phsubsw\t{%2, %0|%0, %2}"
8687 [(set_attr "type" "sseiadd")
8688 (set_attr "atom_unit" "complex")
8689 (set_attr "prefix_data16" "1")
8690 (set_attr "prefix_extra" "1")
8691 (set_attr "mode" "TI")])
8693 (define_insn "ssse3_phsubswv4hi3"
8694 [(set (match_operand:V4HI 0 "register_operand" "=y")
8699 (match_operand:V4HI 1 "register_operand" "0")
8700 (parallel [(const_int 0)]))
8701 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8703 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8704 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8708 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8709 (parallel [(const_int 0)]))
8710 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8712 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8713 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8715 "phsubsw\t{%2, %0|%0, %2}"
8716 [(set_attr "type" "sseiadd")
8717 (set_attr "atom_unit" "complex")
8718 (set_attr "prefix_extra" "1")
8719 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8720 (set_attr "mode" "DI")])
8722 (define_insn "*avx_pmaddubsw128"
8723 [(set (match_operand:V8HI 0 "register_operand" "=x")
8728 (match_operand:V16QI 1 "register_operand" "x")
8729 (parallel [(const_int 0)
8739 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8740 (parallel [(const_int 0)
8750 (vec_select:V16QI (match_dup 1)
8751 (parallel [(const_int 1)
8760 (vec_select:V16QI (match_dup 2)
8761 (parallel [(const_int 1)
8768 (const_int 15)]))))))]
8770 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8771 [(set_attr "type" "sseiadd")
8772 (set_attr "prefix_extra" "1")
8773 (set_attr "prefix" "vex")
8774 (set_attr "mode" "TI")])
8776 (define_insn "ssse3_pmaddubsw128"
8777 [(set (match_operand:V8HI 0 "register_operand" "=x")
8782 (match_operand:V16QI 1 "register_operand" "0")
8783 (parallel [(const_int 0)
8793 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8794 (parallel [(const_int 0)
8804 (vec_select:V16QI (match_dup 1)
8805 (parallel [(const_int 1)
8814 (vec_select:V16QI (match_dup 2)
8815 (parallel [(const_int 1)
8822 (const_int 15)]))))))]
8824 "pmaddubsw\t{%2, %0|%0, %2}"
8825 [(set_attr "type" "sseiadd")
8826 (set_attr "atom_unit" "simul")
8827 (set_attr "prefix_data16" "1")
8828 (set_attr "prefix_extra" "1")
8829 (set_attr "mode" "TI")])
8831 (define_insn "ssse3_pmaddubsw"
8832 [(set (match_operand:V4HI 0 "register_operand" "=y")
8837 (match_operand:V8QI 1 "register_operand" "0")
8838 (parallel [(const_int 0)
8844 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8845 (parallel [(const_int 0)
8851 (vec_select:V8QI (match_dup 1)
8852 (parallel [(const_int 1)
8857 (vec_select:V8QI (match_dup 2)
8858 (parallel [(const_int 1)
8861 (const_int 7)]))))))]
8863 "pmaddubsw\t{%2, %0|%0, %2}"
8864 [(set_attr "type" "sseiadd")
8865 (set_attr "atom_unit" "simul")
8866 (set_attr "prefix_extra" "1")
8867 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8868 (set_attr "mode" "DI")])
8870 (define_expand "ssse3_pmulhrswv8hi3"
8871 [(set (match_operand:V8HI 0 "register_operand" "")
8878 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8880 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8882 (const_vector:V8HI [(const_int 1) (const_int 1)
8883 (const_int 1) (const_int 1)
8884 (const_int 1) (const_int 1)
8885 (const_int 1) (const_int 1)]))
8888 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8890 (define_insn "*avx_pmulhrswv8hi3"
8891 [(set (match_operand:V8HI 0 "register_operand" "=x")
8898 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8900 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8902 (const_vector:V8HI [(const_int 1) (const_int 1)
8903 (const_int 1) (const_int 1)
8904 (const_int 1) (const_int 1)
8905 (const_int 1) (const_int 1)]))
8907 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8908 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8909 [(set_attr "type" "sseimul")
8910 (set_attr "prefix_extra" "1")
8911 (set_attr "prefix" "vex")
8912 (set_attr "mode" "TI")])
8914 (define_insn "*ssse3_pmulhrswv8hi3"
8915 [(set (match_operand:V8HI 0 "register_operand" "=x")
8922 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8924 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8926 (const_vector:V8HI [(const_int 1) (const_int 1)
8927 (const_int 1) (const_int 1)
8928 (const_int 1) (const_int 1)
8929 (const_int 1) (const_int 1)]))
8931 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8932 "pmulhrsw\t{%2, %0|%0, %2}"
8933 [(set_attr "type" "sseimul")
8934 (set_attr "prefix_data16" "1")
8935 (set_attr "prefix_extra" "1")
8936 (set_attr "mode" "TI")])
8938 (define_expand "ssse3_pmulhrswv4hi3"
8939 [(set (match_operand:V4HI 0 "register_operand" "")
8946 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8948 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8950 (const_vector:V4HI [(const_int 1) (const_int 1)
8951 (const_int 1) (const_int 1)]))
8954 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8956 (define_insn "*ssse3_pmulhrswv4hi3"
8957 [(set (match_operand:V4HI 0 "register_operand" "=y")
8964 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8966 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8968 (const_vector:V4HI [(const_int 1) (const_int 1)
8969 (const_int 1) (const_int 1)]))
8971 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8972 "pmulhrsw\t{%2, %0|%0, %2}"
8973 [(set_attr "type" "sseimul")
8974 (set_attr "prefix_extra" "1")
8975 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8976 (set_attr "mode" "DI")])
8978 (define_insn "*avx_pshufbv16qi3"
8979 [(set (match_operand:V16QI 0 "register_operand" "=x")
8980 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8981 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8984 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8985 [(set_attr "type" "sselog1")
8986 (set_attr "prefix_extra" "1")
8987 (set_attr "prefix" "vex")
8988 (set_attr "mode" "TI")])
8990 (define_insn "ssse3_pshufbv16qi3"
8991 [(set (match_operand:V16QI 0 "register_operand" "=x")
8992 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8993 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8996 "pshufb\t{%2, %0|%0, %2}";
8997 [(set_attr "type" "sselog1")
8998 (set_attr "prefix_data16" "1")
8999 (set_attr "prefix_extra" "1")
9000 (set_attr "mode" "TI")])
9002 (define_insn "ssse3_pshufbv8qi3"
9003 [(set (match_operand:V8QI 0 "register_operand" "=y")
9004 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9005 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9008 "pshufb\t{%2, %0|%0, %2}";
9009 [(set_attr "type" "sselog1")
9010 (set_attr "prefix_extra" "1")
9011 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9012 (set_attr "mode" "DI")])
9014 (define_insn "*avx_psign<mode>3"
9015 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9017 [(match_operand:SSEMODE124 1 "register_operand" "x")
9018 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9021 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9022 [(set_attr "type" "sselog1")
9023 (set_attr "prefix_extra" "1")
9024 (set_attr "prefix" "vex")
9025 (set_attr "mode" "TI")])
9027 (define_insn "ssse3_psign<mode>3"
9028 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9030 [(match_operand:SSEMODE124 1 "register_operand" "0")
9031 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9034 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9035 [(set_attr "type" "sselog1")
9036 (set_attr "prefix_data16" "1")
9037 (set_attr "prefix_extra" "1")
9038 (set_attr "mode" "TI")])
9040 (define_insn "ssse3_psign<mode>3"
9041 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9043 [(match_operand:MMXMODEI 1 "register_operand" "0")
9044 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9047 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9048 [(set_attr "type" "sselog1")
9049 (set_attr "prefix_extra" "1")
9050 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9051 (set_attr "mode" "DI")])
9053 (define_insn "*avx_palignrti"
9054 [(set (match_operand:TI 0 "register_operand" "=x")
9055 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9056 (match_operand:TI 2 "nonimmediate_operand" "xm")
9057 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9061 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9062 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9064 [(set_attr "type" "sseishft")
9065 (set_attr "prefix_extra" "1")
9066 (set_attr "length_immediate" "1")
9067 (set_attr "prefix" "vex")
9068 (set_attr "mode" "TI")])
9070 (define_insn "ssse3_palignrti"
9071 [(set (match_operand:TI 0 "register_operand" "=x")
9072 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9073 (match_operand:TI 2 "nonimmediate_operand" "xm")
9074 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9078 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9079 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9081 [(set_attr "type" "sseishft")
9082 (set_attr "atom_unit" "sishuf")
9083 (set_attr "prefix_data16" "1")
9084 (set_attr "prefix_extra" "1")
9085 (set_attr "length_immediate" "1")
9086 (set_attr "mode" "TI")])
9088 (define_insn "ssse3_palignrdi"
9089 [(set (match_operand:DI 0 "register_operand" "=y")
9090 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9091 (match_operand:DI 2 "nonimmediate_operand" "ym")
9092 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9096 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9097 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9099 [(set_attr "type" "sseishft")
9100 (set_attr "atom_unit" "sishuf")
9101 (set_attr "prefix_extra" "1")
9102 (set_attr "length_immediate" "1")
9103 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9104 (set_attr "mode" "DI")])
9106 (define_insn "abs<mode>2"
9107 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9108 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9110 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9111 [(set_attr "type" "sselog1")
9112 (set_attr "prefix_data16" "1")
9113 (set_attr "prefix_extra" "1")
9114 (set_attr "prefix" "maybe_vex")
9115 (set_attr "mode" "TI")])
9117 (define_insn "abs<mode>2"
9118 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9119 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9121 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9122 [(set_attr "type" "sselog1")
9123 (set_attr "prefix_rep" "0")
9124 (set_attr "prefix_extra" "1")
9125 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9126 (set_attr "mode" "DI")])
9128 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9130 ;; AMD SSE4A instructions
9132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9134 (define_insn "sse4a_movnt<mode>"
9135 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9137 [(match_operand:MODEF 1 "register_operand" "x")]
9140 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9141 [(set_attr "type" "ssemov")
9142 (set_attr "mode" "<MODE>")])
9144 (define_insn "sse4a_vmmovnt<mode>"
9145 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9146 (unspec:<ssescalarmode>
9147 [(vec_select:<ssescalarmode>
9148 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9149 (parallel [(const_int 0)]))]
9152 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9153 [(set_attr "type" "ssemov")
9154 (set_attr "mode" "<ssescalarmode>")])
9156 (define_insn "sse4a_extrqi"
9157 [(set (match_operand:V2DI 0 "register_operand" "=x")
9158 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9159 (match_operand 2 "const_int_operand" "")
9160 (match_operand 3 "const_int_operand" "")]
9163 "extrq\t{%3, %2, %0|%0, %2, %3}"
9164 [(set_attr "type" "sse")
9165 (set_attr "prefix_data16" "1")
9166 (set_attr "length_immediate" "2")
9167 (set_attr "mode" "TI")])
9169 (define_insn "sse4a_extrq"
9170 [(set (match_operand:V2DI 0 "register_operand" "=x")
9171 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9172 (match_operand:V16QI 2 "register_operand" "x")]
9175 "extrq\t{%2, %0|%0, %2}"
9176 [(set_attr "type" "sse")
9177 (set_attr "prefix_data16" "1")
9178 (set_attr "mode" "TI")])
9180 (define_insn "sse4a_insertqi"
9181 [(set (match_operand:V2DI 0 "register_operand" "=x")
9182 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9183 (match_operand:V2DI 2 "register_operand" "x")
9184 (match_operand 3 "const_int_operand" "")
9185 (match_operand 4 "const_int_operand" "")]
9188 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9189 [(set_attr "type" "sseins")
9190 (set_attr "prefix_data16" "0")
9191 (set_attr "prefix_rep" "1")
9192 (set_attr "length_immediate" "2")
9193 (set_attr "mode" "TI")])
9195 (define_insn "sse4a_insertq"
9196 [(set (match_operand:V2DI 0 "register_operand" "=x")
9197 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9198 (match_operand:V2DI 2 "register_operand" "x")]
9201 "insertq\t{%2, %0|%0, %2}"
9202 [(set_attr "type" "sseins")
9203 (set_attr "prefix_data16" "0")
9204 (set_attr "prefix_rep" "1")
9205 (set_attr "mode" "TI")])
9207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9209 ;; Intel SSE4.1 instructions
9211 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9213 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9214 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9215 (vec_merge:AVXMODEF2P
9216 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9217 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9218 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9220 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9221 [(set_attr "type" "ssemov")
9222 (set_attr "prefix_extra" "1")
9223 (set_attr "length_immediate" "1")
9224 (set_attr "prefix" "vex")
9225 (set_attr "mode" "<avxvecmode>")])
9227 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9228 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9230 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9231 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9232 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9235 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9236 [(set_attr "type" "ssemov")
9237 (set_attr "prefix_extra" "1")
9238 (set_attr "length_immediate" "1")
9239 (set_attr "prefix" "vex")
9240 (set_attr "mode" "<avxvecmode>")])
9242 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9243 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9244 (vec_merge:SSEMODEF2P
9245 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9246 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9247 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9249 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9250 [(set_attr "type" "ssemov")
9251 (set_attr "prefix_data16" "1")
9252 (set_attr "prefix_extra" "1")
9253 (set_attr "length_immediate" "1")
9254 (set_attr "mode" "<MODE>")])
9256 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9257 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9259 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9260 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9261 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9264 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9265 [(set_attr "type" "ssemov")
9266 (set_attr "prefix_data16" "1")
9267 (set_attr "prefix_extra" "1")
9268 (set_attr "mode" "<MODE>")])
9270 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9271 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9273 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9274 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9275 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9278 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9279 [(set_attr "type" "ssemul")
9280 (set_attr "prefix" "vex")
9281 (set_attr "prefix_extra" "1")
9282 (set_attr "length_immediate" "1")
9283 (set_attr "mode" "<avxvecmode>")])
9285 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9286 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9288 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9289 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9290 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9293 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9294 [(set_attr "type" "ssemul")
9295 (set_attr "prefix_data16" "1")
9296 (set_attr "prefix_extra" "1")
9297 (set_attr "length_immediate" "1")
9298 (set_attr "mode" "<MODE>")])
9300 (define_insn "sse4_1_movntdqa"
9301 [(set (match_operand:V2DI 0 "register_operand" "=x")
9302 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9305 "%vmovntdqa\t{%1, %0|%0, %1}"
9306 [(set_attr "type" "ssemov")
9307 (set_attr "prefix_extra" "1")
9308 (set_attr "prefix" "maybe_vex")
9309 (set_attr "mode" "TI")])
9311 (define_insn "*avx_mpsadbw"
9312 [(set (match_operand:V16QI 0 "register_operand" "=x")
9313 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9314 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9315 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9318 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9319 [(set_attr "type" "sselog1")
9320 (set_attr "prefix" "vex")
9321 (set_attr "prefix_extra" "1")
9322 (set_attr "length_immediate" "1")
9323 (set_attr "mode" "TI")])
9325 (define_insn "sse4_1_mpsadbw"
9326 [(set (match_operand:V16QI 0 "register_operand" "=x")
9327 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9328 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9329 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9332 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9333 [(set_attr "type" "sselog1")
9334 (set_attr "prefix_extra" "1")
9335 (set_attr "length_immediate" "1")
9336 (set_attr "mode" "TI")])
9338 (define_insn "*avx_packusdw"
9339 [(set (match_operand:V8HI 0 "register_operand" "=x")
9342 (match_operand:V4SI 1 "register_operand" "x"))
9344 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9346 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9347 [(set_attr "type" "sselog")
9348 (set_attr "prefix_extra" "1")
9349 (set_attr "prefix" "vex")
9350 (set_attr "mode" "TI")])
9352 (define_insn "sse4_1_packusdw"
9353 [(set (match_operand:V8HI 0 "register_operand" "=x")
9356 (match_operand:V4SI 1 "register_operand" "0"))
9358 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9360 "packusdw\t{%2, %0|%0, %2}"
9361 [(set_attr "type" "sselog")
9362 (set_attr "prefix_extra" "1")
9363 (set_attr "mode" "TI")])
9365 (define_insn "*avx_pblendvb"
9366 [(set (match_operand:V16QI 0 "register_operand" "=x")
9367 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9368 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9369 (match_operand:V16QI 3 "register_operand" "x")]
9372 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9373 [(set_attr "type" "ssemov")
9374 (set_attr "prefix_extra" "1")
9375 (set_attr "length_immediate" "1")
9376 (set_attr "prefix" "vex")
9377 (set_attr "mode" "TI")])
9379 (define_insn "sse4_1_pblendvb"
9380 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9381 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9382 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9383 (match_operand:V16QI 3 "register_operand" "Yz")]
9386 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9387 [(set_attr "type" "ssemov")
9388 (set_attr "prefix_extra" "1")
9389 (set_attr "mode" "TI")])
9391 (define_insn "*avx_pblendw"
9392 [(set (match_operand:V8HI 0 "register_operand" "=x")
9394 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9395 (match_operand:V8HI 1 "register_operand" "x")
9396 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9398 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9399 [(set_attr "type" "ssemov")
9400 (set_attr "prefix" "vex")
9401 (set_attr "prefix_extra" "1")
9402 (set_attr "length_immediate" "1")
9403 (set_attr "mode" "TI")])
9405 (define_insn "sse4_1_pblendw"
9406 [(set (match_operand:V8HI 0 "register_operand" "=x")
9408 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9409 (match_operand:V8HI 1 "register_operand" "0")
9410 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9412 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9413 [(set_attr "type" "ssemov")
9414 (set_attr "prefix_extra" "1")
9415 (set_attr "length_immediate" "1")
9416 (set_attr "mode" "TI")])
9418 (define_insn "sse4_1_phminposuw"
9419 [(set (match_operand:V8HI 0 "register_operand" "=x")
9420 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9421 UNSPEC_PHMINPOSUW))]
9423 "%vphminposuw\t{%1, %0|%0, %1}"
9424 [(set_attr "type" "sselog1")
9425 (set_attr "prefix_extra" "1")
9426 (set_attr "prefix" "maybe_vex")
9427 (set_attr "mode" "TI")])
9429 (define_insn "sse4_1_extendv8qiv8hi2"
9430 [(set (match_operand:V8HI 0 "register_operand" "=x")
9433 (match_operand:V16QI 1 "register_operand" "x")
9434 (parallel [(const_int 0)
9443 "%vpmovsxbw\t{%1, %0|%0, %1}"
9444 [(set_attr "type" "ssemov")
9445 (set_attr "prefix_extra" "1")
9446 (set_attr "prefix" "maybe_vex")
9447 (set_attr "mode" "TI")])
9449 (define_insn "*sse4_1_extendv8qiv8hi2"
9450 [(set (match_operand:V8HI 0 "register_operand" "=x")
9453 (vec_duplicate:V16QI
9454 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9455 (parallel [(const_int 0)
9464 "%vpmovsxbw\t{%1, %0|%0, %1}"
9465 [(set_attr "type" "ssemov")
9466 (set_attr "prefix_extra" "1")
9467 (set_attr "prefix" "maybe_vex")
9468 (set_attr "mode" "TI")])
9470 (define_insn "sse4_1_extendv4qiv4si2"
9471 [(set (match_operand:V4SI 0 "register_operand" "=x")
9474 (match_operand:V16QI 1 "register_operand" "x")
9475 (parallel [(const_int 0)
9480 "%vpmovsxbd\t{%1, %0|%0, %1}"
9481 [(set_attr "type" "ssemov")
9482 (set_attr "prefix_extra" "1")
9483 (set_attr "prefix" "maybe_vex")
9484 (set_attr "mode" "TI")])
9486 (define_insn "*sse4_1_extendv4qiv4si2"
9487 [(set (match_operand:V4SI 0 "register_operand" "=x")
9490 (vec_duplicate:V16QI
9491 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9492 (parallel [(const_int 0)
9497 "%vpmovsxbd\t{%1, %0|%0, %1}"
9498 [(set_attr "type" "ssemov")
9499 (set_attr "prefix_extra" "1")
9500 (set_attr "prefix" "maybe_vex")
9501 (set_attr "mode" "TI")])
9503 (define_insn "sse4_1_extendv2qiv2di2"
9504 [(set (match_operand:V2DI 0 "register_operand" "=x")
9507 (match_operand:V16QI 1 "register_operand" "x")
9508 (parallel [(const_int 0)
9511 "%vpmovsxbq\t{%1, %0|%0, %1}"
9512 [(set_attr "type" "ssemov")
9513 (set_attr "prefix_extra" "1")
9514 (set_attr "prefix" "maybe_vex")
9515 (set_attr "mode" "TI")])
9517 (define_insn "*sse4_1_extendv2qiv2di2"
9518 [(set (match_operand:V2DI 0 "register_operand" "=x")
9521 (vec_duplicate:V16QI
9522 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9523 (parallel [(const_int 0)
9526 "%vpmovsxbq\t{%1, %0|%0, %1}"
9527 [(set_attr "type" "ssemov")
9528 (set_attr "prefix_extra" "1")
9529 (set_attr "prefix" "maybe_vex")
9530 (set_attr "mode" "TI")])
9532 (define_insn "sse4_1_extendv4hiv4si2"
9533 [(set (match_operand:V4SI 0 "register_operand" "=x")
9536 (match_operand:V8HI 1 "register_operand" "x")
9537 (parallel [(const_int 0)
9542 "%vpmovsxwd\t{%1, %0|%0, %1}"
9543 [(set_attr "type" "ssemov")
9544 (set_attr "prefix_extra" "1")
9545 (set_attr "prefix" "maybe_vex")
9546 (set_attr "mode" "TI")])
9548 (define_insn "*sse4_1_extendv4hiv4si2"
9549 [(set (match_operand:V4SI 0 "register_operand" "=x")
9553 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9554 (parallel [(const_int 0)
9559 "%vpmovsxwd\t{%1, %0|%0, %1}"
9560 [(set_attr "type" "ssemov")
9561 (set_attr "prefix_extra" "1")
9562 (set_attr "prefix" "maybe_vex")
9563 (set_attr "mode" "TI")])
9565 (define_insn "sse4_1_extendv2hiv2di2"
9566 [(set (match_operand:V2DI 0 "register_operand" "=x")
9569 (match_operand:V8HI 1 "register_operand" "x")
9570 (parallel [(const_int 0)
9573 "%vpmovsxwq\t{%1, %0|%0, %1}"
9574 [(set_attr "type" "ssemov")
9575 (set_attr "prefix_extra" "1")
9576 (set_attr "prefix" "maybe_vex")
9577 (set_attr "mode" "TI")])
9579 (define_insn "*sse4_1_extendv2hiv2di2"
9580 [(set (match_operand:V2DI 0 "register_operand" "=x")
9584 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9585 (parallel [(const_int 0)
9588 "%vpmovsxwq\t{%1, %0|%0, %1}"
9589 [(set_attr "type" "ssemov")
9590 (set_attr "prefix_extra" "1")
9591 (set_attr "prefix" "maybe_vex")
9592 (set_attr "mode" "TI")])
9594 (define_insn "sse4_1_extendv2siv2di2"
9595 [(set (match_operand:V2DI 0 "register_operand" "=x")
9598 (match_operand:V4SI 1 "register_operand" "x")
9599 (parallel [(const_int 0)
9602 "%vpmovsxdq\t{%1, %0|%0, %1}"
9603 [(set_attr "type" "ssemov")
9604 (set_attr "prefix_extra" "1")
9605 (set_attr "prefix" "maybe_vex")
9606 (set_attr "mode" "TI")])
9608 (define_insn "*sse4_1_extendv2siv2di2"
9609 [(set (match_operand:V2DI 0 "register_operand" "=x")
9613 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9614 (parallel [(const_int 0)
9617 "%vpmovsxdq\t{%1, %0|%0, %1}"
9618 [(set_attr "type" "ssemov")
9619 (set_attr "prefix_extra" "1")
9620 (set_attr "prefix" "maybe_vex")
9621 (set_attr "mode" "TI")])
9623 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9624 [(set (match_operand:V8HI 0 "register_operand" "=x")
9627 (match_operand:V16QI 1 "register_operand" "x")
9628 (parallel [(const_int 0)
9637 "%vpmovzxbw\t{%1, %0|%0, %1}"
9638 [(set_attr "type" "ssemov")
9639 (set_attr "prefix_extra" "1")
9640 (set_attr "prefix" "maybe_vex")
9641 (set_attr "mode" "TI")])
9643 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9644 [(set (match_operand:V8HI 0 "register_operand" "=x")
9647 (vec_duplicate:V16QI
9648 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9649 (parallel [(const_int 0)
9658 "%vpmovzxbw\t{%1, %0|%0, %1}"
9659 [(set_attr "type" "ssemov")
9660 (set_attr "prefix_extra" "1")
9661 (set_attr "prefix" "maybe_vex")
9662 (set_attr "mode" "TI")])
9664 (define_insn "sse4_1_zero_extendv4qiv4si2"
9665 [(set (match_operand:V4SI 0 "register_operand" "=x")
9668 (match_operand:V16QI 1 "register_operand" "x")
9669 (parallel [(const_int 0)
9674 "%vpmovzxbd\t{%1, %0|%0, %1}"
9675 [(set_attr "type" "ssemov")
9676 (set_attr "prefix_extra" "1")
9677 (set_attr "prefix" "maybe_vex")
9678 (set_attr "mode" "TI")])
9680 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9681 [(set (match_operand:V4SI 0 "register_operand" "=x")
9684 (vec_duplicate:V16QI
9685 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9686 (parallel [(const_int 0)
9691 "%vpmovzxbd\t{%1, %0|%0, %1}"
9692 [(set_attr "type" "ssemov")
9693 (set_attr "prefix_extra" "1")
9694 (set_attr "prefix" "maybe_vex")
9695 (set_attr "mode" "TI")])
9697 (define_insn "sse4_1_zero_extendv2qiv2di2"
9698 [(set (match_operand:V2DI 0 "register_operand" "=x")
9701 (match_operand:V16QI 1 "register_operand" "x")
9702 (parallel [(const_int 0)
9705 "%vpmovzxbq\t{%1, %0|%0, %1}"
9706 [(set_attr "type" "ssemov")
9707 (set_attr "prefix_extra" "1")
9708 (set_attr "prefix" "maybe_vex")
9709 (set_attr "mode" "TI")])
9711 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9712 [(set (match_operand:V2DI 0 "register_operand" "=x")
9715 (vec_duplicate:V16QI
9716 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9717 (parallel [(const_int 0)
9720 "%vpmovzxbq\t{%1, %0|%0, %1}"
9721 [(set_attr "type" "ssemov")
9722 (set_attr "prefix_extra" "1")
9723 (set_attr "prefix" "maybe_vex")
9724 (set_attr "mode" "TI")])
9726 (define_insn "sse4_1_zero_extendv4hiv4si2"
9727 [(set (match_operand:V4SI 0 "register_operand" "=x")
9730 (match_operand:V8HI 1 "register_operand" "x")
9731 (parallel [(const_int 0)
9736 "%vpmovzxwd\t{%1, %0|%0, %1}"
9737 [(set_attr "type" "ssemov")
9738 (set_attr "prefix_extra" "1")
9739 (set_attr "prefix" "maybe_vex")
9740 (set_attr "mode" "TI")])
9742 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9743 [(set (match_operand:V4SI 0 "register_operand" "=x")
9747 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9748 (parallel [(const_int 0)
9753 "%vpmovzxwd\t{%1, %0|%0, %1}"
9754 [(set_attr "type" "ssemov")
9755 (set_attr "prefix_extra" "1")
9756 (set_attr "prefix" "maybe_vex")
9757 (set_attr "mode" "TI")])
9759 (define_insn "sse4_1_zero_extendv2hiv2di2"
9760 [(set (match_operand:V2DI 0 "register_operand" "=x")
9763 (match_operand:V8HI 1 "register_operand" "x")
9764 (parallel [(const_int 0)
9767 "%vpmovzxwq\t{%1, %0|%0, %1}"
9768 [(set_attr "type" "ssemov")
9769 (set_attr "prefix_extra" "1")
9770 (set_attr "prefix" "maybe_vex")
9771 (set_attr "mode" "TI")])
9773 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9774 [(set (match_operand:V2DI 0 "register_operand" "=x")
9778 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9779 (parallel [(const_int 0)
9782 "%vpmovzxwq\t{%1, %0|%0, %1}"
9783 [(set_attr "type" "ssemov")
9784 (set_attr "prefix_extra" "1")
9785 (set_attr "prefix" "maybe_vex")
9786 (set_attr "mode" "TI")])
9788 (define_insn "sse4_1_zero_extendv2siv2di2"
9789 [(set (match_operand:V2DI 0 "register_operand" "=x")
9792 (match_operand:V4SI 1 "register_operand" "x")
9793 (parallel [(const_int 0)
9796 "%vpmovzxdq\t{%1, %0|%0, %1}"
9797 [(set_attr "type" "ssemov")
9798 (set_attr "prefix_extra" "1")
9799 (set_attr "prefix" "maybe_vex")
9800 (set_attr "mode" "TI")])
9802 (define_insn "*sse4_1_zero_extendv2siv2di2"
9803 [(set (match_operand:V2DI 0 "register_operand" "=x")
9807 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9808 (parallel [(const_int 0)
9811 "%vpmovzxdq\t{%1, %0|%0, %1}"
9812 [(set_attr "type" "ssemov")
9813 (set_attr "prefix_extra" "1")
9814 (set_attr "prefix" "maybe_vex")
9815 (set_attr "mode" "TI")])
9817 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9818 ;; setting FLAGS_REG. But it is not a really compare instruction.
9819 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9820 [(set (reg:CC FLAGS_REG)
9821 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9822 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9825 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9826 [(set_attr "type" "ssecomi")
9827 (set_attr "prefix_extra" "1")
9828 (set_attr "prefix" "vex")
9829 (set_attr "mode" "<MODE>")])
9831 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9832 ;; But it is not a really compare instruction.
9833 (define_insn "avx_ptest256"
9834 [(set (reg:CC FLAGS_REG)
9835 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9836 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9839 "vptest\t{%1, %0|%0, %1}"
9840 [(set_attr "type" "ssecomi")
9841 (set_attr "prefix_extra" "1")
9842 (set_attr "prefix" "vex")
9843 (set_attr "mode" "OI")])
9845 (define_insn "sse4_1_ptest"
9846 [(set (reg:CC FLAGS_REG)
9847 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9848 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9851 "%vptest\t{%1, %0|%0, %1}"
9852 [(set_attr "type" "ssecomi")
9853 (set_attr "prefix_extra" "1")
9854 (set_attr "prefix" "maybe_vex")
9855 (set_attr "mode" "TI")])
9857 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9858 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9859 (unspec:AVX256MODEF2P
9860 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9861 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9864 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9865 [(set_attr "type" "ssecvt")
9866 (set_attr "prefix_extra" "1")
9867 (set_attr "length_immediate" "1")
9868 (set_attr "prefix" "vex")
9869 (set_attr "mode" "<MODE>")])
9871 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9872 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9874 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9875 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9878 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9879 [(set_attr "type" "ssecvt")
9880 (set_attr "prefix_data16" "1")
9881 (set_attr "prefix_extra" "1")
9882 (set_attr "length_immediate" "1")
9883 (set_attr "prefix" "maybe_vex")
9884 (set_attr "mode" "<MODE>")])
9886 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9887 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9888 (vec_merge:SSEMODEF2P
9890 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9891 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9893 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9896 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9897 [(set_attr "type" "ssecvt")
9898 (set_attr "prefix_extra" "1")
9899 (set_attr "length_immediate" "1")
9900 (set_attr "prefix" "vex")
9901 (set_attr "mode" "<MODE>")])
9903 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9904 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9905 (vec_merge:SSEMODEF2P
9907 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9908 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9910 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9913 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9914 [(set_attr "type" "ssecvt")
9915 (set_attr "prefix_data16" "1")
9916 (set_attr "prefix_extra" "1")
9917 (set_attr "length_immediate" "1")
9918 (set_attr "mode" "<MODE>")])
9920 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9922 ;; Intel SSE4.2 string/text processing instructions
9924 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9926 (define_insn_and_split "sse4_2_pcmpestr"
9927 [(set (match_operand:SI 0 "register_operand" "=c,c")
9929 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9930 (match_operand:SI 3 "register_operand" "a,a")
9931 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9932 (match_operand:SI 5 "register_operand" "d,d")
9933 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9935 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9943 (set (reg:CC FLAGS_REG)
9952 && can_create_pseudo_p ()"
9957 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9958 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9959 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9962 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9963 operands[3], operands[4],
9964 operands[5], operands[6]));
9966 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9967 operands[3], operands[4],
9968 operands[5], operands[6]));
9969 if (flags && !(ecx || xmm0))
9970 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9971 operands[2], operands[3],
9972 operands[4], operands[5],
9976 [(set_attr "type" "sselog")
9977 (set_attr "prefix_data16" "1")
9978 (set_attr "prefix_extra" "1")
9979 (set_attr "length_immediate" "1")
9980 (set_attr "memory" "none,load")
9981 (set_attr "mode" "TI")])
9983 (define_insn "sse4_2_pcmpestri"
9984 [(set (match_operand:SI 0 "register_operand" "=c,c")
9986 [(match_operand:V16QI 1 "register_operand" "x,x")
9987 (match_operand:SI 2 "register_operand" "a,a")
9988 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9989 (match_operand:SI 4 "register_operand" "d,d")
9990 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9992 (set (reg:CC FLAGS_REG)
10001 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10002 [(set_attr "type" "sselog")
10003 (set_attr "prefix_data16" "1")
10004 (set_attr "prefix_extra" "1")
10005 (set_attr "prefix" "maybe_vex")
10006 (set_attr "length_immediate" "1")
10007 (set_attr "memory" "none,load")
10008 (set_attr "mode" "TI")])
10010 (define_insn "sse4_2_pcmpestrm"
10011 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10013 [(match_operand:V16QI 1 "register_operand" "x,x")
10014 (match_operand:SI 2 "register_operand" "a,a")
10015 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10016 (match_operand:SI 4 "register_operand" "d,d")
10017 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10019 (set (reg:CC FLAGS_REG)
10028 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10029 [(set_attr "type" "sselog")
10030 (set_attr "prefix_data16" "1")
10031 (set_attr "prefix_extra" "1")
10032 (set_attr "length_immediate" "1")
10033 (set_attr "prefix" "maybe_vex")
10034 (set_attr "memory" "none,load")
10035 (set_attr "mode" "TI")])
10037 (define_insn "sse4_2_pcmpestr_cconly"
10038 [(set (reg:CC FLAGS_REG)
10040 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10041 (match_operand:SI 3 "register_operand" "a,a,a,a")
10042 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10043 (match_operand:SI 5 "register_operand" "d,d,d,d")
10044 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10046 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10047 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10050 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10051 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10052 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10053 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10054 [(set_attr "type" "sselog")
10055 (set_attr "prefix_data16" "1")
10056 (set_attr "prefix_extra" "1")
10057 (set_attr "length_immediate" "1")
10058 (set_attr "memory" "none,load,none,load")
10059 (set_attr "prefix" "maybe_vex")
10060 (set_attr "mode" "TI")])
10062 (define_insn_and_split "sse4_2_pcmpistr"
10063 [(set (match_operand:SI 0 "register_operand" "=c,c")
10065 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10066 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10067 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10069 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10075 (set (reg:CC FLAGS_REG)
10082 && can_create_pseudo_p ()"
10087 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10088 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10089 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10092 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10093 operands[3], operands[4]));
10095 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10096 operands[3], operands[4]));
10097 if (flags && !(ecx || xmm0))
10098 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10099 operands[2], operands[3],
10103 [(set_attr "type" "sselog")
10104 (set_attr "prefix_data16" "1")
10105 (set_attr "prefix_extra" "1")
10106 (set_attr "length_immediate" "1")
10107 (set_attr "memory" "none,load")
10108 (set_attr "mode" "TI")])
10110 (define_insn "sse4_2_pcmpistri"
10111 [(set (match_operand:SI 0 "register_operand" "=c,c")
10113 [(match_operand:V16QI 1 "register_operand" "x,x")
10114 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10115 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10117 (set (reg:CC FLAGS_REG)
10124 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10125 [(set_attr "type" "sselog")
10126 (set_attr "prefix_data16" "1")
10127 (set_attr "prefix_extra" "1")
10128 (set_attr "length_immediate" "1")
10129 (set_attr "prefix" "maybe_vex")
10130 (set_attr "memory" "none,load")
10131 (set_attr "mode" "TI")])
10133 (define_insn "sse4_2_pcmpistrm"
10134 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10136 [(match_operand:V16QI 1 "register_operand" "x,x")
10137 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10138 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10140 (set (reg:CC FLAGS_REG)
10147 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10148 [(set_attr "type" "sselog")
10149 (set_attr "prefix_data16" "1")
10150 (set_attr "prefix_extra" "1")
10151 (set_attr "length_immediate" "1")
10152 (set_attr "prefix" "maybe_vex")
10153 (set_attr "memory" "none,load")
10154 (set_attr "mode" "TI")])
10156 (define_insn "sse4_2_pcmpistr_cconly"
10157 [(set (reg:CC FLAGS_REG)
10159 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10160 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10161 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10163 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10164 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10167 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10168 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10169 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10170 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10171 [(set_attr "type" "sselog")
10172 (set_attr "prefix_data16" "1")
10173 (set_attr "prefix_extra" "1")
10174 (set_attr "length_immediate" "1")
10175 (set_attr "memory" "none,load,none,load")
10176 (set_attr "prefix" "maybe_vex")
10177 (set_attr "mode" "TI")])
10179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10181 ;; SSE5 instructions
10183 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10185 ;; SSE5 parallel integer multiply/add instructions.
10186 ;; Note the instruction does not allow the value being added to be a memory
10187 ;; operation. However by pretending via the nonimmediate_operand predicate
10188 ;; that it does and splitting it later allows the following to be recognized:
10189 ;; a[i] = b[i] * c[i] + d[i];
10190 (define_insn "sse5_pmacsww"
10191 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
10194 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
10195 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
10196 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
10197 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
10199 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10200 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10201 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10202 [(set_attr "type" "ssemuladd")
10203 (set_attr "mode" "TI")])
10205 ;; Split pmacsww with two memory operands into a load and the pmacsww.
10207 [(set (match_operand:V8HI 0 "register_operand" "")
10209 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
10210 (match_operand:V8HI 2 "nonimmediate_operand" ""))
10211 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
10213 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
10214 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
10215 && !reg_mentioned_p (operands[0], operands[1])
10216 && !reg_mentioned_p (operands[0], operands[2])
10217 && !reg_mentioned_p (operands[0], operands[3])"
10220 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
10221 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
10226 (define_insn "sse5_pmacssww"
10227 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
10229 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10230 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
10231 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
10232 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10234 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10235 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10236 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10237 [(set_attr "type" "ssemuladd")
10238 (set_attr "mode" "TI")])
10240 ;; Note the instruction does not allow the value being added to be a memory
10241 ;; operation. However by pretending via the nonimmediate_operand predicate
10242 ;; that it does and splitting it later allows the following to be recognized:
10243 ;; a[i] = b[i] * c[i] + d[i];
10244 (define_insn "sse5_pmacsdd"
10245 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10248 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10249 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
10250 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10251 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
10253 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10254 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10255 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10256 [(set_attr "type" "ssemuladd")
10257 (set_attr "mode" "TI")])
10259 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
10261 [(set (match_operand:V4SI 0 "register_operand" "")
10263 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
10264 (match_operand:V4SI 2 "nonimmediate_operand" ""))
10265 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
10267 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
10268 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
10269 && !reg_mentioned_p (operands[0], operands[1])
10270 && !reg_mentioned_p (operands[0], operands[2])
10271 && !reg_mentioned_p (operands[0], operands[3])"
10274 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
10275 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
10280 (define_insn "sse5_pmacssdd"
10281 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10283 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10284 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
10285 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10286 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10288 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10289 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10290 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10291 [(set_attr "type" "ssemuladd")
10292 (set_attr "mode" "TI")])
10294 (define_insn "sse5_pmacssdql"
10295 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10300 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10301 (parallel [(const_int 1)
10304 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10305 (parallel [(const_int 1)
10307 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10308 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10310 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10311 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10312 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10313 [(set_attr "type" "ssemuladd")
10314 (set_attr "mode" "TI")])
10316 (define_insn "sse5_pmacssdqh"
10317 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10322 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10323 (parallel [(const_int 0)
10327 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10328 (parallel [(const_int 0)
10330 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10331 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10333 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10334 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10335 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10336 [(set_attr "type" "ssemuladd")
10337 (set_attr "mode" "TI")])
10339 (define_insn "sse5_pmacsdql"
10340 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10345 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10346 (parallel [(const_int 1)
10350 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10351 (parallel [(const_int 1)
10353 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10354 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10356 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10357 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10358 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10359 [(set_attr "type" "ssemuladd")
10360 (set_attr "mode" "TI")])
10362 (define_insn_and_split "*sse5_pmacsdql_mem"
10363 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10368 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10369 (parallel [(const_int 1)
10373 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10374 (parallel [(const_int 1)
10376 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10377 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10379 "&& (reload_completed
10380 || (!reg_mentioned_p (operands[0], operands[1])
10381 && !reg_mentioned_p (operands[0], operands[2])))"
10382 [(set (match_dup 0)
10390 (parallel [(const_int 1)
10395 (parallel [(const_int 1)
10399 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10400 ;; fake it with a multiply/add. In general, we expect the define_split to
10401 ;; occur before register allocation, so we have to handle the corner case where
10402 ;; the target is the same as operands 1/2
10403 (define_insn_and_split "sse5_mulv2div2di3_low"
10404 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10408 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10409 (parallel [(const_int 1)
10413 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10414 (parallel [(const_int 1)
10415 (const_int 3)])))))]
10418 "&& (reload_completed
10419 || (!reg_mentioned_p (operands[0], operands[1])
10420 && !reg_mentioned_p (operands[0], operands[2])))"
10421 [(set (match_dup 0)
10429 (parallel [(const_int 1)
10434 (parallel [(const_int 1)
10438 operands[3] = CONST0_RTX (V2DImode);
10440 [(set_attr "type" "ssemuladd")
10441 (set_attr "mode" "TI")])
10443 (define_insn "sse5_pmacsdqh"
10444 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10449 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10450 (parallel [(const_int 0)
10454 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10455 (parallel [(const_int 0)
10457 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10458 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10460 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10461 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10462 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10463 [(set_attr "type" "ssemuladd")
10464 (set_attr "mode" "TI")])
10466 (define_insn_and_split "*sse5_pmacsdqh_mem"
10467 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10472 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10473 (parallel [(const_int 0)
10477 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10478 (parallel [(const_int 0)
10480 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10481 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10483 "&& (reload_completed
10484 || (!reg_mentioned_p (operands[0], operands[1])
10485 && !reg_mentioned_p (operands[0], operands[2])))"
10486 [(set (match_dup 0)
10494 (parallel [(const_int 0)
10499 (parallel [(const_int 0)
10503 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10504 ;; fake it with a multiply/add. In general, we expect the define_split to
10505 ;; occur before register allocation, so we have to handle the corner case where
10506 ;; the target is the same as either operands[1] or operands[2]
10507 (define_insn_and_split "sse5_mulv2div2di3_high"
10508 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10512 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10513 (parallel [(const_int 0)
10517 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10518 (parallel [(const_int 0)
10519 (const_int 2)])))))]
10522 "&& (reload_completed
10523 || (!reg_mentioned_p (operands[0], operands[1])
10524 && !reg_mentioned_p (operands[0], operands[2])))"
10525 [(set (match_dup 0)
10533 (parallel [(const_int 0)
10538 (parallel [(const_int 0)
10542 operands[3] = CONST0_RTX (V2DImode);
10544 [(set_attr "type" "ssemuladd")
10545 (set_attr "mode" "TI")])
10547 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10548 (define_insn "sse5_pmacsswd"
10549 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10554 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10555 (parallel [(const_int 1)
10561 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10562 (parallel [(const_int 1)
10566 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10567 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10569 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10570 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10571 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10572 [(set_attr "type" "ssemuladd")
10573 (set_attr "mode" "TI")])
10575 (define_insn "sse5_pmacswd"
10576 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10581 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10582 (parallel [(const_int 1)
10588 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10589 (parallel [(const_int 1)
10593 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10594 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10596 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10597 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10598 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10599 [(set_attr "type" "ssemuladd")
10600 (set_attr "mode" "TI")])
10602 (define_insn "sse5_pmadcsswd"
10603 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10609 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10610 (parallel [(const_int 0)
10616 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10617 (parallel [(const_int 0)
10625 (parallel [(const_int 1)
10632 (parallel [(const_int 1)
10635 (const_int 7)])))))
10636 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10637 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10639 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10640 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10641 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10642 [(set_attr "type" "ssemuladd")
10643 (set_attr "mode" "TI")])
10645 (define_insn "sse5_pmadcswd"
10646 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10652 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10653 (parallel [(const_int 0)
10659 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10660 (parallel [(const_int 0)
10668 (parallel [(const_int 1)
10675 (parallel [(const_int 1)
10678 (const_int 7)])))))
10679 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10680 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10682 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10683 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10684 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10685 [(set_attr "type" "ssemuladd")
10686 (set_attr "mode" "TI")])
10688 ;; SSE5 parallel XMM conditional moves
10689 (define_insn "sse5_pcmov_<mode>"
10690 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10691 (if_then_else:SSEMODE
10692 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10693 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10694 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10695 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10697 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10698 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10699 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10700 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10701 [(set_attr "type" "sse4arg")])
10703 ;; SSE5 horizontal add/subtract instructions
10704 (define_insn "sse5_phaddbw"
10705 [(set (match_operand:V8HI 0 "register_operand" "=x")
10709 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10710 (parallel [(const_int 0)
10721 (parallel [(const_int 1)
10728 (const_int 15)])))))]
10730 "phaddbw\t{%1, %0|%0, %1}"
10731 [(set_attr "type" "sseiadd1")])
10733 (define_insn "sse5_phaddbd"
10734 [(set (match_operand:V4SI 0 "register_operand" "=x")
10739 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10740 (parallel [(const_int 0)
10747 (parallel [(const_int 1)
10750 (const_int 13)]))))
10755 (parallel [(const_int 2)
10762 (parallel [(const_int 3)
10765 (const_int 15)]))))))]
10767 "phaddbd\t{%1, %0|%0, %1}"
10768 [(set_attr "type" "sseiadd1")])
10770 (define_insn "sse5_phaddbq"
10771 [(set (match_operand:V2DI 0 "register_operand" "=x")
10777 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10778 (parallel [(const_int 0)
10783 (parallel [(const_int 1)
10789 (parallel [(const_int 2)
10794 (parallel [(const_int 3)
10795 (const_int 7)])))))
10801 (parallel [(const_int 8)
10806 (parallel [(const_int 9)
10807 (const_int 13)]))))
10812 (parallel [(const_int 10)
10817 (parallel [(const_int 11)
10818 (const_int 15)])))))))]
10820 "phaddbq\t{%1, %0|%0, %1}"
10821 [(set_attr "type" "sseiadd1")])
10823 (define_insn "sse5_phaddwd"
10824 [(set (match_operand:V4SI 0 "register_operand" "=x")
10828 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10829 (parallel [(const_int 0)
10836 (parallel [(const_int 1)
10839 (const_int 7)])))))]
10841 "phaddwd\t{%1, %0|%0, %1}"
10842 [(set_attr "type" "sseiadd1")])
10844 (define_insn "sse5_phaddwq"
10845 [(set (match_operand:V2DI 0 "register_operand" "=x")
10850 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10851 (parallel [(const_int 0)
10856 (parallel [(const_int 1)
10862 (parallel [(const_int 2)
10867 (parallel [(const_int 3)
10868 (const_int 7)]))))))]
10870 "phaddwq\t{%1, %0|%0, %1}"
10871 [(set_attr "type" "sseiadd1")])
10873 (define_insn "sse5_phadddq"
10874 [(set (match_operand:V2DI 0 "register_operand" "=x")
10878 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10879 (parallel [(const_int 0)
10884 (parallel [(const_int 1)
10885 (const_int 3)])))))]
10887 "phadddq\t{%1, %0|%0, %1}"
10888 [(set_attr "type" "sseiadd1")])
10890 (define_insn "sse5_phaddubw"
10891 [(set (match_operand:V8HI 0 "register_operand" "=x")
10895 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10896 (parallel [(const_int 0)
10907 (parallel [(const_int 1)
10914 (const_int 15)])))))]
10916 "phaddubw\t{%1, %0|%0, %1}"
10917 [(set_attr "type" "sseiadd1")])
10919 (define_insn "sse5_phaddubd"
10920 [(set (match_operand:V4SI 0 "register_operand" "=x")
10925 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10926 (parallel [(const_int 0)
10933 (parallel [(const_int 1)
10936 (const_int 13)]))))
10941 (parallel [(const_int 2)
10948 (parallel [(const_int 3)
10951 (const_int 15)]))))))]
10953 "phaddubd\t{%1, %0|%0, %1}"
10954 [(set_attr "type" "sseiadd1")])
10956 (define_insn "sse5_phaddubq"
10957 [(set (match_operand:V2DI 0 "register_operand" "=x")
10963 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10964 (parallel [(const_int 0)
10969 (parallel [(const_int 1)
10975 (parallel [(const_int 2)
10980 (parallel [(const_int 3)
10981 (const_int 7)])))))
10987 (parallel [(const_int 8)
10992 (parallel [(const_int 9)
10993 (const_int 13)]))))
10998 (parallel [(const_int 10)
11003 (parallel [(const_int 11)
11004 (const_int 15)])))))))]
11006 "phaddubq\t{%1, %0|%0, %1}"
11007 [(set_attr "type" "sseiadd1")])
11009 (define_insn "sse5_phadduwd"
11010 [(set (match_operand:V4SI 0 "register_operand" "=x")
11014 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11015 (parallel [(const_int 0)
11022 (parallel [(const_int 1)
11025 (const_int 7)])))))]
11027 "phadduwd\t{%1, %0|%0, %1}"
11028 [(set_attr "type" "sseiadd1")])
11030 (define_insn "sse5_phadduwq"
11031 [(set (match_operand:V2DI 0 "register_operand" "=x")
11036 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11037 (parallel [(const_int 0)
11042 (parallel [(const_int 1)
11048 (parallel [(const_int 2)
11053 (parallel [(const_int 3)
11054 (const_int 7)]))))))]
11056 "phadduwq\t{%1, %0|%0, %1}"
11057 [(set_attr "type" "sseiadd1")])
11059 (define_insn "sse5_phaddudq"
11060 [(set (match_operand:V2DI 0 "register_operand" "=x")
11064 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11065 (parallel [(const_int 0)
11070 (parallel [(const_int 1)
11071 (const_int 3)])))))]
11073 "phaddudq\t{%1, %0|%0, %1}"
11074 [(set_attr "type" "sseiadd1")])
11076 (define_insn "sse5_phsubbw"
11077 [(set (match_operand:V8HI 0 "register_operand" "=x")
11081 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11082 (parallel [(const_int 0)
11093 (parallel [(const_int 1)
11100 (const_int 15)])))))]
11102 "phsubbw\t{%1, %0|%0, %1}"
11103 [(set_attr "type" "sseiadd1")])
11105 (define_insn "sse5_phsubwd"
11106 [(set (match_operand:V4SI 0 "register_operand" "=x")
11110 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11111 (parallel [(const_int 0)
11118 (parallel [(const_int 1)
11121 (const_int 7)])))))]
11123 "phsubwd\t{%1, %0|%0, %1}"
11124 [(set_attr "type" "sseiadd1")])
11126 (define_insn "sse5_phsubdq"
11127 [(set (match_operand:V2DI 0 "register_operand" "=x")
11131 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11132 (parallel [(const_int 0)
11137 (parallel [(const_int 1)
11138 (const_int 3)])))))]
11140 "phsubdq\t{%1, %0|%0, %1}"
11141 [(set_attr "type" "sseiadd1")])
11143 ;; SSE5 permute instructions
11144 (define_insn "sse5_pperm"
11145 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
11147 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
11148 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
11149 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
11150 UNSPEC_SSE5_PERMUTE))]
11151 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11152 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11153 [(set_attr "type" "sse4arg")
11154 (set_attr "mode" "TI")])
11156 ;; The following are for the various unpack insns which doesn't need the first
11157 ;; source operand, so we can just use the output operand for the first operand.
11158 ;; This allows either of the other two operands to be a memory operand. We
11159 ;; can't just use the first operand as an argument to the normal pperm because
11160 ;; then an output only argument, suddenly becomes an input operand.
11161 (define_insn "sse5_pperm_zero_v16qi_v8hi"
11162 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11165 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
11166 (match_operand 2 "" "")))) ;; parallel with const_int's
11167 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11169 && (register_operand (operands[1], V16QImode)
11170 || register_operand (operands[2], V16QImode))"
11171 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11172 [(set_attr "type" "sseadd")
11173 (set_attr "prefix_data16" "0")
11174 (set_attr "prefix_extra" "2")
11175 (set_attr "mode" "TI")])
11177 (define_insn "sse5_pperm_sign_v16qi_v8hi"
11178 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11181 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
11182 (match_operand 2 "" "")))) ;; parallel with const_int's
11183 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11185 && (register_operand (operands[1], V16QImode)
11186 || register_operand (operands[2], V16QImode))"
11187 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11188 [(set_attr "type" "sseadd")
11189 (set_attr "prefix_data16" "0")
11190 (set_attr "prefix_extra" "2")
11191 (set_attr "mode" "TI")])
11193 (define_insn "sse5_pperm_zero_v8hi_v4si"
11194 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11197 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
11198 (match_operand 2 "" "")))) ;; parallel with const_int's
11199 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11201 && (register_operand (operands[1], V8HImode)
11202 || register_operand (operands[2], V16QImode))"
11203 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11204 [(set_attr "type" "sseadd")
11205 (set_attr "prefix_data16" "0")
11206 (set_attr "prefix_extra" "2")
11207 (set_attr "mode" "TI")])
11209 (define_insn "sse5_pperm_sign_v8hi_v4si"
11210 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11213 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
11214 (match_operand 2 "" "")))) ;; parallel with const_int's
11215 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11217 && (register_operand (operands[1], V8HImode)
11218 || register_operand (operands[2], V16QImode))"
11219 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11220 [(set_attr "type" "sseadd")
11221 (set_attr "prefix_data16" "0")
11222 (set_attr "prefix_extra" "2")
11223 (set_attr "mode" "TI")])
11225 (define_insn "sse5_pperm_zero_v4si_v2di"
11226 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11229 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
11230 (match_operand 2 "" "")))) ;; parallel with const_int's
11231 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11233 && (register_operand (operands[1], V4SImode)
11234 || register_operand (operands[2], V16QImode))"
11235 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11236 [(set_attr "type" "sseadd")
11237 (set_attr "prefix_data16" "0")
11238 (set_attr "prefix_extra" "2")
11239 (set_attr "mode" "TI")])
11241 (define_insn "sse5_pperm_sign_v4si_v2di"
11242 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11245 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
11246 (match_operand 2 "" "")))) ;; parallel with const_int's
11247 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11249 && (register_operand (operands[1], V4SImode)
11250 || register_operand (operands[2], V16QImode))"
11251 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11252 [(set_attr "type" "sseadd")
11253 (set_attr "prefix_data16" "0")
11254 (set_attr "prefix_extra" "2")
11255 (set_attr "mode" "TI")])
11257 ;; SSE5 pack instructions that combine two vectors into a smaller vector
11258 (define_insn "sse5_pperm_pack_v2di_v4si"
11259 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
11262 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
11264 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11265 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11266 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11267 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11268 [(set_attr "type" "sse4arg")
11269 (set_attr "mode" "TI")])
11271 (define_insn "sse5_pperm_pack_v4si_v8hi"
11272 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
11275 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
11277 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11278 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11279 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11280 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11281 [(set_attr "type" "sse4arg")
11282 (set_attr "mode" "TI")])
11284 (define_insn "sse5_pperm_pack_v8hi_v16qi"
11285 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
11288 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
11290 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11291 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11292 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11293 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11294 [(set_attr "type" "sse4arg")
11295 (set_attr "mode" "TI")])
11297 ;; Floating point permutation (permps, permpd)
11298 (define_insn "sse5_perm<mode>"
11299 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
11301 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
11302 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
11303 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
11304 UNSPEC_SSE5_PERMUTE))]
11305 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11306 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11307 [(set_attr "type" "sse4arg")
11308 (set_attr "mode" "<MODE>")])
11310 ;; SSE5 packed rotate instructions
11311 (define_expand "rotl<mode>3"
11312 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11313 (rotate:SSEMODE1248
11314 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11315 (match_operand:SI 2 "general_operand")))]
11318 /* If we were given a scalar, convert it to parallel */
11319 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11321 rtvec vs = rtvec_alloc (<ssescalarnum>);
11322 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11323 rtx reg = gen_reg_rtx (<MODE>mode);
11324 rtx op2 = operands[2];
11327 if (GET_MODE (op2) != <ssescalarmode>mode)
11329 op2 = gen_reg_rtx (<ssescalarmode>mode);
11330 convert_move (op2, operands[2], false);
11333 for (i = 0; i < <ssescalarnum>; i++)
11334 RTVEC_ELT (vs, i) = op2;
11336 emit_insn (gen_vec_init<mode> (reg, par));
11337 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11342 (define_expand "rotr<mode>3"
11343 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11344 (rotatert:SSEMODE1248
11345 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11346 (match_operand:SI 2 "general_operand")))]
11349 /* If we were given a scalar, convert it to parallel */
11350 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11352 rtvec vs = rtvec_alloc (<ssescalarnum>);
11353 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11354 rtx neg = gen_reg_rtx (<MODE>mode);
11355 rtx reg = gen_reg_rtx (<MODE>mode);
11356 rtx op2 = operands[2];
11359 if (GET_MODE (op2) != <ssescalarmode>mode)
11361 op2 = gen_reg_rtx (<ssescalarmode>mode);
11362 convert_move (op2, operands[2], false);
11365 for (i = 0; i < <ssescalarnum>; i++)
11366 RTVEC_ELT (vs, i) = op2;
11368 emit_insn (gen_vec_init<mode> (reg, par));
11369 emit_insn (gen_neg<mode>2 (neg, reg));
11370 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
11375 (define_insn "sse5_rotl<mode>3"
11376 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11377 (rotate:SSEMODE1248
11378 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11379 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11381 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11382 [(set_attr "type" "sseishft")
11383 (set_attr "length_immediate" "1")
11384 (set_attr "mode" "TI")])
11386 (define_insn "sse5_rotr<mode>3"
11387 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11388 (rotatert:SSEMODE1248
11389 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11390 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11393 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11394 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11396 [(set_attr "type" "sseishft")
11397 (set_attr "length_immediate" "1")
11398 (set_attr "mode" "TI")])
11400 (define_expand "vrotr<mode>3"
11401 [(match_operand:SSEMODE1248 0 "register_operand" "")
11402 (match_operand:SSEMODE1248 1 "register_operand" "")
11403 (match_operand:SSEMODE1248 2 "register_operand" "")]
11406 rtx reg = gen_reg_rtx (<MODE>mode);
11407 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11408 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11412 (define_expand "vrotl<mode>3"
11413 [(match_operand:SSEMODE1248 0 "register_operand" "")
11414 (match_operand:SSEMODE1248 1 "register_operand" "")
11415 (match_operand:SSEMODE1248 2 "register_operand" "")]
11418 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11422 (define_insn "sse5_vrotl<mode>3"
11423 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11424 (if_then_else:SSEMODE1248
11426 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11428 (rotate:SSEMODE1248
11429 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11431 (rotatert:SSEMODE1248
11433 (neg:SSEMODE1248 (match_dup 2)))))]
11434 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11435 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11436 [(set_attr "type" "sseishft")
11437 (set_attr "prefix_data16" "0")
11438 (set_attr "prefix_extra" "2")
11439 (set_attr "mode" "TI")])
11441 ;; SSE5 packed shift instructions.
11442 ;; FIXME: add V2DI back in
11443 (define_expand "vlshr<mode>3"
11444 [(match_operand:SSEMODE124 0 "register_operand" "")
11445 (match_operand:SSEMODE124 1 "register_operand" "")
11446 (match_operand:SSEMODE124 2 "register_operand" "")]
11449 rtx neg = gen_reg_rtx (<MODE>mode);
11450 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11451 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
11455 (define_expand "vashr<mode>3"
11456 [(match_operand:SSEMODE124 0 "register_operand" "")
11457 (match_operand:SSEMODE124 1 "register_operand" "")
11458 (match_operand:SSEMODE124 2 "register_operand" "")]
11461 rtx neg = gen_reg_rtx (<MODE>mode);
11462 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11463 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
11467 (define_expand "vashl<mode>3"
11468 [(match_operand:SSEMODE124 0 "register_operand" "")
11469 (match_operand:SSEMODE124 1 "register_operand" "")
11470 (match_operand:SSEMODE124 2 "register_operand" "")]
11473 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
11477 (define_insn "sse5_ashl<mode>3"
11478 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11479 (if_then_else:SSEMODE1248
11481 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11483 (ashift:SSEMODE1248
11484 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11486 (ashiftrt:SSEMODE1248
11488 (neg:SSEMODE1248 (match_dup 2)))))]
11489 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11490 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11491 [(set_attr "type" "sseishft")
11492 (set_attr "prefix_data16" "0")
11493 (set_attr "prefix_extra" "2")
11494 (set_attr "mode" "TI")])
11496 (define_insn "sse5_lshl<mode>3"
11497 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11498 (if_then_else:SSEMODE1248
11500 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11502 (ashift:SSEMODE1248
11503 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11505 (lshiftrt:SSEMODE1248
11507 (neg:SSEMODE1248 (match_dup 2)))))]
11508 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11509 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11510 [(set_attr "type" "sseishft")
11511 (set_attr "prefix_data16" "0")
11512 (set_attr "prefix_extra" "2")
11513 (set_attr "mode" "TI")])
11515 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
11516 (define_expand "ashlv16qi3"
11517 [(match_operand:V16QI 0 "register_operand" "")
11518 (match_operand:V16QI 1 "register_operand" "")
11519 (match_operand:SI 2 "nonmemory_operand" "")]
11522 rtvec vs = rtvec_alloc (16);
11523 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11524 rtx reg = gen_reg_rtx (V16QImode);
11526 for (i = 0; i < 16; i++)
11527 RTVEC_ELT (vs, i) = operands[2];
11529 emit_insn (gen_vec_initv16qi (reg, par));
11530 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11534 (define_expand "lshlv16qi3"
11535 [(match_operand:V16QI 0 "register_operand" "")
11536 (match_operand:V16QI 1 "register_operand" "")
11537 (match_operand:SI 2 "nonmemory_operand" "")]
11540 rtvec vs = rtvec_alloc (16);
11541 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11542 rtx reg = gen_reg_rtx (V16QImode);
11544 for (i = 0; i < 16; i++)
11545 RTVEC_ELT (vs, i) = operands[2];
11547 emit_insn (gen_vec_initv16qi (reg, par));
11548 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
11552 (define_expand "ashrv16qi3"
11553 [(match_operand:V16QI 0 "register_operand" "")
11554 (match_operand:V16QI 1 "register_operand" "")
11555 (match_operand:SI 2 "nonmemory_operand" "")]
11558 rtvec vs = rtvec_alloc (16);
11559 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11560 rtx reg = gen_reg_rtx (V16QImode);
11562 rtx ele = ((CONST_INT_P (operands[2]))
11563 ? GEN_INT (- INTVAL (operands[2]))
11566 for (i = 0; i < 16; i++)
11567 RTVEC_ELT (vs, i) = ele;
11569 emit_insn (gen_vec_initv16qi (reg, par));
11571 if (!CONST_INT_P (operands[2]))
11573 rtx neg = gen_reg_rtx (V16QImode);
11574 emit_insn (gen_negv16qi2 (neg, reg));
11575 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11578 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11583 (define_expand "ashrv2di3"
11584 [(match_operand:V2DI 0 "register_operand" "")
11585 (match_operand:V2DI 1 "register_operand" "")
11586 (match_operand:DI 2 "nonmemory_operand" "")]
11589 rtvec vs = rtvec_alloc (2);
11590 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11591 rtx reg = gen_reg_rtx (V2DImode);
11594 if (CONST_INT_P (operands[2]))
11595 ele = GEN_INT (- INTVAL (operands[2]));
11596 else if (GET_MODE (operands[2]) != DImode)
11598 rtx move = gen_reg_rtx (DImode);
11599 ele = gen_reg_rtx (DImode);
11600 convert_move (move, operands[2], false);
11601 emit_insn (gen_negdi2 (ele, move));
11605 ele = gen_reg_rtx (DImode);
11606 emit_insn (gen_negdi2 (ele, operands[2]));
11609 RTVEC_ELT (vs, 0) = ele;
11610 RTVEC_ELT (vs, 1) = ele;
11611 emit_insn (gen_vec_initv2di (reg, par));
11612 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11616 ;; SSE5 FRCZ support
11618 (define_insn "sse5_frcz<mode>2"
11619 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11621 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11624 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11625 [(set_attr "type" "ssecvt1")
11626 (set_attr "mode" "<MODE>")])
11629 (define_insn "sse5_vmfrcz<mode>2"
11630 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11631 (vec_merge:SSEMODEF2P
11633 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11635 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11638 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11639 [(set_attr "type" "ssecvt1")
11640 (set_attr "mode" "<MODE>")])
11642 (define_insn "sse5_cvtph2ps"
11643 [(set (match_operand:V4SF 0 "register_operand" "=x")
11644 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11647 "cvtph2ps\t{%1, %0|%0, %1}"
11648 [(set_attr "type" "ssecvt")
11649 (set_attr "mode" "V4SF")])
11651 (define_insn "sse5_cvtps2ph"
11652 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11653 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11656 "cvtps2ph\t{%1, %0|%0, %1}"
11657 [(set_attr "type" "ssecvt")
11658 (set_attr "mode" "V4SF")])
11660 ;; Scalar versions of the com instructions that use vector types that are
11661 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11662 ;; com instructions fill in 0's in the upper bits instead of leaving them
11663 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11664 (define_expand "sse5_vmmaskcmp<mode>3"
11665 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11666 (vec_merge:SSEMODEF2P
11667 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11668 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11669 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11674 operands[4] = CONST0_RTX (<MODE>mode);
11677 (define_insn "*sse5_vmmaskcmp<mode>3"
11678 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11679 (vec_merge:SSEMODEF2P
11680 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11681 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11682 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11683 (match_operand:SSEMODEF2P 4 "")
11686 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11687 [(set_attr "type" "sse4arg")
11688 (set_attr "prefix_data16" "0")
11689 (set_attr "prefix_rep" "0")
11690 (set_attr "prefix_extra" "2")
11691 (set_attr "length_immediate" "1")
11692 (set_attr "mode" "<ssescalarmode>")])
11694 ;; We don't have a comparison operator that always returns true/false, so
11695 ;; handle comfalse and comtrue specially.
11696 (define_insn "sse5_com_tf<mode>3"
11697 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11699 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11700 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11701 (match_operand:SI 3 "const_int_operand" "n")]
11702 UNSPEC_SSE5_TRUEFALSE))]
11705 const char *ret = NULL;
11707 switch (INTVAL (operands[3]))
11710 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11714 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11718 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11722 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11726 gcc_unreachable ();
11731 [(set_attr "type" "ssecmp")
11732 (set_attr "prefix_data16" "0")
11733 (set_attr "prefix_rep" "0")
11734 (set_attr "prefix_extra" "2")
11735 (set_attr "length_immediate" "1")
11736 (set_attr "mode" "<MODE>")])
11738 (define_insn "sse5_maskcmp<mode>3"
11739 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11740 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11741 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11742 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11744 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11745 [(set_attr "type" "ssecmp")
11746 (set_attr "prefix_data16" "0")
11747 (set_attr "prefix_rep" "0")
11748 (set_attr "prefix_extra" "2")
11749 (set_attr "length_immediate" "1")
11750 (set_attr "mode" "<MODE>")])
11752 (define_insn "sse5_maskcmp<mode>3"
11753 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11754 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11755 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11756 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11758 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11759 [(set_attr "type" "sse4arg")
11760 (set_attr "prefix_data16" "0")
11761 (set_attr "prefix_rep" "0")
11762 (set_attr "prefix_extra" "2")
11763 (set_attr "length_immediate" "1")
11764 (set_attr "mode" "TI")])
11766 (define_insn "sse5_maskcmp_uns<mode>3"
11767 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11768 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11769 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11770 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11772 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11773 [(set_attr "type" "ssecmp")
11774 (set_attr "prefix_data16" "0")
11775 (set_attr "prefix_rep" "0")
11776 (set_attr "prefix_extra" "2")
11777 (set_attr "length_immediate" "1")
11778 (set_attr "mode" "TI")])
11780 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11781 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11782 ;; the exact instruction generated for the intrinsic.
11783 (define_insn "sse5_maskcmp_uns2<mode>3"
11784 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11785 (unspec:SSEMODE1248
11786 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11787 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11788 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11789 UNSPEC_SSE5_UNSIGNED_CMP))]
11791 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11792 [(set_attr "type" "ssecmp")
11793 (set_attr "prefix_data16" "0")
11794 (set_attr "prefix_extra" "2")
11795 (set_attr "length_immediate" "1")
11796 (set_attr "mode" "TI")])
11798 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11799 ;; being added here to be complete.
11800 (define_insn "sse5_pcom_tf<mode>3"
11801 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11802 (unspec:SSEMODE1248
11803 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11804 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11805 (match_operand:SI 3 "const_int_operand" "n")]
11806 UNSPEC_SSE5_TRUEFALSE))]
11809 return ((INTVAL (operands[3]) != 0)
11810 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11811 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11813 [(set_attr "type" "ssecmp")
11814 (set_attr "prefix_data16" "0")
11815 (set_attr "prefix_extra" "2")
11816 (set_attr "length_immediate" "1")
11817 (set_attr "mode" "TI")])
11819 (define_insn "*avx_aesenc"
11820 [(set (match_operand:V2DI 0 "register_operand" "=x")
11821 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11822 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11824 "TARGET_AES && TARGET_AVX"
11825 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11826 [(set_attr "type" "sselog1")
11827 (set_attr "prefix_extra" "1")
11828 (set_attr "prefix" "vex")
11829 (set_attr "mode" "TI")])
11831 (define_insn "aesenc"
11832 [(set (match_operand:V2DI 0 "register_operand" "=x")
11833 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11834 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11837 "aesenc\t{%2, %0|%0, %2}"
11838 [(set_attr "type" "sselog1")
11839 (set_attr "prefix_extra" "1")
11840 (set_attr "mode" "TI")])
11842 (define_insn "*avx_aesenclast"
11843 [(set (match_operand:V2DI 0 "register_operand" "=x")
11844 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11845 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11846 UNSPEC_AESENCLAST))]
11847 "TARGET_AES && TARGET_AVX"
11848 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11849 [(set_attr "type" "sselog1")
11850 (set_attr "prefix_extra" "1")
11851 (set_attr "prefix" "vex")
11852 (set_attr "mode" "TI")])
11854 (define_insn "aesenclast"
11855 [(set (match_operand:V2DI 0 "register_operand" "=x")
11856 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11857 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11858 UNSPEC_AESENCLAST))]
11860 "aesenclast\t{%2, %0|%0, %2}"
11861 [(set_attr "type" "sselog1")
11862 (set_attr "prefix_extra" "1")
11863 (set_attr "mode" "TI")])
11865 (define_insn "*avx_aesdec"
11866 [(set (match_operand:V2DI 0 "register_operand" "=x")
11867 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11868 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11870 "TARGET_AES && TARGET_AVX"
11871 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11872 [(set_attr "type" "sselog1")
11873 (set_attr "prefix_extra" "1")
11874 (set_attr "prefix" "vex")
11875 (set_attr "mode" "TI")])
11877 (define_insn "aesdec"
11878 [(set (match_operand:V2DI 0 "register_operand" "=x")
11879 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11880 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11883 "aesdec\t{%2, %0|%0, %2}"
11884 [(set_attr "type" "sselog1")
11885 (set_attr "prefix_extra" "1")
11886 (set_attr "mode" "TI")])
11888 (define_insn "*avx_aesdeclast"
11889 [(set (match_operand:V2DI 0 "register_operand" "=x")
11890 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11891 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11892 UNSPEC_AESDECLAST))]
11893 "TARGET_AES && TARGET_AVX"
11894 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11895 [(set_attr "type" "sselog1")
11896 (set_attr "prefix_extra" "1")
11897 (set_attr "prefix" "vex")
11898 (set_attr "mode" "TI")])
11900 (define_insn "aesdeclast"
11901 [(set (match_operand:V2DI 0 "register_operand" "=x")
11902 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11903 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11904 UNSPEC_AESDECLAST))]
11906 "aesdeclast\t{%2, %0|%0, %2}"
11907 [(set_attr "type" "sselog1")
11908 (set_attr "prefix_extra" "1")
11909 (set_attr "mode" "TI")])
11911 (define_insn "aesimc"
11912 [(set (match_operand:V2DI 0 "register_operand" "=x")
11913 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11916 "%vaesimc\t{%1, %0|%0, %1}"
11917 [(set_attr "type" "sselog1")
11918 (set_attr "prefix_extra" "1")
11919 (set_attr "prefix" "maybe_vex")
11920 (set_attr "mode" "TI")])
11922 (define_insn "aeskeygenassist"
11923 [(set (match_operand:V2DI 0 "register_operand" "=x")
11924 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11925 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11926 UNSPEC_AESKEYGENASSIST))]
11928 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11929 [(set_attr "type" "sselog1")
11930 (set_attr "prefix_extra" "1")
11931 (set_attr "length_immediate" "1")
11932 (set_attr "prefix" "maybe_vex")
11933 (set_attr "mode" "TI")])
11935 (define_insn "*vpclmulqdq"
11936 [(set (match_operand:V2DI 0 "register_operand" "=x")
11937 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11938 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11939 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11941 "TARGET_PCLMUL && TARGET_AVX"
11942 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11943 [(set_attr "type" "sselog1")
11944 (set_attr "prefix_extra" "1")
11945 (set_attr "length_immediate" "1")
11946 (set_attr "prefix" "vex")
11947 (set_attr "mode" "TI")])
11949 (define_insn "pclmulqdq"
11950 [(set (match_operand:V2DI 0 "register_operand" "=x")
11951 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11952 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11953 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11956 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11957 [(set_attr "type" "sselog1")
11958 (set_attr "prefix_extra" "1")
11959 (set_attr "length_immediate" "1")
11960 (set_attr "mode" "TI")])
11962 (define_expand "avx_vzeroall"
11963 [(match_par_dup 0 [(const_int 0)])]
11966 int nregs = TARGET_64BIT ? 16 : 8;
11969 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11971 XVECEXP (operands[0], 0, 0)
11972 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11975 for (regno = 0; regno < nregs; regno++)
11976 XVECEXP (operands[0], 0, regno + 1)
11977 = gen_rtx_SET (VOIDmode,
11978 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11979 CONST0_RTX (V8SImode));
11982 (define_insn "*avx_vzeroall"
11983 [(match_parallel 0 "vzeroall_operation"
11984 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11985 (set (match_operand 1 "register_operand" "=x")
11986 (match_operand 2 "const0_operand" "X"))])]
11989 [(set_attr "type" "sse")
11990 (set_attr "modrm" "0")
11991 (set_attr "memory" "none")
11992 (set_attr "prefix" "vex")
11993 (set_attr "mode" "OI")])
11995 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11996 (define_insn "avx_vzeroupper"
11997 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11998 (clobber (reg:V8SI XMM0_REG))
11999 (clobber (reg:V8SI XMM1_REG))
12000 (clobber (reg:V8SI XMM2_REG))
12001 (clobber (reg:V8SI XMM3_REG))
12002 (clobber (reg:V8SI XMM4_REG))
12003 (clobber (reg:V8SI XMM5_REG))
12004 (clobber (reg:V8SI XMM6_REG))
12005 (clobber (reg:V8SI XMM7_REG))]
12006 "TARGET_AVX && !TARGET_64BIT"
12008 [(set_attr "type" "sse")
12009 (set_attr "modrm" "0")
12010 (set_attr "memory" "none")
12011 (set_attr "prefix" "vex")
12012 (set_attr "mode" "OI")])
12014 (define_insn "avx_vzeroupper_rex64"
12015 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12016 (clobber (reg:V8SI XMM0_REG))
12017 (clobber (reg:V8SI XMM1_REG))
12018 (clobber (reg:V8SI XMM2_REG))
12019 (clobber (reg:V8SI XMM3_REG))
12020 (clobber (reg:V8SI XMM4_REG))
12021 (clobber (reg:V8SI XMM5_REG))
12022 (clobber (reg:V8SI XMM6_REG))
12023 (clobber (reg:V8SI XMM7_REG))
12024 (clobber (reg:V8SI XMM8_REG))
12025 (clobber (reg:V8SI XMM9_REG))
12026 (clobber (reg:V8SI XMM10_REG))
12027 (clobber (reg:V8SI XMM11_REG))
12028 (clobber (reg:V8SI XMM12_REG))
12029 (clobber (reg:V8SI XMM13_REG))
12030 (clobber (reg:V8SI XMM14_REG))
12031 (clobber (reg:V8SI XMM15_REG))]
12032 "TARGET_AVX && TARGET_64BIT"
12034 [(set_attr "type" "sse")
12035 (set_attr "modrm" "0")
12036 (set_attr "memory" "none")
12037 (set_attr "prefix" "vex")
12038 (set_attr "mode" "OI")])
12040 (define_insn "avx_vpermil<mode>"
12041 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12043 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
12044 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
12047 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12048 [(set_attr "type" "sselog")
12049 (set_attr "prefix_extra" "1")
12050 (set_attr "length_immediate" "1")
12051 (set_attr "prefix" "vex")
12052 (set_attr "mode" "<MODE>")])
12054 (define_insn "avx_vpermilvar<mode>3"
12055 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12057 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12058 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
12061 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12062 [(set_attr "type" "sselog")
12063 (set_attr "prefix_extra" "1")
12064 (set_attr "prefix" "vex")
12065 (set_attr "mode" "<MODE>")])
12067 (define_insn "avx_vperm2f128<mode>3"
12068 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12069 (unspec:AVX256MODE2P
12070 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12071 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12072 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12073 UNSPEC_VPERMIL2F128))]
12075 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12076 [(set_attr "type" "sselog")
12077 (set_attr "prefix_extra" "1")
12078 (set_attr "length_immediate" "1")
12079 (set_attr "prefix" "vex")
12080 (set_attr "mode" "V8SF")])
12082 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
12083 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
12084 (vec_concat:AVXMODEF4P
12085 (vec_concat:<avxhalfvecmode>
12086 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
12088 (vec_concat:<avxhalfvecmode>
12092 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
12093 [(set_attr "type" "ssemov")
12094 (set_attr "prefix_extra" "1")
12095 (set_attr "prefix" "vex")
12096 (set_attr "mode" "<avxscalarmode>")])
12098 (define_insn "avx_vbroadcastss256"
12099 [(set (match_operand:V8SF 0 "register_operand" "=x")
12103 (match_operand:SF 1 "memory_operand" "m")
12116 "vbroadcastss\t{%1, %0|%0, %1}"
12117 [(set_attr "type" "ssemov")
12118 (set_attr "prefix_extra" "1")
12119 (set_attr "prefix" "vex")
12120 (set_attr "mode" "SF")])
12122 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
12123 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
12124 (vec_concat:AVX256MODEF2P
12125 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
12128 "vbroadcastf128\t{%1, %0|%0, %1}"
12129 [(set_attr "type" "ssemov")
12130 (set_attr "prefix_extra" "1")
12131 (set_attr "prefix" "vex")
12132 (set_attr "mode" "V4SF")])
12134 (define_expand "avx_vinsertf128<mode>"
12135 [(match_operand:AVX256MODE 0 "register_operand" "")
12136 (match_operand:AVX256MODE 1 "register_operand" "")
12137 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12138 (match_operand:SI 3 "const_0_to_1_operand" "")]
12141 switch (INTVAL (operands[3]))
12144 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12148 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12152 gcc_unreachable ();
12157 (define_insn "vec_set_lo_<mode>"
12158 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12159 (vec_concat:AVX256MODE4P
12160 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12161 (vec_select:<avxhalfvecmode>
12162 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12163 (parallel [(const_int 2) (const_int 3)]))))]
12165 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12166 [(set_attr "type" "sselog")
12167 (set_attr "prefix_extra" "1")
12168 (set_attr "length_immediate" "1")
12169 (set_attr "prefix" "vex")
12170 (set_attr "mode" "V8SF")])
12172 (define_insn "vec_set_hi_<mode>"
12173 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12174 (vec_concat:AVX256MODE4P
12175 (vec_select:<avxhalfvecmode>
12176 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12177 (parallel [(const_int 0) (const_int 1)]))
12178 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12180 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12181 [(set_attr "type" "sselog")
12182 (set_attr "prefix_extra" "1")
12183 (set_attr "length_immediate" "1")
12184 (set_attr "prefix" "vex")
12185 (set_attr "mode" "V8SF")])
12187 (define_insn "vec_set_lo_<mode>"
12188 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12189 (vec_concat:AVX256MODE8P
12190 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12191 (vec_select:<avxhalfvecmode>
12192 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12193 (parallel [(const_int 4) (const_int 5)
12194 (const_int 6) (const_int 7)]))))]
12196 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12197 [(set_attr "type" "sselog")
12198 (set_attr "prefix_extra" "1")
12199 (set_attr "length_immediate" "1")
12200 (set_attr "prefix" "vex")
12201 (set_attr "mode" "V8SF")])
12203 (define_insn "vec_set_hi_<mode>"
12204 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12205 (vec_concat:AVX256MODE8P
12206 (vec_select:<avxhalfvecmode>
12207 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12208 (parallel [(const_int 0) (const_int 1)
12209 (const_int 2) (const_int 3)]))
12210 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12212 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12213 [(set_attr "type" "sselog")
12214 (set_attr "prefix_extra" "1")
12215 (set_attr "length_immediate" "1")
12216 (set_attr "prefix" "vex")
12217 (set_attr "mode" "V8SF")])
12219 (define_insn "vec_set_lo_v16hi"
12220 [(set (match_operand:V16HI 0 "register_operand" "=x")
12222 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12224 (match_operand:V16HI 1 "register_operand" "x")
12225 (parallel [(const_int 8) (const_int 9)
12226 (const_int 10) (const_int 11)
12227 (const_int 12) (const_int 13)
12228 (const_int 14) (const_int 15)]))))]
12230 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12231 [(set_attr "type" "sselog")
12232 (set_attr "prefix_extra" "1")
12233 (set_attr "length_immediate" "1")
12234 (set_attr "prefix" "vex")
12235 (set_attr "mode" "V8SF")])
12237 (define_insn "vec_set_hi_v16hi"
12238 [(set (match_operand:V16HI 0 "register_operand" "=x")
12241 (match_operand:V16HI 1 "register_operand" "x")
12242 (parallel [(const_int 0) (const_int 1)
12243 (const_int 2) (const_int 3)
12244 (const_int 4) (const_int 5)
12245 (const_int 6) (const_int 7)]))
12246 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12248 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12249 [(set_attr "type" "sselog")
12250 (set_attr "prefix_extra" "1")
12251 (set_attr "length_immediate" "1")
12252 (set_attr "prefix" "vex")
12253 (set_attr "mode" "V8SF")])
12255 (define_insn "vec_set_lo_v32qi"
12256 [(set (match_operand:V32QI 0 "register_operand" "=x")
12258 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12260 (match_operand:V32QI 1 "register_operand" "x")
12261 (parallel [(const_int 16) (const_int 17)
12262 (const_int 18) (const_int 19)
12263 (const_int 20) (const_int 21)
12264 (const_int 22) (const_int 23)
12265 (const_int 24) (const_int 25)
12266 (const_int 26) (const_int 27)
12267 (const_int 28) (const_int 29)
12268 (const_int 30) (const_int 31)]))))]
12270 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12271 [(set_attr "type" "sselog")
12272 (set_attr "prefix_extra" "1")
12273 (set_attr "length_immediate" "1")
12274 (set_attr "prefix" "vex")
12275 (set_attr "mode" "V8SF")])
12277 (define_insn "vec_set_hi_v32qi"
12278 [(set (match_operand:V32QI 0 "register_operand" "=x")
12281 (match_operand:V32QI 1 "register_operand" "x")
12282 (parallel [(const_int 0) (const_int 1)
12283 (const_int 2) (const_int 3)
12284 (const_int 4) (const_int 5)
12285 (const_int 6) (const_int 7)
12286 (const_int 8) (const_int 9)
12287 (const_int 10) (const_int 11)
12288 (const_int 12) (const_int 13)
12289 (const_int 14) (const_int 15)]))
12290 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12292 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12293 [(set_attr "type" "sselog")
12294 (set_attr "prefix_extra" "1")
12295 (set_attr "length_immediate" "1")
12296 (set_attr "prefix" "vex")
12297 (set_attr "mode" "V8SF")])
12299 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12300 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12302 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12303 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12307 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12308 [(set_attr "type" "sselog1")
12309 (set_attr "prefix_extra" "1")
12310 (set_attr "prefix" "vex")
12311 (set_attr "mode" "<MODE>")])
12313 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12314 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12316 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12317 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12319 UNSPEC_MASKSTORE))]
12321 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12322 [(set_attr "type" "sselog1")
12323 (set_attr "prefix_extra" "1")
12324 (set_attr "prefix" "vex")
12325 (set_attr "mode" "<MODE>")])
12327 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12328 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12329 (unspec:AVX256MODE2P
12330 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12334 switch (which_alternative)
12339 switch (get_attr_mode (insn))
12342 return "vmovaps\t{%1, %x0|%x0, %1}";
12344 return "vmovapd\t{%1, %x0|%x0, %1}";
12346 return "vmovdqa\t{%1, %x0|%x0, %1}";
12353 gcc_unreachable ();
12355 [(set_attr "type" "ssemov")
12356 (set_attr "prefix" "vex")
12357 (set_attr "mode" "<avxvecmode>")
12358 (set (attr "length")
12359 (if_then_else (eq_attr "alternative" "0")
12361 (const_string "*")))])
12363 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12364 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12365 (unspec:<avxhalfvecmode>
12366 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12370 switch (which_alternative)
12375 switch (get_attr_mode (insn))
12378 return "vmovaps\t{%x1, %0|%0, %x1}";
12380 return "vmovapd\t{%x1, %0|%0, %x1}";
12382 return "vmovdqa\t{%x1, %0|%0, %x1}";
12389 gcc_unreachable ();
12391 [(set_attr "type" "ssemov")
12392 (set_attr "prefix" "vex")
12393 (set_attr "mode" "<avxvecmode>")
12394 (set (attr "length")
12395 (if_then_else (eq_attr "alternative" "0")
12397 (const_string "*")))])
12399 (define_expand "vec_init<mode>"
12400 [(match_operand:AVX256MODE 0 "register_operand" "")
12401 (match_operand 1 "" "")]
12404 ix86_expand_vector_init (false, operands[0], operands[1]);
12408 (define_insn "*vec_concat<mode>_avx"
12409 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12410 (vec_concat:AVX256MODE
12411 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12412 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12415 switch (which_alternative)
12418 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12420 switch (get_attr_mode (insn))
12423 return "vmovaps\t{%1, %x0|%x0, %1}";
12425 return "vmovapd\t{%1, %x0|%x0, %1}";
12427 return "vmovdqa\t{%1, %x0|%x0, %1}";
12430 gcc_unreachable ();
12433 [(set_attr "type" "sselog,ssemov")
12434 (set_attr "prefix_extra" "1,*")
12435 (set_attr "length_immediate" "1,*")
12436 (set_attr "prefix" "vex")
12437 (set_attr "mode" "<avxvecmode>")])