1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the fma4 suffix
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95 (V4SF "ss") (V2DF "sd")])
97 ;; Mapping of the avx suffix
98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
99 (V4SF "ps") (V2DF "pd")])
101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
105 ;; Mapping of the max integer size for xop rotate immediate constraint
106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
108 ;; Mapping of vector modes back to the scalar modes
109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
110 (V16QI "QI") (V8HI "HI")
111 (V4SI "SI") (V2DI "DI")])
113 ;; Mapping of vector modes to a vector mode of double size
114 (define_mode_attr ssedoublesizemode
115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116 (V8HI "V16HI") (V16QI "V32QI")
117 (V4DF "V8DF") (V8SF "V16SF")
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
120 ;; Number of scalar elements in each vector type
121 (define_mode_attr ssescalarnum
122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
126 (define_mode_attr avxvecmode
127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
130 (define_mode_attr avxvecpsmode
131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
133 (define_mode_attr avxhalfvecmode
134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
135 (V8SF "V4SF") (V4DF "V2DF")
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
137 (define_mode_attr avxscalarmode
138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
140 (define_mode_attr avxcvtvecmode
141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
142 (define_mode_attr avxpermvecmode
143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
144 (define_mode_attr avxmodesuffixf2c
145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
146 (define_mode_attr avxmodesuffixp
147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
149 (define_mode_attr avxmodesuffix
150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
151 (V8SI "256") (V8SF "256") (V4DF "256")])
153 ;; Mapping of immediate bits for blend instructions
154 (define_mode_attr blendbits
155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
157 ;; Mapping of immediate bits for pinsr instructions
158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168 (define_expand "mov<mode>"
169 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
170 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
173 ix86_expand_vector_move (<MODE>mode, operands);
177 (define_insn "*avx_mov<mode>_internal"
178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || register_operand (operands[1], <MODE>mode))"
184 switch (which_alternative)
187 return standard_sse_constant_opcode (insn, operands[1]);
190 switch (get_attr_mode (insn))
194 return "vmovaps\t{%1, %0|%0, %1}";
197 return "vmovapd\t{%1, %0|%0, %1}";
199 return "vmovdqa\t{%1, %0|%0, %1}";
205 [(set_attr "type" "sselog1,ssemov,ssemov")
206 (set_attr "prefix" "vex")
207 (set_attr "mode" "<avxvecmode>")])
209 ;; All of these patterns are enabled for SSE1 as well as SSE2.
210 ;; This is essential for maintaining stable calling conventions.
212 (define_expand "mov<mode>"
213 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
214 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
217 ix86_expand_vector_move (<MODE>mode, operands);
221 (define_insn "*mov<mode>_internal"
222 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
223 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
225 && (register_operand (operands[0], <MODE>mode)
226 || register_operand (operands[1], <MODE>mode))"
228 switch (which_alternative)
231 return standard_sse_constant_opcode (insn, operands[1]);
234 switch (get_attr_mode (insn))
237 return "movaps\t{%1, %0|%0, %1}";
239 return "movapd\t{%1, %0|%0, %1}";
241 return "movdqa\t{%1, %0|%0, %1}";
247 [(set_attr "type" "sselog1,ssemov,ssemov")
249 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
250 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
251 (and (eq_attr "alternative" "2")
252 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
254 (const_string "V4SF")
255 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
256 (const_string "V4SF")
257 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
258 (const_string "V2DF")
260 (const_string "TI")))])
262 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
263 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
264 ;; from memory, we'd prefer to load the memory directly into the %xmm
265 ;; register. To facilitate this happy circumstance, this pattern won't
266 ;; split until after register allocation. If the 64-bit value didn't
267 ;; come from memory, this is the best we can do. This is much better
268 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
271 (define_insn_and_split "movdi_to_sse"
273 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
274 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
275 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
276 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
278 "&& reload_completed"
281 if (register_operand (operands[1], DImode))
283 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
284 Assemble the 64-bit DImode value in an xmm register. */
285 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
286 gen_rtx_SUBREG (SImode, operands[1], 0)));
287 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
288 gen_rtx_SUBREG (SImode, operands[1], 4)));
289 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
292 else if (memory_operand (operands[1], DImode))
293 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
294 operands[1], const0_rtx));
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
302 "TARGET_SSE && reload_completed"
305 (vec_duplicate:V4SF (match_dup 1))
309 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
310 operands[2] = CONST0_RTX (V4SFmode);
314 [(set (match_operand:V2DF 0 "register_operand" "")
315 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
316 "TARGET_SSE2 && reload_completed"
317 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
319 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
320 operands[2] = CONST0_RTX (DFmode);
323 (define_expand "push<mode>1"
324 [(match_operand:AVX256MODE 0 "register_operand" "")]
327 ix86_expand_push (<MODE>mode, operands[0]);
331 (define_expand "push<mode>1"
332 [(match_operand:SSEMODE16 0 "register_operand" "")]
335 ix86_expand_push (<MODE>mode, operands[0]);
339 (define_expand "movmisalign<mode>"
340 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
341 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
344 ix86_expand_vector_move_misalign (<MODE>mode, operands);
348 (define_expand "movmisalign<mode>"
349 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
350 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
353 ix86_expand_vector_move_misalign (<MODE>mode, operands);
357 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
358 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
360 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
362 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
365 [(set_attr "type" "ssemov")
366 (set_attr "movu" "1")
367 (set_attr "prefix" "vex")
368 (set_attr "mode" "<MODE>")])
370 (define_insn "sse2_movq128"
371 [(set (match_operand:V2DI 0 "register_operand" "=x")
374 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
375 (parallel [(const_int 0)]))
378 "%vmovq\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "maybe_vex")
381 (set_attr "mode" "TI")])
383 (define_insn "<sse>_movup<ssemodesuffixf2c>"
384 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
386 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
390 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
391 [(set_attr "type" "ssemov")
392 (set_attr "movu" "1")
393 (set_attr "mode" "<MODE>")])
395 (define_insn "avx_movdqu<avxmodesuffix>"
396 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
398 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
401 "vmovdqu\t{%1, %0|%0, %1}"
402 [(set_attr "type" "ssemov")
403 (set_attr "movu" "1")
404 (set_attr "prefix" "vex")
405 (set_attr "mode" "<avxvecmode>")])
407 (define_insn "sse2_movdqu"
408 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
409 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
411 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
412 "movdqu\t{%1, %0|%0, %1}"
413 [(set_attr "type" "ssemov")
414 (set_attr "movu" "1")
415 (set_attr "prefix_data16" "1")
416 (set_attr "mode" "TI")])
418 (define_insn "avx_movnt<mode>"
419 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
421 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
423 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
424 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "prefix" "vex")
427 (set_attr "mode" "<MODE>")])
429 (define_insn "<sse>_movnt<mode>"
430 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
432 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
434 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
435 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
436 [(set_attr "type" "ssemov")
437 (set_attr "mode" "<MODE>")])
439 (define_insn "avx_movnt<mode>"
440 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
442 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
445 "vmovntdq\t{%1, %0|%0, %1}"
446 [(set_attr "type" "ssecvt")
447 (set_attr "prefix" "vex")
448 (set_attr "mode" "<avxvecmode>")])
450 (define_insn "sse2_movntv2di"
451 [(set (match_operand:V2DI 0 "memory_operand" "=m")
452 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
455 "movntdq\t{%1, %0|%0, %1}"
456 [(set_attr "type" "ssemov")
457 (set_attr "prefix_data16" "1")
458 (set_attr "mode" "TI")])
460 (define_insn "sse2_movntsi"
461 [(set (match_operand:SI 0 "memory_operand" "=m")
462 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
465 "movnti\t{%1, %0|%0, %1}"
466 [(set_attr "type" "ssemov")
467 (set_attr "prefix_data16" "0")
468 (set_attr "mode" "V2DF")])
470 (define_insn "avx_lddqu<avxmodesuffix>"
471 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
473 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
476 "vlddqu\t{%1, %0|%0, %1}"
477 [(set_attr "type" "ssecvt")
478 (set_attr "movu" "1")
479 (set_attr "prefix" "vex")
480 (set_attr "mode" "<avxvecmode>")])
482 (define_insn "sse3_lddqu"
483 [(set (match_operand:V16QI 0 "register_operand" "=x")
484 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
487 "lddqu\t{%1, %0|%0, %1}"
488 [(set_attr "type" "ssemov")
489 (set_attr "movu" "1")
490 (set_attr "prefix_data16" "0")
491 (set_attr "prefix_rep" "1")
492 (set_attr "mode" "TI")])
494 ; Expand patterns for non-temporal stores. At the moment, only those
495 ; that directly map to insns are defined; it would be possible to
496 ; define patterns for other modes that would expand to several insns.
498 (define_expand "storent<mode>"
499 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
501 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
503 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 (define_expand "storent<mode>"
507 [(set (match_operand:MODEF 0 "memory_operand" "")
509 [(match_operand:MODEF 1 "register_operand" "")]
514 (define_expand "storentv2di"
515 [(set (match_operand:V2DI 0 "memory_operand" "")
516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
521 (define_expand "storentsi"
522 [(set (match_operand:SI 0 "memory_operand" "")
523 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
528 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
530 ;; Parallel floating point arithmetic
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
534 (define_expand "<code><mode>2"
535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
537 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
538 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
539 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
541 (define_expand "<plusminus_insn><mode>3"
542 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
543 (plusminus:AVX256MODEF2P
544 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
545 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
546 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
547 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
549 (define_insn "*avx_<plusminus_insn><mode>3"
550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
551 (plusminus:AVXMODEF2P
552 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
554 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
555 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
556 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
557 [(set_attr "type" "sseadd")
558 (set_attr "prefix" "vex")
559 (set_attr "mode" "<avxvecmode>")])
561 (define_expand "<plusminus_insn><mode>3"
562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
563 (plusminus:SSEMODEF2P
564 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
565 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
566 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
567 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
569 (define_insn "*<plusminus_insn><mode>3"
570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
571 (plusminus:SSEMODEF2P
572 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
574 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
575 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
576 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<MODE>")])
580 (define_insn "*avx_vm<plusminus_insn><mode>3"
581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
582 (vec_merge:SSEMODEF2P
583 (plusminus:SSEMODEF2P
584 (match_operand:SSEMODEF2P 1 "register_operand" "x")
585 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
588 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
589 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
590 [(set_attr "type" "sseadd")
591 (set_attr "prefix" "vex")
592 (set_attr "mode" "<ssescalarmode>")])
594 (define_insn "<sse>_vm<plusminus_insn><mode>3"
595 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596 (vec_merge:SSEMODEF2P
597 (plusminus:SSEMODEF2P
598 (match_operand:SSEMODEF2P 1 "register_operand" "0")
599 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
602 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
603 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "mode" "<ssescalarmode>")])
607 (define_expand "mul<mode>3"
608 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
610 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
611 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
612 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
613 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
615 (define_insn "*avx_mul<mode>3"
616 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
618 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
619 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
620 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
621 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
622 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "type" "ssemul")
624 (set_attr "prefix" "vex")
625 (set_attr "mode" "<avxvecmode>")])
627 (define_expand "mul<mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
635 (define_insn "*mul<mode>3"
636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
641 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
642 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
643 [(set_attr "type" "ssemul")
644 (set_attr "mode" "<MODE>")])
646 (define_insn "*avx_vmmul<mode>3"
647 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
648 (vec_merge:SSEMODEF2P
650 (match_operand:SSEMODEF2P 1 "register_operand" "x")
651 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
654 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
655 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
656 [(set_attr "type" "ssemul")
657 (set_attr "prefix" "vex")
658 (set_attr "mode" "<ssescalarmode>")])
660 (define_insn "<sse>_vmmul<mode>3"
661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
662 (vec_merge:SSEMODEF2P
664 (match_operand:SSEMODEF2P 1 "register_operand" "0")
665 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
668 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
669 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
670 [(set_attr "type" "ssemul")
671 (set_attr "mode" "<ssescalarmode>")])
673 (define_expand "divv8sf3"
674 [(set (match_operand:V8SF 0 "register_operand" "")
675 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
676 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
679 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
681 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
682 && flag_finite_math_only && !flag_trapping_math
683 && flag_unsafe_math_optimizations)
685 ix86_emit_swdivsf (operands[0], operands[1],
686 operands[2], V8SFmode);
691 (define_expand "divv4df3"
692 [(set (match_operand:V4DF 0 "register_operand" "")
693 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
694 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
696 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
698 (define_insn "avx_div<mode>3"
699 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
701 (match_operand:AVXMODEF2P 1 "register_operand" "x")
702 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
703 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
704 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
705 [(set_attr "type" "ssediv")
706 (set_attr "prefix" "vex")
707 (set_attr "mode" "<MODE>")])
709 (define_expand "divv4sf3"
710 [(set (match_operand:V4SF 0 "register_operand" "")
711 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
712 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
715 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
716 && flag_finite_math_only && !flag_trapping_math
717 && flag_unsafe_math_optimizations)
719 ix86_emit_swdivsf (operands[0], operands[1],
720 operands[2], V4SFmode);
725 (define_expand "divv2df3"
726 [(set (match_operand:V2DF 0 "register_operand" "")
727 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
732 (define_insn "*avx_div<mode>3"
733 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
735 (match_operand:SSEMODEF2P 1 "register_operand" "x")
736 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
737 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
738 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
739 [(set_attr "type" "ssediv")
740 (set_attr "prefix" "vex")
741 (set_attr "mode" "<MODE>")])
743 (define_insn "<sse>_div<mode>3"
744 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
746 (match_operand:SSEMODEF2P 1 "register_operand" "0")
747 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
748 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
749 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssediv")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "*avx_vmdiv<mode>3"
754 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
755 (vec_merge:SSEMODEF2P
757 (match_operand:SSEMODEF2P 1 "register_operand" "x")
758 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
761 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
762 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
763 [(set_attr "type" "ssediv")
764 (set_attr "prefix" "vex")
765 (set_attr "mode" "<ssescalarmode>")])
767 (define_insn "<sse>_vmdiv<mode>3"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
769 (vec_merge:SSEMODEF2P
771 (match_operand:SSEMODEF2P 1 "register_operand" "0")
772 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
776 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
777 [(set_attr "type" "ssediv")
778 (set_attr "mode" "<ssescalarmode>")])
780 (define_insn "avx_rcpv8sf2"
781 [(set (match_operand:V8SF 0 "register_operand" "=x")
783 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
785 "vrcpps\t{%1, %0|%0, %1}"
786 [(set_attr "type" "sse")
787 (set_attr "prefix" "vex")
788 (set_attr "mode" "V8SF")])
790 (define_insn "sse_rcpv4sf2"
791 [(set (match_operand:V4SF 0 "register_operand" "=x")
793 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
795 "%vrcpps\t{%1, %0|%0, %1}"
796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
798 (set_attr "prefix" "maybe_vex")
799 (set_attr "mode" "V4SF")])
801 (define_insn "*avx_vmrcpv4sf2"
802 [(set (match_operand:V4SF 0 "register_operand" "=x")
804 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
806 (match_operand:V4SF 2 "register_operand" "x")
809 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
810 [(set_attr "type" "sse")
811 (set_attr "prefix" "vex")
812 (set_attr "mode" "SF")])
814 (define_insn "sse_vmrcpv4sf2"
815 [(set (match_operand:V4SF 0 "register_operand" "=x")
817 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
819 (match_operand:V4SF 2 "register_operand" "0")
822 "rcpss\t{%1, %0|%0, %1}"
823 [(set_attr "type" "sse")
824 (set_attr "atom_sse_attr" "rcp")
825 (set_attr "mode" "SF")])
827 (define_expand "sqrtv8sf2"
828 [(set (match_operand:V8SF 0 "register_operand" "")
829 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
832 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
833 && flag_finite_math_only && !flag_trapping_math
834 && flag_unsafe_math_optimizations)
836 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
841 (define_insn "avx_sqrtv8sf2"
842 [(set (match_operand:V8SF 0 "register_operand" "=x")
843 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
845 "vsqrtps\t{%1, %0|%0, %1}"
846 [(set_attr "type" "sse")
847 (set_attr "prefix" "vex")
848 (set_attr "mode" "V8SF")])
850 (define_expand "sqrtv4sf2"
851 [(set (match_operand:V4SF 0 "register_operand" "")
852 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
855 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
864 (define_insn "sse_sqrtv4sf2"
865 [(set (match_operand:V4SF 0 "register_operand" "=x")
866 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
868 "%vsqrtps\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "V4SF")])
874 (define_insn "sqrtv4df2"
875 [(set (match_operand:V4DF 0 "register_operand" "=x")
876 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
878 "vsqrtpd\t{%1, %0|%0, %1}"
879 [(set_attr "type" "sse")
880 (set_attr "prefix" "vex")
881 (set_attr "mode" "V4DF")])
883 (define_insn "sqrtv2df2"
884 [(set (match_operand:V2DF 0 "register_operand" "=x")
885 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
887 "%vsqrtpd\t{%1, %0|%0, %1}"
888 [(set_attr "type" "sse")
889 (set_attr "prefix" "maybe_vex")
890 (set_attr "mode" "V2DF")])
892 (define_insn "*avx_vmsqrt<mode>2"
893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
894 (vec_merge:SSEMODEF2P
896 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
897 (match_operand:SSEMODEF2P 2 "register_operand" "x")
899 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
900 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_insn "<sse>_vmsqrt<mode>2"
906 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
907 (vec_merge:SSEMODEF2P
909 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
910 (match_operand:SSEMODEF2P 2 "register_operand" "0")
912 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
913 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
914 [(set_attr "type" "sse")
915 (set_attr "atom_sse_attr" "sqrt")
916 (set_attr "mode" "<ssescalarmode>")])
918 (define_expand "rsqrtv8sf2"
919 [(set (match_operand:V8SF 0 "register_operand" "")
921 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
922 "TARGET_AVX && TARGET_SSE_MATH"
924 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
928 (define_insn "avx_rsqrtv8sf2"
929 [(set (match_operand:V8SF 0 "register_operand" "=x")
931 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
933 "vrsqrtps\t{%1, %0|%0, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "V8SF")])
938 (define_expand "rsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "")
941 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
944 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
948 (define_insn "sse_rsqrtv4sf2"
949 [(set (match_operand:V4SF 0 "register_operand" "=x")
951 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
953 "%vrsqrtps\t{%1, %0|%0, %1}"
954 [(set_attr "type" "sse")
955 (set_attr "prefix" "maybe_vex")
956 (set_attr "mode" "V4SF")])
958 (define_insn "*avx_vmrsqrtv4sf2"
959 [(set (match_operand:V4SF 0 "register_operand" "=x")
961 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
963 (match_operand:V4SF 2 "register_operand" "x")
966 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
967 [(set_attr "type" "sse")
968 (set_attr "prefix" "vex")
969 (set_attr "mode" "SF")])
971 (define_insn "sse_vmrsqrtv4sf2"
972 [(set (match_operand:V4SF 0 "register_operand" "=x")
974 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
976 (match_operand:V4SF 2 "register_operand" "0")
979 "rsqrtss\t{%1, %0|%0, %1}"
980 [(set_attr "type" "sse")
981 (set_attr "mode" "SF")])
983 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
984 ;; isn't really correct, as those rtl operators aren't defined when
985 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
987 (define_expand "<code><mode>3"
988 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
989 (smaxmin:AVX256MODEF2P
990 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
991 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
992 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
994 if (!flag_finite_math_only)
995 operands[1] = force_reg (<MODE>mode, operands[1]);
996 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
999 (define_expand "<code><mode>3"
1000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1002 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1003 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1004 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1006 if (!flag_finite_math_only)
1007 operands[1] = force_reg (<MODE>mode, operands[1]);
1008 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1011 (define_insn "*avx_<code><mode>3_finite"
1012 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1014 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1015 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1016 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1017 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1018 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "prefix" "vex")
1021 (set_attr "mode" "<MODE>")])
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1026 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1028 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1030 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1031 [(set_attr "type" "sseadd")
1032 (set_attr "mode" "<MODE>")])
1034 (define_insn "*avx_<code><mode>3"
1035 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1037 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1038 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1039 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1040 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "<avxvecmode>")])
1045 (define_insn "*<code><mode>3"
1046 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1048 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1049 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1051 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "mode" "<MODE>")])
1055 (define_insn "*avx_vm<code><mode>3"
1056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1057 (vec_merge:SSEMODEF2P
1059 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1063 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1064 "v<maxmin_float>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "type" "sse")
1066 (set_attr "prefix" "vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1069 (define_insn "<sse>_vm<code><mode>3"
1070 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1071 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1077 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "<maxmin_float>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "mode" "<ssescalarmode>")])
1082 ;; These versions of the min/max patterns implement exactly the operations
1083 ;; min = (op1 < op2 ? op1 : op2)
1084 ;; max = (!(op1 < op2) ? op1 : op2)
1085 ;; Their operands are not commutative, and thus they may be used in the
1086 ;; presence of -0.0 and NaN.
1088 (define_insn "*avx_ieee_smin<mode>3"
1089 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1091 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1092 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1094 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1095 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1096 [(set_attr "type" "sseadd")
1097 (set_attr "prefix" "vex")
1098 (set_attr "mode" "<avxvecmode>")])
1100 (define_insn "*avx_ieee_smax<mode>3"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1106 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1107 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<avxvecmode>")])
1112 (define_insn "*ieee_smin<mode>3"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1115 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1118 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1119 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1123 (define_insn "*ieee_smax<mode>3"
1124 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1126 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1127 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1129 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1130 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1131 [(set_attr "type" "sseadd")
1132 (set_attr "mode" "<MODE>")])
1134 (define_insn "avx_addsubv8sf3"
1135 [(set (match_operand:V8SF 0 "register_operand" "=x")
1138 (match_operand:V8SF 1 "register_operand" "x")
1139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1140 (minus:V8SF (match_dup 1) (match_dup 2))
1143 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1144 [(set_attr "type" "sseadd")
1145 (set_attr "prefix" "vex")
1146 (set_attr "mode" "V8SF")])
1148 (define_insn "avx_addsubv4df3"
1149 [(set (match_operand:V4DF 0 "register_operand" "=x")
1152 (match_operand:V4DF 1 "register_operand" "x")
1153 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1154 (minus:V4DF (match_dup 1) (match_dup 2))
1157 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1158 [(set_attr "type" "sseadd")
1159 (set_attr "prefix" "vex")
1160 (set_attr "mode" "V4DF")])
1162 (define_insn "*avx_addsubv4sf3"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1166 (match_operand:V4SF 1 "register_operand" "x")
1167 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1168 (minus:V4SF (match_dup 1) (match_dup 2))
1171 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1172 [(set_attr "type" "sseadd")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "V4SF")])
1176 (define_insn "sse3_addsubv4sf3"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x")
1180 (match_operand:V4SF 1 "register_operand" "0")
1181 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1182 (minus:V4SF (match_dup 1) (match_dup 2))
1185 "addsubps\t{%2, %0|%0, %2}"
1186 [(set_attr "type" "sseadd")
1187 (set_attr "prefix_rep" "1")
1188 (set_attr "mode" "V4SF")])
1190 (define_insn "*avx_addsubv2df3"
1191 [(set (match_operand:V2DF 0 "register_operand" "=x")
1194 (match_operand:V2DF 1 "register_operand" "x")
1195 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1196 (minus:V2DF (match_dup 1) (match_dup 2))
1199 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1200 [(set_attr "type" "sseadd")
1201 (set_attr "prefix" "vex")
1202 (set_attr "mode" "V2DF")])
1204 (define_insn "sse3_addsubv2df3"
1205 [(set (match_operand:V2DF 0 "register_operand" "=x")
1208 (match_operand:V2DF 1 "register_operand" "0")
1209 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1210 (minus:V2DF (match_dup 1) (match_dup 2))
1213 "addsubpd\t{%2, %0|%0, %2}"
1214 [(set_attr "type" "sseadd")
1215 (set_attr "atom_unit" "complex")
1216 (set_attr "mode" "V2DF")])
1218 (define_insn "avx_h<plusminus_insn>v4df3"
1219 [(set (match_operand:V4DF 0 "register_operand" "=x")
1224 (match_operand:V4DF 1 "register_operand" "x")
1225 (parallel [(const_int 0)]))
1226 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1228 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1233 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1234 (parallel [(const_int 0)]))
1235 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1237 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1240 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1241 [(set_attr "type" "sseadd")
1242 (set_attr "prefix" "vex")
1243 (set_attr "mode" "V4DF")])
1245 (define_insn "avx_h<plusminus_insn>v8sf3"
1246 [(set (match_operand:V8SF 0 "register_operand" "=x")
1252 (match_operand:V8SF 1 "register_operand" "x")
1253 (parallel [(const_int 0)]))
1254 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1261 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1262 (parallel [(const_int 0)]))
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1265 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1270 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1277 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1283 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "prefix" "vex")
1286 (set_attr "mode" "V8SF")])
1288 (define_insn "*avx_h<plusminus_insn>v4sf3"
1289 [(set (match_operand:V4SF 0 "register_operand" "=x")
1294 (match_operand:V4SF 1 "register_operand" "x")
1295 (parallel [(const_int 0)]))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1310 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1311 [(set_attr "type" "sseadd")
1312 (set_attr "prefix" "vex")
1313 (set_attr "mode" "V4SF")])
1315 (define_insn "sse3_h<plusminus_insn>v4sf3"
1316 [(set (match_operand:V4SF 0 "register_operand" "=x")
1321 (match_operand:V4SF 1 "register_operand" "0")
1322 (parallel [(const_int 0)]))
1323 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1325 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1330 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1331 (parallel [(const_int 0)]))
1332 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1334 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1337 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1338 [(set_attr "type" "sseadd")
1339 (set_attr "atom_unit" "complex")
1340 (set_attr "prefix_rep" "1")
1341 (set_attr "mode" "V4SF")])
1343 (define_insn "*avx_h<plusminus_insn>v2df3"
1344 [(set (match_operand:V2DF 0 "register_operand" "=x")
1348 (match_operand:V2DF 1 "register_operand" "x")
1349 (parallel [(const_int 0)]))
1350 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1353 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1354 (parallel [(const_int 0)]))
1355 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1357 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1358 [(set_attr "type" "sseadd")
1359 (set_attr "prefix" "vex")
1360 (set_attr "mode" "V2DF")])
1362 (define_insn "sse3_h<plusminus_insn>v2df3"
1363 [(set (match_operand:V2DF 0 "register_operand" "=x")
1367 (match_operand:V2DF 1 "register_operand" "0")
1368 (parallel [(const_int 0)]))
1369 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1373 (parallel [(const_int 0)]))
1374 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1376 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1377 [(set_attr "type" "sseadd")
1378 (set_attr "mode" "V2DF")])
1380 (define_expand "reduc_splus_v4sf"
1381 [(match_operand:V4SF 0 "register_operand" "")
1382 (match_operand:V4SF 1 "register_operand" "")]
1387 rtx tmp = gen_reg_rtx (V4SFmode);
1388 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1389 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1392 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1396 (define_expand "reduc_splus_v2df"
1397 [(match_operand:V2DF 0 "register_operand" "")
1398 (match_operand:V2DF 1 "register_operand" "")]
1401 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1405 (define_expand "reduc_smax_v4sf"
1406 [(match_operand:V4SF 0 "register_operand" "")
1407 (match_operand:V4SF 1 "register_operand" "")]
1410 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1414 (define_expand "reduc_smin_v4sf"
1415 [(match_operand:V4SF 0 "register_operand" "")
1416 (match_operand:V4SF 1 "register_operand" "")]
1419 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1425 ;; Parallel floating point comparisons
1427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1429 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1430 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1432 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1433 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1437 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1440 (set_attr "prefix" "vex")
1441 (set_attr "mode" "<MODE>")])
1443 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1445 (vec_merge:SSEMODEF2P
1447 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1448 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1449 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1454 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "prefix" "vex")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1461 ;; may generate 256bit vector compare instructions.
1462 (define_insn "*avx_maskcmp<mode>3"
1463 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1464 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1465 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1466 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1467 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1468 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1469 [(set_attr "type" "ssecmp")
1470 (set_attr "prefix" "vex")
1471 (set_attr "length_immediate" "1")
1472 (set_attr "mode" "<avxvecmode>")])
1474 (define_insn "<sse>_maskcmp<mode>3"
1475 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1476 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1477 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1478 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1480 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1481 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1482 [(set_attr "type" "ssecmp")
1483 (set_attr "length_immediate" "1")
1484 (set_attr "mode" "<MODE>")])
1486 (define_insn "*avx_vmmaskcmp<mode>3"
1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1488 (vec_merge:SSEMODEF2P
1489 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1490 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1494 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1495 "vcmp%D3s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1496 [(set_attr "type" "ssecmp")
1497 (set_attr "prefix" "vex")
1498 (set_attr "mode" "<ssescalarmode>")])
1500 (define_insn "<sse>_vmmaskcmp<mode>3"
1501 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1502 (vec_merge:SSEMODEF2P
1503 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1504 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1505 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1508 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1509 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1510 [(set_attr "type" "ssecmp")
1511 (set_attr "length_immediate" "1")
1512 (set_attr "mode" "<ssescalarmode>")])
1514 (define_insn "<sse>_comi"
1515 [(set (reg:CCFP FLAGS_REG)
1518 (match_operand:<ssevecmode> 0 "register_operand" "x")
1519 (parallel [(const_int 0)]))
1521 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1522 (parallel [(const_int 0)]))))]
1523 "SSE_FLOAT_MODE_P (<MODE>mode)"
1524 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1525 [(set_attr "type" "ssecomi")
1526 (set_attr "prefix" "maybe_vex")
1527 (set_attr "prefix_rep" "0")
1528 (set (attr "prefix_data16")
1529 (if_then_else (eq_attr "mode" "DF")
1531 (const_string "0")))
1532 (set_attr "mode" "<MODE>")])
1534 (define_insn "<sse>_ucomi"
1535 [(set (reg:CCFPU FLAGS_REG)
1538 (match_operand:<ssevecmode> 0 "register_operand" "x")
1539 (parallel [(const_int 0)]))
1541 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1542 (parallel [(const_int 0)]))))]
1543 "SSE_FLOAT_MODE_P (<MODE>mode)"
1544 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1545 [(set_attr "type" "ssecomi")
1546 (set_attr "prefix" "maybe_vex")
1547 (set_attr "prefix_rep" "0")
1548 (set (attr "prefix_data16")
1549 (if_then_else (eq_attr "mode" "DF")
1551 (const_string "0")))
1552 (set_attr "mode" "<MODE>")])
1554 (define_expand "vcond<mode>"
1555 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1556 (if_then_else:AVXMODEF2P
1557 (match_operator 3 ""
1558 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1559 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1560 (match_operand:AVXMODEF2P 1 "general_operand" "")
1561 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1562 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1563 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1565 bool ok = ix86_expand_fp_vcond (operands);
1570 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1572 ;; Parallel floating point logical operations
1574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1576 (define_insn "avx_andnot<mode>3"
1577 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1580 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1581 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1582 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1583 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1584 [(set_attr "type" "sselog")
1585 (set_attr "prefix" "vex")
1586 (set_attr "mode" "<avxvecmode>")])
1588 (define_insn "<sse>_andnot<mode>3"
1589 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1592 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1593 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1594 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1595 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1596 [(set_attr "type" "sselog")
1597 (set_attr "mode" "<MODE>")])
1599 (define_expand "<code><mode>3"
1600 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1601 (any_logic:AVX256MODEF2P
1602 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1603 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1604 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1605 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1607 (define_insn "*avx_<code><mode>3"
1608 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1609 (any_logic:AVXMODEF2P
1610 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1611 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1612 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1613 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1614 "v<logic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1615 [(set_attr "type" "sselog")
1616 (set_attr "prefix" "vex")
1617 (set_attr "mode" "<avxvecmode>")])
1619 (define_expand "<code><mode>3"
1620 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1621 (any_logic:SSEMODEF2P
1622 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1623 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1624 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1625 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1627 (define_insn "*<code><mode>3"
1628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1629 (any_logic:SSEMODEF2P
1630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1633 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1634 "<logic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1635 [(set_attr "type" "sselog")
1636 (set_attr "mode" "<MODE>")])
1638 (define_expand "copysign<mode>3"
1641 (not:SSEMODEF2P (match_dup 3))
1642 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1644 (and:SSEMODEF2P (match_dup 3)
1645 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1646 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1647 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1648 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1650 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1652 operands[4] = gen_reg_rtx (<MODE>mode);
1653 operands[5] = gen_reg_rtx (<MODE>mode);
1656 ;; Also define scalar versions. These are used for abs, neg, and
1657 ;; conditional move. Using subregs into vector modes causes register
1658 ;; allocation lossage. These patterns do not allow memory operands
1659 ;; because the native instructions read the full 128-bits.
1661 (define_insn "*avx_andnot<mode>3"
1662 [(set (match_operand:MODEF 0 "register_operand" "=x")
1665 (match_operand:MODEF 1 "register_operand" "x"))
1666 (match_operand:MODEF 2 "register_operand" "x")))]
1667 "AVX_FLOAT_MODE_P (<MODE>mode)"
1668 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1669 [(set_attr "type" "sselog")
1670 (set_attr "prefix" "vex")
1671 (set_attr "mode" "<ssevecmode>")])
1673 (define_insn "*andnot<mode>3"
1674 [(set (match_operand:MODEF 0 "register_operand" "=x")
1677 (match_operand:MODEF 1 "register_operand" "0"))
1678 (match_operand:MODEF 2 "register_operand" "x")))]
1679 "SSE_FLOAT_MODE_P (<MODE>mode)"
1680 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1681 [(set_attr "type" "sselog")
1682 (set_attr "mode" "<ssevecmode>")])
1684 (define_insn "*avx_<code><mode>3"
1685 [(set (match_operand:MODEF 0 "register_operand" "=x")
1687 (match_operand:MODEF 1 "register_operand" "x")
1688 (match_operand:MODEF 2 "register_operand" "x")))]
1689 "AVX_FLOAT_MODE_P (<MODE>mode)"
1690 "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1691 [(set_attr "type" "sselog")
1692 (set_attr "prefix" "vex")
1693 (set_attr "mode" "<ssevecmode>")])
1695 (define_insn "*<code><mode>3"
1696 [(set (match_operand:MODEF 0 "register_operand" "=x")
1698 (match_operand:MODEF 1 "register_operand" "0")
1699 (match_operand:MODEF 2 "register_operand" "x")))]
1700 "SSE_FLOAT_MODE_P (<MODE>mode)"
1701 "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1702 [(set_attr "type" "sselog")
1703 (set_attr "mode" "<ssevecmode>")])
1705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1707 ;; FMA4 floating point multiply/accumulate instructions. This
1708 ;; includes the scalar version of the instructions as well as the
1711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1713 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1714 ;; combine to generate a multiply/add with two memory references. We then
1715 ;; split this insn, into loading up the destination register with one of the
1716 ;; memory operations. If we don't manage to split the insn, reload will
1717 ;; generate the appropriate moves. The reason this is needed, is that combine
1718 ;; has already folded one of the memory references into both the multiply and
1719 ;; add insns, and it can't generate a new pseudo. I.e.:
1720 ;; (set (reg1) (mem (addr1)))
1721 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1722 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1724 (define_insn "fma4_fmadd<mode>4256"
1725 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1728 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1729 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1730 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1731 "TARGET_FMA4 && TARGET_FUSED_MADD"
1732 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1733 [(set_attr "type" "ssemuladd")
1734 (set_attr "mode" "<MODE>")])
1736 ;; Floating multiply and subtract.
1737 (define_insn "fma4_fmsub<mode>4256"
1738 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1741 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1742 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1743 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1744 "TARGET_FMA4 && TARGET_FUSED_MADD"
1745 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1746 [(set_attr "type" "ssemuladd")
1747 (set_attr "mode" "<MODE>")])
1749 ;; Floating point negative multiply and add.
1750 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1751 (define_insn "fma4_fnmadd<mode>4256"
1752 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1754 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1756 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1757 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1758 "TARGET_FMA4 && TARGET_FUSED_MADD"
1759 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1760 [(set_attr "type" "ssemuladd")
1761 (set_attr "mode" "<MODE>")])
1763 ;; Floating point negative multiply and subtract.
1764 (define_insn "fma4_fnmsub<mode>4256"
1765 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1769 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1770 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1771 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1772 "TARGET_FMA4 && TARGET_FUSED_MADD"
1773 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1774 [(set_attr "type" "ssemuladd")
1775 (set_attr "mode" "<MODE>")])
1777 (define_insn "fma4_fmadd<mode>4"
1778 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1781 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1782 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1783 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1784 "TARGET_FMA4 && TARGET_FUSED_MADD"
1785 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1786 [(set_attr "type" "ssemuladd")
1787 (set_attr "mode" "<MODE>")])
1789 ;; For the scalar operations, use operand1 for the upper words that aren't
1790 ;; modified, so restrict the forms that are generated.
1791 ;; Scalar version of fmadd.
1792 (define_insn "fma4_vmfmadd<mode>4"
1793 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1794 (vec_merge:SSEMODEF2P
1797 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1798 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1799 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1802 "TARGET_FMA4 && TARGET_FUSED_MADD"
1803 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1804 [(set_attr "type" "ssemuladd")
1805 (set_attr "mode" "<MODE>")])
1807 ;; Floating multiply and subtract.
1808 ;; Allow two memory operands the same as fmadd.
1809 (define_insn "fma4_fmsub<mode>4"
1810 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1813 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1814 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1815 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1816 "TARGET_FMA4 && TARGET_FUSED_MADD"
1817 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1818 [(set_attr "type" "ssemuladd")
1819 (set_attr "mode" "<MODE>")])
1821 ;; For the scalar operations, use operand1 for the upper words that aren't
1822 ;; modified, so restrict the forms that are generated.
1823 ;; Scalar version of fmsub.
1824 (define_insn "fma4_vmfmsub<mode>4"
1825 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1826 (vec_merge:SSEMODEF2P
1829 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1830 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1831 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1834 "TARGET_FMA4 && TARGET_FUSED_MADD"
1835 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1836 [(set_attr "type" "ssemuladd")
1837 (set_attr "mode" "<MODE>")])
1839 ;; Floating point negative multiply and add.
1840 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1841 (define_insn "fma4_fnmadd<mode>4"
1842 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1844 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1846 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1847 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1848 "TARGET_FMA4 && TARGET_FUSED_MADD"
1849 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1850 [(set_attr "type" "ssemuladd")
1851 (set_attr "mode" "<MODE>")])
1853 ;; For the scalar operations, use operand1 for the upper words that aren't
1854 ;; modified, so restrict the forms that are generated.
1855 ;; Scalar version of fnmadd.
1856 (define_insn "fma4_vmfnmadd<mode>4"
1857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1858 (vec_merge:SSEMODEF2P
1860 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1862 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1863 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1866 "TARGET_FMA4 && TARGET_FUSED_MADD"
1867 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1868 [(set_attr "type" "ssemuladd")
1869 (set_attr "mode" "<MODE>")])
1871 ;; Floating point negative multiply and subtract.
1872 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1873 (define_insn "fma4_fnmsub<mode>4"
1874 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1878 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1879 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1880 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1881 "TARGET_FMA4 && TARGET_FUSED_MADD"
1882 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1883 [(set_attr "type" "ssemuladd")
1884 (set_attr "mode" "<MODE>")])
1886 ;; For the scalar operations, use operand1 for the upper words that aren't
1887 ;; modified, so restrict the forms that are generated.
1888 ;; Scalar version of fnmsub.
1889 (define_insn "fma4_vmfnmsub<mode>4"
1890 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1891 (vec_merge:SSEMODEF2P
1895 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1896 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1897 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1900 "TARGET_FMA4 && TARGET_FUSED_MADD"
1901 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 (define_insn "fma4i_fmadd<mode>4256"
1906 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1910 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1911 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1912 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1913 UNSPEC_FMA4_INTRINSIC))]
1914 "TARGET_FMA4 && TARGET_FUSED_MADD"
1915 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1916 [(set_attr "type" "ssemuladd")
1917 (set_attr "mode" "<MODE>")])
1919 (define_insn "fma4i_fmsub<mode>4256"
1920 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1924 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1925 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1926 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1927 UNSPEC_FMA4_INTRINSIC))]
1928 "TARGET_FMA4 && TARGET_FUSED_MADD"
1929 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1930 [(set_attr "type" "ssemuladd")
1931 (set_attr "mode" "<MODE>")])
1933 (define_insn "fma4i_fnmadd<mode>4256"
1934 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1937 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1939 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1940 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1941 UNSPEC_FMA4_INTRINSIC))]
1942 "TARGET_FMA4 && TARGET_FUSED_MADD"
1943 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1944 [(set_attr "type" "ssemuladd")
1945 (set_attr "mode" "<MODE>")])
1947 (define_insn "fma4i_fnmsub<mode>4256"
1948 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1953 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1954 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1955 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1956 UNSPEC_FMA4_INTRINSIC))]
1957 "TARGET_FMA4 && TARGET_FUSED_MADD"
1958 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1959 [(set_attr "type" "ssemuladd")
1960 (set_attr "mode" "<MODE>")])
1962 (define_insn "fma4i_fmadd<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1967 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1968 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1969 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1970 UNSPEC_FMA4_INTRINSIC))]
1971 "TARGET_FMA4 && TARGET_FUSED_MADD"
1972 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1976 (define_insn "fma4i_fmsub<mode>4"
1977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1983 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1984 UNSPEC_FMA4_INTRINSIC))]
1985 "TARGET_FMA4 && TARGET_FUSED_MADD"
1986 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1987 [(set_attr "type" "ssemuladd")
1988 (set_attr "mode" "<MODE>")])
1990 (define_insn "fma4i_fnmadd<mode>4"
1991 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1994 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1996 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1997 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
1998 UNSPEC_FMA4_INTRINSIC))]
1999 "TARGET_FMA4 && TARGET_FUSED_MADD"
2000 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2001 [(set_attr "type" "ssemuladd")
2002 (set_attr "mode" "<MODE>")])
2004 (define_insn "fma4i_fnmsub<mode>4"
2005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2010 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2011 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2012 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2013 UNSPEC_FMA4_INTRINSIC))]
2014 "TARGET_FMA4 && TARGET_FUSED_MADD"
2015 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2016 [(set_attr "type" "ssemuladd")
2017 (set_attr "mode" "<MODE>")])
2019 ;; For the scalar operations, use operand1 for the upper words that aren't
2020 ;; modified, so restrict the forms that are accepted.
2021 (define_insn "fma4i_vmfmadd<mode>4"
2022 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2024 [(vec_merge:SSEMODEF2P
2027 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2028 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2029 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2032 UNSPEC_FMA4_INTRINSIC))]
2033 "TARGET_FMA4 && TARGET_FUSED_MADD"
2034 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2035 [(set_attr "type" "ssemuladd")
2036 (set_attr "mode" "<ssescalarmode>")])
2038 (define_insn "fma4i_vmfmsub<mode>4"
2039 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2041 [(vec_merge:SSEMODEF2P
2044 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2045 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2046 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2049 UNSPEC_FMA4_INTRINSIC))]
2050 "TARGET_FMA4 && TARGET_FUSED_MADD"
2051 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2052 [(set_attr "type" "ssemuladd")
2053 (set_attr "mode" "<ssescalarmode>")])
2055 (define_insn "fma4i_vmfnmadd<mode>4"
2056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2058 [(vec_merge:SSEMODEF2P
2060 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2062 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2063 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2066 UNSPEC_FMA4_INTRINSIC))]
2067 "TARGET_FMA4 && TARGET_FUSED_MADD"
2068 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2069 [(set_attr "type" "ssemuladd")
2070 (set_attr "mode" "<ssescalarmode>")])
2072 (define_insn "fma4i_vmfnmsub<mode>4"
2073 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2075 [(vec_merge:SSEMODEF2P
2079 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2080 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2081 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2084 UNSPEC_FMA4_INTRINSIC))]
2085 "TARGET_FMA4 && TARGET_FUSED_MADD"
2086 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2087 [(set_attr "type" "ssemuladd")
2088 (set_attr "mode" "<ssescalarmode>")])
2090 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2092 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2094 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2096 (define_insn "fma4_fmaddsubv8sf4"
2097 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2101 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2102 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2103 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2110 "TARGET_FMA4 && TARGET_FUSED_MADD"
2111 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2112 [(set_attr "type" "ssemuladd")
2113 (set_attr "mode" "V8SF")])
2115 (define_insn "fma4_fmaddsubv4df4"
2116 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2120 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2121 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2122 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2129 "TARGET_FMA4 && TARGET_FUSED_MADD"
2130 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2131 [(set_attr "type" "ssemuladd")
2132 (set_attr "mode" "V4DF")])
2134 (define_insn "fma4_fmaddsubv4sf4"
2135 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2139 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2140 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2141 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2148 "TARGET_FMA4 && TARGET_FUSED_MADD"
2149 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2150 [(set_attr "type" "ssemuladd")
2151 (set_attr "mode" "V4SF")])
2153 (define_insn "fma4_fmaddsubv2df4"
2154 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2158 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2159 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2160 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2167 "TARGET_FMA4 && TARGET_FUSED_MADD"
2168 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2169 [(set_attr "type" "ssemuladd")
2170 (set_attr "mode" "V2DF")])
2172 (define_insn "fma4_fmsubaddv8sf4"
2173 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2177 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2178 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2179 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2186 "TARGET_FMA4 && TARGET_FUSED_MADD"
2187 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2188 [(set_attr "type" "ssemuladd")
2189 (set_attr "mode" "V8SF")])
2191 (define_insn "fma4_fmsubaddv4df4"
2192 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2196 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2197 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2198 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2205 "TARGET_FMA4 && TARGET_FUSED_MADD"
2206 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2207 [(set_attr "type" "ssemuladd")
2208 (set_attr "mode" "V4DF")])
2210 (define_insn "fma4_fmsubaddv4sf4"
2211 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2215 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2216 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2217 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2224 "TARGET_FMA4 && TARGET_FUSED_MADD"
2225 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2226 [(set_attr "type" "ssemuladd")
2227 (set_attr "mode" "V4SF")])
2229 (define_insn "fma4_fmsubaddv2df4"
2230 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2234 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2235 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2236 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2243 "TARGET_FMA4 && TARGET_FUSED_MADD"
2244 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2245 [(set_attr "type" "ssemuladd")
2246 (set_attr "mode" "V2DF")])
2248 (define_insn "fma4i_fmaddsubv8sf4"
2249 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2254 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2255 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2256 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2263 UNSPEC_FMA4_INTRINSIC))]
2264 "TARGET_FMA4 && TARGET_FUSED_MADD"
2265 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2266 [(set_attr "type" "ssemuladd")
2267 (set_attr "mode" "V8SF")])
2269 (define_insn "fma4i_fmaddsubv4df4"
2270 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2275 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2276 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2277 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2284 UNSPEC_FMA4_INTRINSIC))]
2285 "TARGET_FMA4 && TARGET_FUSED_MADD"
2286 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2287 [(set_attr "type" "ssemuladd")
2288 (set_attr "mode" "V4DF")])
2290 (define_insn "fma4i_fmaddsubv4sf4"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2296 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2297 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2298 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2305 UNSPEC_FMA4_INTRINSIC))]
2306 "TARGET_FMA4 && TARGET_FUSED_MADD"
2307 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2308 [(set_attr "type" "ssemuladd")
2309 (set_attr "mode" "V4SF")])
2311 (define_insn "fma4i_fmaddsubv2df4"
2312 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2317 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2318 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2319 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2326 UNSPEC_FMA4_INTRINSIC))]
2327 "TARGET_FMA4 && TARGET_FUSED_MADD"
2328 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2329 [(set_attr "type" "ssemuladd")
2330 (set_attr "mode" "V2DF")])
2332 (define_insn "fma4i_fmsubaddv8sf4"
2333 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2338 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2339 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2340 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2347 UNSPEC_FMA4_INTRINSIC))]
2348 "TARGET_FMA4 && TARGET_FUSED_MADD"
2349 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2350 [(set_attr "type" "ssemuladd")
2351 (set_attr "mode" "V8SF")])
2353 (define_insn "fma4i_fmsubaddv4df4"
2354 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2359 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2360 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2361 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2368 UNSPEC_FMA4_INTRINSIC))]
2369 "TARGET_FMA4 && TARGET_FUSED_MADD"
2370 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2371 [(set_attr "type" "ssemuladd")
2372 (set_attr "mode" "V4DF")])
2374 (define_insn "fma4i_fmsubaddv4sf4"
2375 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2380 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2381 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2382 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2389 UNSPEC_FMA4_INTRINSIC))]
2390 "TARGET_FMA4 && TARGET_FUSED_MADD"
2391 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2392 [(set_attr "type" "ssemuladd")
2393 (set_attr "mode" "V4SF")])
2395 (define_insn "fma4i_fmsubaddv2df4"
2396 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2401 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2402 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2403 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2410 UNSPEC_FMA4_INTRINSIC))]
2411 "TARGET_FMA4 && TARGET_FUSED_MADD"
2412 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2413 [(set_attr "type" "ssemuladd")
2414 (set_attr "mode" "V2DF")])
2416 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2418 ;; Parallel single-precision floating point conversion operations
2420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2422 (define_insn "sse_cvtpi2ps"
2423 [(set (match_operand:V4SF 0 "register_operand" "=x")
2426 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2427 (match_operand:V4SF 1 "register_operand" "0")
2430 "cvtpi2ps\t{%2, %0|%0, %2}"
2431 [(set_attr "type" "ssecvt")
2432 (set_attr "mode" "V4SF")])
2434 (define_insn "sse_cvtps2pi"
2435 [(set (match_operand:V2SI 0 "register_operand" "=y")
2437 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2439 (parallel [(const_int 0) (const_int 1)])))]
2441 "cvtps2pi\t{%1, %0|%0, %1}"
2442 [(set_attr "type" "ssecvt")
2443 (set_attr "unit" "mmx")
2444 (set_attr "mode" "DI")])
2446 (define_insn "sse_cvttps2pi"
2447 [(set (match_operand:V2SI 0 "register_operand" "=y")
2449 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2450 (parallel [(const_int 0) (const_int 1)])))]
2452 "cvttps2pi\t{%1, %0|%0, %1}"
2453 [(set_attr "type" "ssecvt")
2454 (set_attr "unit" "mmx")
2455 (set_attr "prefix_rep" "0")
2456 (set_attr "mode" "SF")])
2458 (define_insn "*avx_cvtsi2ss"
2459 [(set (match_operand:V4SF 0 "register_operand" "=x")
2462 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2463 (match_operand:V4SF 1 "register_operand" "x")
2466 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2467 [(set_attr "type" "sseicvt")
2468 (set_attr "prefix" "vex")
2469 (set_attr "mode" "SF")])
2471 (define_insn "sse_cvtsi2ss"
2472 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2475 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2476 (match_operand:V4SF 1 "register_operand" "0,0")
2479 "cvtsi2ss\t{%2, %0|%0, %2}"
2480 [(set_attr "type" "sseicvt")
2481 (set_attr "athlon_decode" "vector,double")
2482 (set_attr "amdfam10_decode" "vector,double")
2483 (set_attr "mode" "SF")])
2485 (define_insn "*avx_cvtsi2ssq"
2486 [(set (match_operand:V4SF 0 "register_operand" "=x")
2489 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2490 (match_operand:V4SF 1 "register_operand" "x")
2492 "TARGET_AVX && TARGET_64BIT"
2493 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2494 [(set_attr "type" "sseicvt")
2495 (set_attr "length_vex" "4")
2496 (set_attr "prefix" "vex")
2497 (set_attr "mode" "SF")])
2499 (define_insn "sse_cvtsi2ssq"
2500 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2503 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2504 (match_operand:V4SF 1 "register_operand" "0,0")
2506 "TARGET_SSE && TARGET_64BIT"
2507 "cvtsi2ssq\t{%2, %0|%0, %2}"
2508 [(set_attr "type" "sseicvt")
2509 (set_attr "prefix_rex" "1")
2510 (set_attr "athlon_decode" "vector,double")
2511 (set_attr "amdfam10_decode" "vector,double")
2512 (set_attr "mode" "SF")])
2514 (define_insn "sse_cvtss2si"
2515 [(set (match_operand:SI 0 "register_operand" "=r,r")
2518 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2519 (parallel [(const_int 0)]))]
2520 UNSPEC_FIX_NOTRUNC))]
2522 "%vcvtss2si\t{%1, %0|%0, %1}"
2523 [(set_attr "type" "sseicvt")
2524 (set_attr "athlon_decode" "double,vector")
2525 (set_attr "prefix_rep" "1")
2526 (set_attr "prefix" "maybe_vex")
2527 (set_attr "mode" "SI")])
2529 (define_insn "sse_cvtss2si_2"
2530 [(set (match_operand:SI 0 "register_operand" "=r,r")
2531 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2532 UNSPEC_FIX_NOTRUNC))]
2534 "%vcvtss2si\t{%1, %0|%0, %1}"
2535 [(set_attr "type" "sseicvt")
2536 (set_attr "athlon_decode" "double,vector")
2537 (set_attr "amdfam10_decode" "double,double")
2538 (set_attr "prefix_rep" "1")
2539 (set_attr "prefix" "maybe_vex")
2540 (set_attr "mode" "SI")])
2542 (define_insn "sse_cvtss2siq"
2543 [(set (match_operand:DI 0 "register_operand" "=r,r")
2546 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2547 (parallel [(const_int 0)]))]
2548 UNSPEC_FIX_NOTRUNC))]
2549 "TARGET_SSE && TARGET_64BIT"
2550 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2551 [(set_attr "type" "sseicvt")
2552 (set_attr "athlon_decode" "double,vector")
2553 (set_attr "prefix_rep" "1")
2554 (set_attr "prefix" "maybe_vex")
2555 (set_attr "mode" "DI")])
2557 (define_insn "sse_cvtss2siq_2"
2558 [(set (match_operand:DI 0 "register_operand" "=r,r")
2559 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2560 UNSPEC_FIX_NOTRUNC))]
2561 "TARGET_SSE && TARGET_64BIT"
2562 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2563 [(set_attr "type" "sseicvt")
2564 (set_attr "athlon_decode" "double,vector")
2565 (set_attr "amdfam10_decode" "double,double")
2566 (set_attr "prefix_rep" "1")
2567 (set_attr "prefix" "maybe_vex")
2568 (set_attr "mode" "DI")])
2570 (define_insn "sse_cvttss2si"
2571 [(set (match_operand:SI 0 "register_operand" "=r,r")
2574 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2575 (parallel [(const_int 0)]))))]
2577 "%vcvttss2si\t{%1, %0|%0, %1}"
2578 [(set_attr "type" "sseicvt")
2579 (set_attr "athlon_decode" "double,vector")
2580 (set_attr "amdfam10_decode" "double,double")
2581 (set_attr "prefix_rep" "1")
2582 (set_attr "prefix" "maybe_vex")
2583 (set_attr "mode" "SI")])
2585 (define_insn "sse_cvttss2siq"
2586 [(set (match_operand:DI 0 "register_operand" "=r,r")
2589 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2590 (parallel [(const_int 0)]))))]
2591 "TARGET_SSE && TARGET_64BIT"
2592 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2593 [(set_attr "type" "sseicvt")
2594 (set_attr "athlon_decode" "double,vector")
2595 (set_attr "amdfam10_decode" "double,double")
2596 (set_attr "prefix_rep" "1")
2597 (set_attr "prefix" "maybe_vex")
2598 (set_attr "mode" "DI")])
2600 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2601 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2602 (float:AVXMODEDCVTDQ2PS
2603 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2605 "vcvtdq2ps\t{%1, %0|%0, %1}"
2606 [(set_attr "type" "ssecvt")
2607 (set_attr "prefix" "vex")
2608 (set_attr "mode" "<avxvecmode>")])
2610 (define_insn "sse2_cvtdq2ps"
2611 [(set (match_operand:V4SF 0 "register_operand" "=x")
2612 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2614 "cvtdq2ps\t{%1, %0|%0, %1}"
2615 [(set_attr "type" "ssecvt")
2616 (set_attr "mode" "V4SF")])
2618 (define_expand "sse2_cvtudq2ps"
2620 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2622 (lt:V4SF (match_dup 5) (match_dup 3)))
2624 (and:V4SF (match_dup 6) (match_dup 4)))
2625 (set (match_operand:V4SF 0 "register_operand" "")
2626 (plus:V4SF (match_dup 5) (match_dup 7)))]
2629 REAL_VALUE_TYPE TWO32r;
2633 real_ldexp (&TWO32r, &dconst1, 32);
2634 x = const_double_from_real_value (TWO32r, SFmode);
2636 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2637 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2639 for (i = 5; i < 8; i++)
2640 operands[i] = gen_reg_rtx (V4SFmode);
2643 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2644 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2645 (unspec:AVXMODEDCVTPS2DQ
2646 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2647 UNSPEC_FIX_NOTRUNC))]
2649 "vcvtps2dq\t{%1, %0|%0, %1}"
2650 [(set_attr "type" "ssecvt")
2651 (set_attr "prefix" "vex")
2652 (set_attr "mode" "<avxvecmode>")])
2654 (define_insn "sse2_cvtps2dq"
2655 [(set (match_operand:V4SI 0 "register_operand" "=x")
2656 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2657 UNSPEC_FIX_NOTRUNC))]
2659 "cvtps2dq\t{%1, %0|%0, %1}"
2660 [(set_attr "type" "ssecvt")
2661 (set_attr "prefix_data16" "1")
2662 (set_attr "mode" "TI")])
2664 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2665 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2666 (fix:AVXMODEDCVTPS2DQ
2667 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2669 "vcvttps2dq\t{%1, %0|%0, %1}"
2670 [(set_attr "type" "ssecvt")
2671 (set_attr "prefix" "vex")
2672 (set_attr "mode" "<avxvecmode>")])
2674 (define_insn "sse2_cvttps2dq"
2675 [(set (match_operand:V4SI 0 "register_operand" "=x")
2676 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2678 "cvttps2dq\t{%1, %0|%0, %1}"
2679 [(set_attr "type" "ssecvt")
2680 (set_attr "prefix_rep" "1")
2681 (set_attr "prefix_data16" "0")
2682 (set_attr "mode" "TI")])
2684 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2686 ;; Parallel double-precision floating point conversion operations
2688 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2690 (define_insn "sse2_cvtpi2pd"
2691 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2692 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2694 "cvtpi2pd\t{%1, %0|%0, %1}"
2695 [(set_attr "type" "ssecvt")
2696 (set_attr "unit" "mmx,*")
2697 (set_attr "prefix_data16" "1,*")
2698 (set_attr "mode" "V2DF")])
2700 (define_insn "sse2_cvtpd2pi"
2701 [(set (match_operand:V2SI 0 "register_operand" "=y")
2702 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2703 UNSPEC_FIX_NOTRUNC))]
2705 "cvtpd2pi\t{%1, %0|%0, %1}"
2706 [(set_attr "type" "ssecvt")
2707 (set_attr "unit" "mmx")
2708 (set_attr "prefix_data16" "1")
2709 (set_attr "mode" "DI")])
2711 (define_insn "sse2_cvttpd2pi"
2712 [(set (match_operand:V2SI 0 "register_operand" "=y")
2713 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2715 "cvttpd2pi\t{%1, %0|%0, %1}"
2716 [(set_attr "type" "ssecvt")
2717 (set_attr "unit" "mmx")
2718 (set_attr "prefix_data16" "1")
2719 (set_attr "mode" "TI")])
2721 (define_insn "*avx_cvtsi2sd"
2722 [(set (match_operand:V2DF 0 "register_operand" "=x")
2725 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2726 (match_operand:V2DF 1 "register_operand" "x")
2729 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2730 [(set_attr "type" "sseicvt")
2731 (set_attr "prefix" "vex")
2732 (set_attr "mode" "DF")])
2734 (define_insn "sse2_cvtsi2sd"
2735 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2738 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2739 (match_operand:V2DF 1 "register_operand" "0,0")
2742 "cvtsi2sd\t{%2, %0|%0, %2}"
2743 [(set_attr "type" "sseicvt")
2744 (set_attr "mode" "DF")
2745 (set_attr "athlon_decode" "double,direct")
2746 (set_attr "amdfam10_decode" "vector,double")])
2748 (define_insn "*avx_cvtsi2sdq"
2749 [(set (match_operand:V2DF 0 "register_operand" "=x")
2752 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2753 (match_operand:V2DF 1 "register_operand" "x")
2755 "TARGET_AVX && TARGET_64BIT"
2756 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2757 [(set_attr "type" "sseicvt")
2758 (set_attr "length_vex" "4")
2759 (set_attr "prefix" "vex")
2760 (set_attr "mode" "DF")])
2762 (define_insn "sse2_cvtsi2sdq"
2763 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2766 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2767 (match_operand:V2DF 1 "register_operand" "0,0")
2769 "TARGET_SSE2 && TARGET_64BIT"
2770 "cvtsi2sdq\t{%2, %0|%0, %2}"
2771 [(set_attr "type" "sseicvt")
2772 (set_attr "prefix_rex" "1")
2773 (set_attr "mode" "DF")
2774 (set_attr "athlon_decode" "double,direct")
2775 (set_attr "amdfam10_decode" "vector,double")])
2777 (define_insn "sse2_cvtsd2si"
2778 [(set (match_operand:SI 0 "register_operand" "=r,r")
2781 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2782 (parallel [(const_int 0)]))]
2783 UNSPEC_FIX_NOTRUNC))]
2785 "%vcvtsd2si\t{%1, %0|%0, %1}"
2786 [(set_attr "type" "sseicvt")
2787 (set_attr "athlon_decode" "double,vector")
2788 (set_attr "prefix_rep" "1")
2789 (set_attr "prefix" "maybe_vex")
2790 (set_attr "mode" "SI")])
2792 (define_insn "sse2_cvtsd2si_2"
2793 [(set (match_operand:SI 0 "register_operand" "=r,r")
2794 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2795 UNSPEC_FIX_NOTRUNC))]
2797 "%vcvtsd2si\t{%1, %0|%0, %1}"
2798 [(set_attr "type" "sseicvt")
2799 (set_attr "athlon_decode" "double,vector")
2800 (set_attr "amdfam10_decode" "double,double")
2801 (set_attr "prefix_rep" "1")
2802 (set_attr "prefix" "maybe_vex")
2803 (set_attr "mode" "SI")])
2805 (define_insn "sse2_cvtsd2siq"
2806 [(set (match_operand:DI 0 "register_operand" "=r,r")
2809 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2810 (parallel [(const_int 0)]))]
2811 UNSPEC_FIX_NOTRUNC))]
2812 "TARGET_SSE2 && TARGET_64BIT"
2813 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2814 [(set_attr "type" "sseicvt")
2815 (set_attr "athlon_decode" "double,vector")
2816 (set_attr "prefix_rep" "1")
2817 (set_attr "prefix" "maybe_vex")
2818 (set_attr "mode" "DI")])
2820 (define_insn "sse2_cvtsd2siq_2"
2821 [(set (match_operand:DI 0 "register_operand" "=r,r")
2822 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2823 UNSPEC_FIX_NOTRUNC))]
2824 "TARGET_SSE2 && TARGET_64BIT"
2825 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2826 [(set_attr "type" "sseicvt")
2827 (set_attr "athlon_decode" "double,vector")
2828 (set_attr "amdfam10_decode" "double,double")
2829 (set_attr "prefix_rep" "1")
2830 (set_attr "prefix" "maybe_vex")
2831 (set_attr "mode" "DI")])
2833 (define_insn "sse2_cvttsd2si"
2834 [(set (match_operand:SI 0 "register_operand" "=r,r")
2837 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2838 (parallel [(const_int 0)]))))]
2840 "%vcvttsd2si\t{%1, %0|%0, %1}"
2841 [(set_attr "type" "sseicvt")
2842 (set_attr "prefix_rep" "1")
2843 (set_attr "prefix" "maybe_vex")
2844 (set_attr "mode" "SI")
2845 (set_attr "athlon_decode" "double,vector")
2846 (set_attr "amdfam10_decode" "double,double")])
2848 (define_insn "sse2_cvttsd2siq"
2849 [(set (match_operand:DI 0 "register_operand" "=r,r")
2852 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2853 (parallel [(const_int 0)]))))]
2854 "TARGET_SSE2 && TARGET_64BIT"
2855 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2856 [(set_attr "type" "sseicvt")
2857 (set_attr "prefix_rep" "1")
2858 (set_attr "prefix" "maybe_vex")
2859 (set_attr "mode" "DI")
2860 (set_attr "athlon_decode" "double,vector")
2861 (set_attr "amdfam10_decode" "double,double")])
2863 (define_insn "avx_cvtdq2pd256"
2864 [(set (match_operand:V4DF 0 "register_operand" "=x")
2865 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2867 "vcvtdq2pd\t{%1, %0|%0, %1}"
2868 [(set_attr "type" "ssecvt")
2869 (set_attr "prefix" "vex")
2870 (set_attr "mode" "V4DF")])
2872 (define_insn "sse2_cvtdq2pd"
2873 [(set (match_operand:V2DF 0 "register_operand" "=x")
2876 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2877 (parallel [(const_int 0) (const_int 1)]))))]
2879 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2880 [(set_attr "type" "ssecvt")
2881 (set_attr "prefix" "maybe_vex")
2882 (set_attr "mode" "V2DF")])
2884 (define_insn "avx_cvtpd2dq256"
2885 [(set (match_operand:V4SI 0 "register_operand" "=x")
2886 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2887 UNSPEC_FIX_NOTRUNC))]
2889 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2890 [(set_attr "type" "ssecvt")
2891 (set_attr "prefix" "vex")
2892 (set_attr "mode" "OI")])
2894 (define_expand "sse2_cvtpd2dq"
2895 [(set (match_operand:V4SI 0 "register_operand" "")
2897 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2901 "operands[2] = CONST0_RTX (V2SImode);")
2903 (define_insn "*sse2_cvtpd2dq"
2904 [(set (match_operand:V4SI 0 "register_operand" "=x")
2906 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2908 (match_operand:V2SI 2 "const0_operand" "")))]
2910 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2911 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2912 [(set_attr "type" "ssecvt")
2913 (set_attr "prefix_rep" "1")
2914 (set_attr "prefix_data16" "0")
2915 (set_attr "prefix" "maybe_vex")
2916 (set_attr "mode" "TI")
2917 (set_attr "amdfam10_decode" "double")])
2919 (define_insn "avx_cvttpd2dq256"
2920 [(set (match_operand:V4SI 0 "register_operand" "=x")
2921 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2923 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2924 [(set_attr "type" "ssecvt")
2925 (set_attr "prefix" "vex")
2926 (set_attr "mode" "OI")])
2928 (define_expand "sse2_cvttpd2dq"
2929 [(set (match_operand:V4SI 0 "register_operand" "")
2931 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2934 "operands[2] = CONST0_RTX (V2SImode);")
2936 (define_insn "*sse2_cvttpd2dq"
2937 [(set (match_operand:V4SI 0 "register_operand" "=x")
2939 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2940 (match_operand:V2SI 2 "const0_operand" "")))]
2942 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2943 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2944 [(set_attr "type" "ssecvt")
2945 (set_attr "prefix" "maybe_vex")
2946 (set_attr "mode" "TI")
2947 (set_attr "amdfam10_decode" "double")])
2949 (define_insn "*avx_cvtsd2ss"
2950 [(set (match_operand:V4SF 0 "register_operand" "=x")
2953 (float_truncate:V2SF
2954 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2955 (match_operand:V4SF 1 "register_operand" "x")
2958 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2959 [(set_attr "type" "ssecvt")
2960 (set_attr "prefix" "vex")
2961 (set_attr "mode" "SF")])
2963 (define_insn "sse2_cvtsd2ss"
2964 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2967 (float_truncate:V2SF
2968 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2969 (match_operand:V4SF 1 "register_operand" "0,0")
2972 "cvtsd2ss\t{%2, %0|%0, %2}"
2973 [(set_attr "type" "ssecvt")
2974 (set_attr "athlon_decode" "vector,double")
2975 (set_attr "amdfam10_decode" "vector,double")
2976 (set_attr "mode" "SF")])
2978 (define_insn "*avx_cvtss2sd"
2979 [(set (match_operand:V2DF 0 "register_operand" "=x")
2983 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2984 (parallel [(const_int 0) (const_int 1)])))
2985 (match_operand:V2DF 1 "register_operand" "x")
2988 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2989 [(set_attr "type" "ssecvt")
2990 (set_attr "prefix" "vex")
2991 (set_attr "mode" "DF")])
2993 (define_insn "sse2_cvtss2sd"
2994 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2998 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2999 (parallel [(const_int 0) (const_int 1)])))
3000 (match_operand:V2DF 1 "register_operand" "0,0")
3003 "cvtss2sd\t{%2, %0|%0, %2}"
3004 [(set_attr "type" "ssecvt")
3005 (set_attr "amdfam10_decode" "vector,double")
3006 (set_attr "mode" "DF")])
3008 (define_insn "avx_cvtpd2ps256"
3009 [(set (match_operand:V4SF 0 "register_operand" "=x")
3010 (float_truncate:V4SF
3011 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3013 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3014 [(set_attr "type" "ssecvt")
3015 (set_attr "prefix" "vex")
3016 (set_attr "mode" "V4SF")])
3018 (define_expand "sse2_cvtpd2ps"
3019 [(set (match_operand:V4SF 0 "register_operand" "")
3021 (float_truncate:V2SF
3022 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3025 "operands[2] = CONST0_RTX (V2SFmode);")
3027 (define_insn "*sse2_cvtpd2ps"
3028 [(set (match_operand:V4SF 0 "register_operand" "=x")
3030 (float_truncate:V2SF
3031 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3032 (match_operand:V2SF 2 "const0_operand" "")))]
3034 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3035 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3036 [(set_attr "type" "ssecvt")
3037 (set_attr "prefix_data16" "1")
3038 (set_attr "prefix" "maybe_vex")
3039 (set_attr "mode" "V4SF")
3040 (set_attr "amdfam10_decode" "double")])
3042 (define_insn "avx_cvtps2pd256"
3043 [(set (match_operand:V4DF 0 "register_operand" "=x")
3045 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3047 "vcvtps2pd\t{%1, %0|%0, %1}"
3048 [(set_attr "type" "ssecvt")
3049 (set_attr "prefix" "vex")
3050 (set_attr "mode" "V4DF")])
3052 (define_insn "sse2_cvtps2pd"
3053 [(set (match_operand:V2DF 0 "register_operand" "=x")
3056 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3057 (parallel [(const_int 0) (const_int 1)]))))]
3059 "%vcvtps2pd\t{%1, %0|%0, %1}"
3060 [(set_attr "type" "ssecvt")
3061 (set_attr "prefix" "maybe_vex")
3062 (set_attr "mode" "V2DF")
3063 (set_attr "prefix_data16" "0")
3064 (set_attr "amdfam10_decode" "direct")])
3066 (define_expand "vec_unpacks_hi_v4sf"
3071 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3072 (parallel [(const_int 6)
3076 (set (match_operand:V2DF 0 "register_operand" "")
3080 (parallel [(const_int 0) (const_int 1)]))))]
3083 operands[2] = gen_reg_rtx (V4SFmode);
3086 (define_expand "vec_unpacks_lo_v4sf"
3087 [(set (match_operand:V2DF 0 "register_operand" "")
3090 (match_operand:V4SF 1 "nonimmediate_operand" "")
3091 (parallel [(const_int 0) (const_int 1)]))))]
3094 (define_expand "vec_unpacks_float_hi_v8hi"
3095 [(match_operand:V4SF 0 "register_operand" "")
3096 (match_operand:V8HI 1 "register_operand" "")]
3099 rtx tmp = gen_reg_rtx (V4SImode);
3101 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3102 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3106 (define_expand "vec_unpacks_float_lo_v8hi"
3107 [(match_operand:V4SF 0 "register_operand" "")
3108 (match_operand:V8HI 1 "register_operand" "")]
3111 rtx tmp = gen_reg_rtx (V4SImode);
3113 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3114 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3118 (define_expand "vec_unpacku_float_hi_v8hi"
3119 [(match_operand:V4SF 0 "register_operand" "")
3120 (match_operand:V8HI 1 "register_operand" "")]
3123 rtx tmp = gen_reg_rtx (V4SImode);
3125 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3126 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3130 (define_expand "vec_unpacku_float_lo_v8hi"
3131 [(match_operand:V4SF 0 "register_operand" "")
3132 (match_operand:V8HI 1 "register_operand" "")]
3135 rtx tmp = gen_reg_rtx (V4SImode);
3137 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3138 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3142 (define_expand "vec_unpacks_float_hi_v4si"
3145 (match_operand:V4SI 1 "nonimmediate_operand" "")
3146 (parallel [(const_int 2)
3150 (set (match_operand:V2DF 0 "register_operand" "")
3154 (parallel [(const_int 0) (const_int 1)]))))]
3156 "operands[2] = gen_reg_rtx (V4SImode);")
3158 (define_expand "vec_unpacks_float_lo_v4si"
3159 [(set (match_operand:V2DF 0 "register_operand" "")
3162 (match_operand:V4SI 1 "nonimmediate_operand" "")
3163 (parallel [(const_int 0) (const_int 1)]))))]
3166 (define_expand "vec_unpacku_float_hi_v4si"
3169 (match_operand:V4SI 1 "nonimmediate_operand" "")
3170 (parallel [(const_int 2)
3178 (parallel [(const_int 0) (const_int 1)]))))
3180 (lt:V2DF (match_dup 6) (match_dup 3)))
3182 (and:V2DF (match_dup 7) (match_dup 4)))
3183 (set (match_operand:V2DF 0 "register_operand" "")
3184 (plus:V2DF (match_dup 6) (match_dup 8)))]
3187 REAL_VALUE_TYPE TWO32r;
3191 real_ldexp (&TWO32r, &dconst1, 32);
3192 x = const_double_from_real_value (TWO32r, DFmode);
3194 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3195 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3197 operands[5] = gen_reg_rtx (V4SImode);
3199 for (i = 6; i < 9; i++)
3200 operands[i] = gen_reg_rtx (V2DFmode);
3203 (define_expand "vec_unpacku_float_lo_v4si"
3207 (match_operand:V4SI 1 "nonimmediate_operand" "")
3208 (parallel [(const_int 0) (const_int 1)]))))
3210 (lt:V2DF (match_dup 5) (match_dup 3)))
3212 (and:V2DF (match_dup 6) (match_dup 4)))
3213 (set (match_operand:V2DF 0 "register_operand" "")
3214 (plus:V2DF (match_dup 5) (match_dup 7)))]
3217 REAL_VALUE_TYPE TWO32r;
3221 real_ldexp (&TWO32r, &dconst1, 32);
3222 x = const_double_from_real_value (TWO32r, DFmode);
3224 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3225 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3227 for (i = 5; i < 8; i++)
3228 operands[i] = gen_reg_rtx (V2DFmode);
3231 (define_expand "vec_pack_trunc_v2df"
3232 [(match_operand:V4SF 0 "register_operand" "")
3233 (match_operand:V2DF 1 "nonimmediate_operand" "")
3234 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3239 r1 = gen_reg_rtx (V4SFmode);
3240 r2 = gen_reg_rtx (V4SFmode);
3242 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3243 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3244 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3248 (define_expand "vec_pack_sfix_trunc_v2df"
3249 [(match_operand:V4SI 0 "register_operand" "")
3250 (match_operand:V2DF 1 "nonimmediate_operand" "")
3251 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3256 r1 = gen_reg_rtx (V4SImode);
3257 r2 = gen_reg_rtx (V4SImode);
3259 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3260 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3261 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3262 gen_lowpart (V2DImode, r1),
3263 gen_lowpart (V2DImode, r2)));
3267 (define_expand "vec_pack_sfix_v2df"
3268 [(match_operand:V4SI 0 "register_operand" "")
3269 (match_operand:V2DF 1 "nonimmediate_operand" "")
3270 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3275 r1 = gen_reg_rtx (V4SImode);
3276 r2 = gen_reg_rtx (V4SImode);
3278 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3279 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3280 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3281 gen_lowpart (V2DImode, r1),
3282 gen_lowpart (V2DImode, r2)));
3286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3288 ;; Parallel single-precision floating point element swizzling
3290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3292 (define_expand "sse_movhlps_exp"
3293 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3296 (match_operand:V4SF 1 "nonimmediate_operand" "")
3297 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3298 (parallel [(const_int 6)
3303 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3305 (define_insn "*avx_movhlps"
3306 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3309 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3310 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3311 (parallel [(const_int 6)
3315 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3317 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3318 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3319 vmovhps\t{%2, %0|%0, %2}"
3320 [(set_attr "type" "ssemov")
3321 (set_attr "prefix" "vex")
3322 (set_attr "mode" "V4SF,V2SF,V2SF")])
3324 (define_insn "sse_movhlps"
3325 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3328 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3329 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3330 (parallel [(const_int 6)
3334 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3336 movhlps\t{%2, %0|%0, %2}
3337 movlps\t{%H2, %0|%0, %H2}
3338 movhps\t{%2, %0|%0, %2}"
3339 [(set_attr "type" "ssemov")
3340 (set_attr "mode" "V4SF,V2SF,V2SF")])
3342 (define_expand "sse_movlhps_exp"
3343 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3346 (match_operand:V4SF 1 "nonimmediate_operand" "")
3347 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3348 (parallel [(const_int 0)
3353 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3355 (define_insn "*avx_movlhps"
3356 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3359 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3360 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3361 (parallel [(const_int 0)
3365 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3367 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3368 vmovhps\t{%2, %1, %0|%0, %1, %2}
3369 vmovlps\t{%2, %H0|%H0, %2}"
3370 [(set_attr "type" "ssemov")
3371 (set_attr "prefix" "vex")
3372 (set_attr "mode" "V4SF,V2SF,V2SF")])
3374 (define_insn "sse_movlhps"
3375 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3378 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3379 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3380 (parallel [(const_int 0)
3384 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3386 movlhps\t{%2, %0|%0, %2}
3387 movhps\t{%2, %0|%0, %2}
3388 movlps\t{%2, %H0|%H0, %2}"
3389 [(set_attr "type" "ssemov")
3390 (set_attr "mode" "V4SF,V2SF,V2SF")])
3392 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3393 (define_insn "avx_unpckhps256"
3394 [(set (match_operand:V8SF 0 "register_operand" "=x")
3397 (match_operand:V8SF 1 "register_operand" "x")
3398 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3399 (parallel [(const_int 2) (const_int 10)
3400 (const_int 3) (const_int 11)
3401 (const_int 6) (const_int 14)
3402 (const_int 7) (const_int 15)])))]
3404 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3405 [(set_attr "type" "sselog")
3406 (set_attr "prefix" "vex")
3407 (set_attr "mode" "V8SF")])
3409 (define_insn "*avx_interleave_highv4sf"
3410 [(set (match_operand:V4SF 0 "register_operand" "=x")
3413 (match_operand:V4SF 1 "register_operand" "x")
3414 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3415 (parallel [(const_int 2) (const_int 6)
3416 (const_int 3) (const_int 7)])))]
3418 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3419 [(set_attr "type" "sselog")
3420 (set_attr "prefix" "vex")
3421 (set_attr "mode" "V4SF")])
3423 (define_insn "vec_interleave_highv4sf"
3424 [(set (match_operand:V4SF 0 "register_operand" "=x")
3427 (match_operand:V4SF 1 "register_operand" "0")
3428 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3429 (parallel [(const_int 2) (const_int 6)
3430 (const_int 3) (const_int 7)])))]
3432 "unpckhps\t{%2, %0|%0, %2}"
3433 [(set_attr "type" "sselog")
3434 (set_attr "mode" "V4SF")])
3436 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3437 (define_insn "avx_unpcklps256"
3438 [(set (match_operand:V8SF 0 "register_operand" "=x")
3441 (match_operand:V8SF 1 "register_operand" "x")
3442 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3443 (parallel [(const_int 0) (const_int 8)
3444 (const_int 1) (const_int 9)
3445 (const_int 4) (const_int 12)
3446 (const_int 5) (const_int 13)])))]
3448 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3449 [(set_attr "type" "sselog")
3450 (set_attr "prefix" "vex")
3451 (set_attr "mode" "V8SF")])
3453 (define_insn "*avx_interleave_lowv4sf"
3454 [(set (match_operand:V4SF 0 "register_operand" "=x")
3457 (match_operand:V4SF 1 "register_operand" "x")
3458 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3459 (parallel [(const_int 0) (const_int 4)
3460 (const_int 1) (const_int 5)])))]
3462 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3463 [(set_attr "type" "sselog")
3464 (set_attr "prefix" "vex")
3465 (set_attr "mode" "V4SF")])
3467 (define_insn "vec_interleave_lowv4sf"
3468 [(set (match_operand:V4SF 0 "register_operand" "=x")
3471 (match_operand:V4SF 1 "register_operand" "0")
3472 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3473 (parallel [(const_int 0) (const_int 4)
3474 (const_int 1) (const_int 5)])))]
3476 "unpcklps\t{%2, %0|%0, %2}"
3477 [(set_attr "type" "sselog")
3478 (set_attr "mode" "V4SF")])
3480 ;; These are modeled with the same vec_concat as the others so that we
3481 ;; capture users of shufps that can use the new instructions
3482 (define_insn "avx_movshdup256"
3483 [(set (match_operand:V8SF 0 "register_operand" "=x")
3486 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3488 (parallel [(const_int 1) (const_int 1)
3489 (const_int 3) (const_int 3)
3490 (const_int 5) (const_int 5)
3491 (const_int 7) (const_int 7)])))]
3493 "vmovshdup\t{%1, %0|%0, %1}"
3494 [(set_attr "type" "sse")
3495 (set_attr "prefix" "vex")
3496 (set_attr "mode" "V8SF")])
3498 (define_insn "sse3_movshdup"
3499 [(set (match_operand:V4SF 0 "register_operand" "=x")
3502 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3504 (parallel [(const_int 1)
3509 "%vmovshdup\t{%1, %0|%0, %1}"
3510 [(set_attr "type" "sse")
3511 (set_attr "prefix_rep" "1")
3512 (set_attr "prefix" "maybe_vex")
3513 (set_attr "mode" "V4SF")])
3515 (define_insn "avx_movsldup256"
3516 [(set (match_operand:V8SF 0 "register_operand" "=x")
3519 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3521 (parallel [(const_int 0) (const_int 0)
3522 (const_int 2) (const_int 2)
3523 (const_int 4) (const_int 4)
3524 (const_int 6) (const_int 6)])))]
3526 "vmovsldup\t{%1, %0|%0, %1}"
3527 [(set_attr "type" "sse")
3528 (set_attr "prefix" "vex")
3529 (set_attr "mode" "V8SF")])
3531 (define_insn "sse3_movsldup"
3532 [(set (match_operand:V4SF 0 "register_operand" "=x")
3535 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3537 (parallel [(const_int 0)
3542 "%vmovsldup\t{%1, %0|%0, %1}"
3543 [(set_attr "type" "sse")
3544 (set_attr "prefix_rep" "1")
3545 (set_attr "prefix" "maybe_vex")
3546 (set_attr "mode" "V4SF")])
3548 (define_expand "avx_shufps256"
3549 [(match_operand:V8SF 0 "register_operand" "")
3550 (match_operand:V8SF 1 "register_operand" "")
3551 (match_operand:V8SF 2 "nonimmediate_operand" "")
3552 (match_operand:SI 3 "const_int_operand" "")]
3555 int mask = INTVAL (operands[3]);
3556 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3557 GEN_INT ((mask >> 0) & 3),
3558 GEN_INT ((mask >> 2) & 3),
3559 GEN_INT (((mask >> 4) & 3) + 8),
3560 GEN_INT (((mask >> 6) & 3) + 8),
3561 GEN_INT (((mask >> 0) & 3) + 4),
3562 GEN_INT (((mask >> 2) & 3) + 4),
3563 GEN_INT (((mask >> 4) & 3) + 12),
3564 GEN_INT (((mask >> 6) & 3) + 12)));
3568 ;; One bit in mask selects 2 elements.
3569 (define_insn "avx_shufps256_1"
3570 [(set (match_operand:V8SF 0 "register_operand" "=x")
3573 (match_operand:V8SF 1 "register_operand" "x")
3574 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3575 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3576 (match_operand 4 "const_0_to_3_operand" "")
3577 (match_operand 5 "const_8_to_11_operand" "")
3578 (match_operand 6 "const_8_to_11_operand" "")
3579 (match_operand 7 "const_4_to_7_operand" "")
3580 (match_operand 8 "const_4_to_7_operand" "")
3581 (match_operand 9 "const_12_to_15_operand" "")
3582 (match_operand 10 "const_12_to_15_operand" "")])))]
3584 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3585 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3586 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3587 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3590 mask = INTVAL (operands[3]);
3591 mask |= INTVAL (operands[4]) << 2;
3592 mask |= (INTVAL (operands[5]) - 8) << 4;
3593 mask |= (INTVAL (operands[6]) - 8) << 6;
3594 operands[3] = GEN_INT (mask);
3596 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3598 [(set_attr "type" "sselog")
3599 (set_attr "length_immediate" "1")
3600 (set_attr "prefix" "vex")
3601 (set_attr "mode" "V8SF")])
3603 (define_expand "sse_shufps"
3604 [(match_operand:V4SF 0 "register_operand" "")
3605 (match_operand:V4SF 1 "register_operand" "")
3606 (match_operand:V4SF 2 "nonimmediate_operand" "")
3607 (match_operand:SI 3 "const_int_operand" "")]
3610 int mask = INTVAL (operands[3]);
3611 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3612 GEN_INT ((mask >> 0) & 3),
3613 GEN_INT ((mask >> 2) & 3),
3614 GEN_INT (((mask >> 4) & 3) + 4),
3615 GEN_INT (((mask >> 6) & 3) + 4)));
3619 (define_insn "*avx_shufps_<mode>"
3620 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3621 (vec_select:SSEMODE4S
3622 (vec_concat:<ssedoublesizemode>
3623 (match_operand:SSEMODE4S 1 "register_operand" "x")
3624 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3625 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3626 (match_operand 4 "const_0_to_3_operand" "")
3627 (match_operand 5 "const_4_to_7_operand" "")
3628 (match_operand 6 "const_4_to_7_operand" "")])))]
3632 mask |= INTVAL (operands[3]) << 0;
3633 mask |= INTVAL (operands[4]) << 2;
3634 mask |= (INTVAL (operands[5]) - 4) << 4;
3635 mask |= (INTVAL (operands[6]) - 4) << 6;
3636 operands[3] = GEN_INT (mask);
3638 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3640 [(set_attr "type" "sselog")
3641 (set_attr "length_immediate" "1")
3642 (set_attr "prefix" "vex")
3643 (set_attr "mode" "V4SF")])
3645 (define_insn "sse_shufps_<mode>"
3646 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3647 (vec_select:SSEMODE4S
3648 (vec_concat:<ssedoublesizemode>
3649 (match_operand:SSEMODE4S 1 "register_operand" "0")
3650 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3651 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3652 (match_operand 4 "const_0_to_3_operand" "")
3653 (match_operand 5 "const_4_to_7_operand" "")
3654 (match_operand 6 "const_4_to_7_operand" "")])))]
3658 mask |= INTVAL (operands[3]) << 0;
3659 mask |= INTVAL (operands[4]) << 2;
3660 mask |= (INTVAL (operands[5]) - 4) << 4;
3661 mask |= (INTVAL (operands[6]) - 4) << 6;
3662 operands[3] = GEN_INT (mask);
3664 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3666 [(set_attr "type" "sselog")
3667 (set_attr "length_immediate" "1")
3668 (set_attr "mode" "V4SF")])
3670 (define_insn "sse_storehps"
3671 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3673 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3674 (parallel [(const_int 2) (const_int 3)])))]
3677 %vmovhps\t{%1, %0|%0, %1}
3678 %vmovhlps\t{%1, %d0|%d0, %1}
3679 %vmovlps\t{%H1, %d0|%d0, %H1}"
3680 [(set_attr "type" "ssemov")
3681 (set_attr "prefix" "maybe_vex")
3682 (set_attr "mode" "V2SF,V4SF,V2SF")])
3684 (define_expand "sse_loadhps_exp"
3685 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3688 (match_operand:V4SF 1 "nonimmediate_operand" "")
3689 (parallel [(const_int 0) (const_int 1)]))
3690 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3692 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3694 (define_insn "*avx_loadhps"
3695 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3698 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3699 (parallel [(const_int 0) (const_int 1)]))
3700 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3703 vmovhps\t{%2, %1, %0|%0, %1, %2}
3704 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3705 vmovlps\t{%2, %H0|%H0, %2}"
3706 [(set_attr "type" "ssemov")
3707 (set_attr "prefix" "vex")
3708 (set_attr "mode" "V2SF,V4SF,V2SF")])
3710 (define_insn "sse_loadhps"
3711 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3714 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3715 (parallel [(const_int 0) (const_int 1)]))
3716 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3719 movhps\t{%2, %0|%0, %2}
3720 movlhps\t{%2, %0|%0, %2}
3721 movlps\t{%2, %H0|%H0, %2}"
3722 [(set_attr "type" "ssemov")
3723 (set_attr "mode" "V2SF,V4SF,V2SF")])
3725 (define_insn "*avx_storelps"
3726 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3728 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3729 (parallel [(const_int 0) (const_int 1)])))]
3732 vmovlps\t{%1, %0|%0, %1}
3733 vmovaps\t{%1, %0|%0, %1}
3734 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3735 [(set_attr "type" "ssemov")
3736 (set_attr "prefix" "vex")
3737 (set_attr "mode" "V2SF,V2DF,V2SF")])
3739 (define_insn "sse_storelps"
3740 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3742 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3743 (parallel [(const_int 0) (const_int 1)])))]
3746 movlps\t{%1, %0|%0, %1}
3747 movaps\t{%1, %0|%0, %1}
3748 movlps\t{%1, %0|%0, %1}"
3749 [(set_attr "type" "ssemov")
3750 (set_attr "mode" "V2SF,V4SF,V2SF")])
3752 (define_expand "sse_loadlps_exp"
3753 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3755 (match_operand:V2SF 2 "nonimmediate_operand" "")
3757 (match_operand:V4SF 1 "nonimmediate_operand" "")
3758 (parallel [(const_int 2) (const_int 3)]))))]
3760 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3762 (define_insn "*avx_loadlps"
3763 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3765 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3767 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3768 (parallel [(const_int 2) (const_int 3)]))))]
3771 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3772 vmovlps\t{%2, %1, %0|%0, %1, %2}
3773 vmovlps\t{%2, %0|%0, %2}"
3774 [(set_attr "type" "sselog,ssemov,ssemov")
3775 (set_attr "length_immediate" "1,*,*")
3776 (set_attr "prefix" "vex")
3777 (set_attr "mode" "V4SF,V2SF,V2SF")])
3779 (define_insn "sse_loadlps"
3780 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3782 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3784 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3785 (parallel [(const_int 2) (const_int 3)]))))]
3788 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3789 movlps\t{%2, %0|%0, %2}
3790 movlps\t{%2, %0|%0, %2}"
3791 [(set_attr "type" "sselog,ssemov,ssemov")
3792 (set_attr "length_immediate" "1,*,*")
3793 (set_attr "mode" "V4SF,V2SF,V2SF")])
3795 (define_insn "*avx_movss"
3796 [(set (match_operand:V4SF 0 "register_operand" "=x")
3798 (match_operand:V4SF 2 "register_operand" "x")
3799 (match_operand:V4SF 1 "register_operand" "x")
3802 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3803 [(set_attr "type" "ssemov")
3804 (set_attr "prefix" "vex")
3805 (set_attr "mode" "SF")])
3807 (define_insn "sse_movss"
3808 [(set (match_operand:V4SF 0 "register_operand" "=x")
3810 (match_operand:V4SF 2 "register_operand" "x")
3811 (match_operand:V4SF 1 "register_operand" "0")
3814 "movss\t{%2, %0|%0, %2}"
3815 [(set_attr "type" "ssemov")
3816 (set_attr "mode" "SF")])
3818 (define_expand "vec_dupv4sf"
3819 [(set (match_operand:V4SF 0 "register_operand" "")
3821 (match_operand:SF 1 "nonimmediate_operand" "")))]
3825 operands[1] = force_reg (V4SFmode, operands[1]);
3828 (define_insn "*vec_dupv4sf_avx"
3829 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3831 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3834 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3835 vbroadcastss\t{%1, %0|%0, %1}"
3836 [(set_attr "type" "sselog1,ssemov")
3837 (set_attr "length_immediate" "1,0")
3838 (set_attr "prefix_extra" "0,1")
3839 (set_attr "prefix" "vex")
3840 (set_attr "mode" "V4SF")])
3842 (define_insn "*vec_dupv4sf"
3843 [(set (match_operand:V4SF 0 "register_operand" "=x")
3845 (match_operand:SF 1 "register_operand" "0")))]
3847 "shufps\t{$0, %0, %0|%0, %0, 0}"
3848 [(set_attr "type" "sselog1")
3849 (set_attr "length_immediate" "1")
3850 (set_attr "mode" "V4SF")])
3852 (define_insn "*vec_concatv2sf_avx"
3853 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3855 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3856 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3859 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3860 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3861 vmovss\t{%1, %0|%0, %1}
3862 punpckldq\t{%2, %0|%0, %2}
3863 movd\t{%1, %0|%0, %1}"
3864 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3865 (set_attr "length_immediate" "*,1,*,*,*")
3866 (set_attr "prefix_extra" "*,1,*,*,*")
3867 (set (attr "prefix")
3868 (if_then_else (eq_attr "alternative" "3,4")
3869 (const_string "orig")
3870 (const_string "vex")))
3871 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3873 ;; Although insertps takes register source, we prefer
3874 ;; unpcklps with register source since it is shorter.
3875 (define_insn "*vec_concatv2sf_sse4_1"
3876 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3878 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3879 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3882 unpcklps\t{%2, %0|%0, %2}
3883 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3884 movss\t{%1, %0|%0, %1}
3885 punpckldq\t{%2, %0|%0, %2}
3886 movd\t{%1, %0|%0, %1}"
3887 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3888 (set_attr "prefix_data16" "*,1,*,*,*")
3889 (set_attr "prefix_extra" "*,1,*,*,*")
3890 (set_attr "length_immediate" "*,1,*,*,*")
3891 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3893 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3894 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3895 ;; alternatives pretty much forces the MMX alternative to be chosen.
3896 (define_insn "*vec_concatv2sf_sse"
3897 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3899 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3900 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3903 unpcklps\t{%2, %0|%0, %2}
3904 movss\t{%1, %0|%0, %1}
3905 punpckldq\t{%2, %0|%0, %2}
3906 movd\t{%1, %0|%0, %1}"
3907 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3908 (set_attr "mode" "V4SF,SF,DI,DI")])
3910 (define_insn "*vec_concatv4sf_avx"
3911 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3913 (match_operand:V2SF 1 "register_operand" " x,x")
3914 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3917 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3918 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3919 [(set_attr "type" "ssemov")
3920 (set_attr "prefix" "vex")
3921 (set_attr "mode" "V4SF,V2SF")])
3923 (define_insn "*vec_concatv4sf_sse"
3924 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3926 (match_operand:V2SF 1 "register_operand" " 0,0")
3927 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3930 movlhps\t{%2, %0|%0, %2}
3931 movhps\t{%2, %0|%0, %2}"
3932 [(set_attr "type" "ssemov")
3933 (set_attr "mode" "V4SF,V2SF")])
3935 (define_expand "vec_init<mode>"
3936 [(match_operand:SSEMODE 0 "register_operand" "")
3937 (match_operand 1 "" "")]
3940 ix86_expand_vector_init (false, operands[0], operands[1]);