1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the fma4 suffix
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95 (V4SF "ss") (V2DF "sd")])
97 ;; Mapping of the avx suffix
98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
99 (V4SF "ps") (V2DF "pd")])
101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
105 ;; Mapping of the max integer size for xop rotate immediate constraint
106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
108 ;; Mapping of vector modes back to the scalar modes
109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
110 (V16QI "QI") (V8HI "HI")
111 (V4SI "SI") (V2DI "DI")])
113 ;; Mapping of vector modes to a vector mode of double size
114 (define_mode_attr ssedoublesizemode
115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116 (V8HI "V16HI") (V16QI "V32QI")
117 (V4DF "V8DF") (V8SF "V16SF")
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
120 ;; Number of scalar elements in each vector type
121 (define_mode_attr ssescalarnum
122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
126 (define_mode_attr avxvecmode
127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
130 (define_mode_attr avxvecpsmode
131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
133 (define_mode_attr avxhalfvecmode
134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
135 (V8SF "V4SF") (V4DF "V2DF")
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
137 (define_mode_attr avxscalarmode
138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
140 (define_mode_attr avxcvtvecmode
141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
142 (define_mode_attr avxpermvecmode
143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
144 (define_mode_attr avxmodesuffixf2c
145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
146 (define_mode_attr avxmodesuffixp
147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
149 (define_mode_attr avxmodesuffix
150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
151 (V8SI "256") (V8SF "256") (V4DF "256")])
153 ;; Mapping of immediate bits for blend instructions
154 (define_mode_attr blendbits
155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
157 ;; Mapping of immediate bits for pinsr instructions
158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168 (define_expand "mov<mode>"
169 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
170 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
173 ix86_expand_vector_move (<MODE>mode, operands);
177 (define_insn "*avx_mov<mode>_internal"
178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || register_operand (operands[1], <MODE>mode))"
184 switch (which_alternative)
187 return standard_sse_constant_opcode (insn, operands[1]);
190 switch (get_attr_mode (insn))
194 return "vmovaps\t{%1, %0|%0, %1}";
197 return "vmovapd\t{%1, %0|%0, %1}";
199 return "vmovdqa\t{%1, %0|%0, %1}";
205 [(set_attr "type" "sselog1,ssemov,ssemov")
206 (set_attr "prefix" "vex")
207 (set_attr "mode" "<avxvecmode>")])
209 ;; All of these patterns are enabled for SSE1 as well as SSE2.
210 ;; This is essential for maintaining stable calling conventions.
212 (define_expand "mov<mode>"
213 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
214 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
217 ix86_expand_vector_move (<MODE>mode, operands);
221 (define_insn "*mov<mode>_internal"
222 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
223 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
225 && (register_operand (operands[0], <MODE>mode)
226 || register_operand (operands[1], <MODE>mode))"
228 switch (which_alternative)
231 return standard_sse_constant_opcode (insn, operands[1]);
234 switch (get_attr_mode (insn))
237 return "movaps\t{%1, %0|%0, %1}";
239 return "movapd\t{%1, %0|%0, %1}";
241 return "movdqa\t{%1, %0|%0, %1}";
247 [(set_attr "type" "sselog1,ssemov,ssemov")
249 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
250 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
251 (and (eq_attr "alternative" "2")
252 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
254 (const_string "V4SF")
255 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
256 (const_string "V4SF")
257 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
258 (const_string "V2DF")
260 (const_string "TI")))])
262 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
263 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
264 ;; from memory, we'd prefer to load the memory directly into the %xmm
265 ;; register. To facilitate this happy circumstance, this pattern won't
266 ;; split until after register allocation. If the 64-bit value didn't
267 ;; come from memory, this is the best we can do. This is much better
268 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
271 (define_insn_and_split "movdi_to_sse"
273 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
274 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
275 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
276 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
278 "&& reload_completed"
281 if (register_operand (operands[1], DImode))
283 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
284 Assemble the 64-bit DImode value in an xmm register. */
285 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
286 gen_rtx_SUBREG (SImode, operands[1], 0)));
287 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
288 gen_rtx_SUBREG (SImode, operands[1], 4)));
289 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
292 else if (memory_operand (operands[1], DImode))
293 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
294 operands[1], const0_rtx));
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
302 "TARGET_SSE && reload_completed"
305 (vec_duplicate:V4SF (match_dup 1))
309 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
310 operands[2] = CONST0_RTX (V4SFmode);
314 [(set (match_operand:V2DF 0 "register_operand" "")
315 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
316 "TARGET_SSE2 && reload_completed"
317 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
319 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
320 operands[2] = CONST0_RTX (DFmode);
323 (define_expand "push<mode>1"
324 [(match_operand:AVX256MODE 0 "register_operand" "")]
327 ix86_expand_push (<MODE>mode, operands[0]);
331 (define_expand "push<mode>1"
332 [(match_operand:SSEMODE16 0 "register_operand" "")]
335 ix86_expand_push (<MODE>mode, operands[0]);
339 (define_expand "movmisalign<mode>"
340 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
341 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
344 ix86_expand_vector_move_misalign (<MODE>mode, operands);
348 (define_expand "movmisalign<mode>"
349 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
350 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
353 ix86_expand_vector_move_misalign (<MODE>mode, operands);
357 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
358 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
360 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
362 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
365 [(set_attr "type" "ssemov")
366 (set_attr "movu" "1")
367 (set_attr "prefix" "vex")
368 (set_attr "mode" "<MODE>")])
370 (define_insn "sse2_movq128"
371 [(set (match_operand:V2DI 0 "register_operand" "=x")
374 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
375 (parallel [(const_int 0)]))
378 "%vmovq\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "maybe_vex")
381 (set_attr "mode" "TI")])
383 (define_insn "<sse>_movup<ssemodesuffixf2c>"
384 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
386 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
390 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
391 [(set_attr "type" "ssemov")
392 (set_attr "movu" "1")
393 (set_attr "mode" "<MODE>")])
395 (define_insn "avx_movdqu<avxmodesuffix>"
396 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
398 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
401 "vmovdqu\t{%1, %0|%0, %1}"
402 [(set_attr "type" "ssemov")
403 (set_attr "movu" "1")
404 (set_attr "prefix" "vex")
405 (set_attr "mode" "<avxvecmode>")])
407 (define_insn "sse2_movdqu"
408 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
409 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
411 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
412 "movdqu\t{%1, %0|%0, %1}"
413 [(set_attr "type" "ssemov")
414 (set_attr "movu" "1")
415 (set_attr "prefix_data16" "1")
416 (set_attr "mode" "TI")])
418 (define_insn "avx_movnt<mode>"
419 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
421 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
423 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
424 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "prefix" "vex")
427 (set_attr "mode" "<MODE>")])
429 (define_insn "<sse>_movnt<mode>"
430 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
432 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
434 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
435 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
436 [(set_attr "type" "ssemov")
437 (set_attr "mode" "<MODE>")])
439 (define_insn "avx_movnt<mode>"
440 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
442 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
445 "vmovntdq\t{%1, %0|%0, %1}"
446 [(set_attr "type" "ssecvt")
447 (set_attr "prefix" "vex")
448 (set_attr "mode" "<avxvecmode>")])
450 (define_insn "sse2_movntv2di"
451 [(set (match_operand:V2DI 0 "memory_operand" "=m")
452 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
455 "movntdq\t{%1, %0|%0, %1}"
456 [(set_attr "type" "ssemov")
457 (set_attr "prefix_data16" "1")
458 (set_attr "mode" "TI")])
460 (define_insn "sse2_movntsi"
461 [(set (match_operand:SI 0 "memory_operand" "=m")
462 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
465 "movnti\t{%1, %0|%0, %1}"
466 [(set_attr "type" "ssemov")
467 (set_attr "prefix_data16" "0")
468 (set_attr "mode" "V2DF")])
470 (define_insn "avx_lddqu<avxmodesuffix>"
471 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
473 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
476 "vlddqu\t{%1, %0|%0, %1}"
477 [(set_attr "type" "ssecvt")
478 (set_attr "movu" "1")
479 (set_attr "prefix" "vex")
480 (set_attr "mode" "<avxvecmode>")])
482 (define_insn "sse3_lddqu"
483 [(set (match_operand:V16QI 0 "register_operand" "=x")
484 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
487 "lddqu\t{%1, %0|%0, %1}"
488 [(set_attr "type" "ssemov")
489 (set_attr "movu" "1")
490 (set_attr "prefix_data16" "0")
491 (set_attr "prefix_rep" "1")
492 (set_attr "mode" "TI")])
494 ; Expand patterns for non-temporal stores. At the moment, only those
495 ; that directly map to insns are defined; it would be possible to
496 ; define patterns for other modes that would expand to several insns.
498 (define_expand "storent<mode>"
499 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
501 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
503 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 (define_expand "storent<mode>"
507 [(set (match_operand:MODEF 0 "memory_operand" "")
509 [(match_operand:MODEF 1 "register_operand" "")]
514 (define_expand "storentv2di"
515 [(set (match_operand:V2DI 0 "memory_operand" "")
516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
521 (define_expand "storentsi"
522 [(set (match_operand:SI 0 "memory_operand" "")
523 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
528 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
530 ;; Parallel floating point arithmetic
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
534 (define_expand "<code><mode>2"
535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
537 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
538 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
539 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
541 (define_expand "<plusminus_insn><mode>3"
542 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
543 (plusminus:AVX256MODEF2P
544 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
545 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
546 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
547 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
549 (define_insn "*avx_<plusminus_insn><mode>3"
550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
551 (plusminus:AVXMODEF2P
552 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
554 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
555 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
556 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
557 [(set_attr "type" "sseadd")
558 (set_attr "prefix" "vex")
559 (set_attr "mode" "<avxvecmode>")])
561 (define_expand "<plusminus_insn><mode>3"
562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
563 (plusminus:SSEMODEF2P
564 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
565 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
566 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
567 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
569 (define_insn "*<plusminus_insn><mode>3"
570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
571 (plusminus:SSEMODEF2P
572 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
574 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
575 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
576 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<MODE>")])
580 (define_insn "*avx_vm<plusminus_insn><mode>3"
581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
582 (vec_merge:SSEMODEF2P
583 (plusminus:SSEMODEF2P
584 (match_operand:SSEMODEF2P 1 "register_operand" "x")
585 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
588 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
589 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
590 [(set_attr "type" "sseadd")
591 (set_attr "prefix" "vex")
592 (set_attr "mode" "<ssescalarmode>")])
594 (define_insn "<sse>_vm<plusminus_insn><mode>3"
595 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596 (vec_merge:SSEMODEF2P
597 (plusminus:SSEMODEF2P
598 (match_operand:SSEMODEF2P 1 "register_operand" "0")
599 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
602 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
603 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "mode" "<ssescalarmode>")])
607 (define_expand "mul<mode>3"
608 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
610 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
611 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
612 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
613 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
615 (define_insn "*avx_mul<mode>3"
616 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
618 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
619 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
620 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
621 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
622 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "type" "ssemul")
624 (set_attr "prefix" "vex")
625 (set_attr "mode" "<avxvecmode>")])
627 (define_expand "mul<mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
635 (define_insn "*mul<mode>3"
636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
641 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
642 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
643 [(set_attr "type" "ssemul")
644 (set_attr "mode" "<MODE>")])
646 (define_insn "*avx_vmmul<mode>3"
647 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
648 (vec_merge:SSEMODEF2P
650 (match_operand:SSEMODEF2P 1 "register_operand" "x")
651 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
654 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
655 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
656 [(set_attr "type" "ssemul")
657 (set_attr "prefix" "vex")
658 (set_attr "mode" "<ssescalarmode>")])
660 (define_insn "<sse>_vmmul<mode>3"
661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
662 (vec_merge:SSEMODEF2P
664 (match_operand:SSEMODEF2P 1 "register_operand" "0")
665 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
668 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
669 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
670 [(set_attr "type" "ssemul")
671 (set_attr "mode" "<ssescalarmode>")])
673 (define_expand "divv8sf3"
674 [(set (match_operand:V8SF 0 "register_operand" "")
675 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
676 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
679 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
681 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
682 && flag_finite_math_only && !flag_trapping_math
683 && flag_unsafe_math_optimizations)
685 ix86_emit_swdivsf (operands[0], operands[1],
686 operands[2], V8SFmode);
691 (define_expand "divv4df3"
692 [(set (match_operand:V4DF 0 "register_operand" "")
693 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
694 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
696 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
698 (define_insn "avx_div<mode>3"
699 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
701 (match_operand:AVXMODEF2P 1 "register_operand" "x")
702 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
703 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
704 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
705 [(set_attr "type" "ssediv")
706 (set_attr "prefix" "vex")
707 (set_attr "mode" "<MODE>")])
709 (define_expand "divv4sf3"
710 [(set (match_operand:V4SF 0 "register_operand" "")
711 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
712 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
715 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
716 && flag_finite_math_only && !flag_trapping_math
717 && flag_unsafe_math_optimizations)
719 ix86_emit_swdivsf (operands[0], operands[1],
720 operands[2], V4SFmode);
725 (define_expand "divv2df3"
726 [(set (match_operand:V2DF 0 "register_operand" "")
727 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
732 (define_insn "*avx_div<mode>3"
733 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
735 (match_operand:SSEMODEF2P 1 "register_operand" "x")
736 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
737 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
738 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
739 [(set_attr "type" "ssediv")
740 (set_attr "prefix" "vex")
741 (set_attr "mode" "<MODE>")])
743 (define_insn "<sse>_div<mode>3"
744 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
746 (match_operand:SSEMODEF2P 1 "register_operand" "0")
747 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
748 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
749 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssediv")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "*avx_vmdiv<mode>3"
754 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
755 (vec_merge:SSEMODEF2P
757 (match_operand:SSEMODEF2P 1 "register_operand" "x")
758 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
761 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
762 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
763 [(set_attr "type" "ssediv")
764 (set_attr "prefix" "vex")
765 (set_attr "mode" "<ssescalarmode>")])
767 (define_insn "<sse>_vmdiv<mode>3"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
769 (vec_merge:SSEMODEF2P
771 (match_operand:SSEMODEF2P 1 "register_operand" "0")
772 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
776 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
777 [(set_attr "type" "ssediv")
778 (set_attr "mode" "<ssescalarmode>")])
780 (define_insn "avx_rcpv8sf2"
781 [(set (match_operand:V8SF 0 "register_operand" "=x")
783 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
785 "vrcpps\t{%1, %0|%0, %1}"
786 [(set_attr "type" "sse")
787 (set_attr "prefix" "vex")
788 (set_attr "mode" "V8SF")])
790 (define_insn "sse_rcpv4sf2"
791 [(set (match_operand:V4SF 0 "register_operand" "=x")
793 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
795 "%vrcpps\t{%1, %0|%0, %1}"
796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
798 (set_attr "prefix" "maybe_vex")
799 (set_attr "mode" "V4SF")])
801 (define_insn "*avx_vmrcpv4sf2"
802 [(set (match_operand:V4SF 0 "register_operand" "=x")
804 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
806 (match_operand:V4SF 2 "register_operand" "x")
809 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
810 [(set_attr "type" "sse")
811 (set_attr "prefix" "vex")
812 (set_attr "mode" "SF")])
814 (define_insn "sse_vmrcpv4sf2"
815 [(set (match_operand:V4SF 0 "register_operand" "=x")
817 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
819 (match_operand:V4SF 2 "register_operand" "0")
822 "rcpss\t{%1, %0|%0, %1}"
823 [(set_attr "type" "sse")
824 (set_attr "atom_sse_attr" "rcp")
825 (set_attr "mode" "SF")])
827 (define_expand "sqrtv8sf2"
828 [(set (match_operand:V8SF 0 "register_operand" "")
829 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
832 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
833 && flag_finite_math_only && !flag_trapping_math
834 && flag_unsafe_math_optimizations)
836 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
841 (define_insn "avx_sqrtv8sf2"
842 [(set (match_operand:V8SF 0 "register_operand" "=x")
843 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
845 "vsqrtps\t{%1, %0|%0, %1}"
846 [(set_attr "type" "sse")
847 (set_attr "prefix" "vex")
848 (set_attr "mode" "V8SF")])
850 (define_expand "sqrtv4sf2"
851 [(set (match_operand:V4SF 0 "register_operand" "")
852 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
855 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
864 (define_insn "sse_sqrtv4sf2"
865 [(set (match_operand:V4SF 0 "register_operand" "=x")
866 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
868 "%vsqrtps\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "V4SF")])
874 (define_insn "sqrtv4df2"
875 [(set (match_operand:V4DF 0 "register_operand" "=x")
876 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
878 "vsqrtpd\t{%1, %0|%0, %1}"
879 [(set_attr "type" "sse")
880 (set_attr "prefix" "vex")
881 (set_attr "mode" "V4DF")])
883 (define_insn "sqrtv2df2"
884 [(set (match_operand:V2DF 0 "register_operand" "=x")
885 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
887 "%vsqrtpd\t{%1, %0|%0, %1}"
888 [(set_attr "type" "sse")
889 (set_attr "prefix" "maybe_vex")
890 (set_attr "mode" "V2DF")])
892 (define_insn "*avx_vmsqrt<mode>2"
893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
894 (vec_merge:SSEMODEF2P
896 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
897 (match_operand:SSEMODEF2P 2 "register_operand" "x")
899 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
900 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_insn "<sse>_vmsqrt<mode>2"
906 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
907 (vec_merge:SSEMODEF2P
909 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
910 (match_operand:SSEMODEF2P 2 "register_operand" "0")
912 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
913 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
914 [(set_attr "type" "sse")
915 (set_attr "atom_sse_attr" "sqrt")
916 (set_attr "mode" "<ssescalarmode>")])
918 (define_expand "rsqrtv8sf2"
919 [(set (match_operand:V8SF 0 "register_operand" "")
921 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
922 "TARGET_AVX && TARGET_SSE_MATH"
924 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
928 (define_insn "avx_rsqrtv8sf2"
929 [(set (match_operand:V8SF 0 "register_operand" "=x")
931 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
933 "vrsqrtps\t{%1, %0|%0, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "V8SF")])
938 (define_expand "rsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "")
941 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
944 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
948 (define_insn "sse_rsqrtv4sf2"
949 [(set (match_operand:V4SF 0 "register_operand" "=x")
951 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
953 "%vrsqrtps\t{%1, %0|%0, %1}"
954 [(set_attr "type" "sse")
955 (set_attr "prefix" "maybe_vex")
956 (set_attr "mode" "V4SF")])
958 (define_insn "*avx_vmrsqrtv4sf2"
959 [(set (match_operand:V4SF 0 "register_operand" "=x")
961 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
963 (match_operand:V4SF 2 "register_operand" "x")
966 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
967 [(set_attr "type" "sse")
968 (set_attr "prefix" "vex")
969 (set_attr "mode" "SF")])
971 (define_insn "sse_vmrsqrtv4sf2"
972 [(set (match_operand:V4SF 0 "register_operand" "=x")
974 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
976 (match_operand:V4SF 2 "register_operand" "0")
979 "rsqrtss\t{%1, %0|%0, %1}"
980 [(set_attr "type" "sse")
981 (set_attr "mode" "SF")])
983 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
984 ;; isn't really correct, as those rtl operators aren't defined when
985 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
987 (define_expand "<code><mode>3"
988 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
989 (smaxmin:AVX256MODEF2P
990 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
991 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
992 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
994 if (!flag_finite_math_only)
995 operands[1] = force_reg (<MODE>mode, operands[1]);
996 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
999 (define_expand "<code><mode>3"
1000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1002 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1003 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1004 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1006 if (!flag_finite_math_only)
1007 operands[1] = force_reg (<MODE>mode, operands[1]);
1008 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1011 (define_insn "*avx_<code><mode>3_finite"
1012 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1014 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1015 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1016 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1017 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1018 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "prefix" "vex")
1021 (set_attr "mode" "<MODE>")])
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1026 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1028 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1030 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1031 [(set_attr "type" "sseadd")
1032 (set_attr "mode" "<MODE>")])
1034 (define_insn "*avx_<code><mode>3"
1035 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1037 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1038 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1039 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1040 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "<avxvecmode>")])
1045 (define_insn "*<code><mode>3"
1046 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1048 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1049 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1051 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "mode" "<MODE>")])
1055 (define_insn "*avx_vm<code><mode>3"
1056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1057 (vec_merge:SSEMODEF2P
1059 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1063 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1064 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "type" "sse")
1066 (set_attr "prefix" "vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1069 (define_insn "<sse>_vm<code><mode>3"
1070 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1071 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1077 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "mode" "<ssescalarmode>")])
1082 ;; These versions of the min/max patterns implement exactly the operations
1083 ;; min = (op1 < op2 ? op1 : op2)
1084 ;; max = (!(op1 < op2) ? op1 : op2)
1085 ;; Their operands are not commutative, and thus they may be used in the
1086 ;; presence of -0.0 and NaN.
1088 (define_insn "*avx_ieee_smin<mode>3"
1089 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1091 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1092 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1094 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1095 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1096 [(set_attr "type" "sseadd")
1097 (set_attr "prefix" "vex")
1098 (set_attr "mode" "<avxvecmode>")])
1100 (define_insn "*avx_ieee_smax<mode>3"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1106 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1107 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<avxvecmode>")])
1112 (define_insn "*ieee_smin<mode>3"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1115 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1118 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1119 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1123 (define_insn "*ieee_smax<mode>3"
1124 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1126 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1127 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1129 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1130 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1131 [(set_attr "type" "sseadd")
1132 (set_attr "mode" "<MODE>")])
1134 (define_insn "avx_addsubv8sf3"
1135 [(set (match_operand:V8SF 0 "register_operand" "=x")
1138 (match_operand:V8SF 1 "register_operand" "x")
1139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1140 (minus:V8SF (match_dup 1) (match_dup 2))
1143 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1144 [(set_attr "type" "sseadd")
1145 (set_attr "prefix" "vex")
1146 (set_attr "mode" "V8SF")])
1148 (define_insn "avx_addsubv4df3"
1149 [(set (match_operand:V4DF 0 "register_operand" "=x")
1152 (match_operand:V4DF 1 "register_operand" "x")
1153 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1154 (minus:V4DF (match_dup 1) (match_dup 2))
1157 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1158 [(set_attr "type" "sseadd")
1159 (set_attr "prefix" "vex")
1160 (set_attr "mode" "V4DF")])
1162 (define_insn "*avx_addsubv4sf3"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1166 (match_operand:V4SF 1 "register_operand" "x")
1167 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1168 (minus:V4SF (match_dup 1) (match_dup 2))
1171 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1172 [(set_attr "type" "sseadd")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "V4SF")])
1176 (define_insn "sse3_addsubv4sf3"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x")
1180 (match_operand:V4SF 1 "register_operand" "0")
1181 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1182 (minus:V4SF (match_dup 1) (match_dup 2))
1185 "addsubps\t{%2, %0|%0, %2}"
1186 [(set_attr "type" "sseadd")
1187 (set_attr "prefix_rep" "1")
1188 (set_attr "mode" "V4SF")])
1190 (define_insn "*avx_addsubv2df3"
1191 [(set (match_operand:V2DF 0 "register_operand" "=x")
1194 (match_operand:V2DF 1 "register_operand" "x")
1195 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1196 (minus:V2DF (match_dup 1) (match_dup 2))
1199 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1200 [(set_attr "type" "sseadd")
1201 (set_attr "prefix" "vex")
1202 (set_attr "mode" "V2DF")])
1204 (define_insn "sse3_addsubv2df3"
1205 [(set (match_operand:V2DF 0 "register_operand" "=x")
1208 (match_operand:V2DF 1 "register_operand" "0")
1209 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1210 (minus:V2DF (match_dup 1) (match_dup 2))
1213 "addsubpd\t{%2, %0|%0, %2}"
1214 [(set_attr "type" "sseadd")
1215 (set_attr "atom_unit" "complex")
1216 (set_attr "mode" "V2DF")])
1218 (define_insn "avx_h<plusminus_insn>v4df3"
1219 [(set (match_operand:V4DF 0 "register_operand" "=x")
1224 (match_operand:V4DF 1 "register_operand" "x")
1225 (parallel [(const_int 0)]))
1226 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1228 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1233 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1234 (parallel [(const_int 0)]))
1235 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1237 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1240 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1241 [(set_attr "type" "sseadd")
1242 (set_attr "prefix" "vex")
1243 (set_attr "mode" "V4DF")])
1245 (define_insn "avx_h<plusminus_insn>v8sf3"
1246 [(set (match_operand:V8SF 0 "register_operand" "=x")
1252 (match_operand:V8SF 1 "register_operand" "x")
1253 (parallel [(const_int 0)]))
1254 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1261 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1262 (parallel [(const_int 0)]))
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1265 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1270 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1277 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1283 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "prefix" "vex")
1286 (set_attr "mode" "V8SF")])
1288 (define_insn "*avx_h<plusminus_insn>v4sf3"
1289 [(set (match_operand:V4SF 0 "register_operand" "=x")
1294 (match_operand:V4SF 1 "register_operand" "x")
1295 (parallel [(const_int 0)]))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1310 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1311 [(set_attr "type" "sseadd")
1312 (set_attr "prefix" "vex")
1313 (set_attr "mode" "V4SF")])
1315 (define_insn "sse3_h<plusminus_insn>v4sf3"
1316 [(set (match_operand:V4SF 0 "register_operand" "=x")
1321 (match_operand:V4SF 1 "register_operand" "0")
1322 (parallel [(const_int 0)]))
1323 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1325 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1330 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1331 (parallel [(const_int 0)]))
1332 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1334 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1337 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1338 [(set_attr "type" "sseadd")
1339 (set_attr "atom_unit" "complex")
1340 (set_attr "prefix_rep" "1")
1341 (set_attr "mode" "V4SF")])
1343 (define_insn "*avx_h<plusminus_insn>v2df3"
1344 [(set (match_operand:V2DF 0 "register_operand" "=x")
1348 (match_operand:V2DF 1 "register_operand" "x")
1349 (parallel [(const_int 0)]))
1350 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1353 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1354 (parallel [(const_int 0)]))
1355 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1357 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1358 [(set_attr "type" "sseadd")
1359 (set_attr "prefix" "vex")
1360 (set_attr "mode" "V2DF")])
1362 (define_insn "sse3_h<plusminus_insn>v2df3"
1363 [(set (match_operand:V2DF 0 "register_operand" "=x")
1367 (match_operand:V2DF 1 "register_operand" "0")
1368 (parallel [(const_int 0)]))
1369 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1373 (parallel [(const_int 0)]))
1374 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1376 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1377 [(set_attr "type" "sseadd")
1378 (set_attr "mode" "V2DF")])
1380 (define_expand "reduc_splus_v4sf"
1381 [(match_operand:V4SF 0 "register_operand" "")
1382 (match_operand:V4SF 1 "register_operand" "")]
1387 rtx tmp = gen_reg_rtx (V4SFmode);
1388 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1389 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1392 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1396 (define_expand "reduc_splus_v2df"
1397 [(match_operand:V2DF 0 "register_operand" "")
1398 (match_operand:V2DF 1 "register_operand" "")]
1401 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1405 (define_expand "reduc_smax_v4sf"
1406 [(match_operand:V4SF 0 "register_operand" "")
1407 (match_operand:V4SF 1 "register_operand" "")]
1410 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1414 (define_expand "reduc_smin_v4sf"
1415 [(match_operand:V4SF 0 "register_operand" "")
1416 (match_operand:V4SF 1 "register_operand" "")]
1419 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1425 ;; Parallel floating point comparisons
1427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1429 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1430 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1432 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1433 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1437 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1440 (set_attr "prefix" "vex")
1441 (set_attr "mode" "<MODE>")])
1443 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1445 (vec_merge:SSEMODEF2P
1447 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1448 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1449 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1454 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "prefix" "vex")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1461 ;; may generate 256bit vector compare instructions.
1462 (define_insn "*avx_maskcmp<mode>3"
1463 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1464 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1465 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1466 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1467 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1468 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1469 [(set_attr "type" "ssecmp")
1470 (set_attr "prefix" "vex")
1471 (set_attr "length_immediate" "1")
1472 (set_attr "mode" "<avxvecmode>")])
1474 (define_insn "<sse>_maskcmp<mode>3"
1475 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1476 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1477 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1478 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1480 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1481 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1482 [(set_attr "type" "ssecmp")
1483 (set_attr "length_immediate" "1")
1484 (set_attr "mode" "<MODE>")])
1486 (define_insn "<sse>_vmmaskcmp<mode>3"
1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1488 (vec_merge:SSEMODEF2P
1489 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1490 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1494 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1495 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1496 [(set_attr "type" "ssecmp")
1497 (set_attr "length_immediate" "1")
1498 (set_attr "mode" "<ssescalarmode>")])
1500 (define_insn "<sse>_comi"
1501 [(set (reg:CCFP FLAGS_REG)
1504 (match_operand:<ssevecmode> 0 "register_operand" "x")
1505 (parallel [(const_int 0)]))
1507 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1508 (parallel [(const_int 0)]))))]
1509 "SSE_FLOAT_MODE_P (<MODE>mode)"
1510 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1511 [(set_attr "type" "ssecomi")
1512 (set_attr "prefix" "maybe_vex")
1513 (set_attr "prefix_rep" "0")
1514 (set (attr "prefix_data16")
1515 (if_then_else (eq_attr "mode" "DF")
1517 (const_string "0")))
1518 (set_attr "mode" "<MODE>")])
1520 (define_insn "<sse>_ucomi"
1521 [(set (reg:CCFPU FLAGS_REG)
1524 (match_operand:<ssevecmode> 0 "register_operand" "x")
1525 (parallel [(const_int 0)]))
1527 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1528 (parallel [(const_int 0)]))))]
1529 "SSE_FLOAT_MODE_P (<MODE>mode)"
1530 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1531 [(set_attr "type" "ssecomi")
1532 (set_attr "prefix" "maybe_vex")
1533 (set_attr "prefix_rep" "0")
1534 (set (attr "prefix_data16")
1535 (if_then_else (eq_attr "mode" "DF")
1537 (const_string "0")))
1538 (set_attr "mode" "<MODE>")])
1540 (define_expand "vcond<mode>"
1541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1542 (if_then_else:SSEMODEF2P
1543 (match_operator 3 ""
1544 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1545 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1546 (match_operand:SSEMODEF2P 1 "general_operand" "")
1547 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1548 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1550 bool ok = ix86_expand_fp_vcond (operands);
1555 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1557 ;; Parallel floating point logical operations
1559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1561 (define_insn "avx_andnot<mode>3"
1562 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1565 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1566 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1567 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1568 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1569 [(set_attr "type" "sselog")
1570 (set_attr "prefix" "vex")
1571 (set_attr "mode" "<avxvecmode>")])
1573 (define_insn "<sse>_andnot<mode>3"
1574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1577 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1578 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1579 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1580 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1581 [(set_attr "type" "sselog")
1582 (set_attr "mode" "<MODE>")])
1584 (define_expand "<code><mode>3"
1585 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1586 (any_logic:AVX256MODEF2P
1587 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1588 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1589 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1590 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1592 (define_insn "*avx_<code><mode>3"
1593 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1594 (any_logic:AVXMODEF2P
1595 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1596 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1597 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1598 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1599 "v<logicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1600 [(set_attr "type" "sselog")
1601 (set_attr "prefix" "vex")
1602 (set_attr "mode" "<avxvecmode>")])
1604 (define_expand "<code><mode>3"
1605 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1606 (any_logic:SSEMODEF2P
1607 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1609 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1610 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1612 (define_insn "*<code><mode>3"
1613 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1614 (any_logic:SSEMODEF2P
1615 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1616 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1617 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1618 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1619 "<logicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1620 [(set_attr "type" "sselog")
1621 (set_attr "mode" "<MODE>")])
1623 (define_expand "copysign<mode>3"
1626 (not:SSEMODEF2P (match_dup 3))
1627 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1629 (and:SSEMODEF2P (match_dup 3)
1630 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1631 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1632 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1633 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1635 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1637 operands[4] = gen_reg_rtx (<MODE>mode);
1638 operands[5] = gen_reg_rtx (<MODE>mode);
1641 ;; Also define scalar versions. These are used for abs, neg, and
1642 ;; conditional move. Using subregs into vector modes causes register
1643 ;; allocation lossage. These patterns do not allow memory operands
1644 ;; because the native instructions read the full 128-bits.
1646 (define_insn "*avx_andnot<mode>3"
1647 [(set (match_operand:MODEF 0 "register_operand" "=x")
1650 (match_operand:MODEF 1 "register_operand" "x"))
1651 (match_operand:MODEF 2 "register_operand" "x")))]
1652 "AVX_FLOAT_MODE_P (<MODE>mode)"
1653 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1654 [(set_attr "type" "sselog")
1655 (set_attr "prefix" "vex")
1656 (set_attr "mode" "<ssevecmode>")])
1658 (define_insn "*andnot<mode>3"
1659 [(set (match_operand:MODEF 0 "register_operand" "=x")
1662 (match_operand:MODEF 1 "register_operand" "0"))
1663 (match_operand:MODEF 2 "register_operand" "x")))]
1664 "SSE_FLOAT_MODE_P (<MODE>mode)"
1665 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1666 [(set_attr "type" "sselog")
1667 (set_attr "mode" "<ssevecmode>")])
1669 (define_insn "*avx_<code><mode>3"
1670 [(set (match_operand:MODEF 0 "register_operand" "=x")
1672 (match_operand:MODEF 1 "register_operand" "x")
1673 (match_operand:MODEF 2 "register_operand" "x")))]
1674 "AVX_FLOAT_MODE_P (<MODE>mode)"
1675 "v<logicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1676 [(set_attr "type" "sselog")
1677 (set_attr "prefix" "vex")
1678 (set_attr "mode" "<ssevecmode>")])
1680 (define_insn "*<code><mode>3"
1681 [(set (match_operand:MODEF 0 "register_operand" "=x")
1683 (match_operand:MODEF 1 "register_operand" "0")
1684 (match_operand:MODEF 2 "register_operand" "x")))]
1685 "SSE_FLOAT_MODE_P (<MODE>mode)"
1686 "<logicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1687 [(set_attr "type" "sselog")
1688 (set_attr "mode" "<ssevecmode>")])
1690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1692 ;; FMA4 floating point multiply/accumulate instructions. This
1693 ;; includes the scalar version of the instructions as well as the
1696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1698 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1699 ;; combine to generate a multiply/add with two memory references. We then
1700 ;; split this insn, into loading up the destination register with one of the
1701 ;; memory operations. If we don't manage to split the insn, reload will
1702 ;; generate the appropriate moves. The reason this is needed, is that combine
1703 ;; has already folded one of the memory references into both the multiply and
1704 ;; add insns, and it can't generate a new pseudo. I.e.:
1705 ;; (set (reg1) (mem (addr1)))
1706 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1707 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1709 (define_insn "fma4_fmadd<mode>4256"
1710 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1713 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1714 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1715 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1716 "TARGET_FMA4 && TARGET_FUSED_MADD"
1717 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1718 [(set_attr "type" "ssemuladd")
1719 (set_attr "mode" "<MODE>")])
1721 ;; Floating multiply and subtract.
1722 (define_insn "fma4_fmsub<mode>4256"
1723 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1726 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1727 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1728 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1729 "TARGET_FMA4 && TARGET_FUSED_MADD"
1730 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1731 [(set_attr "type" "ssemuladd")
1732 (set_attr "mode" "<MODE>")])
1734 ;; Floating point negative multiply and add.
1735 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1736 (define_insn "fma4_fnmadd<mode>4256"
1737 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1739 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1741 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1742 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1743 "TARGET_FMA4 && TARGET_FUSED_MADD"
1744 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1745 [(set_attr "type" "ssemuladd")
1746 (set_attr "mode" "<MODE>")])
1748 ;; Floating point negative multiply and subtract.
1749 (define_insn "fma4_fnmsub<mode>4256"
1750 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1754 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1755 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1756 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1757 "TARGET_FMA4 && TARGET_FUSED_MADD"
1758 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1759 [(set_attr "type" "ssemuladd")
1760 (set_attr "mode" "<MODE>")])
1762 (define_insn "fma4_fmadd<mode>4"
1763 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1766 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1767 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1768 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1769 "TARGET_FMA4 && TARGET_FUSED_MADD"
1770 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1771 [(set_attr "type" "ssemuladd")
1772 (set_attr "mode" "<MODE>")])
1774 ;; For the scalar operations, use operand1 for the upper words that aren't
1775 ;; modified, so restrict the forms that are generated.
1776 ;; Scalar version of fmadd.
1777 (define_insn "fma4_vmfmadd<mode>4"
1778 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1779 (vec_merge:SSEMODEF2P
1782 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1783 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1784 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1787 "TARGET_FMA4 && TARGET_FUSED_MADD"
1788 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1789 [(set_attr "type" "ssemuladd")
1790 (set_attr "mode" "<MODE>")])
1792 ;; Floating multiply and subtract.
1793 ;; Allow two memory operands the same as fmadd.
1794 (define_insn "fma4_fmsub<mode>4"
1795 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1798 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1799 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1800 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1801 "TARGET_FMA4 && TARGET_FUSED_MADD"
1802 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1803 [(set_attr "type" "ssemuladd")
1804 (set_attr "mode" "<MODE>")])
1806 ;; For the scalar operations, use operand1 for the upper words that aren't
1807 ;; modified, so restrict the forms that are generated.
1808 ;; Scalar version of fmsub.
1809 (define_insn "fma4_vmfmsub<mode>4"
1810 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1811 (vec_merge:SSEMODEF2P
1814 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1815 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1816 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1819 "TARGET_FMA4 && TARGET_FUSED_MADD"
1820 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1821 [(set_attr "type" "ssemuladd")
1822 (set_attr "mode" "<MODE>")])
1824 ;; Floating point negative multiply and add.
1825 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1826 (define_insn "fma4_fnmadd<mode>4"
1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1829 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1833 "TARGET_FMA4 && TARGET_FUSED_MADD"
1834 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1835 [(set_attr "type" "ssemuladd")
1836 (set_attr "mode" "<MODE>")])
1838 ;; For the scalar operations, use operand1 for the upper words that aren't
1839 ;; modified, so restrict the forms that are generated.
1840 ;; Scalar version of fnmadd.
1841 (define_insn "fma4_vmfnmadd<mode>4"
1842 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1843 (vec_merge:SSEMODEF2P
1845 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1847 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1848 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1851 "TARGET_FMA4 && TARGET_FUSED_MADD"
1852 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1853 [(set_attr "type" "ssemuladd")
1854 (set_attr "mode" "<MODE>")])
1856 ;; Floating point negative multiply and subtract.
1857 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1858 (define_insn "fma4_fnmsub<mode>4"
1859 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1863 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1864 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1865 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1866 "TARGET_FMA4 && TARGET_FUSED_MADD"
1867 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1868 [(set_attr "type" "ssemuladd")
1869 (set_attr "mode" "<MODE>")])
1871 ;; For the scalar operations, use operand1 for the upper words that aren't
1872 ;; modified, so restrict the forms that are generated.
1873 ;; Scalar version of fnmsub.
1874 (define_insn "fma4_vmfnmsub<mode>4"
1875 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1876 (vec_merge:SSEMODEF2P
1880 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1881 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1882 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1885 "TARGET_FMA4 && TARGET_FUSED_MADD"
1886 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1887 [(set_attr "type" "ssemuladd")
1888 (set_attr "mode" "<MODE>")])
1890 (define_insn "fma4i_fmadd<mode>4256"
1891 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1895 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1896 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1897 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1898 UNSPEC_FMA4_INTRINSIC))]
1899 "TARGET_FMA4 && TARGET_FUSED_MADD"
1900 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1901 [(set_attr "type" "ssemuladd")
1902 (set_attr "mode" "<MODE>")])
1904 (define_insn "fma4i_fmsub<mode>4256"
1905 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1909 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1910 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1911 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1912 UNSPEC_FMA4_INTRINSIC))]
1913 "TARGET_FMA4 && TARGET_FUSED_MADD"
1914 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915 [(set_attr "type" "ssemuladd")
1916 (set_attr "mode" "<MODE>")])
1918 (define_insn "fma4i_fnmadd<mode>4256"
1919 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1922 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1924 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1925 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1926 UNSPEC_FMA4_INTRINSIC))]
1927 "TARGET_FMA4 && TARGET_FUSED_MADD"
1928 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1929 [(set_attr "type" "ssemuladd")
1930 (set_attr "mode" "<MODE>")])
1932 (define_insn "fma4i_fnmsub<mode>4256"
1933 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1938 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1939 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1940 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1941 UNSPEC_FMA4_INTRINSIC))]
1942 "TARGET_FMA4 && TARGET_FUSED_MADD"
1943 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1944 [(set_attr "type" "ssemuladd")
1945 (set_attr "mode" "<MODE>")])
1947 (define_insn "fma4i_fmadd<mode>4"
1948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1952 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1953 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1954 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1955 UNSPEC_FMA4_INTRINSIC))]
1956 "TARGET_FMA4 && TARGET_FUSED_MADD"
1957 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1958 [(set_attr "type" "ssemuladd")
1959 (set_attr "mode" "<MODE>")])
1961 (define_insn "fma4i_fmsub<mode>4"
1962 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1966 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1967 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1968 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1969 UNSPEC_FMA4_INTRINSIC))]
1970 "TARGET_FMA4 && TARGET_FUSED_MADD"
1971 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1972 [(set_attr "type" "ssemuladd")
1973 (set_attr "mode" "<MODE>")])
1975 (define_insn "fma4i_fnmadd<mode>4"
1976 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1979 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
1983 UNSPEC_FMA4_INTRINSIC))]
1984 "TARGET_FMA4 && TARGET_FUSED_MADD"
1985 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1986 [(set_attr "type" "ssemuladd")
1987 (set_attr "mode" "<MODE>")])
1989 (define_insn "fma4i_fnmsub<mode>4"
1990 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1995 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1996 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1997 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1998 UNSPEC_FMA4_INTRINSIC))]
1999 "TARGET_FMA4 && TARGET_FUSED_MADD"
2000 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2001 [(set_attr "type" "ssemuladd")
2002 (set_attr "mode" "<MODE>")])
2004 ;; For the scalar operations, use operand1 for the upper words that aren't
2005 ;; modified, so restrict the forms that are accepted.
2006 (define_insn "fma4i_vmfmadd<mode>4"
2007 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2009 [(vec_merge:SSEMODEF2P
2012 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2013 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2014 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2017 UNSPEC_FMA4_INTRINSIC))]
2018 "TARGET_FMA4 && TARGET_FUSED_MADD"
2019 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2020 [(set_attr "type" "ssemuladd")
2021 (set_attr "mode" "<ssescalarmode>")])
2023 (define_insn "fma4i_vmfmsub<mode>4"
2024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2026 [(vec_merge:SSEMODEF2P
2029 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2030 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2031 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2034 UNSPEC_FMA4_INTRINSIC))]
2035 "TARGET_FMA4 && TARGET_FUSED_MADD"
2036 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2037 [(set_attr "type" "ssemuladd")
2038 (set_attr "mode" "<ssescalarmode>")])
2040 (define_insn "fma4i_vmfnmadd<mode>4"
2041 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2043 [(vec_merge:SSEMODEF2P
2045 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2047 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2048 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2051 UNSPEC_FMA4_INTRINSIC))]
2052 "TARGET_FMA4 && TARGET_FUSED_MADD"
2053 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2054 [(set_attr "type" "ssemuladd")
2055 (set_attr "mode" "<ssescalarmode>")])
2057 (define_insn "fma4i_vmfnmsub<mode>4"
2058 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2060 [(vec_merge:SSEMODEF2P
2064 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2065 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2066 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2069 UNSPEC_FMA4_INTRINSIC))]
2070 "TARGET_FMA4 && TARGET_FUSED_MADD"
2071 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2072 [(set_attr "type" "ssemuladd")
2073 (set_attr "mode" "<ssescalarmode>")])
2075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2077 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2081 (define_insn "fma4_fmaddsubv8sf4"
2082 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2086 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2087 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2088 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2095 "TARGET_FMA4 && TARGET_FUSED_MADD"
2096 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2097 [(set_attr "type" "ssemuladd")
2098 (set_attr "mode" "V8SF")])
2100 (define_insn "fma4_fmaddsubv4df4"
2101 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2105 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2106 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2107 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2114 "TARGET_FMA4 && TARGET_FUSED_MADD"
2115 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2116 [(set_attr "type" "ssemuladd")
2117 (set_attr "mode" "V4DF")])
2119 (define_insn "fma4_fmaddsubv4sf4"
2120 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2124 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2125 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2126 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2133 "TARGET_FMA4 && TARGET_FUSED_MADD"
2134 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "V4SF")])
2138 (define_insn "fma4_fmaddsubv2df4"
2139 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2143 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2144 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2145 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2152 "TARGET_FMA4 && TARGET_FUSED_MADD"
2153 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2154 [(set_attr "type" "ssemuladd")
2155 (set_attr "mode" "V2DF")])
2157 (define_insn "fma4_fmsubaddv8sf4"
2158 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2162 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2163 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2164 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2171 "TARGET_FMA4 && TARGET_FUSED_MADD"
2172 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2173 [(set_attr "type" "ssemuladd")
2174 (set_attr "mode" "V8SF")])
2176 (define_insn "fma4_fmsubaddv4df4"
2177 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2181 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2182 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2183 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2190 "TARGET_FMA4 && TARGET_FUSED_MADD"
2191 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2192 [(set_attr "type" "ssemuladd")
2193 (set_attr "mode" "V4DF")])
2195 (define_insn "fma4_fmsubaddv4sf4"
2196 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2200 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2201 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2202 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2209 "TARGET_FMA4 && TARGET_FUSED_MADD"
2210 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2211 [(set_attr "type" "ssemuladd")
2212 (set_attr "mode" "V4SF")])
2214 (define_insn "fma4_fmsubaddv2df4"
2215 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2219 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2220 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2221 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2228 "TARGET_FMA4 && TARGET_FUSED_MADD"
2229 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2230 [(set_attr "type" "ssemuladd")
2231 (set_attr "mode" "V2DF")])
2233 (define_insn "fma4i_fmaddsubv8sf4"
2234 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2239 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2240 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2241 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2248 UNSPEC_FMA4_INTRINSIC))]
2249 "TARGET_FMA4 && TARGET_FUSED_MADD"
2250 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2251 [(set_attr "type" "ssemuladd")
2252 (set_attr "mode" "V8SF")])
2254 (define_insn "fma4i_fmaddsubv4df4"
2255 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2260 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2261 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2262 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2269 UNSPEC_FMA4_INTRINSIC))]
2270 "TARGET_FMA4 && TARGET_FUSED_MADD"
2271 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2272 [(set_attr "type" "ssemuladd")
2273 (set_attr "mode" "V4DF")])
2275 (define_insn "fma4i_fmaddsubv4sf4"
2276 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2281 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2282 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2283 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2290 UNSPEC_FMA4_INTRINSIC))]
2291 "TARGET_FMA4 && TARGET_FUSED_MADD"
2292 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2293 [(set_attr "type" "ssemuladd")
2294 (set_attr "mode" "V4SF")])
2296 (define_insn "fma4i_fmaddsubv2df4"
2297 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2302 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2303 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2304 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2311 UNSPEC_FMA4_INTRINSIC))]
2312 "TARGET_FMA4 && TARGET_FUSED_MADD"
2313 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2314 [(set_attr "type" "ssemuladd")
2315 (set_attr "mode" "V2DF")])
2317 (define_insn "fma4i_fmsubaddv8sf4"
2318 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2323 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2324 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2325 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2332 UNSPEC_FMA4_INTRINSIC))]
2333 "TARGET_FMA4 && TARGET_FUSED_MADD"
2334 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2335 [(set_attr "type" "ssemuladd")
2336 (set_attr "mode" "V8SF")])
2338 (define_insn "fma4i_fmsubaddv4df4"
2339 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2344 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2345 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2346 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2353 UNSPEC_FMA4_INTRINSIC))]
2354 "TARGET_FMA4 && TARGET_FUSED_MADD"
2355 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2356 [(set_attr "type" "ssemuladd")
2357 (set_attr "mode" "V4DF")])
2359 (define_insn "fma4i_fmsubaddv4sf4"
2360 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2365 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2366 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2367 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2374 UNSPEC_FMA4_INTRINSIC))]
2375 "TARGET_FMA4 && TARGET_FUSED_MADD"
2376 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2377 [(set_attr "type" "ssemuladd")
2378 (set_attr "mode" "V4SF")])
2380 (define_insn "fma4i_fmsubaddv2df4"
2381 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2386 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2387 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2388 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2395 UNSPEC_FMA4_INTRINSIC))]
2396 "TARGET_FMA4 && TARGET_FUSED_MADD"
2397 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2398 [(set_attr "type" "ssemuladd")
2399 (set_attr "mode" "V2DF")])
2401 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2403 ;; Parallel single-precision floating point conversion operations
2405 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2407 (define_insn "sse_cvtpi2ps"
2408 [(set (match_operand:V4SF 0 "register_operand" "=x")
2411 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2412 (match_operand:V4SF 1 "register_operand" "0")
2415 "cvtpi2ps\t{%2, %0|%0, %2}"
2416 [(set_attr "type" "ssecvt")
2417 (set_attr "mode" "V4SF")])
2419 (define_insn "sse_cvtps2pi"
2420 [(set (match_operand:V2SI 0 "register_operand" "=y")
2422 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2424 (parallel [(const_int 0) (const_int 1)])))]
2426 "cvtps2pi\t{%1, %0|%0, %1}"
2427 [(set_attr "type" "ssecvt")
2428 (set_attr "unit" "mmx")
2429 (set_attr "mode" "DI")])
2431 (define_insn "sse_cvttps2pi"
2432 [(set (match_operand:V2SI 0 "register_operand" "=y")
2434 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2435 (parallel [(const_int 0) (const_int 1)])))]
2437 "cvttps2pi\t{%1, %0|%0, %1}"
2438 [(set_attr "type" "ssecvt")
2439 (set_attr "unit" "mmx")
2440 (set_attr "prefix_rep" "0")
2441 (set_attr "mode" "SF")])
2443 (define_insn "*avx_cvtsi2ss"
2444 [(set (match_operand:V4SF 0 "register_operand" "=x")
2447 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2448 (match_operand:V4SF 1 "register_operand" "x")
2451 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2452 [(set_attr "type" "sseicvt")
2453 (set_attr "prefix" "vex")
2454 (set_attr "mode" "SF")])
2456 (define_insn "sse_cvtsi2ss"
2457 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2460 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2461 (match_operand:V4SF 1 "register_operand" "0,0")
2464 "cvtsi2ss\t{%2, %0|%0, %2}"
2465 [(set_attr "type" "sseicvt")
2466 (set_attr "athlon_decode" "vector,double")
2467 (set_attr "amdfam10_decode" "vector,double")
2468 (set_attr "mode" "SF")])
2470 (define_insn "*avx_cvtsi2ssq"
2471 [(set (match_operand:V4SF 0 "register_operand" "=x")
2474 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2475 (match_operand:V4SF 1 "register_operand" "x")
2477 "TARGET_AVX && TARGET_64BIT"
2478 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2479 [(set_attr "type" "sseicvt")
2480 (set_attr "length_vex" "4")
2481 (set_attr "prefix" "vex")
2482 (set_attr "mode" "SF")])
2484 (define_insn "sse_cvtsi2ssq"
2485 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2488 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2489 (match_operand:V4SF 1 "register_operand" "0,0")
2491 "TARGET_SSE && TARGET_64BIT"
2492 "cvtsi2ssq\t{%2, %0|%0, %2}"
2493 [(set_attr "type" "sseicvt")
2494 (set_attr "prefix_rex" "1")
2495 (set_attr "athlon_decode" "vector,double")
2496 (set_attr "amdfam10_decode" "vector,double")
2497 (set_attr "mode" "SF")])
2499 (define_insn "sse_cvtss2si"
2500 [(set (match_operand:SI 0 "register_operand" "=r,r")
2503 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2504 (parallel [(const_int 0)]))]
2505 UNSPEC_FIX_NOTRUNC))]
2507 "%vcvtss2si\t{%1, %0|%0, %1}"
2508 [(set_attr "type" "sseicvt")
2509 (set_attr "athlon_decode" "double,vector")
2510 (set_attr "prefix_rep" "1")
2511 (set_attr "prefix" "maybe_vex")
2512 (set_attr "mode" "SI")])
2514 (define_insn "sse_cvtss2si_2"
2515 [(set (match_operand:SI 0 "register_operand" "=r,r")
2516 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2517 UNSPEC_FIX_NOTRUNC))]
2519 "%vcvtss2si\t{%1, %0|%0, %1}"
2520 [(set_attr "type" "sseicvt")
2521 (set_attr "athlon_decode" "double,vector")
2522 (set_attr "amdfam10_decode" "double,double")
2523 (set_attr "prefix_rep" "1")
2524 (set_attr "prefix" "maybe_vex")
2525 (set_attr "mode" "SI")])
2527 (define_insn "sse_cvtss2siq"
2528 [(set (match_operand:DI 0 "register_operand" "=r,r")
2531 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2532 (parallel [(const_int 0)]))]
2533 UNSPEC_FIX_NOTRUNC))]
2534 "TARGET_SSE && TARGET_64BIT"
2535 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2536 [(set_attr "type" "sseicvt")
2537 (set_attr "athlon_decode" "double,vector")
2538 (set_attr "prefix_rep" "1")
2539 (set_attr "prefix" "maybe_vex")
2540 (set_attr "mode" "DI")])
2542 (define_insn "sse_cvtss2siq_2"
2543 [(set (match_operand:DI 0 "register_operand" "=r,r")
2544 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2545 UNSPEC_FIX_NOTRUNC))]
2546 "TARGET_SSE && TARGET_64BIT"
2547 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2548 [(set_attr "type" "sseicvt")
2549 (set_attr "athlon_decode" "double,vector")
2550 (set_attr "amdfam10_decode" "double,double")
2551 (set_attr "prefix_rep" "1")
2552 (set_attr "prefix" "maybe_vex")
2553 (set_attr "mode" "DI")])
2555 (define_insn "sse_cvttss2si"
2556 [(set (match_operand:SI 0 "register_operand" "=r,r")
2559 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2560 (parallel [(const_int 0)]))))]
2562 "%vcvttss2si\t{%1, %0|%0, %1}"
2563 [(set_attr "type" "sseicvt")
2564 (set_attr "athlon_decode" "double,vector")
2565 (set_attr "amdfam10_decode" "double,double")
2566 (set_attr "prefix_rep" "1")
2567 (set_attr "prefix" "maybe_vex")
2568 (set_attr "mode" "SI")])
2570 (define_insn "sse_cvttss2siq"
2571 [(set (match_operand:DI 0 "register_operand" "=r,r")
2574 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2575 (parallel [(const_int 0)]))))]
2576 "TARGET_SSE && TARGET_64BIT"
2577 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2578 [(set_attr "type" "sseicvt")
2579 (set_attr "athlon_decode" "double,vector")
2580 (set_attr "amdfam10_decode" "double,double")
2581 (set_attr "prefix_rep" "1")
2582 (set_attr "prefix" "maybe_vex")
2583 (set_attr "mode" "DI")])
2585 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2586 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2587 (float:AVXMODEDCVTDQ2PS
2588 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2590 "vcvtdq2ps\t{%1, %0|%0, %1}"
2591 [(set_attr "type" "ssecvt")
2592 (set_attr "prefix" "vex")
2593 (set_attr "mode" "<avxvecmode>")])
2595 (define_insn "sse2_cvtdq2ps"
2596 [(set (match_operand:V4SF 0 "register_operand" "=x")
2597 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2599 "cvtdq2ps\t{%1, %0|%0, %1}"
2600 [(set_attr "type" "ssecvt")
2601 (set_attr "mode" "V4SF")])
2603 (define_expand "sse2_cvtudq2ps"
2605 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2607 (lt:V4SF (match_dup 5) (match_dup 3)))
2609 (and:V4SF (match_dup 6) (match_dup 4)))
2610 (set (match_operand:V4SF 0 "register_operand" "")
2611 (plus:V4SF (match_dup 5) (match_dup 7)))]
2614 REAL_VALUE_TYPE TWO32r;
2618 real_ldexp (&TWO32r, &dconst1, 32);
2619 x = const_double_from_real_value (TWO32r, SFmode);
2621 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2622 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2624 for (i = 5; i < 8; i++)
2625 operands[i] = gen_reg_rtx (V4SFmode);
2628 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2629 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2630 (unspec:AVXMODEDCVTPS2DQ
2631 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2632 UNSPEC_FIX_NOTRUNC))]
2634 "vcvtps2dq\t{%1, %0|%0, %1}"
2635 [(set_attr "type" "ssecvt")
2636 (set_attr "prefix" "vex")
2637 (set_attr "mode" "<avxvecmode>")])
2639 (define_insn "sse2_cvtps2dq"
2640 [(set (match_operand:V4SI 0 "register_operand" "=x")
2641 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2642 UNSPEC_FIX_NOTRUNC))]
2644 "cvtps2dq\t{%1, %0|%0, %1}"
2645 [(set_attr "type" "ssecvt")
2646 (set_attr "prefix_data16" "1")
2647 (set_attr "mode" "TI")])
2649 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2650 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2651 (fix:AVXMODEDCVTPS2DQ
2652 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2654 "vcvttps2dq\t{%1, %0|%0, %1}"
2655 [(set_attr "type" "ssecvt")
2656 (set_attr "prefix" "vex")
2657 (set_attr "mode" "<avxvecmode>")])
2659 (define_insn "sse2_cvttps2dq"
2660 [(set (match_operand:V4SI 0 "register_operand" "=x")
2661 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2663 "cvttps2dq\t{%1, %0|%0, %1}"
2664 [(set_attr "type" "ssecvt")
2665 (set_attr "prefix_rep" "1")
2666 (set_attr "prefix_data16" "0")
2667 (set_attr "mode" "TI")])
2669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2671 ;; Parallel double-precision floating point conversion operations
2673 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2675 (define_insn "sse2_cvtpi2pd"
2676 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2677 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2679 "cvtpi2pd\t{%1, %0|%0, %1}"
2680 [(set_attr "type" "ssecvt")
2681 (set_attr "unit" "mmx,*")
2682 (set_attr "prefix_data16" "1,*")
2683 (set_attr "mode" "V2DF")])
2685 (define_insn "sse2_cvtpd2pi"
2686 [(set (match_operand:V2SI 0 "register_operand" "=y")
2687 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2688 UNSPEC_FIX_NOTRUNC))]
2690 "cvtpd2pi\t{%1, %0|%0, %1}"
2691 [(set_attr "type" "ssecvt")
2692 (set_attr "unit" "mmx")
2693 (set_attr "prefix_data16" "1")
2694 (set_attr "mode" "DI")])
2696 (define_insn "sse2_cvttpd2pi"
2697 [(set (match_operand:V2SI 0 "register_operand" "=y")
2698 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2700 "cvttpd2pi\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "ssecvt")
2702 (set_attr "unit" "mmx")
2703 (set_attr "prefix_data16" "1")
2704 (set_attr "mode" "TI")])
2706 (define_insn "*avx_cvtsi2sd"
2707 [(set (match_operand:V2DF 0 "register_operand" "=x")
2710 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2711 (match_operand:V2DF 1 "register_operand" "x")
2714 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2715 [(set_attr "type" "sseicvt")
2716 (set_attr "prefix" "vex")
2717 (set_attr "mode" "DF")])
2719 (define_insn "sse2_cvtsi2sd"
2720 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2723 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2724 (match_operand:V2DF 1 "register_operand" "0,0")
2727 "cvtsi2sd\t{%2, %0|%0, %2}"
2728 [(set_attr "type" "sseicvt")
2729 (set_attr "mode" "DF")
2730 (set_attr "athlon_decode" "double,direct")
2731 (set_attr "amdfam10_decode" "vector,double")])
2733 (define_insn "*avx_cvtsi2sdq"
2734 [(set (match_operand:V2DF 0 "register_operand" "=x")
2737 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2738 (match_operand:V2DF 1 "register_operand" "x")
2740 "TARGET_AVX && TARGET_64BIT"
2741 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2742 [(set_attr "type" "sseicvt")
2743 (set_attr "length_vex" "4")
2744 (set_attr "prefix" "vex")
2745 (set_attr "mode" "DF")])
2747 (define_insn "sse2_cvtsi2sdq"
2748 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2751 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2752 (match_operand:V2DF 1 "register_operand" "0,0")
2754 "TARGET_SSE2 && TARGET_64BIT"
2755 "cvtsi2sdq\t{%2, %0|%0, %2}"
2756 [(set_attr "type" "sseicvt")
2757 (set_attr "prefix_rex" "1")
2758 (set_attr "mode" "DF")
2759 (set_attr "athlon_decode" "double,direct")
2760 (set_attr "amdfam10_decode" "vector,double")])
2762 (define_insn "sse2_cvtsd2si"
2763 [(set (match_operand:SI 0 "register_operand" "=r,r")
2766 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2767 (parallel [(const_int 0)]))]
2768 UNSPEC_FIX_NOTRUNC))]
2770 "%vcvtsd2si\t{%1, %0|%0, %1}"
2771 [(set_attr "type" "sseicvt")
2772 (set_attr "athlon_decode" "double,vector")
2773 (set_attr "prefix_rep" "1")
2774 (set_attr "prefix" "maybe_vex")
2775 (set_attr "mode" "SI")])
2777 (define_insn "sse2_cvtsd2si_2"
2778 [(set (match_operand:SI 0 "register_operand" "=r,r")
2779 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2780 UNSPEC_FIX_NOTRUNC))]
2782 "%vcvtsd2si\t{%1, %0|%0, %1}"
2783 [(set_attr "type" "sseicvt")
2784 (set_attr "athlon_decode" "double,vector")
2785 (set_attr "amdfam10_decode" "double,double")
2786 (set_attr "prefix_rep" "1")
2787 (set_attr "prefix" "maybe_vex")
2788 (set_attr "mode" "SI")])
2790 (define_insn "sse2_cvtsd2siq"
2791 [(set (match_operand:DI 0 "register_operand" "=r,r")
2794 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2795 (parallel [(const_int 0)]))]
2796 UNSPEC_FIX_NOTRUNC))]
2797 "TARGET_SSE2 && TARGET_64BIT"
2798 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2799 [(set_attr "type" "sseicvt")
2800 (set_attr "athlon_decode" "double,vector")
2801 (set_attr "prefix_rep" "1")
2802 (set_attr "prefix" "maybe_vex")
2803 (set_attr "mode" "DI")])
2805 (define_insn "sse2_cvtsd2siq_2"
2806 [(set (match_operand:DI 0 "register_operand" "=r,r")
2807 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2808 UNSPEC_FIX_NOTRUNC))]
2809 "TARGET_SSE2 && TARGET_64BIT"
2810 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2811 [(set_attr "type" "sseicvt")
2812 (set_attr "athlon_decode" "double,vector")
2813 (set_attr "amdfam10_decode" "double,double")
2814 (set_attr "prefix_rep" "1")
2815 (set_attr "prefix" "maybe_vex")
2816 (set_attr "mode" "DI")])
2818 (define_insn "sse2_cvttsd2si"
2819 [(set (match_operand:SI 0 "register_operand" "=r,r")
2822 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2823 (parallel [(const_int 0)]))))]
2825 "%vcvttsd2si\t{%1, %0|%0, %1}"
2826 [(set_attr "type" "sseicvt")
2827 (set_attr "prefix_rep" "1")
2828 (set_attr "prefix" "maybe_vex")
2829 (set_attr "mode" "SI")
2830 (set_attr "athlon_decode" "double,vector")
2831 (set_attr "amdfam10_decode" "double,double")])
2833 (define_insn "sse2_cvttsd2siq"
2834 [(set (match_operand:DI 0 "register_operand" "=r,r")
2837 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2838 (parallel [(const_int 0)]))))]
2839 "TARGET_SSE2 && TARGET_64BIT"
2840 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2841 [(set_attr "type" "sseicvt")
2842 (set_attr "prefix_rep" "1")
2843 (set_attr "prefix" "maybe_vex")
2844 (set_attr "mode" "DI")
2845 (set_attr "athlon_decode" "double,vector")
2846 (set_attr "amdfam10_decode" "double,double")])
2848 (define_insn "avx_cvtdq2pd256"
2849 [(set (match_operand:V4DF 0 "register_operand" "=x")
2850 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2852 "vcvtdq2pd\t{%1, %0|%0, %1}"
2853 [(set_attr "type" "ssecvt")
2854 (set_attr "prefix" "vex")
2855 (set_attr "mode" "V4DF")])
2857 (define_insn "sse2_cvtdq2pd"
2858 [(set (match_operand:V2DF 0 "register_operand" "=x")
2861 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2862 (parallel [(const_int 0) (const_int 1)]))))]
2864 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2865 [(set_attr "type" "ssecvt")
2866 (set_attr "prefix" "maybe_vex")
2867 (set_attr "mode" "V2DF")])
2869 (define_insn "avx_cvtpd2dq256"
2870 [(set (match_operand:V4SI 0 "register_operand" "=x")
2871 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2872 UNSPEC_FIX_NOTRUNC))]
2874 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2875 [(set_attr "type" "ssecvt")
2876 (set_attr "prefix" "vex")
2877 (set_attr "mode" "OI")])
2879 (define_expand "sse2_cvtpd2dq"
2880 [(set (match_operand:V4SI 0 "register_operand" "")
2882 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2886 "operands[2] = CONST0_RTX (V2SImode);")
2888 (define_insn "*sse2_cvtpd2dq"
2889 [(set (match_operand:V4SI 0 "register_operand" "=x")
2891 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2893 (match_operand:V2SI 2 "const0_operand" "")))]
2895 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2896 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2897 [(set_attr "type" "ssecvt")
2898 (set_attr "prefix_rep" "1")
2899 (set_attr "prefix_data16" "0")
2900 (set_attr "prefix" "maybe_vex")
2901 (set_attr "mode" "TI")
2902 (set_attr "amdfam10_decode" "double")])
2904 (define_insn "avx_cvttpd2dq256"
2905 [(set (match_operand:V4SI 0 "register_operand" "=x")
2906 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2908 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2909 [(set_attr "type" "ssecvt")
2910 (set_attr "prefix" "vex")
2911 (set_attr "mode" "OI")])
2913 (define_expand "sse2_cvttpd2dq"
2914 [(set (match_operand:V4SI 0 "register_operand" "")
2916 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2919 "operands[2] = CONST0_RTX (V2SImode);")
2921 (define_insn "*sse2_cvttpd2dq"
2922 [(set (match_operand:V4SI 0 "register_operand" "=x")
2924 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2925 (match_operand:V2SI 2 "const0_operand" "")))]
2927 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2928 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2929 [(set_attr "type" "ssecvt")
2930 (set_attr "prefix" "maybe_vex")
2931 (set_attr "mode" "TI")
2932 (set_attr "amdfam10_decode" "double")])
2934 (define_insn "*avx_cvtsd2ss"
2935 [(set (match_operand:V4SF 0 "register_operand" "=x")
2938 (float_truncate:V2SF
2939 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2940 (match_operand:V4SF 1 "register_operand" "x")
2943 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2944 [(set_attr "type" "ssecvt")
2945 (set_attr "prefix" "vex")
2946 (set_attr "mode" "SF")])
2948 (define_insn "sse2_cvtsd2ss"
2949 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2952 (float_truncate:V2SF
2953 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2954 (match_operand:V4SF 1 "register_operand" "0,0")
2957 "cvtsd2ss\t{%2, %0|%0, %2}"
2958 [(set_attr "type" "ssecvt")
2959 (set_attr "athlon_decode" "vector,double")
2960 (set_attr "amdfam10_decode" "vector,double")
2961 (set_attr "mode" "SF")])
2963 (define_insn "*avx_cvtss2sd"
2964 [(set (match_operand:V2DF 0 "register_operand" "=x")
2968 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2969 (parallel [(const_int 0) (const_int 1)])))
2970 (match_operand:V2DF 1 "register_operand" "x")
2973 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2974 [(set_attr "type" "ssecvt")
2975 (set_attr "prefix" "vex")
2976 (set_attr "mode" "DF")])
2978 (define_insn "sse2_cvtss2sd"
2979 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2983 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2984 (parallel [(const_int 0) (const_int 1)])))
2985 (match_operand:V2DF 1 "register_operand" "0,0")
2988 "cvtss2sd\t{%2, %0|%0, %2}"
2989 [(set_attr "type" "ssecvt")
2990 (set_attr "amdfam10_decode" "vector,double")
2991 (set_attr "mode" "DF")])
2993 (define_insn "avx_cvtpd2ps256"
2994 [(set (match_operand:V4SF 0 "register_operand" "=x")
2995 (float_truncate:V4SF
2996 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2998 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2999 [(set_attr "type" "ssecvt")
3000 (set_attr "prefix" "vex")
3001 (set_attr "mode" "V4SF")])
3003 (define_expand "sse2_cvtpd2ps"
3004 [(set (match_operand:V4SF 0 "register_operand" "")
3006 (float_truncate:V2SF
3007 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3010 "operands[2] = CONST0_RTX (V2SFmode);")
3012 (define_insn "*sse2_cvtpd2ps"
3013 [(set (match_operand:V4SF 0 "register_operand" "=x")
3015 (float_truncate:V2SF
3016 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3017 (match_operand:V2SF 2 "const0_operand" "")))]
3019 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3020 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3021 [(set_attr "type" "ssecvt")
3022 (set_attr "prefix_data16" "1")
3023 (set_attr "prefix" "maybe_vex")
3024 (set_attr "mode" "V4SF")
3025 (set_attr "amdfam10_decode" "double")])
3027 (define_insn "avx_cvtps2pd256"
3028 [(set (match_operand:V4DF 0 "register_operand" "=x")
3030 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3032 "vcvtps2pd\t{%1, %0|%0, %1}"
3033 [(set_attr "type" "ssecvt")
3034 (set_attr "prefix" "vex")
3035 (set_attr "mode" "V4DF")])
3037 (define_insn "sse2_cvtps2pd"
3038 [(set (match_operand:V2DF 0 "register_operand" "=x")
3041 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3042 (parallel [(const_int 0) (const_int 1)]))))]
3044 "%vcvtps2pd\t{%1, %0|%0, %1}"
3045 [(set_attr "type" "ssecvt")
3046 (set_attr "prefix" "maybe_vex")
3047 (set_attr "mode" "V2DF")
3048 (set_attr "prefix_data16" "0")
3049 (set_attr "amdfam10_decode" "direct")])
3051 (define_expand "vec_unpacks_hi_v4sf"
3056 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3057 (parallel [(const_int 6)
3061 (set (match_operand:V2DF 0 "register_operand" "")
3065 (parallel [(const_int 0) (const_int 1)]))))]
3068 operands[2] = gen_reg_rtx (V4SFmode);
3071 (define_expand "vec_unpacks_lo_v4sf"
3072 [(set (match_operand:V2DF 0 "register_operand" "")
3075 (match_operand:V4SF 1 "nonimmediate_operand" "")
3076 (parallel [(const_int 0) (const_int 1)]))))]
3079 (define_expand "vec_unpacks_float_hi_v8hi"
3080 [(match_operand:V4SF 0 "register_operand" "")
3081 (match_operand:V8HI 1 "register_operand" "")]
3084 rtx tmp = gen_reg_rtx (V4SImode);
3086 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3087 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3091 (define_expand "vec_unpacks_float_lo_v8hi"
3092 [(match_operand:V4SF 0 "register_operand" "")
3093 (match_operand:V8HI 1 "register_operand" "")]
3096 rtx tmp = gen_reg_rtx (V4SImode);
3098 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3099 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3103 (define_expand "vec_unpacku_float_hi_v8hi"
3104 [(match_operand:V4SF 0 "register_operand" "")
3105 (match_operand:V8HI 1 "register_operand" "")]
3108 rtx tmp = gen_reg_rtx (V4SImode);
3110 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3111 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3115 (define_expand "vec_unpacku_float_lo_v8hi"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V8HI 1 "register_operand" "")]
3120 rtx tmp = gen_reg_rtx (V4SImode);
3122 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3123 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3127 (define_expand "vec_unpacks_float_hi_v4si"
3130 (match_operand:V4SI 1 "nonimmediate_operand" "")
3131 (parallel [(const_int 2)
3135 (set (match_operand:V2DF 0 "register_operand" "")
3139 (parallel [(const_int 0) (const_int 1)]))))]
3141 "operands[2] = gen_reg_rtx (V4SImode);")
3143 (define_expand "vec_unpacks_float_lo_v4si"
3144 [(set (match_operand:V2DF 0 "register_operand" "")
3147 (match_operand:V4SI 1 "nonimmediate_operand" "")
3148 (parallel [(const_int 0) (const_int 1)]))))]
3151 (define_expand "vec_unpacku_float_hi_v4si"
3154 (match_operand:V4SI 1 "nonimmediate_operand" "")
3155 (parallel [(const_int 2)
3163 (parallel [(const_int 0) (const_int 1)]))))
3165 (lt:V2DF (match_dup 6) (match_dup 3)))
3167 (and:V2DF (match_dup 7) (match_dup 4)))
3168 (set (match_operand:V2DF 0 "register_operand" "")
3169 (plus:V2DF (match_dup 6) (match_dup 8)))]
3172 REAL_VALUE_TYPE TWO32r;
3176 real_ldexp (&TWO32r, &dconst1, 32);
3177 x = const_double_from_real_value (TWO32r, DFmode);
3179 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3180 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3182 operands[5] = gen_reg_rtx (V4SImode);
3184 for (i = 6; i < 9; i++)
3185 operands[i] = gen_reg_rtx (V2DFmode);
3188 (define_expand "vec_unpacku_float_lo_v4si"
3192 (match_operand:V4SI 1 "nonimmediate_operand" "")
3193 (parallel [(const_int 0) (const_int 1)]))))
3195 (lt:V2DF (match_dup 5) (match_dup 3)))
3197 (and:V2DF (match_dup 6) (match_dup 4)))
3198 (set (match_operand:V2DF 0 "register_operand" "")
3199 (plus:V2DF (match_dup 5) (match_dup 7)))]
3202 REAL_VALUE_TYPE TWO32r;
3206 real_ldexp (&TWO32r, &dconst1, 32);
3207 x = const_double_from_real_value (TWO32r, DFmode);
3209 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3210 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3212 for (i = 5; i < 8; i++)
3213 operands[i] = gen_reg_rtx (V2DFmode);
3216 (define_expand "vec_pack_trunc_v2df"
3217 [(match_operand:V4SF 0 "register_operand" "")
3218 (match_operand:V2DF 1 "nonimmediate_operand" "")
3219 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3224 r1 = gen_reg_rtx (V4SFmode);
3225 r2 = gen_reg_rtx (V4SFmode);
3227 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3228 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3229 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3233 (define_expand "vec_pack_sfix_trunc_v2df"
3234 [(match_operand:V4SI 0 "register_operand" "")
3235 (match_operand:V2DF 1 "nonimmediate_operand" "")
3236 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3241 r1 = gen_reg_rtx (V4SImode);
3242 r2 = gen_reg_rtx (V4SImode);
3244 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3245 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3246 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3247 gen_lowpart (V2DImode, r1),
3248 gen_lowpart (V2DImode, r2)));
3252 (define_expand "vec_pack_sfix_v2df"
3253 [(match_operand:V4SI 0 "register_operand" "")
3254 (match_operand:V2DF 1 "nonimmediate_operand" "")
3255 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3260 r1 = gen_reg_rtx (V4SImode);
3261 r2 = gen_reg_rtx (V4SImode);
3263 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3264 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3265 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3266 gen_lowpart (V2DImode, r1),
3267 gen_lowpart (V2DImode, r2)));
3271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3273 ;; Parallel single-precision floating point element swizzling
3275 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3277 (define_expand "sse_movhlps_exp"
3278 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3281 (match_operand:V4SF 1 "nonimmediate_operand" "")
3282 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3283 (parallel [(const_int 6)
3288 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3290 (define_insn "*avx_movhlps"
3291 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3294 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3295 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3296 (parallel [(const_int 6)
3300 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3302 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3303 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3304 vmovhps\t{%2, %0|%0, %2}"
3305 [(set_attr "type" "ssemov")
3306 (set_attr "prefix" "vex")
3307 (set_attr "mode" "V4SF,V2SF,V2SF")])
3309 (define_insn "sse_movhlps"
3310 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3313 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3314 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3315 (parallel [(const_int 6)
3319 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3321 movhlps\t{%2, %0|%0, %2}
3322 movlps\t{%H2, %0|%0, %H2}
3323 movhps\t{%2, %0|%0, %2}"
3324 [(set_attr "type" "ssemov")
3325 (set_attr "mode" "V4SF,V2SF,V2SF")])
3327 (define_expand "sse_movlhps_exp"
3328 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3331 (match_operand:V4SF 1 "nonimmediate_operand" "")
3332 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3333 (parallel [(const_int 0)
3338 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3340 (define_insn "*avx_movlhps"
3341 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3344 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3345 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3346 (parallel [(const_int 0)
3350 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3352 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3353 vmovhps\t{%2, %1, %0|%0, %1, %2}
3354 vmovlps\t{%2, %H0|%H0, %2}"
3355 [(set_attr "type" "ssemov")
3356 (set_attr "prefix" "vex")
3357 (set_attr "mode" "V4SF,V2SF,V2SF")])
3359 (define_insn "sse_movlhps"
3360 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3363 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3364 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3365 (parallel [(const_int 0)
3369 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3371 movlhps\t{%2, %0|%0, %2}
3372 movhps\t{%2, %0|%0, %2}
3373 movlps\t{%2, %H0|%H0, %2}"
3374 [(set_attr "type" "ssemov")
3375 (set_attr "mode" "V4SF,V2SF,V2SF")])
3377 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3378 (define_insn "avx_unpckhps256"
3379 [(set (match_operand:V8SF 0 "register_operand" "=x")
3382 (match_operand:V8SF 1 "register_operand" "x")
3383 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3384 (parallel [(const_int 2) (const_int 10)
3385 (const_int 3) (const_int 11)
3386 (const_int 6) (const_int 14)
3387 (const_int 7) (const_int 15)])))]
3389 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3390 [(set_attr "type" "sselog")
3391 (set_attr "prefix" "vex")
3392 (set_attr "mode" "V8SF")])
3394 (define_insn "*avx_interleave_highv4sf"
3395 [(set (match_operand:V4SF 0 "register_operand" "=x")
3398 (match_operand:V4SF 1 "register_operand" "x")
3399 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3400 (parallel [(const_int 2) (const_int 6)
3401 (const_int 3) (const_int 7)])))]
3403 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3404 [(set_attr "type" "sselog")
3405 (set_attr "prefix" "vex")
3406 (set_attr "mode" "V4SF")])
3408 (define_insn "vec_interleave_highv4sf"
3409 [(set (match_operand:V4SF 0 "register_operand" "=x")
3412 (match_operand:V4SF 1 "register_operand" "0")
3413 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3414 (parallel [(const_int 2) (const_int 6)
3415 (const_int 3) (const_int 7)])))]
3417 "unpckhps\t{%2, %0|%0, %2}"
3418 [(set_attr "type" "sselog")
3419 (set_attr "mode" "V4SF")])
3421 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3422 (define_insn "avx_unpcklps256"
3423 [(set (match_operand:V8SF 0 "register_operand" "=x")
3426 (match_operand:V8SF 1 "register_operand" "x")
3427 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3428 (parallel [(const_int 0) (const_int 8)
3429 (const_int 1) (const_int 9)
3430 (const_int 4) (const_int 12)
3431 (const_int 5) (const_int 13)])))]
3433 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3434 [(set_attr "type" "sselog")
3435 (set_attr "prefix" "vex")
3436 (set_attr "mode" "V8SF")])
3438 (define_insn "*avx_interleave_lowv4sf"
3439 [(set (match_operand:V4SF 0 "register_operand" "=x")
3442 (match_operand:V4SF 1 "register_operand" "x")
3443 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3444 (parallel [(const_int 0) (const_int 4)
3445 (const_int 1) (const_int 5)])))]
3447 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3448 [(set_attr "type" "sselog")
3449 (set_attr "prefix" "vex")
3450 (set_attr "mode" "V4SF")])
3452 (define_insn "vec_interleave_lowv4sf"
3453 [(set (match_operand:V4SF 0 "register_operand" "=x")
3456 (match_operand:V4SF 1 "register_operand" "0")
3457 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3458 (parallel [(const_int 0) (const_int 4)
3459 (const_int 1) (const_int 5)])))]
3461 "unpcklps\t{%2, %0|%0, %2}"
3462 [(set_attr "type" "sselog")
3463 (set_attr "mode" "V4SF")])
3465 ;; These are modeled with the same vec_concat as the others so that we
3466 ;; capture users of shufps that can use the new instructions
3467 (define_insn "avx_movshdup256"
3468 [(set (match_operand:V8SF 0 "register_operand" "=x")
3471 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3473 (parallel [(const_int 1) (const_int 1)
3474 (const_int 3) (const_int 3)
3475 (const_int 5) (const_int 5)
3476 (const_int 7) (const_int 7)])))]
3478 "vmovshdup\t{%1, %0|%0, %1}"
3479 [(set_attr "type" "sse")
3480 (set_attr "prefix" "vex")
3481 (set_attr "mode" "V8SF")])
3483 (define_insn "sse3_movshdup"
3484 [(set (match_operand:V4SF 0 "register_operand" "=x")
3487 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3489 (parallel [(const_int 1)
3494 "%vmovshdup\t{%1, %0|%0, %1}"
3495 [(set_attr "type" "sse")
3496 (set_attr "prefix_rep" "1")
3497 (set_attr "prefix" "maybe_vex")
3498 (set_attr "mode" "V4SF")])
3500 (define_insn "avx_movsldup256"
3501 [(set (match_operand:V8SF 0 "register_operand" "=x")
3504 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3506 (parallel [(const_int 0) (const_int 0)
3507 (const_int 2) (const_int 2)
3508 (const_int 4) (const_int 4)
3509 (const_int 6) (const_int 6)])))]
3511 "vmovsldup\t{%1, %0|%0, %1}"
3512 [(set_attr "type" "sse")
3513 (set_attr "prefix" "vex")
3514 (set_attr "mode" "V8SF")])
3516 (define_insn "sse3_movsldup"
3517 [(set (match_operand:V4SF 0 "register_operand" "=x")
3520 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3522 (parallel [(const_int 0)
3527 "%vmovsldup\t{%1, %0|%0, %1}"
3528 [(set_attr "type" "sse")
3529 (set_attr "prefix_rep" "1")
3530 (set_attr "prefix" "maybe_vex")
3531 (set_attr "mode" "V4SF")])
3533 (define_expand "avx_shufps256"
3534 [(match_operand:V8SF 0 "register_operand" "")
3535 (match_operand:V8SF 1 "register_operand" "")
3536 (match_operand:V8SF 2 "nonimmediate_operand" "")
3537 (match_operand:SI 3 "const_int_operand" "")]
3540 int mask = INTVAL (operands[3]);
3541 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3542 GEN_INT ((mask >> 0) & 3),
3543 GEN_INT ((mask >> 2) & 3),
3544 GEN_INT (((mask >> 4) & 3) + 8),
3545 GEN_INT (((mask >> 6) & 3) + 8),
3546 GEN_INT (((mask >> 0) & 3) + 4),
3547 GEN_INT (((mask >> 2) & 3) + 4),
3548 GEN_INT (((mask >> 4) & 3) + 12),
3549 GEN_INT (((mask >> 6) & 3) + 12)));
3553 ;; One bit in mask selects 2 elements.
3554 (define_insn "avx_shufps256_1"
3555 [(set (match_operand:V8SF 0 "register_operand" "=x")
3558 (match_operand:V8SF 1 "register_operand" "x")
3559 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3560 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3561 (match_operand 4 "const_0_to_3_operand" "")
3562 (match_operand 5 "const_8_to_11_operand" "")
3563 (match_operand 6 "const_8_to_11_operand" "")
3564 (match_operand 7 "const_4_to_7_operand" "")
3565 (match_operand 8 "const_4_to_7_operand" "")
3566 (match_operand 9 "const_12_to_15_operand" "")
3567 (match_operand 10 "const_12_to_15_operand" "")])))]
3569 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3570 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3571 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3572 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3575 mask = INTVAL (operands[3]);
3576 mask |= INTVAL (operands[4]) << 2;
3577 mask |= (INTVAL (operands[5]) - 8) << 4;
3578 mask |= (INTVAL (operands[6]) - 8) << 6;
3579 operands[3] = GEN_INT (mask);
3581 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3583 [(set_attr "type" "sselog")
3584 (set_attr "length_immediate" "1")
3585 (set_attr "prefix" "vex")
3586 (set_attr "mode" "V8SF")])
3588 (define_expand "sse_shufps"
3589 [(match_operand:V4SF 0 "register_operand" "")
3590 (match_operand:V4SF 1 "register_operand" "")
3591 (match_operand:V4SF 2 "nonimmediate_operand" "")
3592 (match_operand:SI 3 "const_int_operand" "")]
3595 int mask = INTVAL (operands[3]);
3596 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3597 GEN_INT ((mask >> 0) & 3),
3598 GEN_INT ((mask >> 2) & 3),
3599 GEN_INT (((mask >> 4) & 3) + 4),
3600 GEN_INT (((mask >> 6) & 3) + 4)));
3604 (define_insn "*avx_shufps_<mode>"
3605 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3606 (vec_select:SSEMODE4S
3607 (vec_concat:<ssedoublesizemode>
3608 (match_operand:SSEMODE4S 1 "register_operand" "x")
3609 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3610 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3611 (match_operand 4 "const_0_to_3_operand" "")
3612 (match_operand 5 "const_4_to_7_operand" "")
3613 (match_operand 6 "const_4_to_7_operand" "")])))]
3617 mask |= INTVAL (operands[3]) << 0;
3618 mask |= INTVAL (operands[4]) << 2;
3619 mask |= (INTVAL (operands[5]) - 4) << 4;
3620 mask |= (INTVAL (operands[6]) - 4) << 6;
3621 operands[3] = GEN_INT (mask);
3623 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3625 [(set_attr "type" "sselog")
3626 (set_attr "length_immediate" "1")
3627 (set_attr "prefix" "vex")
3628 (set_attr "mode" "V4SF")])
3630 (define_insn "sse_shufps_<mode>"
3631 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3632 (vec_select:SSEMODE4S
3633 (vec_concat:<ssedoublesizemode>
3634 (match_operand:SSEMODE4S 1 "register_operand" "0")
3635 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3636 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3637 (match_operand 4 "const_0_to_3_operand" "")
3638 (match_operand 5 "const_4_to_7_operand" "")
3639 (match_operand 6 "const_4_to_7_operand" "")])))]
3643 mask |= INTVAL (operands[3]) << 0;
3644 mask |= INTVAL (operands[4]) << 2;
3645 mask |= (INTVAL (operands[5]) - 4) << 4;
3646 mask |= (INTVAL (operands[6]) - 4) << 6;
3647 operands[3] = GEN_INT (mask);
3649 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3651 [(set_attr "type" "sselog")
3652 (set_attr "length_immediate" "1")
3653 (set_attr "mode" "V4SF")])
3655 (define_insn "sse_storehps"
3656 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3658 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3659 (parallel [(const_int 2) (const_int 3)])))]
3662 %vmovhps\t{%1, %0|%0, %1}
3663 %vmovhlps\t{%1, %d0|%d0, %1}
3664 %vmovlps\t{%H1, %d0|%d0, %H1}"
3665 [(set_attr "type" "ssemov")
3666 (set_attr "prefix" "maybe_vex")
3667 (set_attr "mode" "V2SF,V4SF,V2SF")])
3669 (define_expand "sse_loadhps_exp"
3670 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3673 (match_operand:V4SF 1 "nonimmediate_operand" "")
3674 (parallel [(const_int 0) (const_int 1)]))
3675 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3677 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3679 (define_insn "*avx_loadhps"
3680 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3683 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3684 (parallel [(const_int 0) (const_int 1)]))
3685 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3688 vmovhps\t{%2, %1, %0|%0, %1, %2}
3689 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3690 vmovlps\t{%2, %H0|%H0, %2}"
3691 [(set_attr "type" "ssemov")
3692 (set_attr "prefix" "vex")
3693 (set_attr "mode" "V2SF,V4SF,V2SF")])
3695 (define_insn "sse_loadhps"
3696 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3699 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3700 (parallel [(const_int 0) (const_int 1)]))
3701 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3704 movhps\t{%2, %0|%0, %2}
3705 movlhps\t{%2, %0|%0, %2}
3706 movlps\t{%2, %H0|%H0, %2}"
3707 [(set_attr "type" "ssemov")
3708 (set_attr "mode" "V2SF,V4SF,V2SF")])
3710 (define_insn "*avx_storelps"
3711 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3713 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3714 (parallel [(const_int 0) (const_int 1)])))]
3717 vmovlps\t{%1, %0|%0, %1}
3718 vmovaps\t{%1, %0|%0, %1}
3719 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3720 [(set_attr "type" "ssemov")
3721 (set_attr "prefix" "vex")
3722 (set_attr "mode" "V2SF,V2DF,V2SF")])
3724 (define_insn "sse_storelps"
3725 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3727 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3728 (parallel [(const_int 0) (const_int 1)])))]
3731 movlps\t{%1, %0|%0, %1}
3732 movaps\t{%1, %0|%0, %1}
3733 movlps\t{%1, %0|%0, %1}"
3734 [(set_attr "type" "ssemov")
3735 (set_attr "mode" "V2SF,V4SF,V2SF")])
3737 (define_expand "sse_loadlps_exp"
3738 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3740 (match_operand:V2SF 2 "nonimmediate_operand" "")
3742 (match_operand:V4SF 1 "nonimmediate_operand" "")
3743 (parallel [(const_int 2) (const_int 3)]))))]
3745 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3747 (define_insn "*avx_loadlps"
3748 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3750 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3752 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3753 (parallel [(const_int 2) (const_int 3)]))))]
3756 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3757 vmovlps\t{%2, %1, %0|%0, %1, %2}
3758 vmovlps\t{%2, %0|%0, %2}"
3759 [(set_attr "type" "sselog,ssemov,ssemov")
3760 (set_attr "length_immediate" "1,*,*")
3761 (set_attr "prefix" "vex")
3762 (set_attr "mode" "V4SF,V2SF,V2SF")])
3764 (define_insn "sse_loadlps"
3765 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3767 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3769 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3770 (parallel [(const_int 2) (const_int 3)]))))]
3773 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3774 movlps\t{%2, %0|%0, %2}
3775 movlps\t{%2, %0|%0, %2}"
3776 [(set_attr "type" "sselog,ssemov,ssemov")
3777 (set_attr "length_immediate" "1,*,*")
3778 (set_attr "mode" "V4SF,V2SF,V2SF")])
3780 (define_insn "*avx_movss"
3781 [(set (match_operand:V4SF 0 "register_operand" "=x")
3783 (match_operand:V4SF 2 "register_operand" "x")
3784 (match_operand:V4SF 1 "register_operand" "x")
3787 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3788 [(set_attr "type" "ssemov")
3789 (set_attr "prefix" "vex")
3790 (set_attr "mode" "SF")])
3792 (define_insn "sse_movss"
3793 [(set (match_operand:V4SF 0 "register_operand" "=x")
3795 (match_operand:V4SF 2 "register_operand" "x")
3796 (match_operand:V4SF 1 "register_operand" "0")
3799 "movss\t{%2, %0|%0, %2}"
3800 [(set_attr "type" "ssemov")
3801 (set_attr "mode" "SF")])
3803 (define_expand "vec_dupv4sf"
3804 [(set (match_operand:V4SF 0 "register_operand" "")
3806 (match_operand:SF 1 "nonimmediate_operand" "")))]
3810 operands[1] = force_reg (V4SFmode, operands[1]);
3813 (define_insn "*vec_dupv4sf_avx"
3814 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3816 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3819 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3820 vbroadcastss\t{%1, %0|%0, %1}"
3821 [(set_attr "type" "sselog1,ssemov")
3822 (set_attr "length_immediate" "1,0")
3823 (set_attr "prefix_extra" "0,1")
3824 (set_attr "prefix" "vex")
3825 (set_attr "mode" "V4SF")])
3827 (define_insn "*vec_dupv4sf"
3828 [(set (match_operand:V4SF 0 "register_operand" "=x")
3830 (match_operand:SF 1 "register_operand" "0")))]
3832 "shufps\t{$0, %0, %0|%0, %0, 0}"
3833 [(set_attr "type" "sselog1")
3834 (set_attr "length_immediate" "1")
3835 (set_attr "mode" "V4SF")])
3837 (define_insn "*vec_concatv2sf_avx"
3838 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3840 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3841 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3844 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3845 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3846 vmovss\t{%1, %0|%0, %1}
3847 punpckldq\t{%2, %0|%0, %2}
3848 movd\t{%1, %0|%0, %1}"
3849 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3850 (set_attr "length_immediate" "*,1,*,*,*")
3851 (set_attr "prefix_extra" "*,1,*,*,*")
3852 (set (attr "prefix")
3853 (if_then_else (eq_attr "alternative" "3,4")
3854 (const_string "orig")
3855 (const_string "vex")))
3856 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3858 ;; Although insertps takes register source, we prefer
3859 ;; unpcklps with register source since it is shorter.
3860 (define_insn "*vec_concatv2sf_sse4_1"
3861 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3863 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3864 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3867 unpcklps\t{%2, %0|%0, %2}
3868 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3869 movss\t{%1, %0|%0, %1}
3870 punpckldq\t{%2, %0|%0, %2}
3871 movd\t{%1, %0|%0, %1}"
3872 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3873 (set_attr "prefix_data16" "*,1,*,*,*")
3874 (set_attr "prefix_extra" "*,1,*,*,*")
3875 (set_attr "length_immediate" "*,1,*,*,*")
3876 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3878 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3879 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3880 ;; alternatives pretty much forces the MMX alternative to be chosen.
3881 (define_insn "*vec_concatv2sf_sse"
3882 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3884 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3885 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3888 unpcklps\t{%2, %0|%0, %2}
3889 movss\t{%1, %0|%0, %1}
3890 punpckldq\t{%2, %0|%0, %2}
3891 movd\t{%1, %0|%0, %1}"
3892 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3893 (set_attr "mode" "V4SF,SF,DI,DI")])
3895 (define_insn "*vec_concatv4sf_avx"
3896 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3898 (match_operand:V2SF 1 "register_operand" " x,x")
3899 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3902 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3903 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3904 [(set_attr "type" "ssemov")
3905 (set_attr "prefix" "vex")
3906 (set_attr "mode" "V4SF,V2SF")])
3908 (define_insn "*vec_concatv4sf_sse"
3909 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3911 (match_operand:V2SF 1 "register_operand" " 0,0")
3912 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3915 movlhps\t{%2, %0|%0, %2}
3916 movhps\t{%2, %0|%0, %2}"
3917 [(set_attr "type" "ssemov")
3918 (set_attr "mode" "V4SF,V2SF")])
3920 (define_expand "vec_init<mode>"
3921 [(match_operand:SSEMODE 0 "register_operand" "")
3922 (match_operand 1 "" "")]
3925 ix86_expand_vector_init (false, operands[0], operands[1]);
3929 (define_insn "*vec_set<mode>_0_avx"
3930 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3931 (vec_merge:SSEMODE4S
3932 (vec_duplicate:SSEMODE4S
3933 (match_operand:<ssescalarmode> 2
3934 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3935 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3939 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3940 vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3941 vmovd\t{%2, %0|%0, %2}