1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
58 ;; Mapping of immediate bits for blend instructions
59 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
61 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
63 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69 ;; All of these patterns are enabled for SSE1 as well as SSE2.
70 ;; This is essential for maintaining stable calling conventions.
72 (define_expand "mov<mode>"
73 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
74 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
77 ix86_expand_vector_move (<MODE>mode, operands);
81 (define_insn "*mov<mode>_internal"
82 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
83 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
85 && (register_operand (operands[0], <MODE>mode)
86 || register_operand (operands[1], <MODE>mode))"
88 switch (which_alternative)
91 return standard_sse_constant_opcode (insn, operands[1]);
94 if (get_attr_mode (insn) == MODE_V4SF)
95 return "movaps\t{%1, %0|%0, %1}";
97 return "movdqa\t{%1, %0|%0, %1}";
102 [(set_attr "type" "sselog1,ssemov,ssemov")
105 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
106 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
107 (and (eq_attr "alternative" "2")
108 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
110 (const_string "V4SF")
111 (const_string "TI")))])
113 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
114 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
115 ;; from memory, we'd prefer to load the memory directly into the %xmm
116 ;; register. To facilitate this happy circumstance, this pattern won't
117 ;; split until after register allocation. If the 64-bit value didn't
118 ;; come from memory, this is the best we can do. This is much better
119 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
122 (define_insn_and_split "movdi_to_sse"
124 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
125 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
126 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
127 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
129 "&& reload_completed"
132 if (register_operand (operands[1], DImode))
134 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
135 Assemble the 64-bit DImode value in an xmm register. */
136 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
137 gen_rtx_SUBREG (SImode, operands[1], 0)));
138 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
139 gen_rtx_SUBREG (SImode, operands[1], 4)));
140 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
142 else if (memory_operand (operands[1], DImode))
143 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
148 (define_expand "mov<mode>"
149 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
150 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))]
153 ix86_expand_vector_move (<MODE>mode, operands);
157 (define_insn "*movv4sf_internal"
158 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
159 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
161 && (register_operand (operands[0], V4SFmode)
162 || register_operand (operands[1], V4SFmode))"
164 switch (which_alternative)
167 return standard_sse_constant_opcode (insn, operands[1]);
170 return "movaps\t{%1, %0|%0, %1}";
175 [(set_attr "type" "sselog1,ssemov,ssemov")
176 (set_attr "mode" "V4SF")])
179 [(set (match_operand:V4SF 0 "register_operand" "")
180 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
181 "TARGET_SSE && reload_completed"
184 (vec_duplicate:V4SF (match_dup 1))
188 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
189 operands[2] = CONST0_RTX (V4SFmode);
192 (define_insn "*movv2df_internal"
193 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
194 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
196 && (register_operand (operands[0], V2DFmode)
197 || register_operand (operands[1], V2DFmode))"
199 switch (which_alternative)
202 return standard_sse_constant_opcode (insn, operands[1]);
205 if (get_attr_mode (insn) == MODE_V4SF)
206 return "movaps\t{%1, %0|%0, %1}";
208 return "movapd\t{%1, %0|%0, %1}";
213 [(set_attr "type" "sselog1,ssemov,ssemov")
216 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
217 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
218 (and (eq_attr "alternative" "2")
219 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
221 (const_string "V4SF")
222 (const_string "V2DF")))])
225 [(set (match_operand:V2DF 0 "register_operand" "")
226 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
227 "TARGET_SSE2 && reload_completed"
228 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
230 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
231 operands[2] = CONST0_RTX (DFmode);
234 (define_expand "push<mode>1"
235 [(match_operand:SSEMODE 0 "register_operand" "")]
238 ix86_expand_push (<MODE>mode, operands[0]);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
244 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
247 ix86_expand_vector_move_misalign (<MODE>mode, operands);
251 (define_insn "<sse>_movup<ssemodesuffixf2c>"
252 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
254 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
256 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
257 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
258 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
259 [(set_attr "type" "ssemov")
260 (set_attr "mode" "<MODE>")])
262 (define_insn "sse2_movdqu"
263 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
264 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
266 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
267 "movdqu\t{%1, %0|%0, %1}"
268 [(set_attr "type" "ssemov")
269 (set_attr "prefix_data16" "1")
270 (set_attr "mode" "TI")])
272 (define_insn "<sse>_movnt<mode>"
273 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
275 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
277 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
278 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
279 [(set_attr "type" "ssemov")
280 (set_attr "mode" "<MODE>")])
282 (define_insn "sse2_movntv2di"
283 [(set (match_operand:V2DI 0 "memory_operand" "=m")
284 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
287 "movntdq\t{%1, %0|%0, %1}"
288 [(set_attr "type" "ssecvt")
289 (set_attr "prefix_data16" "1")
290 (set_attr "mode" "TI")])
292 (define_insn "sse2_movntsi"
293 [(set (match_operand:SI 0 "memory_operand" "=m")
294 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
297 "movnti\t{%1, %0|%0, %1}"
298 [(set_attr "type" "ssecvt")
299 (set_attr "mode" "V2DF")])
301 (define_insn "sse3_lddqu"
302 [(set (match_operand:V16QI 0 "register_operand" "=x")
303 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
306 "lddqu\t{%1, %0|%0, %1}"
307 [(set_attr "type" "ssecvt")
308 (set_attr "prefix_rep" "1")
309 (set_attr "mode" "TI")])
311 ; Expand patterns for non-temporal stores. At the moment, only those
312 ; that directly map to insns are defined; it would be possible to
313 ; define patterns for other modes that would expand to several insns.
315 (define_expand "storent<mode>"
316 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
318 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
320 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
323 (define_expand "storent<mode>"
324 [(set (match_operand:MODEF 0 "memory_operand" "")
326 [(match_operand:MODEF 1 "register_operand" "")]
331 (define_expand "storentv2di"
332 [(set (match_operand:V2DI 0 "memory_operand" "")
333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
338 (define_expand "storentsi"
339 [(set (match_operand:SI 0 "memory_operand" "")
340 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
345 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
347 ;; Parallel floating point arithmetic
349 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
351 (define_expand "<code><mode>2"
352 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
354 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
355 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
356 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
358 (define_expand "<addsub><mode>3"
359 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
360 (plusminus:SSEMODEF2P
361 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
362 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
363 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
364 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
366 (define_insn "*<addsub><mode>3"
367 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
368 (plusminus:SSEMODEF2P
369 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
370 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
372 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
373 "<addsub>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
374 [(set_attr "type" "sseadd")
375 (set_attr "mode" "<MODE>")])
377 (define_insn "<sse>_vm<addsub><mode>3"
378 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
379 (vec_merge:SSEMODEF2P
380 (plusminus:SSEMODEF2P
381 (match_operand:SSEMODEF2P 1 "register_operand" "0")
382 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
385 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
386 && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
387 "<addsub>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
388 [(set_attr "type" "sseadd")
389 (set_attr "mode" "<ssescalarmode>")])
391 (define_expand "mul<mode>3"
392 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
394 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
395 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
396 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
397 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
399 (define_insn "*mul<mode>3"
400 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
402 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
403 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
404 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
405 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
406 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
407 [(set_attr "type" "ssemul")
408 (set_attr "mode" "<MODE>")])
410 (define_insn "<sse>_vmmul<mode>3"
411 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
412 (vec_merge:SSEMODEF2P
414 (match_operand:SSEMODEF2P 1 "register_operand" "0")
415 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
418 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
419 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
420 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
421 [(set_attr "type" "ssemul")
422 (set_attr "mode" "<ssescalarmode>")])
424 (define_expand "divv4sf3"
425 [(set (match_operand:V4SF 0 "register_operand" "")
426 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
427 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
430 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
432 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
433 && flag_finite_math_only && !flag_trapping_math
434 && flag_unsafe_math_optimizations)
436 ix86_emit_swdivsf (operands[0], operands[1],
437 operands[2], V4SFmode);
442 (define_expand "divv2df3"
443 [(set (match_operand:V2DF 0 "register_operand" "")
444 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
445 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
447 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
449 (define_insn "<sse>_div<mode>3"
450 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
452 (match_operand:SSEMODEF2P 1 "register_operand" "0")
453 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
454 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
455 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
456 [(set_attr "type" "ssediv")
457 (set_attr "mode" "<MODE>")])
459 (define_insn "<sse>_vmdiv<mode>3"
460 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
461 (vec_merge:SSEMODEF2P
463 (match_operand:SSEMODEF2P 1 "register_operand" "0")
464 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
467 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
468 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
469 [(set_attr "type" "ssediv")
470 (set_attr "mode" "<ssescalarmode>")])
472 (define_insn "sse_rcpv4sf2"
473 [(set (match_operand:V4SF 0 "register_operand" "=x")
475 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
477 "rcpps\t{%1, %0|%0, %1}"
478 [(set_attr "type" "sse")
479 (set_attr "mode" "V4SF")])
481 (define_insn "sse_vmrcpv4sf2"
482 [(set (match_operand:V4SF 0 "register_operand" "=x")
484 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
486 (match_operand:V4SF 2 "register_operand" "0")
489 "rcpss\t{%1, %0|%0, %1}"
490 [(set_attr "type" "sse")
491 (set_attr "mode" "SF")])
493 (define_expand "sqrtv4sf2"
494 [(set (match_operand:V4SF 0 "register_operand" "")
495 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
498 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
499 && flag_finite_math_only && !flag_trapping_math
500 && flag_unsafe_math_optimizations)
502 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
507 (define_insn "sse_sqrtv4sf2"
508 [(set (match_operand:V4SF 0 "register_operand" "=x")
509 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
511 "sqrtps\t{%1, %0|%0, %1}"
512 [(set_attr "type" "sse")
513 (set_attr "mode" "V4SF")])
515 (define_insn "sqrtv2df2"
516 [(set (match_operand:V2DF 0 "register_operand" "=x")
517 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
519 "sqrtpd\t{%1, %0|%0, %1}"
520 [(set_attr "type" "sse")
521 (set_attr "mode" "V2DF")])
523 (define_insn "<sse>_vmsqrt<mode>2"
524 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
525 (vec_merge:SSEMODEF2P
527 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
528 (match_operand:SSEMODEF2P 2 "register_operand" "0")
530 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
531 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
532 [(set_attr "type" "sse")
533 (set_attr "mode" "<ssescalarmode>")])
535 (define_expand "rsqrtv4sf2"
536 [(set (match_operand:V4SF 0 "register_operand" "")
538 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
541 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
545 (define_insn "sse_rsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
550 "rsqrtps\t{%1, %0|%0, %1}"
551 [(set_attr "type" "sse")
552 (set_attr "mode" "V4SF")])
554 (define_insn "sse_vmrsqrtv4sf2"
555 [(set (match_operand:V4SF 0 "register_operand" "=x")
557 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
559 (match_operand:V4SF 2 "register_operand" "0")
562 "rsqrtss\t{%1, %0|%0, %1}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "SF")])
566 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
567 ;; isn't really correct, as those rtl operators aren't defined when
568 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
570 (define_expand "<code><mode>3"
571 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
573 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
574 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
575 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
577 if (!flag_finite_math_only)
578 operands[1] = force_reg (<MODE>mode, operands[1]);
579 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
582 (define_insn "*<code><mode>3_finite"
583 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
585 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
586 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
587 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
588 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
589 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
590 [(set_attr "type" "sseadd")
591 (set_attr "mode" "<MODE>")])
593 (define_insn "*<code><mode>3"
594 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596 (match_operand:SSEMODEF2P 1 "register_operand" "0")
597 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
598 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
599 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
600 [(set_attr "type" "sseadd")
601 (set_attr "mode" "<MODE>")])
603 (define_insn "<sse>_vm<code><mode>3"
604 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
605 (vec_merge:SSEMODEF2P
607 (match_operand:SSEMODEF2P 1 "register_operand" "0")
608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
611 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
612 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
613 [(set_attr "type" "sse")
614 (set_attr "mode" "<ssescalarmode>")])
616 ;; These versions of the min/max patterns implement exactly the operations
617 ;; min = (op1 < op2 ? op1 : op2)
618 ;; max = (!(op1 < op2) ? op1 : op2)
619 ;; Their operands are not commutative, and thus they may be used in the
620 ;; presence of -0.0 and NaN.
622 (define_insn "*ieee_smin<mode>3"
623 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
625 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
626 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
628 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
629 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
630 [(set_attr "type" "sseadd")
631 (set_attr "mode" "<MODE>")])
633 (define_insn "*ieee_smax<mode>3"
634 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
636 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
637 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
639 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
640 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
641 [(set_attr "type" "sseadd")
642 (set_attr "mode" "<MODE>")])
644 (define_insn "sse3_addsubv4sf3"
645 [(set (match_operand:V4SF 0 "register_operand" "=x")
648 (match_operand:V4SF 1 "register_operand" "0")
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
650 (minus:V4SF (match_dup 1) (match_dup 2))
653 "addsubps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sseadd")
655 (set_attr "prefix_rep" "1")
656 (set_attr "mode" "V4SF")])
658 (define_insn "sse3_addsubv2df3"
659 [(set (match_operand:V2DF 0 "register_operand" "=x")
662 (match_operand:V2DF 1 "register_operand" "0")
663 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
664 (minus:V2DF (match_dup 1) (match_dup 2))
667 "addsubpd\t{%2, %0|%0, %2}"
668 [(set_attr "type" "sseadd")
669 (set_attr "mode" "V2DF")])
671 (define_insn "sse3_h<addsub>v4sf3"
672 [(set (match_operand:V4SF 0 "register_operand" "=x")
677 (match_operand:V4SF 1 "register_operand" "0")
678 (parallel [(const_int 0)]))
679 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
681 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
682 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
686 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
687 (parallel [(const_int 0)]))
688 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
690 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
691 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
693 "h<addsub>ps\t{%2, %0|%0, %2}"
694 [(set_attr "type" "sseadd")
695 (set_attr "prefix_rep" "1")
696 (set_attr "mode" "V4SF")])
698 (define_insn "sse3_h<addsub>v2df3"
699 [(set (match_operand:V2DF 0 "register_operand" "=x")
703 (match_operand:V2DF 1 "register_operand" "0")
704 (parallel [(const_int 0)]))
705 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
708 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
709 (parallel [(const_int 0)]))
710 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
712 "h<addsub>pd\t{%2, %0|%0, %2}"
713 [(set_attr "type" "sseadd")
714 (set_attr "mode" "V2DF")])
716 (define_expand "reduc_splus_v4sf"
717 [(match_operand:V4SF 0 "register_operand" "")
718 (match_operand:V4SF 1 "register_operand" "")]
723 rtx tmp = gen_reg_rtx (V4SFmode);
724 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
725 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
728 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
732 (define_expand "reduc_splus_v2df"
733 [(match_operand:V2DF 0 "register_operand" "")
734 (match_operand:V2DF 1 "register_operand" "")]
737 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
741 (define_expand "reduc_smax_v4sf"
742 [(match_operand:V4SF 0 "register_operand" "")
743 (match_operand:V4SF 1 "register_operand" "")]
746 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
750 (define_expand "reduc_smin_v4sf"
751 [(match_operand:V4SF 0 "register_operand" "")
752 (match_operand:V4SF 1 "register_operand" "")]
755 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
759 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
761 ;; Parallel floating point comparisons
763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
765 (define_insn "<sse>_maskcmp<mode>3"
766 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
767 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
768 [(match_operand:SSEMODEF4 1 "register_operand" "0")
769 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
770 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
772 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
773 [(set_attr "type" "ssecmp")
774 (set_attr "mode" "<MODE>")])
776 (define_insn "<sse>_vmmaskcmp<mode>3"
777 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
778 (vec_merge:SSEMODEF2P
779 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
780 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
781 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
784 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
785 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
786 [(set_attr "type" "ssecmp")
787 (set_attr "mode" "<ssescalarmode>")])
789 (define_insn "<sse>_comi"
790 [(set (reg:CCFP FLAGS_REG)
793 (match_operand:<ssevecmode> 0 "register_operand" "x")
794 (parallel [(const_int 0)]))
796 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
797 (parallel [(const_int 0)]))))]
798 "SSE_FLOAT_MODE_P (<MODE>mode)"
799 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
800 [(set_attr "type" "ssecomi")
801 (set_attr "mode" "<MODE>")])
803 (define_insn "<sse>_ucomi"
804 [(set (reg:CCFPU FLAGS_REG)
807 (match_operand:<ssevecmode> 0 "register_operand" "x")
808 (parallel [(const_int 0)]))
810 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
811 (parallel [(const_int 0)]))))]
812 "SSE_FLOAT_MODE_P (<MODE>mode)"
813 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
814 [(set_attr "type" "ssecomi")
815 (set_attr "mode" "<MODE>")])
817 (define_expand "vcond<mode>"
818 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
819 (if_then_else:SSEMODEF2P
821 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
822 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
823 (match_operand:SSEMODEF2P 1 "general_operand" "")
824 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
825 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
827 if (ix86_expand_fp_vcond (operands))
833 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
835 ;; Parallel floating point logical operations
837 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
839 (define_insn "<sse>_nand<mode>3"
840 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
843 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
844 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
845 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
846 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
847 [(set_attr "type" "sselog")
848 (set_attr "mode" "<MODE>")])
850 (define_expand "<code><mode>3"
851 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
853 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
854 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
855 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
856 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
858 (define_insn "*<code><mode>3"
859 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
861 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
862 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
863 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
864 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
865 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
866 [(set_attr "type" "sselog")
867 (set_attr "mode" "<MODE>")])
869 ;; Also define scalar versions. These are used for abs, neg, and
870 ;; conditional move. Using subregs into vector modes causes register
871 ;; allocation lossage. These patterns do not allow memory operands
872 ;; because the native instructions read the full 128-bits.
874 (define_insn "*nand<mode>3"
875 [(set (match_operand:MODEF 0 "register_operand" "=x")
878 (match_operand:MODEF 1 "register_operand" "0"))
879 (match_operand:MODEF 2 "register_operand" "x")))]
880 "SSE_FLOAT_MODE_P (<MODE>mode)"
881 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
882 [(set_attr "type" "sselog")
883 (set_attr "mode" "<ssevecmode>")])
885 (define_insn "*<code><mode>3"
886 [(set (match_operand:MODEF 0 "register_operand" "=x")
888 (match_operand:MODEF 1 "register_operand" "0")
889 (match_operand:MODEF 2 "register_operand" "x")))]
890 "SSE_FLOAT_MODE_P (<MODE>mode)"
891 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
892 [(set_attr "type" "sselog")
893 (set_attr "mode" "<ssevecmode>")])
895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
897 ;; SSE5 floating point multiply/accumulate instructions This includes the
898 ;; scalar version of the instructions as well as the vector
900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
902 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
903 ;; combine to generate a multiply/add with two memory references. We then
904 ;; split this insn, into loading up the destination register with one of the
905 ;; memory operations. If we don't manage to split the insn, reload will
906 ;; generate the appropriate moves. The reason this is needed, is that combine
907 ;; has already folded one of the memory references into both the multiply and
908 ;; add insns, and it can't generate a new pseudo. I.e.:
909 ;; (set (reg1) (mem (addr1)))
910 ;; (set (reg2) (mult (reg1) (mem (addr2))))
911 ;; (set (reg3) (plus (reg2) (mem (addr3))))
913 (define_insn "sse5_fmadd<mode>4"
914 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
917 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
918 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
919 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
920 "TARGET_SSE5 && TARGET_FUSED_MADD
921 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
922 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
923 [(set_attr "type" "ssemuladd")
924 (set_attr "mode" "<MODE>")])
926 ;; Split fmadd with two memory operands into a load and the fmadd.
928 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
931 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
932 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
933 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
935 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
936 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
937 && !reg_mentioned_p (operands[0], operands[1])
938 && !reg_mentioned_p (operands[0], operands[2])
939 && !reg_mentioned_p (operands[0], operands[3])"
942 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
943 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
944 operands[2], operands[3]));
948 ;; For the scalar operations, use operand1 for the upper words that aren't
949 ;; modified, so restrict the forms that are generated.
950 ;; Scalar version of fmadd
951 (define_insn "sse5_vmfmadd<mode>4"
952 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
953 (vec_merge:SSEMODEF2P
956 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
957 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
958 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
961 "TARGET_SSE5 && TARGET_FUSED_MADD
962 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
963 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
964 [(set_attr "type" "ssemuladd")
965 (set_attr "mode" "<MODE>")])
967 ;; Floating multiply and subtract
968 ;; Allow two memory operands the same as fmadd
969 (define_insn "sse5_fmsub<mode>4"
970 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
973 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
974 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
975 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
976 "TARGET_SSE5 && TARGET_FUSED_MADD
977 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
978 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
979 [(set_attr "type" "ssemuladd")
980 (set_attr "mode" "<MODE>")])
982 ;; Split fmsub with two memory operands into a load and the fmsub.
984 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
987 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
988 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
989 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
991 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
992 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
993 && !reg_mentioned_p (operands[0], operands[1])
994 && !reg_mentioned_p (operands[0], operands[2])
995 && !reg_mentioned_p (operands[0], operands[3])"
998 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
999 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1000 operands[2], operands[3]));
1004 ;; For the scalar operations, use operand1 for the upper words that aren't
1005 ;; modified, so restrict the forms that are generated.
1006 ;; Scalar version of fmsub
1007 (define_insn "sse5_vmfmsub<mode>4"
1008 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1009 (vec_merge:SSEMODEF2P
1012 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1013 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1014 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1017 "TARGET_SSE5 && TARGET_FUSED_MADD
1018 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1019 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1020 [(set_attr "type" "ssemuladd")
1021 (set_attr "mode" "<MODE>")])
1023 ;; Floating point negative multiply and add
1024 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1025 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1026 ;; Allow two memory operands to help in optimizing.
1027 (define_insn "sse5_fnmadd<mode>4"
1028 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1030 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1032 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1033 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1034 "TARGET_SSE5 && TARGET_FUSED_MADD
1035 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1036 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1037 [(set_attr "type" "ssemuladd")
1038 (set_attr "mode" "<MODE>")])
1040 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1042 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1044 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1046 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1047 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1049 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1050 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1051 && !reg_mentioned_p (operands[0], operands[1])
1052 && !reg_mentioned_p (operands[0], operands[2])
1053 && !reg_mentioned_p (operands[0], operands[3])"
1056 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1057 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1058 operands[2], operands[3]));
1062 ;; For the scalar operations, use operand1 for the upper words that aren't
1063 ;; modified, so restrict the forms that are generated.
1064 ;; Scalar version of fnmadd
1065 (define_insn "sse5_vmfnmadd<mode>4"
1066 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1067 (vec_merge:SSEMODEF2P
1069 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1071 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1072 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1075 "TARGET_SSE5 && TARGET_FUSED_MADD
1076 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1077 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1078 [(set_attr "type" "ssemuladd")
1079 (set_attr "mode" "<MODE>")])
1081 ;; Floating point negative multiply and subtract
1082 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1083 ;; Allow 2 memory operands to help with optimization
1084 (define_insn "sse5_fnmsub<mode>4"
1085 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1089 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1090 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1091 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1092 "TARGET_SSE5 && TARGET_FUSED_MADD
1093 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1094 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1095 [(set_attr "type" "ssemuladd")
1096 (set_attr "mode" "<MODE>")])
1098 ;; Split fnmsub with two memory operands into a load and the fmsub.
1100 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1104 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1105 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1106 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1108 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1109 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1110 && !reg_mentioned_p (operands[0], operands[1])
1111 && !reg_mentioned_p (operands[0], operands[2])
1112 && !reg_mentioned_p (operands[0], operands[3])"
1115 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1116 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1117 operands[2], operands[3]));
1121 ;; For the scalar operations, use operand1 for the upper words that aren't
1122 ;; modified, so restrict the forms that are generated.
1123 ;; Scalar version of fnmsub
1124 (define_insn "sse5_vmfnmsub<mode>4"
1125 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1126 (vec_merge:SSEMODEF2P
1130 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1131 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1132 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1135 "TARGET_SSE5 && TARGET_FUSED_MADD
1136 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1137 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1138 [(set_attr "type" "ssemuladd")
1139 (set_attr "mode" "<MODE>")])
1141 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1142 ;; even if the user used -mno-fused-madd
1143 ;; Parallel instructions. During instruction generation, just default
1144 ;; to registers, and let combine later build the appropriate instruction.
1145 (define_expand "sse5i_fmadd<mode>4"
1146 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1150 (match_operand:SSEMODEF2P 1 "register_operand" "")
1151 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1152 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1153 UNSPEC_SSE5_INTRINSIC))]
1156 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1157 if (TARGET_FUSED_MADD)
1159 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1160 operands[2], operands[3]));
1165 (define_insn "*sse5i_fmadd<mode>4"
1166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1170 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1171 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1172 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1173 UNSPEC_SSE5_INTRINSIC))]
1174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1175 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1176 [(set_attr "type" "ssemuladd")
1177 (set_attr "mode" "<MODE>")])
1179 (define_expand "sse5i_fmsub<mode>4"
1180 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1184 (match_operand:SSEMODEF2P 1 "register_operand" "")
1185 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1186 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1187 UNSPEC_SSE5_INTRINSIC))]
1190 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1191 if (TARGET_FUSED_MADD)
1193 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1194 operands[2], operands[3]));
1199 (define_insn "*sse5i_fmsub<mode>4"
1200 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1204 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1205 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1206 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1207 UNSPEC_SSE5_INTRINSIC))]
1208 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1209 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1210 [(set_attr "type" "ssemuladd")
1211 (set_attr "mode" "<MODE>")])
1213 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1214 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1215 (define_expand "sse5i_fnmadd<mode>4"
1216 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1219 (match_operand:SSEMODEF2P 3 "register_operand" "")
1221 (match_operand:SSEMODEF2P 1 "register_operand" "")
1222 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1223 UNSPEC_SSE5_INTRINSIC))]
1226 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1227 if (TARGET_FUSED_MADD)
1229 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1230 operands[2], operands[3]));
1235 (define_insn "*sse5i_fnmadd<mode>4"
1236 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1239 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1241 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1242 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1243 UNSPEC_SSE5_INTRINSIC))]
1244 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1245 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1246 [(set_attr "type" "ssemuladd")
1247 (set_attr "mode" "<MODE>")])
1249 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1250 (define_expand "sse5i_fnmsub<mode>4"
1251 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1256 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1257 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1258 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1259 UNSPEC_SSE5_INTRINSIC))]
1262 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1263 if (TARGET_FUSED_MADD)
1265 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1266 operands[2], operands[3]));
1271 (define_insn "*sse5i_fnmsub<mode>4"
1272 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1277 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1278 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1279 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1280 UNSPEC_SSE5_INTRINSIC))]
1281 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1282 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1283 [(set_attr "type" "ssemuladd")
1284 (set_attr "mode" "<MODE>")])
1286 ;; Scalar instructions
1287 (define_expand "sse5i_vmfmadd<mode>4"
1288 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1290 [(vec_merge:SSEMODEF2P
1293 (match_operand:SSEMODEF2P 1 "register_operand" "")
1294 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1295 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1298 UNSPEC_SSE5_INTRINSIC))]
1301 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1302 if (TARGET_FUSED_MADD)
1304 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1305 operands[2], operands[3]));
1310 ;; For the scalar operations, use operand1 for the upper words that aren't
1311 ;; modified, so restrict the forms that are accepted.
1312 (define_insn "*sse5i_vmfmadd<mode>4"
1313 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1315 [(vec_merge:SSEMODEF2P
1318 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1319 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1320 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1323 UNSPEC_SSE5_INTRINSIC))]
1324 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1325 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1326 [(set_attr "type" "ssemuladd")
1327 (set_attr "mode" "<ssescalarmode>")])
1329 (define_expand "sse5i_vmfmsub<mode>4"
1330 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1332 [(vec_merge:SSEMODEF2P
1335 (match_operand:SSEMODEF2P 1 "register_operand" "")
1336 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1337 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1340 UNSPEC_SSE5_INTRINSIC))]
1343 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1344 if (TARGET_FUSED_MADD)
1346 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1347 operands[2], operands[3]));
1352 (define_insn "*sse5i_vmfmsub<mode>4"
1353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1355 [(vec_merge:SSEMODEF2P
1358 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1359 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1360 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1363 UNSPEC_SSE5_INTRINSIC))]
1364 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1365 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1366 [(set_attr "type" "ssemuladd")
1367 (set_attr "mode" "<ssescalarmode>")])
1369 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1370 (define_expand "sse5i_vmfnmadd<mode>4"
1371 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1373 [(vec_merge:SSEMODEF2P
1375 (match_operand:SSEMODEF2P 3 "register_operand" "")
1377 (match_operand:SSEMODEF2P 1 "register_operand" "")
1378 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1381 UNSPEC_SSE5_INTRINSIC))]
1384 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1385 if (TARGET_FUSED_MADD)
1387 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1388 operands[2], operands[3]));
1393 (define_insn "*sse5i_vmfnmadd<mode>4"
1394 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1396 [(vec_merge:SSEMODEF2P
1398 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1400 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1401 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1404 UNSPEC_SSE5_INTRINSIC))]
1405 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1406 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1407 [(set_attr "type" "ssemuladd")
1408 (set_attr "mode" "<ssescalarmode>")])
1410 (define_expand "sse5i_vmfnmsub<mode>4"
1411 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1413 [(vec_merge:SSEMODEF2P
1417 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1418 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1419 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1422 UNSPEC_SSE5_INTRINSIC))]
1425 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1426 if (TARGET_FUSED_MADD)
1428 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1429 operands[2], operands[3]));
1434 (define_insn "*sse5i_vmfnmsub<mode>4"
1435 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1437 [(vec_merge:SSEMODEF2P
1441 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1442 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1443 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1446 UNSPEC_SSE5_INTRINSIC))]
1447 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1448 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1449 [(set_attr "type" "ssemuladd")
1450 (set_attr "mode" "<ssescalarmode>")])
1452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1454 ;; Parallel single-precision floating point conversion operations
1456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1458 (define_insn "sse_cvtpi2ps"
1459 [(set (match_operand:V4SF 0 "register_operand" "=x")
1462 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1463 (match_operand:V4SF 1 "register_operand" "0")
1466 "cvtpi2ps\t{%2, %0|%0, %2}"
1467 [(set_attr "type" "ssecvt")
1468 (set_attr "mode" "V4SF")])
1470 (define_insn "sse_cvtps2pi"
1471 [(set (match_operand:V2SI 0 "register_operand" "=y")
1473 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1475 (parallel [(const_int 0) (const_int 1)])))]
1477 "cvtps2pi\t{%1, %0|%0, %1}"
1478 [(set_attr "type" "ssecvt")
1479 (set_attr "unit" "mmx")
1480 (set_attr "mode" "DI")])
1482 (define_insn "sse_cvttps2pi"
1483 [(set (match_operand:V2SI 0 "register_operand" "=y")
1485 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1486 (parallel [(const_int 0) (const_int 1)])))]
1488 "cvttps2pi\t{%1, %0|%0, %1}"
1489 [(set_attr "type" "ssecvt")
1490 (set_attr "unit" "mmx")
1491 (set_attr "mode" "SF")])
1493 (define_insn "sse_cvtsi2ss"
1494 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1497 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1498 (match_operand:V4SF 1 "register_operand" "0,0")
1501 "cvtsi2ss\t{%2, %0|%0, %2}"
1502 [(set_attr "type" "sseicvt")
1503 (set_attr "athlon_decode" "vector,double")
1504 (set_attr "amdfam10_decode" "vector,double")
1505 (set_attr "mode" "SF")])
1507 (define_insn "sse_cvtsi2ssq"
1508 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1511 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1512 (match_operand:V4SF 1 "register_operand" "0,0")
1514 "TARGET_SSE && TARGET_64BIT"
1515 "cvtsi2ssq\t{%2, %0|%0, %2}"
1516 [(set_attr "type" "sseicvt")
1517 (set_attr "athlon_decode" "vector,double")
1518 (set_attr "amdfam10_decode" "vector,double")
1519 (set_attr "mode" "SF")])
1521 (define_insn "sse_cvtss2si"
1522 [(set (match_operand:SI 0 "register_operand" "=r,r")
1525 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1526 (parallel [(const_int 0)]))]
1527 UNSPEC_FIX_NOTRUNC))]
1529 "cvtss2si\t{%1, %0|%0, %1}"
1530 [(set_attr "type" "sseicvt")
1531 (set_attr "athlon_decode" "double,vector")
1532 (set_attr "prefix_rep" "1")
1533 (set_attr "mode" "SI")])
1535 (define_insn "sse_cvtss2si_2"
1536 [(set (match_operand:SI 0 "register_operand" "=r,r")
1537 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1538 UNSPEC_FIX_NOTRUNC))]
1540 "cvtss2si\t{%1, %0|%0, %1}"
1541 [(set_attr "type" "sseicvt")
1542 (set_attr "athlon_decode" "double,vector")
1543 (set_attr "amdfam10_decode" "double,double")
1544 (set_attr "prefix_rep" "1")
1545 (set_attr "mode" "SI")])
1547 (define_insn "sse_cvtss2siq"
1548 [(set (match_operand:DI 0 "register_operand" "=r,r")
1551 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1552 (parallel [(const_int 0)]))]
1553 UNSPEC_FIX_NOTRUNC))]
1554 "TARGET_SSE && TARGET_64BIT"
1555 "cvtss2siq\t{%1, %0|%0, %1}"
1556 [(set_attr "type" "sseicvt")
1557 (set_attr "athlon_decode" "double,vector")
1558 (set_attr "prefix_rep" "1")
1559 (set_attr "mode" "DI")])
1561 (define_insn "sse_cvtss2siq_2"
1562 [(set (match_operand:DI 0 "register_operand" "=r,r")
1563 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1564 UNSPEC_FIX_NOTRUNC))]
1565 "TARGET_SSE && TARGET_64BIT"
1566 "cvtss2siq\t{%1, %0|%0, %1}"
1567 [(set_attr "type" "sseicvt")
1568 (set_attr "athlon_decode" "double,vector")
1569 (set_attr "amdfam10_decode" "double,double")
1570 (set_attr "prefix_rep" "1")
1571 (set_attr "mode" "DI")])
1573 (define_insn "sse_cvttss2si"
1574 [(set (match_operand:SI 0 "register_operand" "=r,r")
1577 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1578 (parallel [(const_int 0)]))))]
1580 "cvttss2si\t{%1, %0|%0, %1}"
1581 [(set_attr "type" "sseicvt")
1582 (set_attr "athlon_decode" "double,vector")
1583 (set_attr "amdfam10_decode" "double,double")
1584 (set_attr "prefix_rep" "1")
1585 (set_attr "mode" "SI")])
1587 (define_insn "sse_cvttss2siq"
1588 [(set (match_operand:DI 0 "register_operand" "=r,r")
1591 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1592 (parallel [(const_int 0)]))))]
1593 "TARGET_SSE && TARGET_64BIT"
1594 "cvttss2siq\t{%1, %0|%0, %1}"
1595 [(set_attr "type" "sseicvt")
1596 (set_attr "athlon_decode" "double,vector")
1597 (set_attr "amdfam10_decode" "double,double")
1598 (set_attr "prefix_rep" "1")
1599 (set_attr "mode" "DI")])
1601 (define_insn "sse2_cvtdq2ps"
1602 [(set (match_operand:V4SF 0 "register_operand" "=x")
1603 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1605 "cvtdq2ps\t{%1, %0|%0, %1}"
1606 [(set_attr "type" "ssecvt")
1607 (set_attr "mode" "V4SF")])
1609 (define_insn "sse2_cvtps2dq"
1610 [(set (match_operand:V4SI 0 "register_operand" "=x")
1611 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1612 UNSPEC_FIX_NOTRUNC))]
1614 "cvtps2dq\t{%1, %0|%0, %1}"
1615 [(set_attr "type" "ssecvt")
1616 (set_attr "prefix_data16" "1")
1617 (set_attr "mode" "TI")])
1619 (define_insn "sse2_cvttps2dq"
1620 [(set (match_operand:V4SI 0 "register_operand" "=x")
1621 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1623 "cvttps2dq\t{%1, %0|%0, %1}"
1624 [(set_attr "type" "ssecvt")
1625 (set_attr "prefix_rep" "1")
1626 (set_attr "mode" "TI")])
1628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1630 ;; Parallel double-precision floating point conversion operations
1632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1634 (define_insn "sse2_cvtpi2pd"
1635 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1636 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1638 "cvtpi2pd\t{%1, %0|%0, %1}"
1639 [(set_attr "type" "ssecvt")
1640 (set_attr "unit" "mmx,*")
1641 (set_attr "mode" "V2DF")])
1643 (define_insn "sse2_cvtpd2pi"
1644 [(set (match_operand:V2SI 0 "register_operand" "=y")
1645 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1646 UNSPEC_FIX_NOTRUNC))]
1648 "cvtpd2pi\t{%1, %0|%0, %1}"
1649 [(set_attr "type" "ssecvt")
1650 (set_attr "unit" "mmx")
1651 (set_attr "prefix_data16" "1")
1652 (set_attr "mode" "DI")])
1654 (define_insn "sse2_cvttpd2pi"
1655 [(set (match_operand:V2SI 0 "register_operand" "=y")
1656 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1658 "cvttpd2pi\t{%1, %0|%0, %1}"
1659 [(set_attr "type" "ssecvt")
1660 (set_attr "unit" "mmx")
1661 (set_attr "prefix_data16" "1")
1662 (set_attr "mode" "TI")])
1664 (define_insn "sse2_cvtsi2sd"
1665 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1668 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1669 (match_operand:V2DF 1 "register_operand" "0,0")
1672 "cvtsi2sd\t{%2, %0|%0, %2}"
1673 [(set_attr "type" "sseicvt")
1674 (set_attr "mode" "DF")
1675 (set_attr "athlon_decode" "double,direct")
1676 (set_attr "amdfam10_decode" "vector,double")])
1678 (define_insn "sse2_cvtsi2sdq"
1679 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1682 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1683 (match_operand:V2DF 1 "register_operand" "0,0")
1685 "TARGET_SSE2 && TARGET_64BIT"
1686 "cvtsi2sdq\t{%2, %0|%0, %2}"
1687 [(set_attr "type" "sseicvt")
1688 (set_attr "mode" "DF")
1689 (set_attr "athlon_decode" "double,direct")
1690 (set_attr "amdfam10_decode" "vector,double")])
1692 (define_insn "sse2_cvtsd2si"
1693 [(set (match_operand:SI 0 "register_operand" "=r,r")
1696 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1697 (parallel [(const_int 0)]))]
1698 UNSPEC_FIX_NOTRUNC))]
1700 "cvtsd2si\t{%1, %0|%0, %1}"
1701 [(set_attr "type" "sseicvt")
1702 (set_attr "athlon_decode" "double,vector")
1703 (set_attr "prefix_rep" "1")
1704 (set_attr "mode" "SI")])
1706 (define_insn "sse2_cvtsd2si_2"
1707 [(set (match_operand:SI 0 "register_operand" "=r,r")
1708 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1709 UNSPEC_FIX_NOTRUNC))]
1711 "cvtsd2si\t{%1, %0|%0, %1}"
1712 [(set_attr "type" "sseicvt")
1713 (set_attr "athlon_decode" "double,vector")
1714 (set_attr "amdfam10_decode" "double,double")
1715 (set_attr "prefix_rep" "1")
1716 (set_attr "mode" "SI")])
1718 (define_insn "sse2_cvtsd2siq"
1719 [(set (match_operand:DI 0 "register_operand" "=r,r")
1722 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1723 (parallel [(const_int 0)]))]
1724 UNSPEC_FIX_NOTRUNC))]
1725 "TARGET_SSE2 && TARGET_64BIT"
1726 "cvtsd2siq\t{%1, %0|%0, %1}"
1727 [(set_attr "type" "sseicvt")
1728 (set_attr "athlon_decode" "double,vector")
1729 (set_attr "prefix_rep" "1")
1730 (set_attr "mode" "DI")])
1732 (define_insn "sse2_cvtsd2siq_2"
1733 [(set (match_operand:DI 0 "register_operand" "=r,r")
1734 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1735 UNSPEC_FIX_NOTRUNC))]
1736 "TARGET_SSE2 && TARGET_64BIT"
1737 "cvtsd2siq\t{%1, %0|%0, %1}"
1738 [(set_attr "type" "sseicvt")
1739 (set_attr "athlon_decode" "double,vector")
1740 (set_attr "amdfam10_decode" "double,double")
1741 (set_attr "prefix_rep" "1")
1742 (set_attr "mode" "DI")])
1744 (define_insn "sse2_cvttsd2si"
1745 [(set (match_operand:SI 0 "register_operand" "=r,r")
1748 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1749 (parallel [(const_int 0)]))))]
1751 "cvttsd2si\t{%1, %0|%0, %1}"
1752 [(set_attr "type" "sseicvt")
1753 (set_attr "prefix_rep" "1")
1754 (set_attr "mode" "SI")
1755 (set_attr "athlon_decode" "double,vector")
1756 (set_attr "amdfam10_decode" "double,double")])
1758 (define_insn "sse2_cvttsd2siq"
1759 [(set (match_operand:DI 0 "register_operand" "=r,r")
1762 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1763 (parallel [(const_int 0)]))))]
1764 "TARGET_SSE2 && TARGET_64BIT"
1765 "cvttsd2siq\t{%1, %0|%0, %1}"
1766 [(set_attr "type" "sseicvt")
1767 (set_attr "prefix_rep" "1")
1768 (set_attr "mode" "DI")
1769 (set_attr "athlon_decode" "double,vector")
1770 (set_attr "amdfam10_decode" "double,double")])
1772 (define_insn "sse2_cvtdq2pd"
1773 [(set (match_operand:V2DF 0 "register_operand" "=x")
1776 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1777 (parallel [(const_int 0) (const_int 1)]))))]
1779 "cvtdq2pd\t{%1, %0|%0, %1}"
1780 [(set_attr "type" "ssecvt")
1781 (set_attr "mode" "V2DF")])
1783 (define_expand "sse2_cvtpd2dq"
1784 [(set (match_operand:V4SI 0 "register_operand" "")
1786 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1790 "operands[2] = CONST0_RTX (V2SImode);")
1792 (define_insn "*sse2_cvtpd2dq"
1793 [(set (match_operand:V4SI 0 "register_operand" "=x")
1795 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1797 (match_operand:V2SI 2 "const0_operand" "")))]
1799 "cvtpd2dq\t{%1, %0|%0, %1}"
1800 [(set_attr "type" "ssecvt")
1801 (set_attr "prefix_rep" "1")
1802 (set_attr "mode" "TI")
1803 (set_attr "amdfam10_decode" "double")])
1805 (define_expand "sse2_cvttpd2dq"
1806 [(set (match_operand:V4SI 0 "register_operand" "")
1808 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1811 "operands[2] = CONST0_RTX (V2SImode);")
1813 (define_insn "*sse2_cvttpd2dq"
1814 [(set (match_operand:V4SI 0 "register_operand" "=x")
1816 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1817 (match_operand:V2SI 2 "const0_operand" "")))]
1819 "cvttpd2dq\t{%1, %0|%0, %1}"
1820 [(set_attr "type" "ssecvt")
1821 (set_attr "prefix_rep" "1")
1822 (set_attr "mode" "TI")
1823 (set_attr "amdfam10_decode" "double")])
1825 (define_insn "sse2_cvtsd2ss"
1826 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1829 (float_truncate:V2SF
1830 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1831 (match_operand:V4SF 1 "register_operand" "0,0")
1834 "cvtsd2ss\t{%2, %0|%0, %2}"
1835 [(set_attr "type" "ssecvt")
1836 (set_attr "athlon_decode" "vector,double")
1837 (set_attr "amdfam10_decode" "vector,double")
1838 (set_attr "mode" "SF")])
1840 (define_insn "sse2_cvtss2sd"
1841 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1845 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1846 (parallel [(const_int 0) (const_int 1)])))
1847 (match_operand:V2DF 1 "register_operand" "0,0")
1850 "cvtss2sd\t{%2, %0|%0, %2}"
1851 [(set_attr "type" "ssecvt")
1852 (set_attr "amdfam10_decode" "vector,double")
1853 (set_attr "mode" "DF")])
1855 (define_expand "sse2_cvtpd2ps"
1856 [(set (match_operand:V4SF 0 "register_operand" "")
1858 (float_truncate:V2SF
1859 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1862 "operands[2] = CONST0_RTX (V2SFmode);")
1864 (define_insn "*sse2_cvtpd2ps"
1865 [(set (match_operand:V4SF 0 "register_operand" "=x")
1867 (float_truncate:V2SF
1868 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1869 (match_operand:V2SF 2 "const0_operand" "")))]
1871 "cvtpd2ps\t{%1, %0|%0, %1}"
1872 [(set_attr "type" "ssecvt")
1873 (set_attr "prefix_data16" "1")
1874 (set_attr "mode" "V4SF")
1875 (set_attr "amdfam10_decode" "double")])
1877 (define_insn "sse2_cvtps2pd"
1878 [(set (match_operand:V2DF 0 "register_operand" "=x")
1881 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1882 (parallel [(const_int 0) (const_int 1)]))))]
1884 "cvtps2pd\t{%1, %0|%0, %1}"
1885 [(set_attr "type" "ssecvt")
1886 (set_attr "mode" "V2DF")
1887 (set_attr "amdfam10_decode" "direct")])
1889 (define_expand "vec_unpacks_hi_v4sf"
1894 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1895 (parallel [(const_int 6)
1899 (set (match_operand:V2DF 0 "register_operand" "")
1903 (parallel [(const_int 0) (const_int 1)]))))]
1906 operands[2] = gen_reg_rtx (V4SFmode);
1909 (define_expand "vec_unpacks_lo_v4sf"
1910 [(set (match_operand:V2DF 0 "register_operand" "")
1913 (match_operand:V4SF 1 "nonimmediate_operand" "")
1914 (parallel [(const_int 0) (const_int 1)]))))]
1917 (define_expand "vec_unpacks_float_hi_v8hi"
1918 [(match_operand:V4SF 0 "register_operand" "")
1919 (match_operand:V8HI 1 "register_operand" "")]
1922 rtx tmp = gen_reg_rtx (V4SImode);
1924 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1925 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1929 (define_expand "vec_unpacks_float_lo_v8hi"
1930 [(match_operand:V4SF 0 "register_operand" "")
1931 (match_operand:V8HI 1 "register_operand" "")]
1934 rtx tmp = gen_reg_rtx (V4SImode);
1936 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1937 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1941 (define_expand "vec_unpacku_float_hi_v8hi"
1942 [(match_operand:V4SF 0 "register_operand" "")
1943 (match_operand:V8HI 1 "register_operand" "")]
1946 rtx tmp = gen_reg_rtx (V4SImode);
1948 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1949 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1953 (define_expand "vec_unpacku_float_lo_v8hi"
1954 [(match_operand:V4SF 0 "register_operand" "")
1955 (match_operand:V8HI 1 "register_operand" "")]
1958 rtx tmp = gen_reg_rtx (V4SImode);
1960 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1961 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1965 (define_expand "vec_unpacks_float_hi_v4si"
1968 (match_operand:V4SI 1 "nonimmediate_operand" "")
1969 (parallel [(const_int 2)
1973 (set (match_operand:V2DF 0 "register_operand" "")
1977 (parallel [(const_int 0) (const_int 1)]))))]
1980 operands[2] = gen_reg_rtx (V4SImode);
1983 (define_expand "vec_unpacks_float_lo_v4si"
1984 [(set (match_operand:V2DF 0 "register_operand" "")
1987 (match_operand:V4SI 1 "nonimmediate_operand" "")
1988 (parallel [(const_int 0) (const_int 1)]))))]
1991 (define_expand "vec_pack_trunc_v2df"
1992 [(match_operand:V4SF 0 "register_operand" "")
1993 (match_operand:V2DF 1 "nonimmediate_operand" "")
1994 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1999 r1 = gen_reg_rtx (V4SFmode);
2000 r2 = gen_reg_rtx (V4SFmode);
2002 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2003 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2004 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2008 (define_expand "vec_pack_sfix_trunc_v2df"
2009 [(match_operand:V4SI 0 "register_operand" "")
2010 (match_operand:V2DF 1 "nonimmediate_operand" "")
2011 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2016 r1 = gen_reg_rtx (V4SImode);
2017 r2 = gen_reg_rtx (V4SImode);
2019 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2020 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2021 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2022 gen_lowpart (V2DImode, r1),
2023 gen_lowpart (V2DImode, r2)));
2027 (define_expand "vec_pack_sfix_v2df"
2028 [(match_operand:V4SI 0 "register_operand" "")
2029 (match_operand:V2DF 1 "nonimmediate_operand" "")
2030 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2035 r1 = gen_reg_rtx (V4SImode);
2036 r2 = gen_reg_rtx (V4SImode);
2038 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2039 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2040 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2041 gen_lowpart (V2DImode, r1),
2042 gen_lowpart (V2DImode, r2)));
2046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2048 ;; Parallel single-precision floating point element swizzling
2050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2052 (define_insn "sse_movhlps"
2053 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2056 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2057 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2058 (parallel [(const_int 6)
2062 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2064 movhlps\t{%2, %0|%0, %2}
2065 movlps\t{%H2, %0|%0, %H2}
2066 movhps\t{%2, %0|%0, %2}"
2067 [(set_attr "type" "ssemov")
2068 (set_attr "mode" "V4SF,V2SF,V2SF")])
2070 (define_insn "sse_movlhps"
2071 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2074 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2075 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2076 (parallel [(const_int 0)
2080 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2082 movlhps\t{%2, %0|%0, %2}
2083 movhps\t{%2, %0|%0, %2}
2084 movlps\t{%2, %H0|%H0, %2}"
2085 [(set_attr "type" "ssemov")
2086 (set_attr "mode" "V4SF,V2SF,V2SF")])
2088 (define_insn "sse_unpckhps"
2089 [(set (match_operand:V4SF 0 "register_operand" "=x")
2092 (match_operand:V4SF 1 "register_operand" "0")
2093 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2094 (parallel [(const_int 2) (const_int 6)
2095 (const_int 3) (const_int 7)])))]
2097 "unpckhps\t{%2, %0|%0, %2}"
2098 [(set_attr "type" "sselog")
2099 (set_attr "mode" "V4SF")])
2101 (define_insn "sse_unpcklps"
2102 [(set (match_operand:V4SF 0 "register_operand" "=x")
2105 (match_operand:V4SF 1 "register_operand" "0")
2106 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2107 (parallel [(const_int 0) (const_int 4)
2108 (const_int 1) (const_int 5)])))]
2110 "unpcklps\t{%2, %0|%0, %2}"
2111 [(set_attr "type" "sselog")
2112 (set_attr "mode" "V4SF")])
2114 ;; These are modeled with the same vec_concat as the others so that we
2115 ;; capture users of shufps that can use the new instructions
2116 (define_insn "sse3_movshdup"
2117 [(set (match_operand:V4SF 0 "register_operand" "=x")
2120 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2122 (parallel [(const_int 1)
2127 "movshdup\t{%1, %0|%0, %1}"
2128 [(set_attr "type" "sse")
2129 (set_attr "prefix_rep" "1")
2130 (set_attr "mode" "V4SF")])
2132 (define_insn "sse3_movsldup"
2133 [(set (match_operand:V4SF 0 "register_operand" "=x")
2136 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2138 (parallel [(const_int 0)
2143 "movsldup\t{%1, %0|%0, %1}"
2144 [(set_attr "type" "sse")
2145 (set_attr "prefix_rep" "1")
2146 (set_attr "mode" "V4SF")])
2148 (define_expand "sse_shufps"
2149 [(match_operand:V4SF 0 "register_operand" "")
2150 (match_operand:V4SF 1 "register_operand" "")
2151 (match_operand:V4SF 2 "nonimmediate_operand" "")
2152 (match_operand:SI 3 "const_int_operand" "")]
2155 int mask = INTVAL (operands[3]);
2156 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2157 GEN_INT ((mask >> 0) & 3),
2158 GEN_INT ((mask >> 2) & 3),
2159 GEN_INT (((mask >> 4) & 3) + 4),
2160 GEN_INT (((mask >> 6) & 3) + 4)));
2164 (define_insn "sse_shufps_1"
2165 [(set (match_operand:V4SF 0 "register_operand" "=x")
2168 (match_operand:V4SF 1 "register_operand" "0")
2169 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2170 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2171 (match_operand 4 "const_0_to_3_operand" "")
2172 (match_operand 5 "const_4_to_7_operand" "")
2173 (match_operand 6 "const_4_to_7_operand" "")])))]
2177 mask |= INTVAL (operands[3]) << 0;
2178 mask |= INTVAL (operands[4]) << 2;
2179 mask |= (INTVAL (operands[5]) - 4) << 4;
2180 mask |= (INTVAL (operands[6]) - 4) << 6;
2181 operands[3] = GEN_INT (mask);
2183 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2185 [(set_attr "type" "sselog")
2186 (set_attr "mode" "V4SF")])
2188 (define_insn "sse_storehps"
2189 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2191 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2192 (parallel [(const_int 2) (const_int 3)])))]
2195 movhps\t{%1, %0|%0, %1}
2196 movhlps\t{%1, %0|%0, %1}
2197 movlps\t{%H1, %0|%0, %H1}"
2198 [(set_attr "type" "ssemov")
2199 (set_attr "mode" "V2SF,V4SF,V2SF")])
2201 (define_insn "sse_loadhps"
2202 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2205 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2206 (parallel [(const_int 0) (const_int 1)]))
2207 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2210 movhps\t{%2, %0|%0, %2}
2211 movlhps\t{%2, %0|%0, %2}
2212 movlps\t{%2, %H0|%H0, %2}"
2213 [(set_attr "type" "ssemov")
2214 (set_attr "mode" "V2SF,V4SF,V2SF")])
2216 (define_insn "sse_storelps"
2217 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2219 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2220 (parallel [(const_int 0) (const_int 1)])))]
2223 movlps\t{%1, %0|%0, %1}
2224 movaps\t{%1, %0|%0, %1}
2225 movlps\t{%1, %0|%0, %1}"
2226 [(set_attr "type" "ssemov")
2227 (set_attr "mode" "V2SF,V4SF,V2SF")])
2229 (define_insn "sse_loadlps"
2230 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2232 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2234 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2235 (parallel [(const_int 2) (const_int 3)]))))]
2238 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2239 movlps\t{%2, %0|%0, %2}
2240 movlps\t{%2, %0|%0, %2}"
2241 [(set_attr "type" "sselog,ssemov,ssemov")
2242 (set_attr "mode" "V4SF,V2SF,V2SF")])
2244 (define_insn "sse_movss"
2245 [(set (match_operand:V4SF 0 "register_operand" "=x")
2247 (match_operand:V4SF 2 "register_operand" "x")
2248 (match_operand:V4SF 1 "register_operand" "0")
2251 "movss\t{%2, %0|%0, %2}"
2252 [(set_attr "type" "ssemov")
2253 (set_attr "mode" "SF")])
2255 (define_insn "*vec_dupv4sf"
2256 [(set (match_operand:V4SF 0 "register_operand" "=x")
2258 (match_operand:SF 1 "register_operand" "0")))]
2260 "shufps\t{$0, %0, %0|%0, %0, 0}"
2261 [(set_attr "type" "sselog1")
2262 (set_attr "mode" "V4SF")])
2264 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2265 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2266 ;; alternatives pretty much forces the MMX alternative to be chosen.
2267 (define_insn "*sse_concatv2sf"
2268 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2270 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2271 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2274 unpcklps\t{%2, %0|%0, %2}
2275 movss\t{%1, %0|%0, %1}
2276 punpckldq\t{%2, %0|%0, %2}
2277 movd\t{%1, %0|%0, %1}"
2278 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2279 (set_attr "mode" "V4SF,SF,DI,DI")])
2281 (define_insn "*sse_concatv4sf"
2282 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2284 (match_operand:V2SF 1 "register_operand" " 0,0")
2285 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2288 movlhps\t{%2, %0|%0, %2}
2289 movhps\t{%2, %0|%0, %2}"
2290 [(set_attr "type" "ssemov")
2291 (set_attr "mode" "V4SF,V2SF")])
2293 (define_expand "vec_initv4sf"
2294 [(match_operand:V4SF 0 "register_operand" "")
2295 (match_operand 1 "" "")]
2298 ix86_expand_vector_init (false, operands[0], operands[1]);
2302 (define_insn "vec_setv4sf_0"
2303 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2306 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2307 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2311 movss\t{%2, %0|%0, %2}
2312 movss\t{%2, %0|%0, %2}
2313 movd\t{%2, %0|%0, %2}
2315 [(set_attr "type" "ssemov")
2316 (set_attr "mode" "SF")])
2318 ;; A subset is vec_setv4sf.
2319 (define_insn "*vec_setv4sf_sse4_1"
2320 [(set (match_operand:V4SF 0 "register_operand" "=x")
2323 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2324 (match_operand:V4SF 1 "register_operand" "0")
2325 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2328 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2329 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2331 [(set_attr "type" "sselog")
2332 (set_attr "prefix_extra" "1")
2333 (set_attr "mode" "V4SF")])
2335 (define_insn "sse4_1_insertps"
2336 [(set (match_operand:V4SF 0 "register_operand" "=x")
2337 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2338 (match_operand:V4SF 1 "register_operand" "0")
2339 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2342 "insertps\t{%3, %2, %0|%0, %2, %3}";
2343 [(set_attr "type" "sselog")
2344 (set_attr "prefix_extra" "1")
2345 (set_attr "mode" "V4SF")])
2348 [(set (match_operand:V4SF 0 "memory_operand" "")
2351 (match_operand:SF 1 "nonmemory_operand" ""))
2354 "TARGET_SSE && reload_completed"
2357 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2361 (define_expand "vec_setv4sf"
2362 [(match_operand:V4SF 0 "register_operand" "")
2363 (match_operand:SF 1 "register_operand" "")
2364 (match_operand 2 "const_int_operand" "")]
2367 ix86_expand_vector_set (false, operands[0], operands[1],
2368 INTVAL (operands[2]));
2372 (define_insn_and_split "*vec_extractv4sf_0"
2373 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2375 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2376 (parallel [(const_int 0)])))]
2377 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2379 "&& reload_completed"
2382 rtx op1 = operands[1];
2384 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2386 op1 = gen_lowpart (SFmode, op1);
2387 emit_move_insn (operands[0], op1);
2391 (define_insn "*sse4_1_extractps"
2392 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2394 (match_operand:V4SF 1 "register_operand" "x")
2395 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2397 "extractps\t{%2, %1, %0|%0, %1, %2}"
2398 [(set_attr "type" "sselog")
2399 (set_attr "prefix_extra" "1")
2400 (set_attr "mode" "V4SF")])
2402 (define_insn_and_split "*vec_extract_v4sf_mem"
2403 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2405 (match_operand:V4SF 1 "memory_operand" "o")
2406 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2412 int i = INTVAL (operands[2]);
2414 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2418 (define_expand "vec_extractv4sf"
2419 [(match_operand:SF 0 "register_operand" "")
2420 (match_operand:V4SF 1 "register_operand" "")
2421 (match_operand 2 "const_int_operand" "")]
2424 ix86_expand_vector_extract (false, operands[0], operands[1],
2425 INTVAL (operands[2]));
2429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2431 ;; Parallel double-precision floating point element swizzling
2433 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2435 (define_insn "sse2_unpckhpd"
2436 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2439 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2440 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2441 (parallel [(const_int 1)
2443 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2445 unpckhpd\t{%2, %0|%0, %2}
2446 movlpd\t{%H1, %0|%0, %H1}
2447 movhpd\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "sselog,ssemov,ssemov")
2449 (set_attr "mode" "V2DF,V1DF,V1DF")])
2451 (define_insn "*sse3_movddup"
2452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2455 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2457 (parallel [(const_int 0)
2459 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2461 movddup\t{%1, %0|%0, %1}
2463 [(set_attr "type" "sselog1,ssemov")
2464 (set_attr "mode" "V2DF")])
2467 [(set (match_operand:V2DF 0 "memory_operand" "")
2470 (match_operand:V2DF 1 "register_operand" "")
2472 (parallel [(const_int 0)
2474 "TARGET_SSE3 && reload_completed"
2477 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2478 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2479 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2483 (define_insn "sse2_unpcklpd"
2484 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2487 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2488 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2489 (parallel [(const_int 0)
2491 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2493 unpcklpd\t{%2, %0|%0, %2}
2494 movhpd\t{%2, %0|%0, %2}
2495 movlpd\t{%2, %H0|%H0, %2}"
2496 [(set_attr "type" "sselog,ssemov,ssemov")
2497 (set_attr "mode" "V2DF,V1DF,V1DF")])
2499 (define_expand "sse2_shufpd"
2500 [(match_operand:V2DF 0 "register_operand" "")
2501 (match_operand:V2DF 1 "register_operand" "")
2502 (match_operand:V2DF 2 "nonimmediate_operand" "")
2503 (match_operand:SI 3 "const_int_operand" "")]
2506 int mask = INTVAL (operands[3]);
2507 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2509 GEN_INT (mask & 2 ? 3 : 2)));
2513 (define_insn "sse2_shufpd_1"
2514 [(set (match_operand:V2DF 0 "register_operand" "=x")
2517 (match_operand:V2DF 1 "register_operand" "0")
2518 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2519 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2520 (match_operand 4 "const_2_to_3_operand" "")])))]
2524 mask = INTVAL (operands[3]);
2525 mask |= (INTVAL (operands[4]) - 2) << 1;
2526 operands[3] = GEN_INT (mask);
2528 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2530 [(set_attr "type" "sselog")
2531 (set_attr "mode" "V2DF")])
2533 (define_insn "sse2_storehpd"
2534 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2536 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2537 (parallel [(const_int 1)])))]
2538 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2540 movhpd\t{%1, %0|%0, %1}
2543 [(set_attr "type" "ssemov,sselog1,ssemov")
2544 (set_attr "mode" "V1DF,V2DF,DF")])
2547 [(set (match_operand:DF 0 "register_operand" "")
2549 (match_operand:V2DF 1 "memory_operand" "")
2550 (parallel [(const_int 1)])))]
2551 "TARGET_SSE2 && reload_completed"
2552 [(set (match_dup 0) (match_dup 1))]
2554 operands[1] = adjust_address (operands[1], DFmode, 8);
2557 (define_insn "sse2_storelpd"
2558 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2560 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2561 (parallel [(const_int 0)])))]
2562 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2564 movlpd\t{%1, %0|%0, %1}
2567 [(set_attr "type" "ssemov")
2568 (set_attr "mode" "V1DF,DF,DF")])
2571 [(set (match_operand:DF 0 "register_operand" "")
2573 (match_operand:V2DF 1 "nonimmediate_operand" "")
2574 (parallel [(const_int 0)])))]
2575 "TARGET_SSE2 && reload_completed"
2578 rtx op1 = operands[1];
2580 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2582 op1 = gen_lowpart (DFmode, op1);
2583 emit_move_insn (operands[0], op1);
2587 (define_insn "sse2_loadhpd"
2588 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2591 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2592 (parallel [(const_int 0)]))
2593 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2594 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2596 movhpd\t{%2, %0|%0, %2}
2597 unpcklpd\t{%2, %0|%0, %2}
2598 shufpd\t{$1, %1, %0|%0, %1, 1}
2600 [(set_attr "type" "ssemov,sselog,sselog,other")
2601 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2604 [(set (match_operand:V2DF 0 "memory_operand" "")
2606 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2607 (match_operand:DF 1 "register_operand" "")))]
2608 "TARGET_SSE2 && reload_completed"
2609 [(set (match_dup 0) (match_dup 1))]
2611 operands[0] = adjust_address (operands[0], DFmode, 8);
2614 (define_insn "sse2_loadlpd"
2615 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2617 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2619 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2620 (parallel [(const_int 1)]))))]
2621 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2623 movsd\t{%2, %0|%0, %2}
2624 movlpd\t{%2, %0|%0, %2}
2625 movsd\t{%2, %0|%0, %2}
2626 shufpd\t{$2, %2, %0|%0, %2, 2}
2627 movhpd\t{%H1, %0|%0, %H1}
2629 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2630 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2633 [(set (match_operand:V2DF 0 "memory_operand" "")
2635 (match_operand:DF 1 "register_operand" "")
2636 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2637 "TARGET_SSE2 && reload_completed"
2638 [(set (match_dup 0) (match_dup 1))]
2640 operands[0] = adjust_address (operands[0], DFmode, 8);
2643 ;; Not sure these two are ever used, but it doesn't hurt to have
2645 (define_insn "*vec_extractv2df_1_sse"
2646 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2648 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2649 (parallel [(const_int 1)])))]
2650 "!TARGET_SSE2 && TARGET_SSE
2651 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2653 movhps\t{%1, %0|%0, %1}
2654 movhlps\t{%1, %0|%0, %1}
2655 movlps\t{%H1, %0|%0, %H1}"
2656 [(set_attr "type" "ssemov")
2657 (set_attr "mode" "V2SF,V4SF,V2SF")])
2659 (define_insn "*vec_extractv2df_0_sse"
2660 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2662 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2663 (parallel [(const_int 0)])))]
2664 "!TARGET_SSE2 && TARGET_SSE
2665 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2667 movlps\t{%1, %0|%0, %1}
2668 movaps\t{%1, %0|%0, %1}
2669 movlps\t{%1, %0|%0, %1}"
2670 [(set_attr "type" "ssemov")
2671 (set_attr "mode" "V2SF,V4SF,V2SF")])
2673 (define_insn "sse2_movsd"
2674 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2676 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2677 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2681 movsd\t{%2, %0|%0, %2}
2682 movlpd\t{%2, %0|%0, %2}
2683 movlpd\t{%2, %0|%0, %2}
2684 shufpd\t{$2, %2, %0|%0, %2, 2}
2685 movhps\t{%H1, %0|%0, %H1}
2686 movhps\t{%1, %H0|%H0, %1}"
2687 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2688 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2690 (define_insn "*vec_dupv2df_sse3"
2691 [(set (match_operand:V2DF 0 "register_operand" "=x")
2693 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2695 "movddup\t{%1, %0|%0, %1}"
2696 [(set_attr "type" "sselog1")
2697 (set_attr "mode" "DF")])
2699 (define_insn "vec_dupv2df"
2700 [(set (match_operand:V2DF 0 "register_operand" "=x")
2702 (match_operand:DF 1 "register_operand" "0")))]
2705 [(set_attr "type" "sselog1")
2706 (set_attr "mode" "V2DF")])
2708 (define_insn "*vec_concatv2df_sse3"
2709 [(set (match_operand:V2DF 0 "register_operand" "=x")
2711 (match_operand:DF 1 "nonimmediate_operand" "xm")
2714 "movddup\t{%1, %0|%0, %1}"
2715 [(set_attr "type" "sselog1")
2716 (set_attr "mode" "DF")])
2718 (define_insn "*vec_concatv2df"
2719 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2721 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2722 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2725 unpcklpd\t{%2, %0|%0, %2}
2726 movhpd\t{%2, %0|%0, %2}
2727 movsd\t{%1, %0|%0, %1}
2728 movlhps\t{%2, %0|%0, %2}
2729 movhps\t{%2, %0|%0, %2}"
2730 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2731 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2733 (define_expand "vec_setv2df"
2734 [(match_operand:V2DF 0 "register_operand" "")
2735 (match_operand:DF 1 "register_operand" "")
2736 (match_operand 2 "const_int_operand" "")]
2739 ix86_expand_vector_set (false, operands[0], operands[1],
2740 INTVAL (operands[2]));
2744 (define_expand "vec_extractv2df"
2745 [(match_operand:DF 0 "register_operand" "")
2746 (match_operand:V2DF 1 "register_operand" "")
2747 (match_operand 2 "const_int_operand" "")]
2750 ix86_expand_vector_extract (false, operands[0], operands[1],
2751 INTVAL (operands[2]));
2755 (define_expand "vec_initv2df"
2756 [(match_operand:V2DF 0 "register_operand" "")
2757 (match_operand 1 "" "")]
2760 ix86_expand_vector_init (false, operands[0], operands[1]);
2764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2766 ;; Parallel integral arithmetic
2768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2770 (define_expand "neg<mode>2"
2771 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2774 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2776 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2778 (define_expand "add<mode>3"
2779 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2780 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2781 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2783 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2785 (define_insn "*add<mode>3"
2786 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2788 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2789 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2790 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2791 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2792 [(set_attr "type" "sseiadd")
2793 (set_attr "prefix_data16" "1")
2794 (set_attr "mode" "TI")])
2796 (define_insn "sse2_ssadd<mode>3"
2797 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2799 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2800 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2801 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2802 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2803 [(set_attr "type" "sseiadd")
2804 (set_attr "prefix_data16" "1")
2805 (set_attr "mode" "TI")])
2807 (define_insn "sse2_usadd<mode>3"
2808 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2810 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2811 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2812 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2813 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2814 [(set_attr "type" "sseiadd")
2815 (set_attr "prefix_data16" "1")
2816 (set_attr "mode" "TI")])
2818 (define_expand "sub<mode>3"
2819 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2820 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2821 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2823 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2825 (define_insn "*sub<mode>3"
2826 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2828 (match_operand:SSEMODEI 1 "register_operand" "0")
2829 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2831 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2832 [(set_attr "type" "sseiadd")
2833 (set_attr "prefix_data16" "1")
2834 (set_attr "mode" "TI")])
2836 (define_insn "sse2_sssub<mode>3"
2837 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2839 (match_operand:SSEMODE12 1 "register_operand" "0")
2840 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2842 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2843 [(set_attr "type" "sseiadd")
2844 (set_attr "prefix_data16" "1")
2845 (set_attr "mode" "TI")])
2847 (define_insn "sse2_ussub<mode>3"
2848 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2850 (match_operand:SSEMODE12 1 "register_operand" "0")
2851 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2853 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2854 [(set_attr "type" "sseiadd")
2855 (set_attr "prefix_data16" "1")
2856 (set_attr "mode" "TI")])
2858 (define_insn_and_split "mulv16qi3"
2859 [(set (match_operand:V16QI 0 "register_operand" "")
2860 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2861 (match_operand:V16QI 2 "register_operand" "")))]
2863 && !(reload_completed || reload_in_progress)"
2868 rtx t[12], op0, op[3];
2873 /* On SSE5, we can take advantage of the pperm instruction to pack and
2874 unpack the bytes. Unpack data such that we've got a source byte in
2875 each low byte of each word. We don't care what goes into the high
2876 byte, so put 0 there. */
2877 for (i = 0; i < 6; ++i)
2878 t[i] = gen_reg_rtx (V8HImode);
2880 for (i = 0; i < 2; i++)
2883 op[1] = operands[i+1];
2884 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2887 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2890 /* Multiply words. */
2891 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2892 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2894 /* Pack the low byte of each word back into a single xmm */
2895 op[0] = operands[0];
2898 ix86_expand_sse5_pack (op);
2902 for (i = 0; i < 12; ++i)
2903 t[i] = gen_reg_rtx (V16QImode);
2905 /* Unpack data such that we've got a source byte in each low byte of
2906 each word. We don't care what goes into the high byte of each word.
2907 Rather than trying to get zero in there, most convenient is to let
2908 it be a copy of the low byte. */
2909 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2910 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2911 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2912 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2914 /* Multiply words. The end-of-line annotations here give a picture of what
2915 the output of that instruction looks like. Dot means don't care; the
2916 letters are the bytes of the result with A being the most significant. */
2917 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2918 gen_lowpart (V8HImode, t[0]),
2919 gen_lowpart (V8HImode, t[1])));
2920 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2921 gen_lowpart (V8HImode, t[2]),
2922 gen_lowpart (V8HImode, t[3])));
2924 /* Extract the relevant bytes and merge them back together. */
2925 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2926 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2927 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2928 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2929 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2930 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2933 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2937 (define_expand "mulv8hi3"
2938 [(set (match_operand:V8HI 0 "register_operand" "")
2939 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2940 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2942 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2944 (define_insn "*mulv8hi3"
2945 [(set (match_operand:V8HI 0 "register_operand" "=x")
2946 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2947 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2948 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2949 "pmullw\t{%2, %0|%0, %2}"
2950 [(set_attr "type" "sseimul")
2951 (set_attr "prefix_data16" "1")
2952 (set_attr "mode" "TI")])
2954 (define_expand "smulv8hi3_highpart"
2955 [(set (match_operand:V8HI 0 "register_operand" "")
2960 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2962 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2965 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2967 (define_insn "*smulv8hi3_highpart"
2968 [(set (match_operand:V8HI 0 "register_operand" "=x")
2973 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2975 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2977 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2978 "pmulhw\t{%2, %0|%0, %2}"
2979 [(set_attr "type" "sseimul")
2980 (set_attr "prefix_data16" "1")
2981 (set_attr "mode" "TI")])
2983 (define_expand "umulv8hi3_highpart"
2984 [(set (match_operand:V8HI 0 "register_operand" "")
2989 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2991 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2994 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2996 (define_insn "*umulv8hi3_highpart"
2997 [(set (match_operand:V8HI 0 "register_operand" "=x")
3002 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3004 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3006 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3007 "pmulhuw\t{%2, %0|%0, %2}"
3008 [(set_attr "type" "sseimul")
3009 (set_attr "prefix_data16" "1")
3010 (set_attr "mode" "TI")])
3012 (define_insn "sse2_umulv2siv2di3"
3013 [(set (match_operand:V2DI 0 "register_operand" "=x")
3017 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3018 (parallel [(const_int 0) (const_int 2)])))
3021 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3022 (parallel [(const_int 0) (const_int 2)])))))]
3023 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3024 "pmuludq\t{%2, %0|%0, %2}"
3025 [(set_attr "type" "sseimul")
3026 (set_attr "prefix_data16" "1")
3027 (set_attr "mode" "TI")])
3029 (define_insn "sse4_1_mulv2siv2di3"
3030 [(set (match_operand:V2DI 0 "register_operand" "=x")
3034 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3035 (parallel [(const_int 0) (const_int 2)])))
3038 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3039 (parallel [(const_int 0) (const_int 2)])))))]
3040 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3041 "pmuldq\t{%2, %0|%0, %2}"
3042 [(set_attr "type" "sseimul")
3043 (set_attr "prefix_extra" "1")
3044 (set_attr "mode" "TI")])
3046 (define_insn "sse2_pmaddwd"
3047 [(set (match_operand:V4SI 0 "register_operand" "=x")
3052 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3053 (parallel [(const_int 0)
3059 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3060 (parallel [(const_int 0)
3066 (vec_select:V4HI (match_dup 1)
3067 (parallel [(const_int 1)
3072 (vec_select:V4HI (match_dup 2)
3073 (parallel [(const_int 1)
3076 (const_int 7)]))))))]
3077 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3078 "pmaddwd\t{%2, %0|%0, %2}"
3079 [(set_attr "type" "sseiadd")
3080 (set_attr "prefix_data16" "1")
3081 (set_attr "mode" "TI")])
3083 (define_expand "mulv4si3"
3084 [(set (match_operand:V4SI 0 "register_operand" "")
3085 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3086 (match_operand:V4SI 2 "register_operand" "")))]
3089 if (TARGET_SSE4_1 || TARGET_SSE5)
3090 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3093 (define_insn "*sse4_1_mulv4si3"
3094 [(set (match_operand:V4SI 0 "register_operand" "=x")
3095 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3096 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3097 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3098 "pmulld\t{%2, %0|%0, %2}"
3099 [(set_attr "type" "sseimul")
3100 (set_attr "prefix_extra" "1")
3101 (set_attr "mode" "TI")])
3103 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3104 ;; multiply/add. In general, we expect the define_split to occur before
3105 ;; register allocation, so we have to handle the corner case where the target
3106 ;; is used as the base or index register in operands 1/2.
3107 (define_insn_and_split "*sse5_mulv4si3"
3108 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3109 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3110 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3113 "&& (reload_completed
3114 || (!reg_mentioned_p (operands[0], operands[1])
3115 && !reg_mentioned_p (operands[0], operands[2])))"
3119 (plus:V4SI (mult:V4SI (match_dup 1)
3123 operands[3] = CONST0_RTX (V4SImode);
3125 [(set_attr "type" "ssemuladd")
3126 (set_attr "mode" "TI")])
3128 (define_insn_and_split "*sse2_mulv4si3"
3129 [(set (match_operand:V4SI 0 "register_operand" "")
3130 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3131 (match_operand:V4SI 2 "register_operand" "")))]
3132 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3133 && !(reload_completed || reload_in_progress)"
3138 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3144 t1 = gen_reg_rtx (V4SImode);
3145 t2 = gen_reg_rtx (V4SImode);
3146 t3 = gen_reg_rtx (V4SImode);
3147 t4 = gen_reg_rtx (V4SImode);
3148 t5 = gen_reg_rtx (V4SImode);
3149 t6 = gen_reg_rtx (V4SImode);
3150 thirtytwo = GEN_INT (32);
3152 /* Multiply elements 2 and 0. */
3153 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3156 /* Shift both input vectors down one element, so that elements 3
3157 and 1 are now in the slots for elements 2 and 0. For K8, at
3158 least, this is faster than using a shuffle. */
3159 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3160 gen_lowpart (TImode, op1),
3162 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3163 gen_lowpart (TImode, op2),
3165 /* Multiply elements 3 and 1. */
3166 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3169 /* Move the results in element 2 down to element 1; we don't care
3170 what goes in elements 2 and 3. */
3171 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3172 const0_rtx, const0_rtx));
3173 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3174 const0_rtx, const0_rtx));
3176 /* Merge the parts back together. */
3177 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3181 (define_insn_and_split "mulv2di3"
3182 [(set (match_operand:V2DI 0 "register_operand" "")
3183 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3184 (match_operand:V2DI 2 "register_operand" "")))]
3186 && !(reload_completed || reload_in_progress)"
3191 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3197 t1 = gen_reg_rtx (V2DImode);
3198 t2 = gen_reg_rtx (V2DImode);
3199 t3 = gen_reg_rtx (V2DImode);
3200 t4 = gen_reg_rtx (V2DImode);
3201 t5 = gen_reg_rtx (V2DImode);
3202 t6 = gen_reg_rtx (V2DImode);
3203 thirtytwo = GEN_INT (32);
3205 /* Multiply low parts. */
3206 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3207 gen_lowpart (V4SImode, op2)));
3209 /* Shift input vectors left 32 bits so we can multiply high parts. */
3210 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3211 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3213 /* Multiply high parts by low parts. */
3214 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3215 gen_lowpart (V4SImode, t3)));
3216 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3217 gen_lowpart (V4SImode, t2)));
3219 /* Shift them back. */
3220 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3221 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3223 /* Add the three parts together. */
3224 emit_insn (gen_addv2di3 (t6, t1, t4));
3225 emit_insn (gen_addv2di3 (op0, t6, t5));
3229 (define_expand "vec_widen_smult_hi_v8hi"
3230 [(match_operand:V4SI 0 "register_operand" "")
3231 (match_operand:V8HI 1 "register_operand" "")
3232 (match_operand:V8HI 2 "register_operand" "")]
3235 rtx op1, op2, t1, t2, dest;
3239 t1 = gen_reg_rtx (V8HImode);
3240 t2 = gen_reg_rtx (V8HImode);
3241 dest = gen_lowpart (V8HImode, operands[0]);
3243 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3244 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3245 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3249 (define_expand "vec_widen_smult_lo_v8hi"
3250 [(match_operand:V4SI 0 "register_operand" "")
3251 (match_operand:V8HI 1 "register_operand" "")
3252 (match_operand:V8HI 2 "register_operand" "")]
3255 rtx op1, op2, t1, t2, dest;
3259 t1 = gen_reg_rtx (V8HImode);
3260 t2 = gen_reg_rtx (V8HImode);
3261 dest = gen_lowpart (V8HImode, operands[0]);
3263 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3264 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3265 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3269 (define_expand "vec_widen_umult_hi_v8hi"
3270 [(match_operand:V4SI 0 "register_operand" "")
3271 (match_operand:V8HI 1 "register_operand" "")
3272 (match_operand:V8HI 2 "register_operand" "")]
3275 rtx op1, op2, t1, t2, dest;
3279 t1 = gen_reg_rtx (V8HImode);
3280 t2 = gen_reg_rtx (V8HImode);
3281 dest = gen_lowpart (V8HImode, operands[0]);
3283 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3284 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3285 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3289 (define_expand "vec_widen_umult_lo_v8hi"
3290 [(match_operand:V4SI 0 "register_operand" "")
3291 (match_operand:V8HI 1 "register_operand" "")
3292 (match_operand:V8HI 2 "register_operand" "")]
3295 rtx op1, op2, t1, t2, dest;
3299 t1 = gen_reg_rtx (V8HImode);
3300 t2 = gen_reg_rtx (V8HImode);
3301 dest = gen_lowpart (V8HImode, operands[0]);
3303 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3304 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3305 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3309 (define_expand "vec_widen_smult_hi_v4si"
3310 [(match_operand:V2DI 0 "register_operand" "")
3311 (match_operand:V4SI 1 "register_operand" "")
3312 (match_operand:V4SI 2 "register_operand" "")]
3315 rtx op1, op2, t1, t2;
3319 t1 = gen_reg_rtx (V4SImode);
3320 t2 = gen_reg_rtx (V4SImode);
3322 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3323 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3324 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3328 (define_expand "vec_widen_smult_lo_v4si"
3329 [(match_operand:V2DI 0 "register_operand" "")
3330 (match_operand:V4SI 1 "register_operand" "")
3331 (match_operand:V4SI 2 "register_operand" "")]
3334 rtx op1, op2, t1, t2;
3338 t1 = gen_reg_rtx (V4SImode);
3339 t2 = gen_reg_rtx (V4SImode);
3341 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3342 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3343 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3347 (define_expand "vec_widen_umult_hi_v4si"
3348 [(match_operand:V2DI 0 "register_operand" "")
3349 (match_operand:V4SI 1 "register_operand" "")
3350 (match_operand:V4SI 2 "register_operand" "")]
3353 rtx op1, op2, t1, t2;
3357 t1 = gen_reg_rtx (V4SImode);
3358 t2 = gen_reg_rtx (V4SImode);
3360 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3361 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3362 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3366 (define_expand "vec_widen_umult_lo_v4si"
3367 [(match_operand:V2DI 0 "register_operand" "")
3368 (match_operand:V4SI 1 "register_operand" "")
3369 (match_operand:V4SI 2 "register_operand" "")]
3372 rtx op1, op2, t1, t2;
3376 t1 = gen_reg_rtx (V4SImode);
3377 t2 = gen_reg_rtx (V4SImode);
3379 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3380 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3381 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3385 (define_expand "sdot_prodv8hi"
3386 [(match_operand:V4SI 0 "register_operand" "")
3387 (match_operand:V8HI 1 "register_operand" "")
3388 (match_operand:V8HI 2 "register_operand" "")
3389 (match_operand:V4SI 3 "register_operand" "")]
3392 rtx t = gen_reg_rtx (V4SImode);
3393 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3394 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3398 (define_expand "udot_prodv4si"
3399 [(match_operand:V2DI 0 "register_operand" "")
3400 (match_operand:V4SI 1 "register_operand" "")
3401 (match_operand:V4SI 2 "register_operand" "")
3402 (match_operand:V2DI 3 "register_operand" "")]
3407 t1 = gen_reg_rtx (V2DImode);
3408 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3409 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3411 t2 = gen_reg_rtx (V4SImode);
3412 t3 = gen_reg_rtx (V4SImode);
3413 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3414 gen_lowpart (TImode, operands[1]),
3416 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3417 gen_lowpart (TImode, operands[2]),
3420 t4 = gen_reg_rtx (V2DImode);
3421 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3423 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3427 (define_insn "ashr<mode>3"
3428 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")