1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
58 ;; Mapping of immediate bits for blend instructions
59 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
61 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
63 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69 ;; All of these patterns are enabled for SSE1 as well as SSE2.
70 ;; This is essential for maintaining stable calling conventions.
72 (define_expand "mov<mode>"
73 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
74 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
77 ix86_expand_vector_move (<MODE>mode, operands);
81 (define_insn "*mov<mode>_internal"
82 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
83 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
85 && (register_operand (operands[0], <MODE>mode)
86 || register_operand (operands[1], <MODE>mode))"
88 switch (which_alternative)
91 return standard_sse_constant_opcode (insn, operands[1]);
94 switch (get_attr_mode (insn))
97 return "movaps\t{%1, %0|%0, %1}";
99 return "movapd\t{%1, %0|%0, %1}";
101 return "movdqa\t{%1, %0|%0, %1}";
107 [(set_attr "type" "sselog1,ssemov,ssemov")
109 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
110 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
111 (and (eq_attr "alternative" "2")
112 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
114 (const_string "V4SF")
115 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
116 (const_string "V4SF")
117 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
118 (const_string "V2DF")
120 (const_string "TI")))])
122 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
123 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
124 ;; from memory, we'd prefer to load the memory directly into the %xmm
125 ;; register. To facilitate this happy circumstance, this pattern won't
126 ;; split until after register allocation. If the 64-bit value didn't
127 ;; come from memory, this is the best we can do. This is much better
128 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
131 (define_insn_and_split "movdi_to_sse"
133 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
134 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
135 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
136 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
138 "&& reload_completed"
141 if (register_operand (operands[1], DImode))
143 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
144 Assemble the 64-bit DImode value in an xmm register. */
145 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
146 gen_rtx_SUBREG (SImode, operands[1], 0)));
147 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
148 gen_rtx_SUBREG (SImode, operands[1], 4)));
149 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
151 else if (memory_operand (operands[1], DImode))
152 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
163 (vec_duplicate:V4SF (match_dup 1))
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "<sse>_movup<ssemodesuffixf2c>"
199 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
201 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
203 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
204 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
205 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
206 [(set_attr "type" "ssemov")
207 (set_attr "mode" "<MODE>")])
209 (define_insn "sse2_movdqu"
210 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
211 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
213 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
214 "movdqu\t{%1, %0|%0, %1}"
215 [(set_attr "type" "ssemov")
216 (set_attr "prefix_data16" "1")
217 (set_attr "mode" "TI")])
219 (define_insn "<sse>_movnt<mode>"
220 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
222 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
224 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
225 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
226 [(set_attr "type" "ssemov")
227 (set_attr "mode" "<MODE>")])
229 (define_insn "sse2_movntv2di"
230 [(set (match_operand:V2DI 0 "memory_operand" "=m")
231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
234 "movntdq\t{%1, %0|%0, %1}"
235 [(set_attr "type" "ssecvt")
236 (set_attr "prefix_data16" "1")
237 (set_attr "mode" "TI")])
239 (define_insn "sse2_movntsi"
240 [(set (match_operand:SI 0 "memory_operand" "=m")
241 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
244 "movnti\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssecvt")
246 (set_attr "mode" "V2DF")])
248 (define_insn "sse3_lddqu"
249 [(set (match_operand:V16QI 0 "register_operand" "=x")
250 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
253 "lddqu\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssecvt")
255 (set_attr "prefix_rep" "1")
256 (set_attr "mode" "TI")])
258 ; Expand patterns for non-temporal stores. At the moment, only those
259 ; that directly map to insns are defined; it would be possible to
260 ; define patterns for other modes that would expand to several insns.
262 (define_expand "storent<mode>"
263 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
265 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
267 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
270 (define_expand "storent<mode>"
271 [(set (match_operand:MODEF 0 "memory_operand" "")
273 [(match_operand:MODEF 1 "register_operand" "")]
278 (define_expand "storentv2di"
279 [(set (match_operand:V2DI 0 "memory_operand" "")
280 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
285 (define_expand "storentsi"
286 [(set (match_operand:SI 0 "memory_operand" "")
287 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
294 ;; Parallel floating point arithmetic
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
298 (define_expand "<code><mode>2"
299 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
301 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
302 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
303 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
305 (define_expand "<plusminus_insn><mode>3"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
307 (plusminus:SSEMODEF2P
308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
310 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
311 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
313 (define_insn "*<plusminus_insn><mode>3"
314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
315 (plusminus:SSEMODEF2P
316 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
317 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
318 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
319 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
320 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
321 [(set_attr "type" "sseadd")
322 (set_attr "mode" "<MODE>")])
324 (define_insn "<sse>_vm<plusminus_insn><mode>3"
325 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
326 (vec_merge:SSEMODEF2P
327 (plusminus:SSEMODEF2P
328 (match_operand:SSEMODEF2P 1 "register_operand" "0")
329 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
332 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
333 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
334 [(set_attr "type" "sseadd")
335 (set_attr "mode" "<ssescalarmode>")])
337 (define_expand "mul<mode>3"
338 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
340 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
341 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
342 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
343 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
345 (define_insn "*mul<mode>3"
346 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
348 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
349 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
350 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
351 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
352 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
353 [(set_attr "type" "ssemul")
354 (set_attr "mode" "<MODE>")])
356 (define_insn "<sse>_vmmul<mode>3"
357 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
358 (vec_merge:SSEMODEF2P
360 (match_operand:SSEMODEF2P 1 "register_operand" "0")
361 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
364 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
365 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
366 [(set_attr "type" "ssemul")
367 (set_attr "mode" "<ssescalarmode>")])
369 (define_expand "divv4sf3"
370 [(set (match_operand:V4SF 0 "register_operand" "")
371 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
372 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
376 && flag_finite_math_only && !flag_trapping_math
377 && flag_unsafe_math_optimizations)
379 ix86_emit_swdivsf (operands[0], operands[1],
380 operands[2], V4SFmode);
385 (define_expand "divv2df3"
386 [(set (match_operand:V2DF 0 "register_operand" "")
387 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
388 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
392 (define_insn "<sse>_div<mode>3"
393 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
395 (match_operand:SSEMODEF2P 1 "register_operand" "0")
396 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
397 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
398 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
399 [(set_attr "type" "ssediv")
400 (set_attr "mode" "<MODE>")])
402 (define_insn "<sse>_vmdiv<mode>3"
403 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
404 (vec_merge:SSEMODEF2P
406 (match_operand:SSEMODEF2P 1 "register_operand" "0")
407 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
410 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
411 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
412 [(set_attr "type" "ssediv")
413 (set_attr "mode" "<ssescalarmode>")])
415 (define_insn "sse_rcpv4sf2"
416 [(set (match_operand:V4SF 0 "register_operand" "=x")
418 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
420 "rcpps\t{%1, %0|%0, %1}"
421 [(set_attr "type" "sse")
422 (set_attr "mode" "V4SF")])
424 (define_insn "sse_vmrcpv4sf2"
425 [(set (match_operand:V4SF 0 "register_operand" "=x")
427 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
429 (match_operand:V4SF 2 "register_operand" "0")
432 "rcpss\t{%1, %0|%0, %1}"
433 [(set_attr "type" "sse")
434 (set_attr "mode" "SF")])
436 (define_expand "sqrtv4sf2"
437 [(set (match_operand:V4SF 0 "register_operand" "")
438 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
441 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
442 && flag_finite_math_only && !flag_trapping_math
443 && flag_unsafe_math_optimizations)
445 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
450 (define_insn "sse_sqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
454 "sqrtps\t{%1, %0|%0, %1}"
455 [(set_attr "type" "sse")
456 (set_attr "mode" "V4SF")])
458 (define_insn "sqrtv2df2"
459 [(set (match_operand:V2DF 0 "register_operand" "=x")
460 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
462 "sqrtpd\t{%1, %0|%0, %1}"
463 [(set_attr "type" "sse")
464 (set_attr "mode" "V2DF")])
466 (define_insn "<sse>_vmsqrt<mode>2"
467 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
468 (vec_merge:SSEMODEF2P
470 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
471 (match_operand:SSEMODEF2P 2 "register_operand" "0")
473 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
474 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
475 [(set_attr "type" "sse")
476 (set_attr "mode" "<ssescalarmode>")])
478 (define_expand "rsqrtv4sf2"
479 [(set (match_operand:V4SF 0 "register_operand" "")
481 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
484 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
488 (define_insn "sse_rsqrtv4sf2"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
493 "rsqrtps\t{%1, %0|%0, %1}"
494 [(set_attr "type" "sse")
495 (set_attr "mode" "V4SF")])
497 (define_insn "sse_vmrsqrtv4sf2"
498 [(set (match_operand:V4SF 0 "register_operand" "=x")
500 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
502 (match_operand:V4SF 2 "register_operand" "0")
505 "rsqrtss\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "SF")])
509 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
510 ;; isn't really correct, as those rtl operators aren't defined when
511 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
513 (define_expand "<code><mode>3"
514 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
516 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
517 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
518 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
520 if (!flag_finite_math_only)
521 operands[1] = force_reg (<MODE>mode, operands[1]);
522 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
525 (define_insn "*<code><mode>3_finite"
526 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
528 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
529 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
530 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
531 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
532 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sseadd")
534 (set_attr "mode" "<MODE>")])
536 (define_insn "*<code><mode>3"
537 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
539 (match_operand:SSEMODEF2P 1 "register_operand" "0")
540 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
541 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
542 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
543 [(set_attr "type" "sseadd")
544 (set_attr "mode" "<MODE>")])
546 (define_insn "<sse>_vm<code><mode>3"
547 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
548 (vec_merge:SSEMODEF2P
550 (match_operand:SSEMODEF2P 1 "register_operand" "0")
551 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
554 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
555 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
556 [(set_attr "type" "sse")
557 (set_attr "mode" "<ssescalarmode>")])
559 ;; These versions of the min/max patterns implement exactly the operations
560 ;; min = (op1 < op2 ? op1 : op2)
561 ;; max = (!(op1 < op2) ? op1 : op2)
562 ;; Their operands are not commutative, and thus they may be used in the
563 ;; presence of -0.0 and NaN.
565 (define_insn "*ieee_smin<mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
568 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
569 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
571 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
572 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
573 [(set_attr "type" "sseadd")
574 (set_attr "mode" "<MODE>")])
576 (define_insn "*ieee_smax<mode>3"
577 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
579 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
580 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
582 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
583 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
584 [(set_attr "type" "sseadd")
585 (set_attr "mode" "<MODE>")])
587 (define_insn "sse3_addsubv4sf3"
588 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (match_operand:V4SF 1 "register_operand" "0")
592 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
593 (minus:V4SF (match_dup 1) (match_dup 2))
596 "addsubps\t{%2, %0|%0, %2}"
597 [(set_attr "type" "sseadd")
598 (set_attr "prefix_rep" "1")
599 (set_attr "mode" "V4SF")])
601 (define_insn "sse3_addsubv2df3"
602 [(set (match_operand:V2DF 0 "register_operand" "=x")
605 (match_operand:V2DF 1 "register_operand" "0")
606 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
607 (minus:V2DF (match_dup 1) (match_dup 2))
610 "addsubpd\t{%2, %0|%0, %2}"
611 [(set_attr "type" "sseadd")
612 (set_attr "mode" "V2DF")])
614 (define_insn "sse3_h<plusminus_insn>v4sf3"
615 [(set (match_operand:V4SF 0 "register_operand" "=x")
620 (match_operand:V4SF 1 "register_operand" "0")
621 (parallel [(const_int 0)]))
622 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
624 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
625 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
629 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
630 (parallel [(const_int 0)]))
631 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
633 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
634 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
636 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
637 [(set_attr "type" "sseadd")
638 (set_attr "prefix_rep" "1")
639 (set_attr "mode" "V4SF")])
641 (define_insn "sse3_h<plusminus_insn>v2df3"
642 [(set (match_operand:V2DF 0 "register_operand" "=x")
646 (match_operand:V2DF 1 "register_operand" "0")
647 (parallel [(const_int 0)]))
648 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
651 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
652 (parallel [(const_int 0)]))
653 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
655 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
656 [(set_attr "type" "sseadd")
657 (set_attr "mode" "V2DF")])
659 (define_expand "reduc_splus_v4sf"
660 [(match_operand:V4SF 0 "register_operand" "")
661 (match_operand:V4SF 1 "register_operand" "")]
666 rtx tmp = gen_reg_rtx (V4SFmode);
667 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
668 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
671 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
675 (define_expand "reduc_splus_v2df"
676 [(match_operand:V2DF 0 "register_operand" "")
677 (match_operand:V2DF 1 "register_operand" "")]
680 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
684 (define_expand "reduc_smax_v4sf"
685 [(match_operand:V4SF 0 "register_operand" "")
686 (match_operand:V4SF 1 "register_operand" "")]
689 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
693 (define_expand "reduc_smin_v4sf"
694 [(match_operand:V4SF 0 "register_operand" "")
695 (match_operand:V4SF 1 "register_operand" "")]
698 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
704 ;; Parallel floating point comparisons
706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
708 (define_insn "<sse>_maskcmp<mode>3"
709 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
710 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
711 [(match_operand:SSEMODEF4 1 "register_operand" "0")
712 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
713 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
715 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
716 [(set_attr "type" "ssecmp")
717 (set_attr "mode" "<MODE>")])
719 (define_insn "<sse>_vmmaskcmp<mode>3"
720 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
721 (vec_merge:SSEMODEF2P
722 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
723 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
724 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
727 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
728 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
729 [(set_attr "type" "ssecmp")
730 (set_attr "mode" "<ssescalarmode>")])
732 (define_insn "<sse>_comi"
733 [(set (reg:CCFP FLAGS_REG)
736 (match_operand:<ssevecmode> 0 "register_operand" "x")
737 (parallel [(const_int 0)]))
739 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
740 (parallel [(const_int 0)]))))]
741 "SSE_FLOAT_MODE_P (<MODE>mode)"
742 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
743 [(set_attr "type" "ssecomi")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "<sse>_ucomi"
747 [(set (reg:CCFPU FLAGS_REG)
750 (match_operand:<ssevecmode> 0 "register_operand" "x")
751 (parallel [(const_int 0)]))
753 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))))]
755 "SSE_FLOAT_MODE_P (<MODE>mode)"
756 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
757 [(set_attr "type" "ssecomi")
758 (set_attr "mode" "<MODE>")])
760 (define_expand "vcond<mode>"
761 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
762 (if_then_else:SSEMODEF2P
764 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
765 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
766 (match_operand:SSEMODEF2P 1 "general_operand" "")
767 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
768 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
770 if (ix86_expand_fp_vcond (operands))
776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
778 ;; Parallel floating point logical operations
780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
782 (define_insn "<sse>_nand<mode>3"
783 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
786 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
787 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
788 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
789 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
790 [(set_attr "type" "sselog")
791 (set_attr "mode" "<MODE>")])
793 (define_expand "<code><mode>3"
794 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
796 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
797 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
798 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
799 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
801 (define_insn "*<code><mode>3"
802 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
804 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
805 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
806 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
807 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
808 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
809 [(set_attr "type" "sselog")
810 (set_attr "mode" "<MODE>")])
812 ;; Also define scalar versions. These are used for abs, neg, and
813 ;; conditional move. Using subregs into vector modes causes register
814 ;; allocation lossage. These patterns do not allow memory operands
815 ;; because the native instructions read the full 128-bits.
817 (define_insn "*nand<mode>3"
818 [(set (match_operand:MODEF 0 "register_operand" "=x")
821 (match_operand:MODEF 1 "register_operand" "0"))
822 (match_operand:MODEF 2 "register_operand" "x")))]
823 "SSE_FLOAT_MODE_P (<MODE>mode)"
824 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
825 [(set_attr "type" "sselog")
826 (set_attr "mode" "<ssevecmode>")])
828 (define_insn "*<code><mode>3"
829 [(set (match_operand:MODEF 0 "register_operand" "=x")
831 (match_operand:MODEF 1 "register_operand" "0")
832 (match_operand:MODEF 2 "register_operand" "x")))]
833 "SSE_FLOAT_MODE_P (<MODE>mode)"
834 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sselog")
836 (set_attr "mode" "<ssevecmode>")])
838 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
840 ;; SSE5 floating point multiply/accumulate instructions This includes the
841 ;; scalar version of the instructions as well as the vector
843 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
845 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
846 ;; combine to generate a multiply/add with two memory references. We then
847 ;; split this insn, into loading up the destination register with one of the
848 ;; memory operations. If we don't manage to split the insn, reload will
849 ;; generate the appropriate moves. The reason this is needed, is that combine
850 ;; has already folded one of the memory references into both the multiply and
851 ;; add insns, and it can't generate a new pseudo. I.e.:
852 ;; (set (reg1) (mem (addr1)))
853 ;; (set (reg2) (mult (reg1) (mem (addr2))))
854 ;; (set (reg3) (plus (reg2) (mem (addr3))))
856 (define_insn "sse5_fmadd<mode>4"
857 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
860 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
861 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
862 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
863 "TARGET_SSE5 && TARGET_FUSED_MADD
864 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
865 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
866 [(set_attr "type" "ssemuladd")
867 (set_attr "mode" "<MODE>")])
869 ;; Split fmadd with two memory operands into a load and the fmadd.
871 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
874 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
875 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
876 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
878 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
879 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
880 && !reg_mentioned_p (operands[0], operands[1])
881 && !reg_mentioned_p (operands[0], operands[2])
882 && !reg_mentioned_p (operands[0], operands[3])"
885 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
887 operands[2], operands[3]));
891 ;; For the scalar operations, use operand1 for the upper words that aren't
892 ;; modified, so restrict the forms that are generated.
893 ;; Scalar version of fmadd
894 (define_insn "sse5_vmfmadd<mode>4"
895 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
896 (vec_merge:SSEMODEF2P
899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
900 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
901 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
904 "TARGET_SSE5 && TARGET_FUSED_MADD
905 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
906 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
907 [(set_attr "type" "ssemuladd")
908 (set_attr "mode" "<MODE>")])
910 ;; Floating multiply and subtract
911 ;; Allow two memory operands the same as fmadd
912 (define_insn "sse5_fmsub<mode>4"
913 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
916 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
917 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
918 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
919 "TARGET_SSE5 && TARGET_FUSED_MADD
920 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
921 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
922 [(set_attr "type" "ssemuladd")
923 (set_attr "mode" "<MODE>")])
925 ;; Split fmsub with two memory operands into a load and the fmsub.
927 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
930 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
931 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
932 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
934 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
935 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
936 && !reg_mentioned_p (operands[0], operands[1])
937 && !reg_mentioned_p (operands[0], operands[2])
938 && !reg_mentioned_p (operands[0], operands[3])"
941 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
942 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
943 operands[2], operands[3]));
947 ;; For the scalar operations, use operand1 for the upper words that aren't
948 ;; modified, so restrict the forms that are generated.
949 ;; Scalar version of fmsub
950 (define_insn "sse5_vmfmsub<mode>4"
951 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
952 (vec_merge:SSEMODEF2P
955 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
956 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
957 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
960 "TARGET_SSE5 && TARGET_FUSED_MADD
961 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
962 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
963 [(set_attr "type" "ssemuladd")
964 (set_attr "mode" "<MODE>")])
966 ;; Floating point negative multiply and add
967 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
968 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
969 ;; Allow two memory operands to help in optimizing.
970 (define_insn "sse5_fnmadd<mode>4"
971 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
973 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
975 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
976 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
977 "TARGET_SSE5 && TARGET_FUSED_MADD
978 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
979 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
980 [(set_attr "type" "ssemuladd")
981 (set_attr "mode" "<MODE>")])
983 ;; Split fnmadd with two memory operands into a load and the fnmadd.
985 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
987 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
989 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
990 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
992 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
993 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
994 && !reg_mentioned_p (operands[0], operands[1])
995 && !reg_mentioned_p (operands[0], operands[2])
996 && !reg_mentioned_p (operands[0], operands[3])"
999 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1000 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1001 operands[2], operands[3]));
1005 ;; For the scalar operations, use operand1 for the upper words that aren't
1006 ;; modified, so restrict the forms that are generated.
1007 ;; Scalar version of fnmadd
1008 (define_insn "sse5_vmfnmadd<mode>4"
1009 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1010 (vec_merge:SSEMODEF2P
1012 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1014 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1015 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1018 "TARGET_SSE5 && TARGET_FUSED_MADD
1019 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1020 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1021 [(set_attr "type" "ssemuladd")
1022 (set_attr "mode" "<MODE>")])
1024 ;; Floating point negative multiply and subtract
1025 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1026 ;; Allow 2 memory operands to help with optimization
1027 (define_insn "sse5_fnmsub<mode>4"
1028 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1032 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1033 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1034 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1035 "TARGET_SSE5 && TARGET_FUSED_MADD
1036 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1037 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1038 [(set_attr "type" "ssemuladd")
1039 (set_attr "mode" "<MODE>")])
1041 ;; Split fnmsub with two memory operands into a load and the fmsub.
1043 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1047 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1048 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1049 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1051 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1052 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1053 && !reg_mentioned_p (operands[0], operands[1])
1054 && !reg_mentioned_p (operands[0], operands[2])
1055 && !reg_mentioned_p (operands[0], operands[3])"
1058 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1059 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1060 operands[2], operands[3]));
1064 ;; For the scalar operations, use operand1 for the upper words that aren't
1065 ;; modified, so restrict the forms that are generated.
1066 ;; Scalar version of fnmsub
1067 (define_insn "sse5_vmfnmsub<mode>4"
1068 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1069 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1075 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1078 "TARGET_SSE5 && TARGET_FUSED_MADD
1079 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1080 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1081 [(set_attr "type" "ssemuladd")
1082 (set_attr "mode" "<MODE>")])
1084 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1085 ;; even if the user used -mno-fused-madd
1086 ;; Parallel instructions. During instruction generation, just default
1087 ;; to registers, and let combine later build the appropriate instruction.
1088 (define_expand "sse5i_fmadd<mode>4"
1089 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1093 (match_operand:SSEMODEF2P 1 "register_operand" "")
1094 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1095 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1096 UNSPEC_SSE5_INTRINSIC))]
1099 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1100 if (TARGET_FUSED_MADD)
1102 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1103 operands[2], operands[3]));
1108 (define_insn "*sse5i_fmadd<mode>4"
1109 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1113 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1114 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1115 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1116 UNSPEC_SSE5_INTRINSIC))]
1117 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1118 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1119 [(set_attr "type" "ssemuladd")
1120 (set_attr "mode" "<MODE>")])
1122 (define_expand "sse5i_fmsub<mode>4"
1123 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1127 (match_operand:SSEMODEF2P 1 "register_operand" "")
1128 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1129 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1130 UNSPEC_SSE5_INTRINSIC))]
1133 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1134 if (TARGET_FUSED_MADD)
1136 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1137 operands[2], operands[3]));
1142 (define_insn "*sse5i_fmsub<mode>4"
1143 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1147 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1148 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1149 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1150 UNSPEC_SSE5_INTRINSIC))]
1151 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1152 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1153 [(set_attr "type" "ssemuladd")
1154 (set_attr "mode" "<MODE>")])
1156 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1157 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1158 (define_expand "sse5i_fnmadd<mode>4"
1159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1162 (match_operand:SSEMODEF2P 3 "register_operand" "")
1164 (match_operand:SSEMODEF2P 1 "register_operand" "")
1165 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1166 UNSPEC_SSE5_INTRINSIC))]
1169 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1170 if (TARGET_FUSED_MADD)
1172 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1173 operands[2], operands[3]));
1178 (define_insn "*sse5i_fnmadd<mode>4"
1179 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1182 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1184 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1185 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1186 UNSPEC_SSE5_INTRINSIC))]
1187 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1188 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1189 [(set_attr "type" "ssemuladd")
1190 (set_attr "mode" "<MODE>")])
1192 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1193 (define_expand "sse5i_fnmsub<mode>4"
1194 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1199 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1200 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1201 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1202 UNSPEC_SSE5_INTRINSIC))]
1205 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1206 if (TARGET_FUSED_MADD)
1208 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1209 operands[2], operands[3]));
1214 (define_insn "*sse5i_fnmsub<mode>4"
1215 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1220 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1221 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1222 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1223 UNSPEC_SSE5_INTRINSIC))]
1224 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1225 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1226 [(set_attr "type" "ssemuladd")
1227 (set_attr "mode" "<MODE>")])
1229 ;; Scalar instructions
1230 (define_expand "sse5i_vmfmadd<mode>4"
1231 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1233 [(vec_merge:SSEMODEF2P
1236 (match_operand:SSEMODEF2P 1 "register_operand" "")
1237 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1238 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1241 UNSPEC_SSE5_INTRINSIC))]
1244 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1245 if (TARGET_FUSED_MADD)
1247 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1248 operands[2], operands[3]));
1253 ;; For the scalar operations, use operand1 for the upper words that aren't
1254 ;; modified, so restrict the forms that are accepted.
1255 (define_insn "*sse5i_vmfmadd<mode>4"
1256 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1258 [(vec_merge:SSEMODEF2P
1261 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1262 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1263 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1266 UNSPEC_SSE5_INTRINSIC))]
1267 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1268 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1269 [(set_attr "type" "ssemuladd")
1270 (set_attr "mode" "<ssescalarmode>")])
1272 (define_expand "sse5i_vmfmsub<mode>4"
1273 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1275 [(vec_merge:SSEMODEF2P
1278 (match_operand:SSEMODEF2P 1 "register_operand" "")
1279 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1280 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1283 UNSPEC_SSE5_INTRINSIC))]
1286 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1287 if (TARGET_FUSED_MADD)
1289 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1290 operands[2], operands[3]));
1295 (define_insn "*sse5i_vmfmsub<mode>4"
1296 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1298 [(vec_merge:SSEMODEF2P
1301 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1302 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1303 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1306 UNSPEC_SSE5_INTRINSIC))]
1307 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1308 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1309 [(set_attr "type" "ssemuladd")
1310 (set_attr "mode" "<ssescalarmode>")])
1312 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1313 (define_expand "sse5i_vmfnmadd<mode>4"
1314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1316 [(vec_merge:SSEMODEF2P
1318 (match_operand:SSEMODEF2P 3 "register_operand" "")
1320 (match_operand:SSEMODEF2P 1 "register_operand" "")
1321 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1324 UNSPEC_SSE5_INTRINSIC))]
1327 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1328 if (TARGET_FUSED_MADD)
1330 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1331 operands[2], operands[3]));
1336 (define_insn "*sse5i_vmfnmadd<mode>4"
1337 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1339 [(vec_merge:SSEMODEF2P
1341 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1343 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1344 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1347 UNSPEC_SSE5_INTRINSIC))]
1348 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1349 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1350 [(set_attr "type" "ssemuladd")
1351 (set_attr "mode" "<ssescalarmode>")])
1353 (define_expand "sse5i_vmfnmsub<mode>4"
1354 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1356 [(vec_merge:SSEMODEF2P
1360 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1361 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1362 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1365 UNSPEC_SSE5_INTRINSIC))]
1368 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1369 if (TARGET_FUSED_MADD)
1371 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1372 operands[2], operands[3]));
1377 (define_insn "*sse5i_vmfnmsub<mode>4"
1378 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1380 [(vec_merge:SSEMODEF2P
1384 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1385 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1386 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1389 UNSPEC_SSE5_INTRINSIC))]
1390 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1391 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1392 [(set_attr "type" "ssemuladd")
1393 (set_attr "mode" "<ssescalarmode>")])
1395 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1397 ;; Parallel single-precision floating point conversion operations
1399 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1401 (define_insn "sse_cvtpi2ps"
1402 [(set (match_operand:V4SF 0 "register_operand" "=x")
1405 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1406 (match_operand:V4SF 1 "register_operand" "0")
1409 "cvtpi2ps\t{%2, %0|%0, %2}"
1410 [(set_attr "type" "ssecvt")
1411 (set_attr "mode" "V4SF")])
1413 (define_insn "sse_cvtps2pi"
1414 [(set (match_operand:V2SI 0 "register_operand" "=y")
1416 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1418 (parallel [(const_int 0) (const_int 1)])))]
1420 "cvtps2pi\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "ssecvt")
1422 (set_attr "unit" "mmx")
1423 (set_attr "mode" "DI")])
1425 (define_insn "sse_cvttps2pi"
1426 [(set (match_operand:V2SI 0 "register_operand" "=y")
1428 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1429 (parallel [(const_int 0) (const_int 1)])))]
1431 "cvttps2pi\t{%1, %0|%0, %1}"
1432 [(set_attr "type" "ssecvt")
1433 (set_attr "unit" "mmx")
1434 (set_attr "mode" "SF")])
1436 (define_insn "sse_cvtsi2ss"
1437 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1440 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1441 (match_operand:V4SF 1 "register_operand" "0,0")
1444 "cvtsi2ss\t{%2, %0|%0, %2}"
1445 [(set_attr "type" "sseicvt")
1446 (set_attr "athlon_decode" "vector,double")
1447 (set_attr "amdfam10_decode" "vector,double")
1448 (set_attr "mode" "SF")])
1450 (define_insn "sse_cvtsi2ssq"
1451 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1454 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1455 (match_operand:V4SF 1 "register_operand" "0,0")
1457 "TARGET_SSE && TARGET_64BIT"
1458 "cvtsi2ssq\t{%2, %0|%0, %2}"
1459 [(set_attr "type" "sseicvt")
1460 (set_attr "athlon_decode" "vector,double")
1461 (set_attr "amdfam10_decode" "vector,double")
1462 (set_attr "mode" "SF")])
1464 (define_insn "sse_cvtss2si"
1465 [(set (match_operand:SI 0 "register_operand" "=r,r")
1468 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1469 (parallel [(const_int 0)]))]
1470 UNSPEC_FIX_NOTRUNC))]
1472 "cvtss2si\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "sseicvt")
1474 (set_attr "athlon_decode" "double,vector")
1475 (set_attr "prefix_rep" "1")
1476 (set_attr "mode" "SI")])
1478 (define_insn "sse_cvtss2si_2"
1479 [(set (match_operand:SI 0 "register_operand" "=r,r")
1480 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1481 UNSPEC_FIX_NOTRUNC))]
1483 "cvtss2si\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "sseicvt")
1485 (set_attr "athlon_decode" "double,vector")
1486 (set_attr "amdfam10_decode" "double,double")
1487 (set_attr "prefix_rep" "1")
1488 (set_attr "mode" "SI")])
1490 (define_insn "sse_cvtss2siq"
1491 [(set (match_operand:DI 0 "register_operand" "=r,r")
1494 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1495 (parallel [(const_int 0)]))]
1496 UNSPEC_FIX_NOTRUNC))]
1497 "TARGET_SSE && TARGET_64BIT"
1498 "cvtss2siq\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "sseicvt")
1500 (set_attr "athlon_decode" "double,vector")
1501 (set_attr "prefix_rep" "1")
1502 (set_attr "mode" "DI")])
1504 (define_insn "sse_cvtss2siq_2"
1505 [(set (match_operand:DI 0 "register_operand" "=r,r")
1506 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1507 UNSPEC_FIX_NOTRUNC))]
1508 "TARGET_SSE && TARGET_64BIT"
1509 "cvtss2siq\t{%1, %0|%0, %1}"
1510 [(set_attr "type" "sseicvt")
1511 (set_attr "athlon_decode" "double,vector")
1512 (set_attr "amdfam10_decode" "double,double")
1513 (set_attr "prefix_rep" "1")
1514 (set_attr "mode" "DI")])
1516 (define_insn "sse_cvttss2si"
1517 [(set (match_operand:SI 0 "register_operand" "=r,r")
1520 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1521 (parallel [(const_int 0)]))))]
1523 "cvttss2si\t{%1, %0|%0, %1}"
1524 [(set_attr "type" "sseicvt")
1525 (set_attr "athlon_decode" "double,vector")
1526 (set_attr "amdfam10_decode" "double,double")
1527 (set_attr "prefix_rep" "1")
1528 (set_attr "mode" "SI")])
1530 (define_insn "sse_cvttss2siq"
1531 [(set (match_operand:DI 0 "register_operand" "=r,r")
1534 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1535 (parallel [(const_int 0)]))))]
1536 "TARGET_SSE && TARGET_64BIT"
1537 "cvttss2siq\t{%1, %0|%0, %1}"
1538 [(set_attr "type" "sseicvt")
1539 (set_attr "athlon_decode" "double,vector")
1540 (set_attr "amdfam10_decode" "double,double")
1541 (set_attr "prefix_rep" "1")
1542 (set_attr "mode" "DI")])
1544 (define_insn "sse2_cvtdq2ps"
1545 [(set (match_operand:V4SF 0 "register_operand" "=x")
1546 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1548 "cvtdq2ps\t{%1, %0|%0, %1}"
1549 [(set_attr "type" "ssecvt")
1550 (set_attr "mode" "V4SF")])
1552 (define_insn "sse2_cvtps2dq"
1553 [(set (match_operand:V4SI 0 "register_operand" "=x")
1554 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1555 UNSPEC_FIX_NOTRUNC))]
1557 "cvtps2dq\t{%1, %0|%0, %1}"
1558 [(set_attr "type" "ssecvt")
1559 (set_attr "prefix_data16" "1")
1560 (set_attr "mode" "TI")])
1562 (define_insn "sse2_cvttps2dq"
1563 [(set (match_operand:V4SI 0 "register_operand" "=x")
1564 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1566 "cvttps2dq\t{%1, %0|%0, %1}"
1567 [(set_attr "type" "ssecvt")
1568 (set_attr "prefix_rep" "1")
1569 (set_attr "mode" "TI")])
1571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1573 ;; Parallel double-precision floating point conversion operations
1575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1577 (define_insn "sse2_cvtpi2pd"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1579 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1581 "cvtpi2pd\t{%1, %0|%0, %1}"
1582 [(set_attr "type" "ssecvt")
1583 (set_attr "unit" "mmx,*")
1584 (set_attr "mode" "V2DF")])
1586 (define_insn "sse2_cvtpd2pi"
1587 [(set (match_operand:V2SI 0 "register_operand" "=y")
1588 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1589 UNSPEC_FIX_NOTRUNC))]
1591 "cvtpd2pi\t{%1, %0|%0, %1}"
1592 [(set_attr "type" "ssecvt")
1593 (set_attr "unit" "mmx")
1594 (set_attr "prefix_data16" "1")
1595 (set_attr "mode" "DI")])
1597 (define_insn "sse2_cvttpd2pi"
1598 [(set (match_operand:V2SI 0 "register_operand" "=y")
1599 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1601 "cvttpd2pi\t{%1, %0|%0, %1}"
1602 [(set_attr "type" "ssecvt")
1603 (set_attr "unit" "mmx")
1604 (set_attr "prefix_data16" "1")
1605 (set_attr "mode" "TI")])
1607 (define_insn "sse2_cvtsi2sd"
1608 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1611 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1612 (match_operand:V2DF 1 "register_operand" "0,0")
1615 "cvtsi2sd\t{%2, %0|%0, %2}"
1616 [(set_attr "type" "sseicvt")
1617 (set_attr "mode" "DF")
1618 (set_attr "athlon_decode" "double,direct")
1619 (set_attr "amdfam10_decode" "vector,double")])
1621 (define_insn "sse2_cvtsi2sdq"
1622 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1625 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1626 (match_operand:V2DF 1 "register_operand" "0,0")
1628 "TARGET_SSE2 && TARGET_64BIT"
1629 "cvtsi2sdq\t{%2, %0|%0, %2}"
1630 [(set_attr "type" "sseicvt")
1631 (set_attr "mode" "DF")
1632 (set_attr "athlon_decode" "double,direct")
1633 (set_attr "amdfam10_decode" "vector,double")])
1635 (define_insn "sse2_cvtsd2si"
1636 [(set (match_operand:SI 0 "register_operand" "=r,r")
1639 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1640 (parallel [(const_int 0)]))]
1641 UNSPEC_FIX_NOTRUNC))]
1643 "cvtsd2si\t{%1, %0|%0, %1}"
1644 [(set_attr "type" "sseicvt")
1645 (set_attr "athlon_decode" "double,vector")
1646 (set_attr "prefix_rep" "1")
1647 (set_attr "mode" "SI")])
1649 (define_insn "sse2_cvtsd2si_2"
1650 [(set (match_operand:SI 0 "register_operand" "=r,r")
1651 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1652 UNSPEC_FIX_NOTRUNC))]
1654 "cvtsd2si\t{%1, %0|%0, %1}"
1655 [(set_attr "type" "sseicvt")
1656 (set_attr "athlon_decode" "double,vector")
1657 (set_attr "amdfam10_decode" "double,double")
1658 (set_attr "prefix_rep" "1")
1659 (set_attr "mode" "SI")])
1661 (define_insn "sse2_cvtsd2siq"
1662 [(set (match_operand:DI 0 "register_operand" "=r,r")
1665 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1666 (parallel [(const_int 0)]))]
1667 UNSPEC_FIX_NOTRUNC))]
1668 "TARGET_SSE2 && TARGET_64BIT"
1669 "cvtsd2siq\t{%1, %0|%0, %1}"
1670 [(set_attr "type" "sseicvt")
1671 (set_attr "athlon_decode" "double,vector")
1672 (set_attr "prefix_rep" "1")
1673 (set_attr "mode" "DI")])
1675 (define_insn "sse2_cvtsd2siq_2"
1676 [(set (match_operand:DI 0 "register_operand" "=r,r")
1677 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1678 UNSPEC_FIX_NOTRUNC))]
1679 "TARGET_SSE2 && TARGET_64BIT"
1680 "cvtsd2siq\t{%1, %0|%0, %1}"
1681 [(set_attr "type" "sseicvt")
1682 (set_attr "athlon_decode" "double,vector")
1683 (set_attr "amdfam10_decode" "double,double")
1684 (set_attr "prefix_rep" "1")
1685 (set_attr "mode" "DI")])
1687 (define_insn "sse2_cvttsd2si"
1688 [(set (match_operand:SI 0 "register_operand" "=r,r")
1691 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1692 (parallel [(const_int 0)]))))]
1694 "cvttsd2si\t{%1, %0|%0, %1}"
1695 [(set_attr "type" "sseicvt")
1696 (set_attr "prefix_rep" "1")
1697 (set_attr "mode" "SI")
1698 (set_attr "athlon_decode" "double,vector")
1699 (set_attr "amdfam10_decode" "double,double")])
1701 (define_insn "sse2_cvttsd2siq"
1702 [(set (match_operand:DI 0 "register_operand" "=r,r")
1705 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1706 (parallel [(const_int 0)]))))]
1707 "TARGET_SSE2 && TARGET_64BIT"
1708 "cvttsd2siq\t{%1, %0|%0, %1}"
1709 [(set_attr "type" "sseicvt")
1710 (set_attr "prefix_rep" "1")
1711 (set_attr "mode" "DI")
1712 (set_attr "athlon_decode" "double,vector")
1713 (set_attr "amdfam10_decode" "double,double")])
1715 (define_insn "sse2_cvtdq2pd"
1716 [(set (match_operand:V2DF 0 "register_operand" "=x")
1719 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1720 (parallel [(const_int 0) (const_int 1)]))))]
1722 "cvtdq2pd\t{%1, %0|%0, %1}"
1723 [(set_attr "type" "ssecvt")
1724 (set_attr "mode" "V2DF")])
1726 (define_expand "sse2_cvtpd2dq"
1727 [(set (match_operand:V4SI 0 "register_operand" "")
1729 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1733 "operands[2] = CONST0_RTX (V2SImode);")
1735 (define_insn "*sse2_cvtpd2dq"
1736 [(set (match_operand:V4SI 0 "register_operand" "=x")
1738 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1740 (match_operand:V2SI 2 "const0_operand" "")))]
1742 "cvtpd2dq\t{%1, %0|%0, %1}"
1743 [(set_attr "type" "ssecvt")
1744 (set_attr "prefix_rep" "1")
1745 (set_attr "mode" "TI")
1746 (set_attr "amdfam10_decode" "double")])
1748 (define_expand "sse2_cvttpd2dq"
1749 [(set (match_operand:V4SI 0 "register_operand" "")
1751 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1754 "operands[2] = CONST0_RTX (V2SImode);")
1756 (define_insn "*sse2_cvttpd2dq"
1757 [(set (match_operand:V4SI 0 "register_operand" "=x")
1759 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1760 (match_operand:V2SI 2 "const0_operand" "")))]
1762 "cvttpd2dq\t{%1, %0|%0, %1}"
1763 [(set_attr "type" "ssecvt")
1764 (set_attr "prefix_rep" "1")
1765 (set_attr "mode" "TI")
1766 (set_attr "amdfam10_decode" "double")])
1768 (define_insn "sse2_cvtsd2ss"
1769 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1772 (float_truncate:V2SF
1773 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1774 (match_operand:V4SF 1 "register_operand" "0,0")
1777 "cvtsd2ss\t{%2, %0|%0, %2}"
1778 [(set_attr "type" "ssecvt")
1779 (set_attr "athlon_decode" "vector,double")
1780 (set_attr "amdfam10_decode" "vector,double")
1781 (set_attr "mode" "SF")])
1783 (define_insn "sse2_cvtss2sd"
1784 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1788 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1789 (parallel [(const_int 0) (const_int 1)])))
1790 (match_operand:V2DF 1 "register_operand" "0,0")
1793 "cvtss2sd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "ssecvt")
1795 (set_attr "amdfam10_decode" "vector,double")
1796 (set_attr "mode" "DF")])
1798 (define_expand "sse2_cvtpd2ps"
1799 [(set (match_operand:V4SF 0 "register_operand" "")
1801 (float_truncate:V2SF
1802 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1805 "operands[2] = CONST0_RTX (V2SFmode);")
1807 (define_insn "*sse2_cvtpd2ps"
1808 [(set (match_operand:V4SF 0 "register_operand" "=x")
1810 (float_truncate:V2SF
1811 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1812 (match_operand:V2SF 2 "const0_operand" "")))]
1814 "cvtpd2ps\t{%1, %0|%0, %1}"
1815 [(set_attr "type" "ssecvt")
1816 (set_attr "prefix_data16" "1")
1817 (set_attr "mode" "V4SF")
1818 (set_attr "amdfam10_decode" "double")])
1820 (define_insn "sse2_cvtps2pd"
1821 [(set (match_operand:V2DF 0 "register_operand" "=x")
1824 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1825 (parallel [(const_int 0) (const_int 1)]))))]
1827 "cvtps2pd\t{%1, %0|%0, %1}"
1828 [(set_attr "type" "ssecvt")
1829 (set_attr "mode" "V2DF")
1830 (set_attr "amdfam10_decode" "direct")])
1832 (define_expand "vec_unpacks_hi_v4sf"
1837 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1838 (parallel [(const_int 6)
1842 (set (match_operand:V2DF 0 "register_operand" "")
1846 (parallel [(const_int 0) (const_int 1)]))))]
1849 operands[2] = gen_reg_rtx (V4SFmode);
1852 (define_expand "vec_unpacks_lo_v4sf"
1853 [(set (match_operand:V2DF 0 "register_operand" "")
1856 (match_operand:V4SF 1 "nonimmediate_operand" "")
1857 (parallel [(const_int 0) (const_int 1)]))))]
1860 (define_expand "vec_unpacks_float_hi_v8hi"
1861 [(match_operand:V4SF 0 "register_operand" "")
1862 (match_operand:V8HI 1 "register_operand" "")]
1865 rtx tmp = gen_reg_rtx (V4SImode);
1867 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1868 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1872 (define_expand "vec_unpacks_float_lo_v8hi"
1873 [(match_operand:V4SF 0 "register_operand" "")
1874 (match_operand:V8HI 1 "register_operand" "")]
1877 rtx tmp = gen_reg_rtx (V4SImode);
1879 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1880 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1884 (define_expand "vec_unpacku_float_hi_v8hi"
1885 [(match_operand:V4SF 0 "register_operand" "")
1886 (match_operand:V8HI 1 "register_operand" "")]
1889 rtx tmp = gen_reg_rtx (V4SImode);
1891 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1892 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1896 (define_expand "vec_unpacku_float_lo_v8hi"
1897 [(match_operand:V4SF 0 "register_operand" "")
1898 (match_operand:V8HI 1 "register_operand" "")]
1901 rtx tmp = gen_reg_rtx (V4SImode);
1903 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1904 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1908 (define_expand "vec_unpacks_float_hi_v4si"
1911 (match_operand:V4SI 1 "nonimmediate_operand" "")
1912 (parallel [(const_int 2)
1916 (set (match_operand:V2DF 0 "register_operand" "")
1920 (parallel [(const_int 0) (const_int 1)]))))]
1923 operands[2] = gen_reg_rtx (V4SImode);
1926 (define_expand "vec_unpacks_float_lo_v4si"
1927 [(set (match_operand:V2DF 0 "register_operand" "")
1930 (match_operand:V4SI 1 "nonimmediate_operand" "")
1931 (parallel [(const_int 0) (const_int 1)]))))]
1934 (define_expand "vec_pack_trunc_v2df"
1935 [(match_operand:V4SF 0 "register_operand" "")
1936 (match_operand:V2DF 1 "nonimmediate_operand" "")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1942 r1 = gen_reg_rtx (V4SFmode);
1943 r2 = gen_reg_rtx (V4SFmode);
1945 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1946 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1947 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1951 (define_expand "vec_pack_sfix_trunc_v2df"
1952 [(match_operand:V4SI 0 "register_operand" "")
1953 (match_operand:V2DF 1 "nonimmediate_operand" "")
1954 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1959 r1 = gen_reg_rtx (V4SImode);
1960 r2 = gen_reg_rtx (V4SImode);
1962 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1963 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1964 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1965 gen_lowpart (V2DImode, r1),
1966 gen_lowpart (V2DImode, r2)));
1970 (define_expand "vec_pack_sfix_v2df"
1971 [(match_operand:V4SI 0 "register_operand" "")
1972 (match_operand:V2DF 1 "nonimmediate_operand" "")
1973 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1978 r1 = gen_reg_rtx (V4SImode);
1979 r2 = gen_reg_rtx (V4SImode);
1981 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1982 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1983 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1984 gen_lowpart (V2DImode, r1),
1985 gen_lowpart (V2DImode, r2)));
1989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1991 ;; Parallel single-precision floating point element swizzling
1993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1995 (define_expand "sse_movhlps_exp"
1996 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
1999 (match_operand:V4SF 1 "nonimmediate_operand" "")
2000 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2001 (parallel [(const_int 6)
2006 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2008 (define_insn "sse_movhlps"
2009 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2012 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2013 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2014 (parallel [(const_int 6)
2018 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2020 movhlps\t{%2, %0|%0, %2}
2021 movlps\t{%H2, %0|%0, %H2}
2022 movhps\t{%2, %0|%0, %2}"
2023 [(set_attr "type" "ssemov")
2024 (set_attr "mode" "V4SF,V2SF,V2SF")])
2026 (define_expand "sse_movlhps_exp"
2027 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2030 (match_operand:V4SF 1 "nonimmediate_operand" "")
2031 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2032 (parallel [(const_int 0)
2037 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2039 (define_insn "sse_movlhps"
2040 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2043 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2044 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2045 (parallel [(const_int 0)
2049 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2051 movlhps\t{%2, %0|%0, %2}
2052 movhps\t{%2, %0|%0, %2}
2053 movlps\t{%2, %H0|%H0, %2}"
2054 [(set_attr "type" "ssemov")
2055 (set_attr "mode" "V4SF,V2SF,V2SF")])
2057 (define_insn "sse_unpckhps"
2058 [(set (match_operand:V4SF 0 "register_operand" "=x")
2061 (match_operand:V4SF 1 "register_operand" "0")
2062 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2063 (parallel [(const_int 2) (const_int 6)
2064 (const_int 3) (const_int 7)])))]
2066 "unpckhps\t{%2, %0|%0, %2}"
2067 [(set_attr "type" "sselog")
2068 (set_attr "mode" "V4SF")])
2070 (define_insn "sse_unpcklps"
2071 [(set (match_operand:V4SF 0 "register_operand" "=x")
2074 (match_operand:V4SF 1 "register_operand" "0")
2075 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2076 (parallel [(const_int 0) (const_int 4)
2077 (const_int 1) (const_int 5)])))]
2079 "unpcklps\t{%2, %0|%0, %2}"
2080 [(set_attr "type" "sselog")
2081 (set_attr "mode" "V4SF")])
2083 ;; These are modeled with the same vec_concat as the others so that we
2084 ;; capture users of shufps that can use the new instructions
2085 (define_insn "sse3_movshdup"
2086 [(set (match_operand:V4SF 0 "register_operand" "=x")
2089 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2091 (parallel [(const_int 1)
2096 "movshdup\t{%1, %0|%0, %1}"
2097 [(set_attr "type" "sse")
2098 (set_attr "prefix_rep" "1")
2099 (set_attr "mode" "V4SF")])
2101 (define_insn "sse3_movsldup"
2102 [(set (match_operand:V4SF 0 "register_operand" "=x")
2105 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2107 (parallel [(const_int 0)
2112 "movsldup\t{%1, %0|%0, %1}"
2113 [(set_attr "type" "sse")
2114 (set_attr "prefix_rep" "1")
2115 (set_attr "mode" "V4SF")])
2117 (define_expand "sse_shufps"
2118 [(match_operand:V4SF 0 "register_operand" "")
2119 (match_operand:V4SF 1 "register_operand" "")
2120 (match_operand:V4SF 2 "nonimmediate_operand" "")
2121 (match_operand:SI 3 "const_int_operand" "")]
2124 int mask = INTVAL (operands[3]);
2125 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2126 GEN_INT ((mask >> 0) & 3),
2127 GEN_INT ((mask >> 2) & 3),
2128 GEN_INT (((mask >> 4) & 3) + 4),
2129 GEN_INT (((mask >> 6) & 3) + 4)));
2133 (define_insn "sse_shufps_1"
2134 [(set (match_operand:V4SF 0 "register_operand" "=x")
2137 (match_operand:V4SF 1 "register_operand" "0")
2138 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2139 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2140 (match_operand 4 "const_0_to_3_operand" "")
2141 (match_operand 5 "const_4_to_7_operand" "")
2142 (match_operand 6 "const_4_to_7_operand" "")])))]
2146 mask |= INTVAL (operands[3]) << 0;
2147 mask |= INTVAL (operands[4]) << 2;
2148 mask |= (INTVAL (operands[5]) - 4) << 4;
2149 mask |= (INTVAL (operands[6]) - 4) << 6;
2150 operands[3] = GEN_INT (mask);
2152 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2154 [(set_attr "type" "sselog")
2155 (set_attr "mode" "V4SF")])
2157 (define_insn "sse_storehps"
2158 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2160 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2161 (parallel [(const_int 2) (const_int 3)])))]
2164 movhps\t{%1, %0|%0, %1}
2165 movhlps\t{%1, %0|%0, %1}
2166 movlps\t{%H1, %0|%0, %H1}"
2167 [(set_attr "type" "ssemov")
2168 (set_attr "mode" "V2SF,V4SF,V2SF")])
2170 (define_expand "sse_loadhps_exp"
2171 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2174 (match_operand:V4SF 1 "nonimmediate_operand" "")
2175 (parallel [(const_int 0) (const_int 1)]))
2176 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
2178 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2180 (define_insn "sse_loadhps"
2181 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2184 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2185 (parallel [(const_int 0) (const_int 1)]))
2186 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2189 movhps\t{%2, %0|%0, %2}
2190 movlhps\t{%2, %0|%0, %2}
2191 movlps\t{%2, %H0|%H0, %2}"
2192 [(set_attr "type" "ssemov")
2193 (set_attr "mode" "V2SF,V4SF,V2SF")])
2195 (define_insn "sse_storelps"
2196 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2198 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2199 (parallel [(const_int 0) (const_int 1)])))]
2202 movlps\t{%1, %0|%0, %1}
2203 movaps\t{%1, %0|%0, %1}
2204 movlps\t{%1, %0|%0, %1}"
2205 [(set_attr "type" "ssemov")
2206 (set_attr "mode" "V2SF,V4SF,V2SF")])
2208 (define_expand "sse_loadlps_exp"
2209 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2211 (match_operand:V2SF 2 "nonimmediate_operand" "")
2213 (match_operand:V4SF 1 "nonimmediate_operand" "")
2214 (parallel [(const_int 2) (const_int 3)]))))]
2216 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2218 (define_insn "sse_loadlps"
2219 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2221 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2223 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2224 (parallel [(const_int 2) (const_int 3)]))))]
2227 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2228 movlps\t{%2, %0|%0, %2}
2229 movlps\t{%2, %0|%0, %2}"
2230 [(set_attr "type" "sselog,ssemov,ssemov")
2231 (set_attr "mode" "V4SF,V2SF,V2SF")])
2233 (define_insn "sse_movss"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x")
2236 (match_operand:V4SF 2 "register_operand" "x")
2237 (match_operand:V4SF 1 "register_operand" "0")
2240 "movss\t{%2, %0|%0, %2}"
2241 [(set_attr "type" "ssemov")
2242 (set_attr "mode" "SF")])
2244 (define_insn "*vec_dupv4sf"
2245 [(set (match_operand:V4SF 0 "register_operand" "=x")
2247 (match_operand:SF 1 "register_operand" "0")))]
2249 "shufps\t{$0, %0, %0|%0, %0, 0}"
2250 [(set_attr "type" "sselog1")
2251 (set_attr "mode" "V4SF")])
2253 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2254 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2255 ;; alternatives pretty much forces the MMX alternative to be chosen.
2256 (define_insn "*sse_concatv2sf"
2257 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2259 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2260 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2263 unpcklps\t{%2, %0|%0, %2}
2264 movss\t{%1, %0|%0, %1}
2265 punpckldq\t{%2, %0|%0, %2}
2266 movd\t{%1, %0|%0, %1}"
2267 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2268 (set_attr "mode" "V4SF,SF,DI,DI")])
2270 (define_insn "*sse_concatv4sf"
2271 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2273 (match_operand:V2SF 1 "register_operand" " 0,0")
2274 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2277 movlhps\t{%2, %0|%0, %2}
2278 movhps\t{%2, %0|%0, %2}"
2279 [(set_attr "type" "ssemov")
2280 (set_attr "mode" "V4SF,V2SF")])
2282 (define_expand "vec_initv4sf"
2283 [(match_operand:V4SF 0 "register_operand" "")
2284 (match_operand 1 "" "")]
2287 ix86_expand_vector_init (false, operands[0], operands[1]);
2291 (define_insn "vec_setv4sf_0"
2292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2295 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2296 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2300 movss\t{%2, %0|%0, %2}
2301 movss\t{%2, %0|%0, %2}
2302 movd\t{%2, %0|%0, %2}
2304 [(set_attr "type" "ssemov")
2305 (set_attr "mode" "SF")])
2307 ;; A subset is vec_setv4sf.
2308 (define_insn "*vec_setv4sf_sse4_1"
2309 [(set (match_operand:V4SF 0 "register_operand" "=x")
2312 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2313 (match_operand:V4SF 1 "register_operand" "0")
2314 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2317 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2318 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2320 [(set_attr "type" "sselog")
2321 (set_attr "prefix_extra" "1")
2322 (set_attr "mode" "V4SF")])
2324 (define_insn "sse4_1_insertps"
2325 [(set (match_operand:V4SF 0 "register_operand" "=x")
2326 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2327 (match_operand:V4SF 1 "register_operand" "0")
2328 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2331 "insertps\t{%3, %2, %0|%0, %2, %3}";
2332 [(set_attr "type" "sselog")
2333 (set_attr "prefix_extra" "1")
2334 (set_attr "mode" "V4SF")])
2337 [(set (match_operand:V4SF 0 "memory_operand" "")
2340 (match_operand:SF 1 "nonmemory_operand" ""))
2343 "TARGET_SSE && reload_completed"
2346 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2350 (define_expand "vec_setv4sf"
2351 [(match_operand:V4SF 0 "register_operand" "")
2352 (match_operand:SF 1 "register_operand" "")
2353 (match_operand 2 "const_int_operand" "")]
2356 ix86_expand_vector_set (false, operands[0], operands[1],
2357 INTVAL (operands[2]));
2361 (define_insn_and_split "*vec_extractv4sf_0"
2362 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2364 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2365 (parallel [(const_int 0)])))]
2366 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2368 "&& reload_completed"
2371 rtx op1 = operands[1];
2373 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2375 op1 = gen_lowpart (SFmode, op1);
2376 emit_move_insn (operands[0], op1);
2380 (define_insn "*sse4_1_extractps"
2381 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2383 (match_operand:V4SF 1 "register_operand" "x")
2384 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2386 "extractps\t{%2, %1, %0|%0, %1, %2}"
2387 [(set_attr "type" "sselog")
2388 (set_attr "prefix_extra" "1")
2389 (set_attr "mode" "V4SF")])
2391 (define_insn_and_split "*vec_extract_v4sf_mem"
2392 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2394 (match_operand:V4SF 1 "memory_operand" "o")
2395 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2401 int i = INTVAL (operands[2]);
2403 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2407 (define_expand "vec_extractv4sf"
2408 [(match_operand:SF 0 "register_operand" "")
2409 (match_operand:V4SF 1 "register_operand" "")
2410 (match_operand 2 "const_int_operand" "")]
2413 ix86_expand_vector_extract (false, operands[0], operands[1],
2414 INTVAL (operands[2]));
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420 ;; Parallel double-precision floating point element swizzling
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424 (define_expand "sse2_unpckhpd_exp"
2425 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2428 (match_operand:V2DF 1 "nonimmediate_operand" "")
2429 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2430 (parallel [(const_int 1)
2433 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2435 (define_insn "sse2_unpckhpd"
2436 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2439 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2440 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2441 (parallel [(const_int 1)
2443 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2445 unpckhpd\t{%2, %0|%0, %2}
2446 movlpd\t{%H1, %0|%0, %H1}
2447 movhpd\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "sselog,ssemov,ssemov")
2449 (set_attr "mode" "V2DF,V1DF,V1DF")])
2451 (define_insn "*sse3_movddup"
2452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2455 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2457 (parallel [(const_int 0)
2459 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2461 movddup\t{%1, %0|%0, %1}
2463 [(set_attr "type" "sselog1,ssemov")
2464 (set_attr "mode" "V2DF")])
2467 [(set (match_operand:V2DF 0 "memory_operand" "")
2470 (match_operand:V2DF 1 "register_operand" "")
2472 (parallel [(const_int 0)
2474 "TARGET_SSE3 && reload_completed"
2477 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2478 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2479 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2483 (define_expand "sse2_unpcklpd_exp"
2484 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2487 (match_operand:V2DF 1 "nonimmediate_operand" "")
2488 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2489 (parallel [(const_int 0)
2492 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2494 (define_insn "sse2_unpcklpd"
2495 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2498 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2499 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2500 (parallel [(const_int 0)
2502 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2504 unpcklpd\t{%2, %0|%0, %2}
2505 movhpd\t{%2, %0|%0, %2}
2506 movlpd\t{%2, %H0|%H0, %2}"
2507 [(set_attr "type" "sselog,ssemov,ssemov")
2508 (set_attr "mode" "V2DF,V1DF,V1DF")])
2510 (define_expand "sse2_shufpd"
2511 [(match_operand:V2DF 0 "register_operand" "")
2512 (match_operand:V2DF 1 "register_operand" "")
2513 (match_operand:V2DF 2 "nonimmediate_operand" "")
2514 (match_operand:SI 3 "const_int_operand" "")]
2517 int mask = INTVAL (operands[3]);
2518 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2520 GEN_INT (mask & 2 ? 3 : 2)));
2524 (define_insn "sse2_shufpd_1"
2525 [(set (match_operand:V2DF 0 "register_operand" "=x")
2528 (match_operand:V2DF 1 "register_operand" "0")
2529 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2530 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2531 (match_operand 4 "const_2_to_3_operand" "")])))]
2535 mask = INTVAL (operands[3]);
2536 mask |= (INTVAL (operands[4]) - 2) << 1;
2537 operands[3] = GEN_INT (mask);
2539 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2541 [(set_attr "type" "sselog")
2542 (set_attr "mode" "V2DF")])
2544 (define_insn "sse2_storehpd"
2545 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2547 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2548 (parallel [(const_int 1)])))]
2549 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2551 movhpd\t{%1, %0|%0, %1}
2554 [(set_attr "type" "ssemov,sselog1,ssemov")
2555 (set_attr "mode" "V1DF,V2DF,DF")])
2558 [(set (match_operand:DF 0 "register_operand" "")
2560 (match_operand:V2DF 1 "memory_operand" "")
2561 (parallel [(const_int 1)])))]
2562 "TARGET_SSE2 && reload_completed"
2563 [(set (match_dup 0) (match_dup 1))]
2565 operands[1] = adjust_address (operands[1], DFmode, 8);
2568 (define_insn "sse2_storelpd"
2569 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2571 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2572 (parallel [(const_int 0)])))]
2573 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2575 movlpd\t{%1, %0|%0, %1}
2578 [(set_attr "type" "ssemov")
2579 (set_attr "mode" "V1DF,DF,DF")])
2582 [(set (match_operand:DF 0 "register_operand" "")
2584 (match_operand:V2DF 1 "nonimmediate_operand" "")
2585 (parallel [(const_int 0)])))]
2586 "TARGET_SSE2 && reload_completed"
2589 rtx op1 = operands[1];
2591 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2593 op1 = gen_lowpart (DFmode, op1);
2594 emit_move_insn (operands[0], op1);
2598 (define_expand "sse2_loadhpd_exp"
2599 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2602 (match_operand:V2DF 1 "nonimmediate_operand" "")
2603 (parallel [(const_int 0)]))
2604 (match_operand:DF 2 "nonimmediate_operand" "")))]
2606 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2608 (define_insn "sse2_loadhpd"
2609 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2612 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2613 (parallel [(const_int 0)]))
2614 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2615 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2617 movhpd\t{%2, %0|%0, %2}
2618 unpcklpd\t{%2, %0|%0, %2}
2619 shufpd\t{$1, %1, %0|%0, %1, 1}
2621 [(set_attr "type" "ssemov,sselog,sselog,other")
2622 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2625 [(set (match_operand:V2DF 0 "memory_operand" "")
2627 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2628 (match_operand:DF 1 "register_operand" "")))]
2629 "TARGET_SSE2 && reload_completed"
2630 [(set (match_dup 0) (match_dup 1))]
2632 operands[0] = adjust_address (operands[0], DFmode, 8);
2635 (define_expand "sse2_loadlpd_exp"
2636 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2638 (match_operand:DF 2 "nonimmediate_operand" "")
2640 (match_operand:V2DF 1 "nonimmediate_operand" "")
2641 (parallel [(const_int 1)]))))]
2643 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2645 (define_insn "sse2_loadlpd"
2646 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2648 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2650 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2651 (parallel [(const_int 1)]))))]
2652 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2654 movsd\t{%2, %0|%0, %2}
2655 movlpd\t{%2, %0|%0, %2}
2656 movsd\t{%2, %0|%0, %2}
2657 shufpd\t{$2, %2, %0|%0, %2, 2}
2658 movhpd\t{%H1, %0|%0, %H1}
2660 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2661 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2664 [(set (match_operand:V2DF 0 "memory_operand" "")
2666 (match_operand:DF 1 "register_operand" "")
2667 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2668 "TARGET_SSE2 && reload_completed"
2669 [(set (match_dup 0) (match_dup 1))]
2671 operands[0] = adjust_address (operands[0], DFmode, 8);
2674 ;; Not sure these two are ever used, but it doesn't hurt to have
2676 (define_insn "*vec_extractv2df_1_sse"
2677 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2679 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2680 (parallel [(const_int 1)])))]
2681 "!TARGET_SSE2 && TARGET_SSE
2682 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2684 movhps\t{%1, %0|%0, %1}
2685 movhlps\t{%1, %0|%0, %1}
2686 movlps\t{%H1, %0|%0, %H1}"
2687 [(set_attr "type" "ssemov")
2688 (set_attr "mode" "V2SF,V4SF,V2SF")])
2690 (define_insn "*vec_extractv2df_0_sse"
2691 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2693 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2694 (parallel [(const_int 0)])))]
2695 "!TARGET_SSE2 && TARGET_SSE
2696 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2698 movlps\t{%1, %0|%0, %1}
2699 movaps\t{%1, %0|%0, %1}
2700 movlps\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "ssemov")
2702 (set_attr "mode" "V2SF,V4SF,V2SF")])
2704 (define_insn "sse2_movsd"
2705 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2707 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2708 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2712 movsd\t{%2, %0|%0, %2}
2713 movlpd\t{%2, %0|%0, %2}
2714 movlpd\t{%2, %0|%0, %2}
2715 shufpd\t{$2, %2, %0|%0, %2, 2}
2716 movhps\t{%H1, %0|%0, %H1}
2717 movhps\t{%1, %H0|%H0, %1}"
2718 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2719 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2721 (define_insn "*vec_dupv2df_sse3"
2722 [(set (match_operand:V2DF 0 "register_operand" "=x")
2724 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2726 "movddup\t{%1, %0|%0, %1}"
2727 [(set_attr "type" "sselog1")
2728 (set_attr "mode" "DF")])
2730 (define_insn "vec_dupv2df"
2731 [(set (match_operand:V2DF 0 "register_operand" "=x")
2733 (match_operand:DF 1 "register_operand" "0")))]
2736 [(set_attr "type" "sselog1")
2737 (set_attr "mode" "V2DF")])
2739 (define_insn "*vec_concatv2df_sse3"
2740 [(set (match_operand:V2DF 0 "register_operand" "=x")
2742 (match_operand:DF 1 "nonimmediate_operand" "xm")
2745 "movddup\t{%1, %0|%0, %1}"
2746 [(set_attr "type" "sselog1")
2747 (set_attr "mode" "DF")])
2749 (define_insn "*vec_concatv2df"
2750 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2752 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2753 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2756 unpcklpd\t{%2, %0|%0, %2}
2757 movhpd\t{%2, %0|%0, %2}
2758 movsd\t{%1, %0|%0, %1}
2759 movlhps\t{%2, %0|%0, %2}
2760 movhps\t{%2, %0|%0, %2}"
2761 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2762 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2764 (define_expand "vec_setv2df"
2765 [(match_operand:V2DF 0 "register_operand" "")
2766 (match_operand:DF 1 "register_operand" "")
2767 (match_operand 2 "const_int_operand" "")]
2770 ix86_expand_vector_set (false, operands[0], operands[1],
2771 INTVAL (operands[2]));
2775 (define_expand "vec_extractv2df"
2776 [(match_operand:DF 0 "register_operand" "")
2777 (match_operand:V2DF 1 "register_operand" "")
2778 (match_operand 2 "const_int_operand" "")]
2781 ix86_expand_vector_extract (false, operands[0], operands[1],
2782 INTVAL (operands[2]));
2786 (define_expand "vec_initv2df"
2787 [(match_operand:V2DF 0 "register_operand" "")
2788 (match_operand 1 "" "")]
2791 ix86_expand_vector_init (false, operands[0], operands[1]);
2795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2797 ;; Parallel integral arithmetic
2799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2801 (define_expand "neg<mode>2"
2802 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2805 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2807 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2809 (define_expand "<plusminus_insn><mode>3"
2810 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2812 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2813 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2815 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2817 (define_insn "*<plusminus_insn><mode>3"
2818 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2820 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2821 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2822 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2823 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2824 [(set_attr "type" "sseiadd")
2825 (set_attr "prefix_data16" "1")
2826 (set_attr "mode" "TI")])
2828 (define_expand "sse2_<plusminus_insn><mode>3"
2829 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2830 (sat_plusminus:SSEMODE12
2831 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
2832 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
2834 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2836 (define_insn "*sse2_<plusminus_insn><mode>3"
2837 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2838 (sat_plusminus:SSEMODE12
2839 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2840 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2841 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2842 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2843 [(set_attr "type" "sseiadd")
2844 (set_attr "prefix_data16" "1")
2845 (set_attr "mode" "TI")])
2847 (define_insn_and_split "mulv16qi3"
2848 [(set (match_operand:V16QI 0 "register_operand" "")
2849 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2850 (match_operand:V16QI 2 "register_operand" "")))]
2852 && !(reload_completed || reload_in_progress)"
2857 rtx t[12], op0, op[3];
2862 /* On SSE5, we can take advantage of the pperm instruction to pack and
2863 unpack the bytes. Unpack data such that we've got a source byte in
2864 each low byte of each word. We don't care what goes into the high
2865 byte, so put 0 there. */
2866 for (i = 0; i < 6; ++i)
2867 t[i] = gen_reg_rtx (V8HImode);
2869 for (i = 0; i < 2; i++)
2872 op[1] = operands[i+1];
2873 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2876 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2879 /* Multiply words. */
2880 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2881 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2883 /* Pack the low byte of each word back into a single xmm */
2884 op[0] = operands[0];
2887 ix86_expand_sse5_pack (op);
2891 for (i = 0; i < 12; ++i)
2892 t[i] = gen_reg_rtx (V16QImode);
2894 /* Unpack data such that we've got a source byte in each low byte of
2895 each word. We don't care what goes into the high byte of each word.
2896 Rather than trying to get zero in there, most convenient is to let
2897 it be a copy of the low byte. */
2898 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2899 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2900 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2901 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2903 /* Multiply words. The end-of-line annotations here give a picture of what
2904 the output of that instruction looks like. Dot means don't care; the
2905 letters are the bytes of the result with A being the most significant. */
2906 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2907 gen_lowpart (V8HImode, t[0]),
2908 gen_lowpart (V8HImode, t[1])));
2909 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2910 gen_lowpart (V8HImode, t[2]),
2911 gen_lowpart (V8HImode, t[3])));
2913 /* Extract the relevant bytes and merge them back together. */
2914 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2915 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2916 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2917 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2918 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2919 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2922 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2926 (define_expand "mulv8hi3"
2927 [(set (match_operand:V8HI 0 "register_operand" "")
2928 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2929 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2931 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2933 (define_insn "*mulv8hi3"
2934 [(set (match_operand:V8HI 0 "register_operand" "=x")
2935 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2936 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2937 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2938 "pmullw\t{%2, %0|%0, %2}"
2939 [(set_attr "type" "sseimul")
2940 (set_attr "prefix_data16" "1")
2941 (set_attr "mode" "TI")])
2943 (define_expand "smulv8hi3_highpart"
2944 [(set (match_operand:V8HI 0 "register_operand" "")
2949 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2951 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2954 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2956 (define_insn "*smulv8hi3_highpart"
2957 [(set (match_operand:V8HI 0 "register_operand" "=x")
2962 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2964 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2966 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2967 "pmulhw\t{%2, %0|%0, %2}"
2968 [(set_attr "type" "sseimul")
2969 (set_attr "prefix_data16" "1")
2970 (set_attr "mode" "TI")])
2972 (define_expand "umulv8hi3_highpart"
2973 [(set (match_operand:V8HI 0 "register_operand" "")
2978 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2980 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2983 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2985 (define_insn "*umulv8hi3_highpart"
2986 [(set (match_operand:V8HI 0 "register_operand" "=x")
2991 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2993 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2995 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2996 "pmulhuw\t{%2, %0|%0, %2}"
2997 [(set_attr "type" "sseimul")
2998 (set_attr "prefix_data16" "1")
2999 (set_attr "mode" "TI")])
3001 (define_expand "sse2_umulv2siv2di3"
3002 [(set (match_operand:V2DI 0 "register_operand" "")
3006 (match_operand:V4SI 1 "nonimmediate_operand" "")
3007 (parallel [(const_int 0) (const_int 2)])))
3010 (match_operand:V4SI 2 "nonimmediate_operand" "")
3011 (parallel [(const_int 0) (const_int 2)])))))]
3013 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3015 (define_insn "*sse2_umulv2siv2di3"
3016 [(set (match_operand:V2DI 0 "register_operand" "=x")
3020 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3021 (parallel [(const_int 0) (const_int 2)])))
3024 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3025 (parallel [(const_int 0) (const_int 2)])))))]
3026 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3027 "pmuludq\t{%2, %0|%0, %2}"
3028 [(set_attr "type" "sseimul")
3029 (set_attr "prefix_data16" "1")
3030 (set_attr "mode" "TI")])
3032 (define_expand "sse4_1_mulv2siv2di3"
3033 [(set (match_operand:V2DI 0 "register_operand" "")
3037 (match_operand:V4SI 1 "nonimmediate_operand" "")
3038 (parallel [(const_int 0) (const_int 2)])))
3041 (match_operand:V4SI 2 "nonimmediate_operand" "")
3042 (parallel [(const_int 0) (const_int 2)])))))]
3044 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3046 (define_insn "*sse4_1_mulv2siv2di3"
3047 [(set (match_operand:V2DI 0 "register_operand" "=x")
3051 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3052 (parallel [(const_int 0) (const_int 2)])))
3055 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3056 (parallel [(const_int 0) (const_int 2)])))))]
3057 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3058 "pmuldq\t{%2, %0|%0, %2}"
3059 [(set_attr "type" "sseimul")
3060 (set_attr "prefix_extra" "1")
3061 (set_attr "mode" "TI")])
3063 (define_expand "sse2_pmaddwd"
3064 [(set (match_operand:V4SI 0 "register_operand" "")
3069 (match_operand:V8HI 1 "nonimmediate_operand" "")
3070 (parallel [(const_int 0)
3076 (match_operand:V8HI 2 "nonimmediate_operand" "")
3077 (parallel [(const_int 0)
3083 (vec_select:V4HI (match_dup 1)
3084 (parallel [(const_int 1)
3089 (vec_select:V4HI (match_dup 2)
3090 (parallel [(const_int 1)
3093 (const_int 7)]))))))]
3095 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3097 (define_insn "*sse2_pmaddwd"
3098 [(set (match_operand:V4SI 0 "register_operand" "=x")
3103 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3104 (parallel [(const_int 0)
3110 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3111 (parallel [(const_int 0)
3117 (vec_select:V4HI (match_dup 1)
3118 (parallel [(const_int 1)
3123 (vec_select:V4HI (match_dup 2)
3124 (parallel [(const_int 1)
3127 (const_int 7)]))))))]
3128 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3129 "pmaddwd\t{%2, %0|%0, %2}"
3130 [(set_attr "type" "sseiadd")
3131 (set_attr "prefix_data16" "1")
3132 (set_attr "mode" "TI")])
3134 (define_expand "mulv4si3"
3135 [(set (match_operand:V4SI 0 "register_operand" "")
3136 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3137 (match_operand:V4SI 2 "register_operand" "")))]
3140 if (TARGET_SSE4_1 || TARGET_SSE5)
3141 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3144 (define_insn "*sse4_1_mulv4si3"
3145 [(set (match_operand:V4SI 0 "register_operand" "=x")
3146 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3147 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3148 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3149 "pmulld\t{%2, %0|%0, %2}"
3150 [(set_attr "type" "sseimul")
3151 (set_attr "prefix_extra" "1")
3152 (set_attr "mode" "TI")])
3154 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3155 ;; multiply/add. In general, we expect the define_split to occur before
3156 ;; register allocation, so we have to handle the corner case where the target
3157 ;; is used as the base or index register in operands 1/2.
3158 (define_insn_and_split "*sse5_mulv4si3"
3159 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3160 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3161 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3164 "&& (reload_completed
3165 || (!reg_mentioned_p (operands[0], operands[1])
3166 && !reg_mentioned_p (operands[0], operands[2])))"
3170 (plus:V4SI (mult:V4SI (match_dup 1)
3174 operands[3] = CONST0_RTX (V4SImode);
3176 [(set_attr "type" "ssemuladd")
3177 (set_attr "mode" "TI")])
3179 (define_insn_and_split "*sse2_mulv4si3"
3180 [(set (match_operand:V4SI 0 "register_operand" "")
3181 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3182 (match_operand:V4SI 2 "register_operand" "")))]
3183 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3184 && !(reload_completed || reload_in_progress)"
3189 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3195 t1 = gen_reg_rtx (V4SImode);
3196 t2 = gen_reg_rtx (V4SImode);
3197 t3 = gen_reg_rtx (V4SImode);
3198 t4 = gen_reg_rtx (V4SImode);
3199 t5 = gen_reg_rtx (V4SImode);
3200 t6 = gen_reg_rtx (V4SImode);
3201 thirtytwo = GEN_INT (32);
3203 /* Multiply elements 2 and 0. */
3204 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3207 /* Shift both input vectors down one element, so that elements 3
3208 and 1 are now in the slots for elements 2 and 0. For K8, at
3209 least, this is faster than using a shuffle. */
3210 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3211 gen_lowpart (TImode, op1),
3213 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3214 gen_lowpart (TImode, op2),
3216 /* Multiply elements 3 and 1. */
3217 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3220 /* Move the results in element 2 down to element 1; we don't care
3221 what goes in elements 2 and 3. */
3222 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3223 const0_rtx, const0_rtx));
3224 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3225 const0_rtx, const0_rtx));
3227 /* Merge the parts back together. */
3228 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3232 (define_insn_and_split "mulv2di3"
3233 [(set (match_operand:V2DI 0 "register_operand" "")
3234 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3235 (match_operand:V2DI 2 "register_operand" "")))]
3237 && !(reload_completed || reload_in_progress)"
3242 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3248 t1 = gen_reg_rtx (V2DImode);
3249 t2 = gen_reg_rtx (V2DImode);
3250 t3 = gen_reg_rtx (V2DImode);
3251 t4 = gen_reg_rtx (V2DImode);
3252 t5 = gen_reg_rtx (V2DImode);
3253 t6 = gen_reg_rtx (V2DImode);
3254 thirtytwo = GEN_INT (32);
3256 /* Multiply low parts. */
3257 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3258 gen_lowpart (V4SImode, op2)));
3260 /* Shift input vectors left 32 bits so we can multiply high parts. */
3261 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3262 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3264 /* Multiply high parts by low parts. */
3265 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3266 gen_lowpart (V4SImode, t3)));
3267 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3268 gen_lowpart (V4SImode, t2)));
3270 /* Shift them back. */
3271 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3272 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3274 /* Add the three parts together. */
3275 emit_insn (gen_addv2di3 (t6, t1, t4));
3276 emit_insn (gen_addv2di3 (op0, t6, t5));
3280 (define_expand "vec_widen_smult_hi_v8hi"
3281 [(match_operand:V4SI 0 "register_operand" "")
3282 (match_operand:V8HI 1 "register_operand" "")
3283 (match_operand:V8HI 2 "register_operand" "")]
3286 rtx op1, op2, t1, t2, dest;
3290 t1 = gen_reg_rtx (V8HImode);
3291 t2 = gen_reg_rtx (V8HImode);
3292 dest = gen_lowpart (V8HImode, operands[0]);
3294 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3295 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3296 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3300 (define_expand "vec_widen_smult_lo_v8hi"
3301 [(match_operand:V4SI 0 "register_operand" "")
3302 (match_operand:V8HI 1 "register_operand" "")
3303 (match_operand:V8HI 2 "register_operand" "")]
3306 rtx op1, op2, t1, t2, dest;
3310 t1 = gen_reg_rtx (V8HImode);
3311 t2 = gen_reg_rtx (V8HImode);
3312 dest = gen_lowpart (V8HImode, operands[0]);
3314 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3315 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3316 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3320 (define_expand "vec_widen_umult_hi_v8hi"
3321 [(match_operand:V4SI 0 "register_operand" "")
3322 (match_operand:V8HI 1 "register_operand" "")
3323 (match_operand:V8HI 2 "register_operand" "")]
3326 rtx op1, op2, t1, t2, dest;
3330 t1 = gen_reg_rtx (V8HImode);
3331 t2 = gen_reg_rtx (V8HImode);
3332 dest = gen_lowpart (V8HImode, operands[0]);
3334 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3335 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3336 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3340 (define_expand "vec_widen_umult_lo_v8hi"
3341 [(match_operand:V4SI 0 "register_operand" "")
3342 (match_operand:V8HI 1 "register_operand" "")
3343 (match_operand:V8HI 2 "register_operand" "")]
3346 rtx op1, op2, t1, t2, dest;
3350 t1 = gen_reg_rtx (V8HImode);
3351 t2 = gen_reg_rtx (V8HImode);
3352 dest = gen_lowpart (V8HImode, operands[0]);
3354 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3355 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3356 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3360 (define_expand "vec_widen_smult_hi_v4si"
3361 [(match_operand:V2DI 0 "register_operand" "")
3362 (match_operand:V4SI 1 "register_operand" "")
3363 (match_operand:V4SI 2 "register_operand" "")]
3366 rtx op1, op2, t1, t2;
3370 t1 = gen_reg_rtx (V4SImode);
3371 t2 = gen_reg_rtx (V4SImode);
3373 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3374 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3375 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3379 (define_expand "vec_widen_smult_lo_v4si"
3380 [(match_operand:V2DI 0 "register_operand" "")
3381 (match_operand:V4SI 1 "register_operand" "")
3382 (match_operand:V4SI 2 "register_operand" "")]
3385 rtx op1, op2, t1, t2;
3389 t1 = gen_reg_rtx (V4SImode);
3390 t2 = gen_reg_rtx (V4SImode);
3392 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3393 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3394 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3398 (define_expand "vec_widen_umult_hi_v4si"
3399 [(match_operand:V2DI 0 "register_operand" "")
3400 (match_operand:V4SI 1 "register_operand" "")
3401 (match_operand:V4SI 2 "register_operand" "")]
3404 rtx op1, op2, t1, t2;
3408 t1 = gen_reg_rtx (V4SImode);
3409 t2 = gen_reg_rtx (V4SImode);