1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
58 ;; Mapping of immediate bits for blend instructions
59 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
61 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
63 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69 ;; All of these patterns are enabled for SSE1 as well as SSE2.
70 ;; This is essential for maintaining stable calling conventions.
72 (define_expand "mov<mode>"
73 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
74 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
77 ix86_expand_vector_move (<MODE>mode, operands);
81 (define_insn "*mov<mode>_internal"
82 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
83 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
85 && (register_operand (operands[0], <MODE>mode)
86 || register_operand (operands[1], <MODE>mode))"
88 switch (which_alternative)
91 return standard_sse_constant_opcode (insn, operands[1]);
94 switch (get_attr_mode (insn))
97 return "movaps\t{%1, %0|%0, %1}";
99 return "movapd\t{%1, %0|%0, %1}";
101 return "movdqa\t{%1, %0|%0, %1}";
107 [(set_attr "type" "sselog1,ssemov,ssemov")
109 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
110 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
111 (and (eq_attr "alternative" "2")
112 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
114 (const_string "V4SF")
115 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
116 (const_string "V4SF")
117 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
118 (const_string "V2DF")
120 (const_string "TI")))])
122 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
123 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
124 ;; from memory, we'd prefer to load the memory directly into the %xmm
125 ;; register. To facilitate this happy circumstance, this pattern won't
126 ;; split until after register allocation. If the 64-bit value didn't
127 ;; come from memory, this is the best we can do. This is much better
128 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
131 (define_insn_and_split "movdi_to_sse"
133 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
134 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
135 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
136 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
138 "&& reload_completed"
141 if (register_operand (operands[1], DImode))
143 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
144 Assemble the 64-bit DImode value in an xmm register. */
145 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
146 gen_rtx_SUBREG (SImode, operands[1], 0)));
147 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
148 gen_rtx_SUBREG (SImode, operands[1], 4)));
149 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
151 else if (memory_operand (operands[1], DImode))
152 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
163 (vec_duplicate:V4SF (match_dup 1))
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "<sse>_movup<ssemodesuffixf2c>"
199 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
201 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
203 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
204 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
205 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
206 [(set_attr "type" "ssemov")
207 (set_attr "mode" "<MODE>")])
209 (define_insn "sse2_movdqu"
210 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
211 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
213 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
214 "movdqu\t{%1, %0|%0, %1}"
215 [(set_attr "type" "ssemov")
216 (set_attr "prefix_data16" "1")
217 (set_attr "mode" "TI")])
219 (define_insn "<sse>_movnt<mode>"
220 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
222 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
224 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
225 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
226 [(set_attr "type" "ssemov")
227 (set_attr "mode" "<MODE>")])
229 (define_insn "sse2_movntv2di"
230 [(set (match_operand:V2DI 0 "memory_operand" "=m")
231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
234 "movntdq\t{%1, %0|%0, %1}"
235 [(set_attr "type" "ssecvt")
236 (set_attr "prefix_data16" "1")
237 (set_attr "mode" "TI")])
239 (define_insn "sse2_movntsi"
240 [(set (match_operand:SI 0 "memory_operand" "=m")
241 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
244 "movnti\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssecvt")
246 (set_attr "mode" "V2DF")])
248 (define_insn "sse3_lddqu"
249 [(set (match_operand:V16QI 0 "register_operand" "=x")
250 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
253 "lddqu\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssecvt")
255 (set_attr "prefix_rep" "1")
256 (set_attr "mode" "TI")])
258 ; Expand patterns for non-temporal stores. At the moment, only those
259 ; that directly map to insns are defined; it would be possible to
260 ; define patterns for other modes that would expand to several insns.
262 (define_expand "storent<mode>"
263 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
265 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
267 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
270 (define_expand "storent<mode>"
271 [(set (match_operand:MODEF 0 "memory_operand" "")
273 [(match_operand:MODEF 1 "register_operand" "")]
278 (define_expand "storentv2di"
279 [(set (match_operand:V2DI 0 "memory_operand" "")
280 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
285 (define_expand "storentsi"
286 [(set (match_operand:SI 0 "memory_operand" "")
287 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
294 ;; Parallel floating point arithmetic
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
298 (define_expand "<code><mode>2"
299 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
301 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
302 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
303 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
305 (define_expand "<addsub><mode>3"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
307 (plusminus:SSEMODEF2P
308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
310 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
311 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
313 (define_insn "*<addsub><mode>3"
314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
315 (plusminus:SSEMODEF2P
316 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
317 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
318 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
319 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
320 "<addsub>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
321 [(set_attr "type" "sseadd")
322 (set_attr "mode" "<MODE>")])
324 (define_insn "<sse>_vm<addsub><mode>3"
325 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
326 (vec_merge:SSEMODEF2P
327 (plusminus:SSEMODEF2P
328 (match_operand:SSEMODEF2P 1 "register_operand" "0")
329 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
332 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
333 && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
334 "<addsub>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
335 [(set_attr "type" "sseadd")
336 (set_attr "mode" "<ssescalarmode>")])
338 (define_expand "mul<mode>3"
339 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
341 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
342 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
343 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
344 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
346 (define_insn "*mul<mode>3"
347 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
349 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
350 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
351 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
352 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
353 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
354 [(set_attr "type" "ssemul")
355 (set_attr "mode" "<MODE>")])
357 (define_insn "<sse>_vmmul<mode>3"
358 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
359 (vec_merge:SSEMODEF2P
361 (match_operand:SSEMODEF2P 1 "register_operand" "0")
362 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
365 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
366 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
367 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
368 [(set_attr "type" "ssemul")
369 (set_attr "mode" "<ssescalarmode>")])
371 (define_expand "divv4sf3"
372 [(set (match_operand:V4SF 0 "register_operand" "")
373 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
374 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
379 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
380 && flag_finite_math_only && !flag_trapping_math
381 && flag_unsafe_math_optimizations)
383 ix86_emit_swdivsf (operands[0], operands[1],
384 operands[2], V4SFmode);
389 (define_expand "divv2df3"
390 [(set (match_operand:V2DF 0 "register_operand" "")
391 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
392 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
394 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
396 (define_insn "<sse>_div<mode>3"
397 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
399 (match_operand:SSEMODEF2P 1 "register_operand" "0")
400 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
401 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
402 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
403 [(set_attr "type" "ssediv")
404 (set_attr "mode" "<MODE>")])
406 (define_insn "<sse>_vmdiv<mode>3"
407 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
408 (vec_merge:SSEMODEF2P
410 (match_operand:SSEMODEF2P 1 "register_operand" "0")
411 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
414 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
415 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
416 [(set_attr "type" "ssediv")
417 (set_attr "mode" "<ssescalarmode>")])
419 (define_insn "sse_rcpv4sf2"
420 [(set (match_operand:V4SF 0 "register_operand" "=x")
422 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
424 "rcpps\t{%1, %0|%0, %1}"
425 [(set_attr "type" "sse")
426 (set_attr "mode" "V4SF")])
428 (define_insn "sse_vmrcpv4sf2"
429 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
433 (match_operand:V4SF 2 "register_operand" "0")
436 "rcpss\t{%1, %0|%0, %1}"
437 [(set_attr "type" "sse")
438 (set_attr "mode" "SF")])
440 (define_expand "sqrtv4sf2"
441 [(set (match_operand:V4SF 0 "register_operand" "")
442 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
445 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
446 && flag_finite_math_only && !flag_trapping_math
447 && flag_unsafe_math_optimizations)
449 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
454 (define_insn "sse_sqrtv4sf2"
455 [(set (match_operand:V4SF 0 "register_operand" "=x")
456 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
458 "sqrtps\t{%1, %0|%0, %1}"
459 [(set_attr "type" "sse")
460 (set_attr "mode" "V4SF")])
462 (define_insn "sqrtv2df2"
463 [(set (match_operand:V2DF 0 "register_operand" "=x")
464 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
466 "sqrtpd\t{%1, %0|%0, %1}"
467 [(set_attr "type" "sse")
468 (set_attr "mode" "V2DF")])
470 (define_insn "<sse>_vmsqrt<mode>2"
471 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
472 (vec_merge:SSEMODEF2P
474 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
475 (match_operand:SSEMODEF2P 2 "register_operand" "0")
477 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
478 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
479 [(set_attr "type" "sse")
480 (set_attr "mode" "<ssescalarmode>")])
482 (define_expand "rsqrtv4sf2"
483 [(set (match_operand:V4SF 0 "register_operand" "")
485 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
488 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
492 (define_insn "sse_rsqrtv4sf2"
493 [(set (match_operand:V4SF 0 "register_operand" "=x")
495 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
497 "rsqrtps\t{%1, %0|%0, %1}"
498 [(set_attr "type" "sse")
499 (set_attr "mode" "V4SF")])
501 (define_insn "sse_vmrsqrtv4sf2"
502 [(set (match_operand:V4SF 0 "register_operand" "=x")
504 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
506 (match_operand:V4SF 2 "register_operand" "0")
509 "rsqrtss\t{%1, %0|%0, %1}"
510 [(set_attr "type" "sse")
511 (set_attr "mode" "SF")])
513 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
514 ;; isn't really correct, as those rtl operators aren't defined when
515 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
517 (define_expand "<code><mode>3"
518 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
520 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
521 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
522 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
524 if (!flag_finite_math_only)
525 operands[1] = force_reg (<MODE>mode, operands[1]);
526 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
529 (define_insn "*<code><mode>3_finite"
530 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
532 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
533 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
534 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
535 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
536 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
537 [(set_attr "type" "sseadd")
538 (set_attr "mode" "<MODE>")])
540 (define_insn "*<code><mode>3"
541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
543 (match_operand:SSEMODEF2P 1 "register_operand" "0")
544 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
545 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
546 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
547 [(set_attr "type" "sseadd")
548 (set_attr "mode" "<MODE>")])
550 (define_insn "<sse>_vm<code><mode>3"
551 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
552 (vec_merge:SSEMODEF2P
554 (match_operand:SSEMODEF2P 1 "register_operand" "0")
555 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
558 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
559 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sse")
561 (set_attr "mode" "<ssescalarmode>")])
563 ;; These versions of the min/max patterns implement exactly the operations
564 ;; min = (op1 < op2 ? op1 : op2)
565 ;; max = (!(op1 < op2) ? op1 : op2)
566 ;; Their operands are not commutative, and thus they may be used in the
567 ;; presence of -0.0 and NaN.
569 (define_insn "*ieee_smin<mode>3"
570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
572 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
575 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
576 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<MODE>")])
580 (define_insn "*ieee_smax<mode>3"
581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
583 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
584 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
586 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
587 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sseadd")
589 (set_attr "mode" "<MODE>")])
591 (define_insn "sse3_addsubv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
595 (match_operand:V4SF 1 "register_operand" "0")
596 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
597 (minus:V4SF (match_dup 1) (match_dup 2))
600 "addsubps\t{%2, %0|%0, %2}"
601 [(set_attr "type" "sseadd")
602 (set_attr "prefix_rep" "1")
603 (set_attr "mode" "V4SF")])
605 (define_insn "sse3_addsubv2df3"
606 [(set (match_operand:V2DF 0 "register_operand" "=x")
609 (match_operand:V2DF 1 "register_operand" "0")
610 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
611 (minus:V2DF (match_dup 1) (match_dup 2))
614 "addsubpd\t{%2, %0|%0, %2}"
615 [(set_attr "type" "sseadd")
616 (set_attr "mode" "V2DF")])
618 (define_insn "sse3_h<addsub>v4sf3"
619 [(set (match_operand:V4SF 0 "register_operand" "=x")
624 (match_operand:V4SF 1 "register_operand" "0")
625 (parallel [(const_int 0)]))
626 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
628 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
629 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
633 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
634 (parallel [(const_int 0)]))
635 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
637 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
638 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
640 "h<addsub>ps\t{%2, %0|%0, %2}"
641 [(set_attr "type" "sseadd")
642 (set_attr "prefix_rep" "1")
643 (set_attr "mode" "V4SF")])
645 (define_insn "sse3_h<addsub>v2df3"
646 [(set (match_operand:V2DF 0 "register_operand" "=x")
650 (match_operand:V2DF 1 "register_operand" "0")
651 (parallel [(const_int 0)]))
652 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
655 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
656 (parallel [(const_int 0)]))
657 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
659 "h<addsub>pd\t{%2, %0|%0, %2}"
660 [(set_attr "type" "sseadd")
661 (set_attr "mode" "V2DF")])
663 (define_expand "reduc_splus_v4sf"
664 [(match_operand:V4SF 0 "register_operand" "")
665 (match_operand:V4SF 1 "register_operand" "")]
670 rtx tmp = gen_reg_rtx (V4SFmode);
671 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
672 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
675 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
679 (define_expand "reduc_splus_v2df"
680 [(match_operand:V2DF 0 "register_operand" "")
681 (match_operand:V2DF 1 "register_operand" "")]
684 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
688 (define_expand "reduc_smax_v4sf"
689 [(match_operand:V4SF 0 "register_operand" "")
690 (match_operand:V4SF 1 "register_operand" "")]
693 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
697 (define_expand "reduc_smin_v4sf"
698 [(match_operand:V4SF 0 "register_operand" "")
699 (match_operand:V4SF 1 "register_operand" "")]
702 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
708 ;; Parallel floating point comparisons
710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
712 (define_insn "<sse>_maskcmp<mode>3"
713 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
714 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
715 [(match_operand:SSEMODEF4 1 "register_operand" "0")
716 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
717 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
719 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
720 [(set_attr "type" "ssecmp")
721 (set_attr "mode" "<MODE>")])
723 (define_insn "<sse>_vmmaskcmp<mode>3"
724 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
725 (vec_merge:SSEMODEF2P
726 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
727 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
728 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
731 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
732 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssecmp")
734 (set_attr "mode" "<ssescalarmode>")])
736 (define_insn "<sse>_comi"
737 [(set (reg:CCFP FLAGS_REG)
740 (match_operand:<ssevecmode> 0 "register_operand" "x")
741 (parallel [(const_int 0)]))
743 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
744 (parallel [(const_int 0)]))))]
745 "SSE_FLOAT_MODE_P (<MODE>mode)"
746 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
747 [(set_attr "type" "ssecomi")
748 (set_attr "mode" "<MODE>")])
750 (define_insn "<sse>_ucomi"
751 [(set (reg:CCFPU FLAGS_REG)
754 (match_operand:<ssevecmode> 0 "register_operand" "x")
755 (parallel [(const_int 0)]))
757 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
758 (parallel [(const_int 0)]))))]
759 "SSE_FLOAT_MODE_P (<MODE>mode)"
760 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
761 [(set_attr "type" "ssecomi")
762 (set_attr "mode" "<MODE>")])
764 (define_expand "vcond<mode>"
765 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
766 (if_then_else:SSEMODEF2P
768 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
769 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
770 (match_operand:SSEMODEF2P 1 "general_operand" "")
771 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
772 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
774 if (ix86_expand_fp_vcond (operands))
780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
782 ;; Parallel floating point logical operations
784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
786 (define_insn "<sse>_nand<mode>3"
787 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
790 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
791 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
792 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
793 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
794 [(set_attr "type" "sselog")
795 (set_attr "mode" "<MODE>")])
797 (define_expand "<code><mode>3"
798 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
800 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
801 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
802 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
803 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
805 (define_insn "*<code><mode>3"
806 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
808 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
809 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
810 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
811 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
812 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
813 [(set_attr "type" "sselog")
814 (set_attr "mode" "<MODE>")])
816 ;; Also define scalar versions. These are used for abs, neg, and
817 ;; conditional move. Using subregs into vector modes causes register
818 ;; allocation lossage. These patterns do not allow memory operands
819 ;; because the native instructions read the full 128-bits.
821 (define_insn "*nand<mode>3"
822 [(set (match_operand:MODEF 0 "register_operand" "=x")
825 (match_operand:MODEF 1 "register_operand" "0"))
826 (match_operand:MODEF 2 "register_operand" "x")))]
827 "SSE_FLOAT_MODE_P (<MODE>mode)"
828 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
829 [(set_attr "type" "sselog")
830 (set_attr "mode" "<ssevecmode>")])
832 (define_insn "*<code><mode>3"
833 [(set (match_operand:MODEF 0 "register_operand" "=x")
835 (match_operand:MODEF 1 "register_operand" "0")
836 (match_operand:MODEF 2 "register_operand" "x")))]
837 "SSE_FLOAT_MODE_P (<MODE>mode)"
838 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
839 [(set_attr "type" "sselog")
840 (set_attr "mode" "<ssevecmode>")])
842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
844 ;; SSE5 floating point multiply/accumulate instructions This includes the
845 ;; scalar version of the instructions as well as the vector
847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
849 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
850 ;; combine to generate a multiply/add with two memory references. We then
851 ;; split this insn, into loading up the destination register with one of the
852 ;; memory operations. If we don't manage to split the insn, reload will
853 ;; generate the appropriate moves. The reason this is needed, is that combine
854 ;; has already folded one of the memory references into both the multiply and
855 ;; add insns, and it can't generate a new pseudo. I.e.:
856 ;; (set (reg1) (mem (addr1)))
857 ;; (set (reg2) (mult (reg1) (mem (addr2))))
858 ;; (set (reg3) (plus (reg2) (mem (addr3))))
860 (define_insn "sse5_fmadd<mode>4"
861 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
864 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
865 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
866 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
867 "TARGET_SSE5 && TARGET_FUSED_MADD
868 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
869 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
870 [(set_attr "type" "ssemuladd")
871 (set_attr "mode" "<MODE>")])
873 ;; Split fmadd with two memory operands into a load and the fmadd.
875 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
878 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
879 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
880 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
882 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
883 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
884 && !reg_mentioned_p (operands[0], operands[1])
885 && !reg_mentioned_p (operands[0], operands[2])
886 && !reg_mentioned_p (operands[0], operands[3])"
889 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
890 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
891 operands[2], operands[3]));
895 ;; For the scalar operations, use operand1 for the upper words that aren't
896 ;; modified, so restrict the forms that are generated.
897 ;; Scalar version of fmadd
898 (define_insn "sse5_vmfmadd<mode>4"
899 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
900 (vec_merge:SSEMODEF2P
903 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
904 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
905 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
908 "TARGET_SSE5 && TARGET_FUSED_MADD
909 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
910 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
911 [(set_attr "type" "ssemuladd")
912 (set_attr "mode" "<MODE>")])
914 ;; Floating multiply and subtract
915 ;; Allow two memory operands the same as fmadd
916 (define_insn "sse5_fmsub<mode>4"
917 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
920 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
921 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
922 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
923 "TARGET_SSE5 && TARGET_FUSED_MADD
924 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
925 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
926 [(set_attr "type" "ssemuladd")
927 (set_attr "mode" "<MODE>")])
929 ;; Split fmsub with two memory operands into a load and the fmsub.
931 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
934 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
935 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
936 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
938 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
939 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
940 && !reg_mentioned_p (operands[0], operands[1])
941 && !reg_mentioned_p (operands[0], operands[2])
942 && !reg_mentioned_p (operands[0], operands[3])"
945 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
946 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
947 operands[2], operands[3]));
951 ;; For the scalar operations, use operand1 for the upper words that aren't
952 ;; modified, so restrict the forms that are generated.
953 ;; Scalar version of fmsub
954 (define_insn "sse5_vmfmsub<mode>4"
955 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
956 (vec_merge:SSEMODEF2P
959 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
960 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
961 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
964 "TARGET_SSE5 && TARGET_FUSED_MADD
965 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
966 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
967 [(set_attr "type" "ssemuladd")
968 (set_attr "mode" "<MODE>")])
970 ;; Floating point negative multiply and add
971 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
972 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
973 ;; Allow two memory operands to help in optimizing.
974 (define_insn "sse5_fnmadd<mode>4"
975 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
977 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
979 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
980 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
981 "TARGET_SSE5 && TARGET_FUSED_MADD
982 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
983 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
984 [(set_attr "type" "ssemuladd")
985 (set_attr "mode" "<MODE>")])
987 ;; Split fnmadd with two memory operands into a load and the fnmadd.
989 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
991 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
993 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
994 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
996 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
997 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
998 && !reg_mentioned_p (operands[0], operands[1])
999 && !reg_mentioned_p (operands[0], operands[2])
1000 && !reg_mentioned_p (operands[0], operands[3])"
1003 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1004 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1005 operands[2], operands[3]));
1009 ;; For the scalar operations, use operand1 for the upper words that aren't
1010 ;; modified, so restrict the forms that are generated.
1011 ;; Scalar version of fnmadd
1012 (define_insn "sse5_vmfnmadd<mode>4"
1013 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1014 (vec_merge:SSEMODEF2P
1016 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1018 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1019 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1022 "TARGET_SSE5 && TARGET_FUSED_MADD
1023 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1024 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1025 [(set_attr "type" "ssemuladd")
1026 (set_attr "mode" "<MODE>")])
1028 ;; Floating point negative multiply and subtract
1029 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1030 ;; Allow 2 memory operands to help with optimization
1031 (define_insn "sse5_fnmsub<mode>4"
1032 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1036 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1037 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1038 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1039 "TARGET_SSE5 && TARGET_FUSED_MADD
1040 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1041 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1042 [(set_attr "type" "ssemuladd")
1043 (set_attr "mode" "<MODE>")])
1045 ;; Split fnmsub with two memory operands into a load and the fmsub.
1047 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1051 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1052 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1053 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1055 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1056 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1057 && !reg_mentioned_p (operands[0], operands[1])
1058 && !reg_mentioned_p (operands[0], operands[2])
1059 && !reg_mentioned_p (operands[0], operands[3])"
1062 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1063 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1064 operands[2], operands[3]));
1068 ;; For the scalar operations, use operand1 for the upper words that aren't
1069 ;; modified, so restrict the forms that are generated.
1070 ;; Scalar version of fnmsub
1071 (define_insn "sse5_vmfnmsub<mode>4"
1072 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1073 (vec_merge:SSEMODEF2P
1077 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1078 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1079 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1082 "TARGET_SSE5 && TARGET_FUSED_MADD
1083 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1084 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1085 [(set_attr "type" "ssemuladd")
1086 (set_attr "mode" "<MODE>")])
1088 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1089 ;; even if the user used -mno-fused-madd
1090 ;; Parallel instructions. During instruction generation, just default
1091 ;; to registers, and let combine later build the appropriate instruction.
1092 (define_expand "sse5i_fmadd<mode>4"
1093 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1097 (match_operand:SSEMODEF2P 1 "register_operand" "")
1098 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1099 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1100 UNSPEC_SSE5_INTRINSIC))]
1103 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1104 if (TARGET_FUSED_MADD)
1106 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1107 operands[2], operands[3]));
1112 (define_insn "*sse5i_fmadd<mode>4"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1117 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1118 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1119 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1120 UNSPEC_SSE5_INTRINSIC))]
1121 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1122 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1123 [(set_attr "type" "ssemuladd")
1124 (set_attr "mode" "<MODE>")])
1126 (define_expand "sse5i_fmsub<mode>4"
1127 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1131 (match_operand:SSEMODEF2P 1 "register_operand" "")
1132 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1133 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1134 UNSPEC_SSE5_INTRINSIC))]
1137 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1138 if (TARGET_FUSED_MADD)
1140 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1141 operands[2], operands[3]));
1146 (define_insn "*sse5i_fmsub<mode>4"
1147 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1151 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1152 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1153 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1154 UNSPEC_SSE5_INTRINSIC))]
1155 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1156 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1157 [(set_attr "type" "ssemuladd")
1158 (set_attr "mode" "<MODE>")])
1160 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1161 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1162 (define_expand "sse5i_fnmadd<mode>4"
1163 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1166 (match_operand:SSEMODEF2P 3 "register_operand" "")
1168 (match_operand:SSEMODEF2P 1 "register_operand" "")
1169 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1170 UNSPEC_SSE5_INTRINSIC))]
1173 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1174 if (TARGET_FUSED_MADD)
1176 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1177 operands[2], operands[3]));
1182 (define_insn "*sse5i_fnmadd<mode>4"
1183 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1186 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1188 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1189 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1190 UNSPEC_SSE5_INTRINSIC))]
1191 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1192 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1193 [(set_attr "type" "ssemuladd")
1194 (set_attr "mode" "<MODE>")])
1196 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1197 (define_expand "sse5i_fnmsub<mode>4"
1198 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1203 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1204 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1205 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1206 UNSPEC_SSE5_INTRINSIC))]
1209 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1210 if (TARGET_FUSED_MADD)
1212 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1213 operands[2], operands[3]));
1218 (define_insn "*sse5i_fnmsub<mode>4"
1219 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1224 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1225 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1226 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1227 UNSPEC_SSE5_INTRINSIC))]
1228 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1229 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1230 [(set_attr "type" "ssemuladd")
1231 (set_attr "mode" "<MODE>")])
1233 ;; Scalar instructions
1234 (define_expand "sse5i_vmfmadd<mode>4"
1235 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1237 [(vec_merge:SSEMODEF2P
1240 (match_operand:SSEMODEF2P 1 "register_operand" "")
1241 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1242 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1245 UNSPEC_SSE5_INTRINSIC))]
1248 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1249 if (TARGET_FUSED_MADD)
1251 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1252 operands[2], operands[3]));
1257 ;; For the scalar operations, use operand1 for the upper words that aren't
1258 ;; modified, so restrict the forms that are accepted.
1259 (define_insn "*sse5i_vmfmadd<mode>4"
1260 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1262 [(vec_merge:SSEMODEF2P
1265 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1266 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1267 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1270 UNSPEC_SSE5_INTRINSIC))]
1271 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1272 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1273 [(set_attr "type" "ssemuladd")
1274 (set_attr "mode" "<ssescalarmode>")])
1276 (define_expand "sse5i_vmfmsub<mode>4"
1277 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1279 [(vec_merge:SSEMODEF2P
1282 (match_operand:SSEMODEF2P 1 "register_operand" "")
1283 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1284 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1287 UNSPEC_SSE5_INTRINSIC))]
1290 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1291 if (TARGET_FUSED_MADD)
1293 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1294 operands[2], operands[3]));
1299 (define_insn "*sse5i_vmfmsub<mode>4"
1300 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1302 [(vec_merge:SSEMODEF2P
1305 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1306 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1307 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1310 UNSPEC_SSE5_INTRINSIC))]
1311 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1312 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1313 [(set_attr "type" "ssemuladd")
1314 (set_attr "mode" "<ssescalarmode>")])
1316 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1317 (define_expand "sse5i_vmfnmadd<mode>4"
1318 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1320 [(vec_merge:SSEMODEF2P
1322 (match_operand:SSEMODEF2P 3 "register_operand" "")
1324 (match_operand:SSEMODEF2P 1 "register_operand" "")
1325 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1328 UNSPEC_SSE5_INTRINSIC))]
1331 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1332 if (TARGET_FUSED_MADD)
1334 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1335 operands[2], operands[3]));
1340 (define_insn "*sse5i_vmfnmadd<mode>4"
1341 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1343 [(vec_merge:SSEMODEF2P
1345 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1347 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1348 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1351 UNSPEC_SSE5_INTRINSIC))]
1352 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1353 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1354 [(set_attr "type" "ssemuladd")
1355 (set_attr "mode" "<ssescalarmode>")])
1357 (define_expand "sse5i_vmfnmsub<mode>4"
1358 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1360 [(vec_merge:SSEMODEF2P
1364 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1365 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1366 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1369 UNSPEC_SSE5_INTRINSIC))]
1372 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1373 if (TARGET_FUSED_MADD)
1375 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1376 operands[2], operands[3]));
1381 (define_insn "*sse5i_vmfnmsub<mode>4"
1382 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1384 [(vec_merge:SSEMODEF2P
1388 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1389 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1390 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1393 UNSPEC_SSE5_INTRINSIC))]
1394 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1395 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1396 [(set_attr "type" "ssemuladd")
1397 (set_attr "mode" "<ssescalarmode>")])
1399 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1401 ;; Parallel single-precision floating point conversion operations
1403 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1405 (define_insn "sse_cvtpi2ps"
1406 [(set (match_operand:V4SF 0 "register_operand" "=x")
1409 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1410 (match_operand:V4SF 1 "register_operand" "0")
1413 "cvtpi2ps\t{%2, %0|%0, %2}"
1414 [(set_attr "type" "ssecvt")
1415 (set_attr "mode" "V4SF")])
1417 (define_insn "sse_cvtps2pi"
1418 [(set (match_operand:V2SI 0 "register_operand" "=y")
1420 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1422 (parallel [(const_int 0) (const_int 1)])))]
1424 "cvtps2pi\t{%1, %0|%0, %1}"
1425 [(set_attr "type" "ssecvt")
1426 (set_attr "unit" "mmx")
1427 (set_attr "mode" "DI")])
1429 (define_insn "sse_cvttps2pi"
1430 [(set (match_operand:V2SI 0 "register_operand" "=y")
1432 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1433 (parallel [(const_int 0) (const_int 1)])))]
1435 "cvttps2pi\t{%1, %0|%0, %1}"
1436 [(set_attr "type" "ssecvt")
1437 (set_attr "unit" "mmx")
1438 (set_attr "mode" "SF")])
1440 (define_insn "sse_cvtsi2ss"
1441 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1444 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1445 (match_operand:V4SF 1 "register_operand" "0,0")
1448 "cvtsi2ss\t{%2, %0|%0, %2}"
1449 [(set_attr "type" "sseicvt")
1450 (set_attr "athlon_decode" "vector,double")
1451 (set_attr "amdfam10_decode" "vector,double")
1452 (set_attr "mode" "SF")])
1454 (define_insn "sse_cvtsi2ssq"
1455 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1458 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1459 (match_operand:V4SF 1 "register_operand" "0,0")
1461 "TARGET_SSE && TARGET_64BIT"
1462 "cvtsi2ssq\t{%2, %0|%0, %2}"
1463 [(set_attr "type" "sseicvt")
1464 (set_attr "athlon_decode" "vector,double")
1465 (set_attr "amdfam10_decode" "vector,double")
1466 (set_attr "mode" "SF")])
1468 (define_insn "sse_cvtss2si"
1469 [(set (match_operand:SI 0 "register_operand" "=r,r")
1472 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1473 (parallel [(const_int 0)]))]
1474 UNSPEC_FIX_NOTRUNC))]
1476 "cvtss2si\t{%1, %0|%0, %1}"
1477 [(set_attr "type" "sseicvt")
1478 (set_attr "athlon_decode" "double,vector")
1479 (set_attr "prefix_rep" "1")
1480 (set_attr "mode" "SI")])
1482 (define_insn "sse_cvtss2si_2"
1483 [(set (match_operand:SI 0 "register_operand" "=r,r")
1484 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1485 UNSPEC_FIX_NOTRUNC))]
1487 "cvtss2si\t{%1, %0|%0, %1}"
1488 [(set_attr "type" "sseicvt")
1489 (set_attr "athlon_decode" "double,vector")
1490 (set_attr "amdfam10_decode" "double,double")
1491 (set_attr "prefix_rep" "1")
1492 (set_attr "mode" "SI")])
1494 (define_insn "sse_cvtss2siq"
1495 [(set (match_operand:DI 0 "register_operand" "=r,r")
1498 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1499 (parallel [(const_int 0)]))]
1500 UNSPEC_FIX_NOTRUNC))]
1501 "TARGET_SSE && TARGET_64BIT"
1502 "cvtss2siq\t{%1, %0|%0, %1}"
1503 [(set_attr "type" "sseicvt")
1504 (set_attr "athlon_decode" "double,vector")
1505 (set_attr "prefix_rep" "1")
1506 (set_attr "mode" "DI")])
1508 (define_insn "sse_cvtss2siq_2"
1509 [(set (match_operand:DI 0 "register_operand" "=r,r")
1510 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1511 UNSPEC_FIX_NOTRUNC))]
1512 "TARGET_SSE && TARGET_64BIT"
1513 "cvtss2siq\t{%1, %0|%0, %1}"
1514 [(set_attr "type" "sseicvt")
1515 (set_attr "athlon_decode" "double,vector")
1516 (set_attr "amdfam10_decode" "double,double")
1517 (set_attr "prefix_rep" "1")
1518 (set_attr "mode" "DI")])
1520 (define_insn "sse_cvttss2si"
1521 [(set (match_operand:SI 0 "register_operand" "=r,r")
1524 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1525 (parallel [(const_int 0)]))))]
1527 "cvttss2si\t{%1, %0|%0, %1}"
1528 [(set_attr "type" "sseicvt")
1529 (set_attr "athlon_decode" "double,vector")
1530 (set_attr "amdfam10_decode" "double,double")
1531 (set_attr "prefix_rep" "1")
1532 (set_attr "mode" "SI")])
1534 (define_insn "sse_cvttss2siq"
1535 [(set (match_operand:DI 0 "register_operand" "=r,r")
1538 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1539 (parallel [(const_int 0)]))))]
1540 "TARGET_SSE && TARGET_64BIT"
1541 "cvttss2siq\t{%1, %0|%0, %1}"
1542 [(set_attr "type" "sseicvt")
1543 (set_attr "athlon_decode" "double,vector")
1544 (set_attr "amdfam10_decode" "double,double")
1545 (set_attr "prefix_rep" "1")
1546 (set_attr "mode" "DI")])
1548 (define_insn "sse2_cvtdq2ps"
1549 [(set (match_operand:V4SF 0 "register_operand" "=x")
1550 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1552 "cvtdq2ps\t{%1, %0|%0, %1}"
1553 [(set_attr "type" "ssecvt")
1554 (set_attr "mode" "V4SF")])
1556 (define_insn "sse2_cvtps2dq"
1557 [(set (match_operand:V4SI 0 "register_operand" "=x")
1558 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1559 UNSPEC_FIX_NOTRUNC))]
1561 "cvtps2dq\t{%1, %0|%0, %1}"
1562 [(set_attr "type" "ssecvt")
1563 (set_attr "prefix_data16" "1")
1564 (set_attr "mode" "TI")])
1566 (define_insn "sse2_cvttps2dq"
1567 [(set (match_operand:V4SI 0 "register_operand" "=x")
1568 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1570 "cvttps2dq\t{%1, %0|%0, %1}"
1571 [(set_attr "type" "ssecvt")
1572 (set_attr "prefix_rep" "1")
1573 (set_attr "mode" "TI")])
1575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1577 ;; Parallel double-precision floating point conversion operations
1579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1581 (define_insn "sse2_cvtpi2pd"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1583 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1585 "cvtpi2pd\t{%1, %0|%0, %1}"
1586 [(set_attr "type" "ssecvt")
1587 (set_attr "unit" "mmx,*")
1588 (set_attr "mode" "V2DF")])
1590 (define_insn "sse2_cvtpd2pi"
1591 [(set (match_operand:V2SI 0 "register_operand" "=y")
1592 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1593 UNSPEC_FIX_NOTRUNC))]
1595 "cvtpd2pi\t{%1, %0|%0, %1}"
1596 [(set_attr "type" "ssecvt")
1597 (set_attr "unit" "mmx")
1598 (set_attr "prefix_data16" "1")
1599 (set_attr "mode" "DI")])
1601 (define_insn "sse2_cvttpd2pi"
1602 [(set (match_operand:V2SI 0 "register_operand" "=y")
1603 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1605 "cvttpd2pi\t{%1, %0|%0, %1}"
1606 [(set_attr "type" "ssecvt")
1607 (set_attr "unit" "mmx")
1608 (set_attr "prefix_data16" "1")
1609 (set_attr "mode" "TI")])
1611 (define_insn "sse2_cvtsi2sd"
1612 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1615 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1616 (match_operand:V2DF 1 "register_operand" "0,0")
1619 "cvtsi2sd\t{%2, %0|%0, %2}"
1620 [(set_attr "type" "sseicvt")
1621 (set_attr "mode" "DF")
1622 (set_attr "athlon_decode" "double,direct")
1623 (set_attr "amdfam10_decode" "vector,double")])
1625 (define_insn "sse2_cvtsi2sdq"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1629 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1630 (match_operand:V2DF 1 "register_operand" "0,0")
1632 "TARGET_SSE2 && TARGET_64BIT"
1633 "cvtsi2sdq\t{%2, %0|%0, %2}"
1634 [(set_attr "type" "sseicvt")
1635 (set_attr "mode" "DF")
1636 (set_attr "athlon_decode" "double,direct")
1637 (set_attr "amdfam10_decode" "vector,double")])
1639 (define_insn "sse2_cvtsd2si"
1640 [(set (match_operand:SI 0 "register_operand" "=r,r")
1643 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1644 (parallel [(const_int 0)]))]
1645 UNSPEC_FIX_NOTRUNC))]
1647 "cvtsd2si\t{%1, %0|%0, %1}"
1648 [(set_attr "type" "sseicvt")
1649 (set_attr "athlon_decode" "double,vector")
1650 (set_attr "prefix_rep" "1")
1651 (set_attr "mode" "SI")])
1653 (define_insn "sse2_cvtsd2si_2"
1654 [(set (match_operand:SI 0 "register_operand" "=r,r")
1655 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1656 UNSPEC_FIX_NOTRUNC))]
1658 "cvtsd2si\t{%1, %0|%0, %1}"
1659 [(set_attr "type" "sseicvt")
1660 (set_attr "athlon_decode" "double,vector")
1661 (set_attr "amdfam10_decode" "double,double")
1662 (set_attr "prefix_rep" "1")
1663 (set_attr "mode" "SI")])
1665 (define_insn "sse2_cvtsd2siq"
1666 [(set (match_operand:DI 0 "register_operand" "=r,r")
1669 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1670 (parallel [(const_int 0)]))]
1671 UNSPEC_FIX_NOTRUNC))]
1672 "TARGET_SSE2 && TARGET_64BIT"
1673 "cvtsd2siq\t{%1, %0|%0, %1}"
1674 [(set_attr "type" "sseicvt")
1675 (set_attr "athlon_decode" "double,vector")
1676 (set_attr "prefix_rep" "1")
1677 (set_attr "mode" "DI")])
1679 (define_insn "sse2_cvtsd2siq_2"
1680 [(set (match_operand:DI 0 "register_operand" "=r,r")
1681 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1682 UNSPEC_FIX_NOTRUNC))]
1683 "TARGET_SSE2 && TARGET_64BIT"
1684 "cvtsd2siq\t{%1, %0|%0, %1}"
1685 [(set_attr "type" "sseicvt")
1686 (set_attr "athlon_decode" "double,vector")
1687 (set_attr "amdfam10_decode" "double,double")
1688 (set_attr "prefix_rep" "1")
1689 (set_attr "mode" "DI")])
1691 (define_insn "sse2_cvttsd2si"
1692 [(set (match_operand:SI 0 "register_operand" "=r,r")
1695 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1696 (parallel [(const_int 0)]))))]
1698 "cvttsd2si\t{%1, %0|%0, %1}"
1699 [(set_attr "type" "sseicvt")
1700 (set_attr "prefix_rep" "1")
1701 (set_attr "mode" "SI")
1702 (set_attr "athlon_decode" "double,vector")
1703 (set_attr "amdfam10_decode" "double,double")])
1705 (define_insn "sse2_cvttsd2siq"
1706 [(set (match_operand:DI 0 "register_operand" "=r,r")
1709 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1710 (parallel [(const_int 0)]))))]
1711 "TARGET_SSE2 && TARGET_64BIT"
1712 "cvttsd2siq\t{%1, %0|%0, %1}"
1713 [(set_attr "type" "sseicvt")
1714 (set_attr "prefix_rep" "1")
1715 (set_attr "mode" "DI")
1716 (set_attr "athlon_decode" "double,vector")
1717 (set_attr "amdfam10_decode" "double,double")])
1719 (define_insn "sse2_cvtdq2pd"
1720 [(set (match_operand:V2DF 0 "register_operand" "=x")
1723 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1724 (parallel [(const_int 0) (const_int 1)]))))]
1726 "cvtdq2pd\t{%1, %0|%0, %1}"
1727 [(set_attr "type" "ssecvt")
1728 (set_attr "mode" "V2DF")])
1730 (define_expand "sse2_cvtpd2dq"
1731 [(set (match_operand:V4SI 0 "register_operand" "")
1733 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1737 "operands[2] = CONST0_RTX (V2SImode);")
1739 (define_insn "*sse2_cvtpd2dq"
1740 [(set (match_operand:V4SI 0 "register_operand" "=x")
1742 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1744 (match_operand:V2SI 2 "const0_operand" "")))]
1746 "cvtpd2dq\t{%1, %0|%0, %1}"
1747 [(set_attr "type" "ssecvt")
1748 (set_attr "prefix_rep" "1")
1749 (set_attr "mode" "TI")
1750 (set_attr "amdfam10_decode" "double")])
1752 (define_expand "sse2_cvttpd2dq"
1753 [(set (match_operand:V4SI 0 "register_operand" "")
1755 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1758 "operands[2] = CONST0_RTX (V2SImode);")
1760 (define_insn "*sse2_cvttpd2dq"
1761 [(set (match_operand:V4SI 0 "register_operand" "=x")
1763 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1764 (match_operand:V2SI 2 "const0_operand" "")))]
1766 "cvttpd2dq\t{%1, %0|%0, %1}"
1767 [(set_attr "type" "ssecvt")
1768 (set_attr "prefix_rep" "1")
1769 (set_attr "mode" "TI")
1770 (set_attr "amdfam10_decode" "double")])
1772 (define_insn "sse2_cvtsd2ss"
1773 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1776 (float_truncate:V2SF
1777 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1778 (match_operand:V4SF 1 "register_operand" "0,0")
1781 "cvtsd2ss\t{%2, %0|%0, %2}"
1782 [(set_attr "type" "ssecvt")
1783 (set_attr "athlon_decode" "vector,double")
1784 (set_attr "amdfam10_decode" "vector,double")
1785 (set_attr "mode" "SF")])
1787 (define_insn "sse2_cvtss2sd"
1788 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1792 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1793 (parallel [(const_int 0) (const_int 1)])))
1794 (match_operand:V2DF 1 "register_operand" "0,0")
1797 "cvtss2sd\t{%2, %0|%0, %2}"
1798 [(set_attr "type" "ssecvt")
1799 (set_attr "amdfam10_decode" "vector,double")
1800 (set_attr "mode" "DF")])
1802 (define_expand "sse2_cvtpd2ps"
1803 [(set (match_operand:V4SF 0 "register_operand" "")
1805 (float_truncate:V2SF
1806 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1809 "operands[2] = CONST0_RTX (V2SFmode);")
1811 (define_insn "*sse2_cvtpd2ps"
1812 [(set (match_operand:V4SF 0 "register_operand" "=x")
1814 (float_truncate:V2SF
1815 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1816 (match_operand:V2SF 2 "const0_operand" "")))]
1818 "cvtpd2ps\t{%1, %0|%0, %1}"
1819 [(set_attr "type" "ssecvt")
1820 (set_attr "prefix_data16" "1")
1821 (set_attr "mode" "V4SF")
1822 (set_attr "amdfam10_decode" "double")])
1824 (define_insn "sse2_cvtps2pd"
1825 [(set (match_operand:V2DF 0 "register_operand" "=x")
1828 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1829 (parallel [(const_int 0) (const_int 1)]))))]
1831 "cvtps2pd\t{%1, %0|%0, %1}"
1832 [(set_attr "type" "ssecvt")
1833 (set_attr "mode" "V2DF")
1834 (set_attr "amdfam10_decode" "direct")])
1836 (define_expand "vec_unpacks_hi_v4sf"
1841 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1842 (parallel [(const_int 6)
1846 (set (match_operand:V2DF 0 "register_operand" "")
1850 (parallel [(const_int 0) (const_int 1)]))))]
1853 operands[2] = gen_reg_rtx (V4SFmode);
1856 (define_expand "vec_unpacks_lo_v4sf"
1857 [(set (match_operand:V2DF 0 "register_operand" "")
1860 (match_operand:V4SF 1 "nonimmediate_operand" "")
1861 (parallel [(const_int 0) (const_int 1)]))))]
1864 (define_expand "vec_unpacks_float_hi_v8hi"
1865 [(match_operand:V4SF 0 "register_operand" "")
1866 (match_operand:V8HI 1 "register_operand" "")]
1869 rtx tmp = gen_reg_rtx (V4SImode);
1871 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1872 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1876 (define_expand "vec_unpacks_float_lo_v8hi"
1877 [(match_operand:V4SF 0 "register_operand" "")
1878 (match_operand:V8HI 1 "register_operand" "")]
1881 rtx tmp = gen_reg_rtx (V4SImode);
1883 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1884 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1888 (define_expand "vec_unpacku_float_hi_v8hi"
1889 [(match_operand:V4SF 0 "register_operand" "")
1890 (match_operand:V8HI 1 "register_operand" "")]
1893 rtx tmp = gen_reg_rtx (V4SImode);
1895 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1896 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1900 (define_expand "vec_unpacku_float_lo_v8hi"
1901 [(match_operand:V4SF 0 "register_operand" "")
1902 (match_operand:V8HI 1 "register_operand" "")]
1905 rtx tmp = gen_reg_rtx (V4SImode);
1907 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1908 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1912 (define_expand "vec_unpacks_float_hi_v4si"
1915 (match_operand:V4SI 1 "nonimmediate_operand" "")
1916 (parallel [(const_int 2)
1920 (set (match_operand:V2DF 0 "register_operand" "")
1924 (parallel [(const_int 0) (const_int 1)]))))]
1927 operands[2] = gen_reg_rtx (V4SImode);
1930 (define_expand "vec_unpacks_float_lo_v4si"
1931 [(set (match_operand:V2DF 0 "register_operand" "")
1934 (match_operand:V4SI 1 "nonimmediate_operand" "")
1935 (parallel [(const_int 0) (const_int 1)]))))]
1938 (define_expand "vec_pack_trunc_v2df"
1939 [(match_operand:V4SF 0 "register_operand" "")
1940 (match_operand:V2DF 1 "nonimmediate_operand" "")
1941 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1946 r1 = gen_reg_rtx (V4SFmode);
1947 r2 = gen_reg_rtx (V4SFmode);
1949 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1950 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1951 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1955 (define_expand "vec_pack_sfix_trunc_v2df"
1956 [(match_operand:V4SI 0 "register_operand" "")
1957 (match_operand:V2DF 1 "nonimmediate_operand" "")
1958 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1963 r1 = gen_reg_rtx (V4SImode);
1964 r2 = gen_reg_rtx (V4SImode);
1966 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1967 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1968 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1969 gen_lowpart (V2DImode, r1),
1970 gen_lowpart (V2DImode, r2)));
1974 (define_expand "vec_pack_sfix_v2df"
1975 [(match_operand:V4SI 0 "register_operand" "")
1976 (match_operand:V2DF 1 "nonimmediate_operand" "")
1977 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1982 r1 = gen_reg_rtx (V4SImode);
1983 r2 = gen_reg_rtx (V4SImode);
1985 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1986 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1987 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1988 gen_lowpart (V2DImode, r1),
1989 gen_lowpart (V2DImode, r2)));
1993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1995 ;; Parallel single-precision floating point element swizzling
1997 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1999 (define_insn "sse_movhlps"
2000 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2003 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2004 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2005 (parallel [(const_int 6)
2009 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2011 movhlps\t{%2, %0|%0, %2}
2012 movlps\t{%H2, %0|%0, %H2}
2013 movhps\t{%2, %0|%0, %2}"
2014 [(set_attr "type" "ssemov")
2015 (set_attr "mode" "V4SF,V2SF,V2SF")])
2017 (define_insn "sse_movlhps"
2018 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2021 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2022 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2023 (parallel [(const_int 0)
2027 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2029 movlhps\t{%2, %0|%0, %2}
2030 movhps\t{%2, %0|%0, %2}
2031 movlps\t{%2, %H0|%H0, %2}"
2032 [(set_attr "type" "ssemov")
2033 (set_attr "mode" "V4SF,V2SF,V2SF")])
2035 (define_insn "sse_unpckhps"
2036 [(set (match_operand:V4SF 0 "register_operand" "=x")
2039 (match_operand:V4SF 1 "register_operand" "0")
2040 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2041 (parallel [(const_int 2) (const_int 6)
2042 (const_int 3) (const_int 7)])))]
2044 "unpckhps\t{%2, %0|%0, %2}"
2045 [(set_attr "type" "sselog")
2046 (set_attr "mode" "V4SF")])
2048 (define_insn "sse_unpcklps"
2049 [(set (match_operand:V4SF 0 "register_operand" "=x")
2052 (match_operand:V4SF 1 "register_operand" "0")
2053 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2054 (parallel [(const_int 0) (const_int 4)
2055 (const_int 1) (const_int 5)])))]
2057 "unpcklps\t{%2, %0|%0, %2}"
2058 [(set_attr "type" "sselog")
2059 (set_attr "mode" "V4SF")])
2061 ;; These are modeled with the same vec_concat as the others so that we
2062 ;; capture users of shufps that can use the new instructions
2063 (define_insn "sse3_movshdup"
2064 [(set (match_operand:V4SF 0 "register_operand" "=x")
2067 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2069 (parallel [(const_int 1)
2074 "movshdup\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "sse")
2076 (set_attr "prefix_rep" "1")
2077 (set_attr "mode" "V4SF")])
2079 (define_insn "sse3_movsldup"
2080 [(set (match_operand:V4SF 0 "register_operand" "=x")
2083 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2085 (parallel [(const_int 0)
2090 "movsldup\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "sse")
2092 (set_attr "prefix_rep" "1")
2093 (set_attr "mode" "V4SF")])
2095 (define_expand "sse_shufps"
2096 [(match_operand:V4SF 0 "register_operand" "")
2097 (match_operand:V4SF 1 "register_operand" "")
2098 (match_operand:V4SF 2 "nonimmediate_operand" "")
2099 (match_operand:SI 3 "const_int_operand" "")]
2102 int mask = INTVAL (operands[3]);
2103 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2104 GEN_INT ((mask >> 0) & 3),
2105 GEN_INT ((mask >> 2) & 3),
2106 GEN_INT (((mask >> 4) & 3) + 4),
2107 GEN_INT (((mask >> 6) & 3) + 4)));
2111 (define_insn "sse_shufps_1"
2112 [(set (match_operand:V4SF 0 "register_operand" "=x")
2115 (match_operand:V4SF 1 "register_operand" "0")
2116 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2117 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2118 (match_operand 4 "const_0_to_3_operand" "")
2119 (match_operand 5 "const_4_to_7_operand" "")
2120 (match_operand 6 "const_4_to_7_operand" "")])))]
2124 mask |= INTVAL (operands[3]) << 0;
2125 mask |= INTVAL (operands[4]) << 2;
2126 mask |= (INTVAL (operands[5]) - 4) << 4;
2127 mask |= (INTVAL (operands[6]) - 4) << 6;
2128 operands[3] = GEN_INT (mask);
2130 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2132 [(set_attr "type" "sselog")
2133 (set_attr "mode" "V4SF")])
2135 (define_insn "sse_storehps"
2136 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2138 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2139 (parallel [(const_int 2) (const_int 3)])))]
2142 movhps\t{%1, %0|%0, %1}
2143 movhlps\t{%1, %0|%0, %1}
2144 movlps\t{%H1, %0|%0, %H1}"
2145 [(set_attr "type" "ssemov")
2146 (set_attr "mode" "V2SF,V4SF,V2SF")])
2148 (define_insn "sse_loadhps"
2149 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2152 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2153 (parallel [(const_int 0) (const_int 1)]))
2154 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2157 movhps\t{%2, %0|%0, %2}
2158 movlhps\t{%2, %0|%0, %2}
2159 movlps\t{%2, %H0|%H0, %2}"
2160 [(set_attr "type" "ssemov")
2161 (set_attr "mode" "V2SF,V4SF,V2SF")])
2163 (define_insn "sse_storelps"
2164 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2166 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2167 (parallel [(const_int 0) (const_int 1)])))]
2170 movlps\t{%1, %0|%0, %1}
2171 movaps\t{%1, %0|%0, %1}
2172 movlps\t{%1, %0|%0, %1}"
2173 [(set_attr "type" "ssemov")
2174 (set_attr "mode" "V2SF,V4SF,V2SF")])
2176 (define_insn "sse_loadlps"
2177 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2179 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2181 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2182 (parallel [(const_int 2) (const_int 3)]))))]
2185 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2186 movlps\t{%2, %0|%0, %2}
2187 movlps\t{%2, %0|%0, %2}"
2188 [(set_attr "type" "sselog,ssemov,ssemov")
2189 (set_attr "mode" "V4SF,V2SF,V2SF")])
2191 (define_insn "sse_movss"
2192 [(set (match_operand:V4SF 0 "register_operand" "=x")
2194 (match_operand:V4SF 2 "register_operand" "x")
2195 (match_operand:V4SF 1 "register_operand" "0")
2198 "movss\t{%2, %0|%0, %2}"
2199 [(set_attr "type" "ssemov")
2200 (set_attr "mode" "SF")])
2202 (define_insn "*vec_dupv4sf"
2203 [(set (match_operand:V4SF 0 "register_operand" "=x")
2205 (match_operand:SF 1 "register_operand" "0")))]
2207 "shufps\t{$0, %0, %0|%0, %0, 0}"
2208 [(set_attr "type" "sselog1")
2209 (set_attr "mode" "V4SF")])
2211 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2212 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2213 ;; alternatives pretty much forces the MMX alternative to be chosen.
2214 (define_insn "*sse_concatv2sf"
2215 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2217 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2218 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2221 unpcklps\t{%2, %0|%0, %2}
2222 movss\t{%1, %0|%0, %1}
2223 punpckldq\t{%2, %0|%0, %2}
2224 movd\t{%1, %0|%0, %1}"
2225 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2226 (set_attr "mode" "V4SF,SF,DI,DI")])
2228 (define_insn "*sse_concatv4sf"
2229 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2231 (match_operand:V2SF 1 "register_operand" " 0,0")
2232 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2235 movlhps\t{%2, %0|%0, %2}
2236 movhps\t{%2, %0|%0, %2}"
2237 [(set_attr "type" "ssemov")
2238 (set_attr "mode" "V4SF,V2SF")])
2240 (define_expand "vec_initv4sf"
2241 [(match_operand:V4SF 0 "register_operand" "")
2242 (match_operand 1 "" "")]
2245 ix86_expand_vector_init (false, operands[0], operands[1]);
2249 (define_insn "vec_setv4sf_0"
2250 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2253 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2254 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2258 movss\t{%2, %0|%0, %2}
2259 movss\t{%2, %0|%0, %2}
2260 movd\t{%2, %0|%0, %2}
2262 [(set_attr "type" "ssemov")
2263 (set_attr "mode" "SF")])
2265 ;; A subset is vec_setv4sf.
2266 (define_insn "*vec_setv4sf_sse4_1"
2267 [(set (match_operand:V4SF 0 "register_operand" "=x")
2270 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2271 (match_operand:V4SF 1 "register_operand" "0")
2272 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2275 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2276 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2278 [(set_attr "type" "sselog")
2279 (set_attr "prefix_extra" "1")
2280 (set_attr "mode" "V4SF")])
2282 (define_insn "sse4_1_insertps"
2283 [(set (match_operand:V4SF 0 "register_operand" "=x")
2284 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2285 (match_operand:V4SF 1 "register_operand" "0")
2286 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2289 "insertps\t{%3, %2, %0|%0, %2, %3}";
2290 [(set_attr "type" "sselog")
2291 (set_attr "prefix_extra" "1")
2292 (set_attr "mode" "V4SF")])
2295 [(set (match_operand:V4SF 0 "memory_operand" "")
2298 (match_operand:SF 1 "nonmemory_operand" ""))
2301 "TARGET_SSE && reload_completed"
2304 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2308 (define_expand "vec_setv4sf"
2309 [(match_operand:V4SF 0 "register_operand" "")
2310 (match_operand:SF 1 "register_operand" "")
2311 (match_operand 2 "const_int_operand" "")]
2314 ix86_expand_vector_set (false, operands[0], operands[1],
2315 INTVAL (operands[2]));
2319 (define_insn_and_split "*vec_extractv4sf_0"
2320 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2322 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2323 (parallel [(const_int 0)])))]
2324 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2326 "&& reload_completed"
2329 rtx op1 = operands[1];
2331 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2333 op1 = gen_lowpart (SFmode, op1);
2334 emit_move_insn (operands[0], op1);
2338 (define_insn "*sse4_1_extractps"
2339 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2341 (match_operand:V4SF 1 "register_operand" "x")
2342 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2344 "extractps\t{%2, %1, %0|%0, %1, %2}"
2345 [(set_attr "type" "sselog")
2346 (set_attr "prefix_extra" "1")
2347 (set_attr "mode" "V4SF")])
2349 (define_insn_and_split "*vec_extract_v4sf_mem"
2350 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2352 (match_operand:V4SF 1 "memory_operand" "o")
2353 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2359 int i = INTVAL (operands[2]);
2361 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2365 (define_expand "vec_extractv4sf"
2366 [(match_operand:SF 0 "register_operand" "")
2367 (match_operand:V4SF 1 "register_operand" "")
2368 (match_operand 2 "const_int_operand" "")]
2371 ix86_expand_vector_extract (false, operands[0], operands[1],
2372 INTVAL (operands[2]));
2376 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2378 ;; Parallel double-precision floating point element swizzling
2380 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2382 (define_insn "sse2_unpckhpd"
2383 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2386 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2387 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2388 (parallel [(const_int 1)
2390 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2392 unpckhpd\t{%2, %0|%0, %2}
2393 movlpd\t{%H1, %0|%0, %H1}
2394 movhpd\t{%1, %0|%0, %1}"
2395 [(set_attr "type" "sselog,ssemov,ssemov")
2396 (set_attr "mode" "V2DF,V1DF,V1DF")])
2398 (define_insn "*sse3_movddup"
2399 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2402 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2404 (parallel [(const_int 0)
2406 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2408 movddup\t{%1, %0|%0, %1}
2410 [(set_attr "type" "sselog1,ssemov")
2411 (set_attr "mode" "V2DF")])
2414 [(set (match_operand:V2DF 0 "memory_operand" "")
2417 (match_operand:V2DF 1 "register_operand" "")
2419 (parallel [(const_int 0)
2421 "TARGET_SSE3 && reload_completed"
2424 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2425 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2426 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2430 (define_insn "sse2_unpcklpd"
2431 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2434 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2435 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2436 (parallel [(const_int 0)
2438 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2440 unpcklpd\t{%2, %0|%0, %2}
2441 movhpd\t{%2, %0|%0, %2}
2442 movlpd\t{%2, %H0|%H0, %2}"
2443 [(set_attr "type" "sselog,ssemov,ssemov")
2444 (set_attr "mode" "V2DF,V1DF,V1DF")])
2446 (define_expand "sse2_shufpd"
2447 [(match_operand:V2DF 0 "register_operand" "")
2448 (match_operand:V2DF 1 "register_operand" "")
2449 (match_operand:V2DF 2 "nonimmediate_operand" "")
2450 (match_operand:SI 3 "const_int_operand" "")]
2453 int mask = INTVAL (operands[3]);
2454 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2456 GEN_INT (mask & 2 ? 3 : 2)));
2460 (define_insn "sse2_shufpd_1"
2461 [(set (match_operand:V2DF 0 "register_operand" "=x")
2464 (match_operand:V2DF 1 "register_operand" "0")
2465 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2466 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2467 (match_operand 4 "const_2_to_3_operand" "")])))]
2471 mask = INTVAL (operands[3]);
2472 mask |= (INTVAL (operands[4]) - 2) << 1;
2473 operands[3] = GEN_INT (mask);
2475 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2477 [(set_attr "type" "sselog")
2478 (set_attr "mode" "V2DF")])
2480 (define_insn "sse2_storehpd"
2481 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2483 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2484 (parallel [(const_int 1)])))]
2485 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2487 movhpd\t{%1, %0|%0, %1}
2490 [(set_attr "type" "ssemov,sselog1,ssemov")
2491 (set_attr "mode" "V1DF,V2DF,DF")])
2494 [(set (match_operand:DF 0 "register_operand" "")
2496 (match_operand:V2DF 1 "memory_operand" "")
2497 (parallel [(const_int 1)])))]
2498 "TARGET_SSE2 && reload_completed"
2499 [(set (match_dup 0) (match_dup 1))]
2501 operands[1] = adjust_address (operands[1], DFmode, 8);
2504 (define_insn "sse2_storelpd"
2505 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2507 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2508 (parallel [(const_int 0)])))]
2509 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2511 movlpd\t{%1, %0|%0, %1}
2514 [(set_attr "type" "ssemov")
2515 (set_attr "mode" "V1DF,DF,DF")])
2518 [(set (match_operand:DF 0 "register_operand" "")
2520 (match_operand:V2DF 1 "nonimmediate_operand" "")
2521 (parallel [(const_int 0)])))]
2522 "TARGET_SSE2 && reload_completed"
2525 rtx op1 = operands[1];
2527 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2529 op1 = gen_lowpart (DFmode, op1);
2530 emit_move_insn (operands[0], op1);
2534 (define_insn "sse2_loadhpd"
2535 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2538 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2539 (parallel [(const_int 0)]))
2540 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2541 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2543 movhpd\t{%2, %0|%0, %2}
2544 unpcklpd\t{%2, %0|%0, %2}
2545 shufpd\t{$1, %1, %0|%0, %1, 1}
2547 [(set_attr "type" "ssemov,sselog,sselog,other")
2548 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2551 [(set (match_operand:V2DF 0 "memory_operand" "")
2553 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2554 (match_operand:DF 1 "register_operand" "")))]
2555 "TARGET_SSE2 && reload_completed"
2556 [(set (match_dup 0) (match_dup 1))]
2558 operands[0] = adjust_address (operands[0], DFmode, 8);
2561 (define_insn "sse2_loadlpd"
2562 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2564 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2566 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2567 (parallel [(const_int 1)]))))]
2568 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2570 movsd\t{%2, %0|%0, %2}
2571 movlpd\t{%2, %0|%0, %2}
2572 movsd\t{%2, %0|%0, %2}
2573 shufpd\t{$2, %2, %0|%0, %2, 2}
2574 movhpd\t{%H1, %0|%0, %H1}
2576 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2577 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2580 [(set (match_operand:V2DF 0 "memory_operand" "")
2582 (match_operand:DF 1 "register_operand" "")
2583 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2584 "TARGET_SSE2 && reload_completed"
2585 [(set (match_dup 0) (match_dup 1))]
2587 operands[0] = adjust_address (operands[0], DFmode, 8);
2590 ;; Not sure these two are ever used, but it doesn't hurt to have
2592 (define_insn "*vec_extractv2df_1_sse"
2593 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2595 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2596 (parallel [(const_int 1)])))]
2597 "!TARGET_SSE2 && TARGET_SSE
2598 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2600 movhps\t{%1, %0|%0, %1}
2601 movhlps\t{%1, %0|%0, %1}
2602 movlps\t{%H1, %0|%0, %H1}"
2603 [(set_attr "type" "ssemov")
2604 (set_attr "mode" "V2SF,V4SF,V2SF")])
2606 (define_insn "*vec_extractv2df_0_sse"
2607 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2609 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2610 (parallel [(const_int 0)])))]
2611 "!TARGET_SSE2 && TARGET_SSE
2612 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2614 movlps\t{%1, %0|%0, %1}
2615 movaps\t{%1, %0|%0, %1}
2616 movlps\t{%1, %0|%0, %1}"
2617 [(set_attr "type" "ssemov")
2618 (set_attr "mode" "V2SF,V4SF,V2SF")])
2620 (define_insn "sse2_movsd"
2621 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2623 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2624 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2628 movsd\t{%2, %0|%0, %2}
2629 movlpd\t{%2, %0|%0, %2}
2630 movlpd\t{%2, %0|%0, %2}
2631 shufpd\t{$2, %2, %0|%0, %2, 2}
2632 movhps\t{%H1, %0|%0, %H1}
2633 movhps\t{%1, %H0|%H0, %1}"
2634 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2635 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2637 (define_insn "*vec_dupv2df_sse3"
2638 [(set (match_operand:V2DF 0 "register_operand" "=x")
2640 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2642 "movddup\t{%1, %0|%0, %1}"
2643 [(set_attr "type" "sselog1")
2644 (set_attr "mode" "DF")])
2646 (define_insn "vec_dupv2df"
2647 [(set (match_operand:V2DF 0 "register_operand" "=x")
2649 (match_operand:DF 1 "register_operand" "0")))]
2652 [(set_attr "type" "sselog1")
2653 (set_attr "mode" "V2DF")])
2655 (define_insn "*vec_concatv2df_sse3"
2656 [(set (match_operand:V2DF 0 "register_operand" "=x")
2658 (match_operand:DF 1 "nonimmediate_operand" "xm")
2661 "movddup\t{%1, %0|%0, %1}"
2662 [(set_attr "type" "sselog1")
2663 (set_attr "mode" "DF")])
2665 (define_insn "*vec_concatv2df"
2666 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2668 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2669 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2672 unpcklpd\t{%2, %0|%0, %2}
2673 movhpd\t{%2, %0|%0, %2}
2674 movsd\t{%1, %0|%0, %1}
2675 movlhps\t{%2, %0|%0, %2}
2676 movhps\t{%2, %0|%0, %2}"
2677 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2678 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2680 (define_expand "vec_setv2df"
2681 [(match_operand:V2DF 0 "register_operand" "")
2682 (match_operand:DF 1 "register_operand" "")
2683 (match_operand 2 "const_int_operand" "")]
2686 ix86_expand_vector_set (false, operands[0], operands[1],
2687 INTVAL (operands[2]));
2691 (define_expand "vec_extractv2df"
2692 [(match_operand:DF 0 "register_operand" "")
2693 (match_operand:V2DF 1 "register_operand" "")
2694 (match_operand 2 "const_int_operand" "")]
2697 ix86_expand_vector_extract (false, operands[0], operands[1],
2698 INTVAL (operands[2]));
2702 (define_expand "vec_initv2df"
2703 [(match_operand:V2DF 0 "register_operand" "")
2704 (match_operand 1 "" "")]
2707 ix86_expand_vector_init (false, operands[0], operands[1]);
2711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2713 ;; Parallel integral arithmetic
2715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2717 (define_expand "neg<mode>2"
2718 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2721 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2723 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2725 (define_expand "add<mode>3"
2726 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2727 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2728 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2730 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2732 (define_insn "*add<mode>3"
2733 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2735 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2736 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2737 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2738 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2739 [(set_attr "type" "sseiadd")
2740 (set_attr "prefix_data16" "1")
2741 (set_attr "mode" "TI")])
2743 (define_insn "sse2_ssadd<mode>3"
2744 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2746 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2747 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2748 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2749 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2750 [(set_attr "type" "sseiadd")
2751 (set_attr "prefix_data16" "1")
2752 (set_attr "mode" "TI")])
2754 (define_insn "sse2_usadd<mode>3"
2755 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2757 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2758 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2759 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2760 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2761 [(set_attr "type" "sseiadd")
2762 (set_attr "prefix_data16" "1")
2763 (set_attr "mode" "TI")])
2765 (define_expand "sub<mode>3"
2766 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2767 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2768 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2770 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2772 (define_insn "*sub<mode>3"
2773 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2775 (match_operand:SSEMODEI 1 "register_operand" "0")
2776 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2778 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2779 [(set_attr "type" "sseiadd")
2780 (set_attr "prefix_data16" "1")
2781 (set_attr "mode" "TI")])
2783 (define_insn "sse2_sssub<mode>3"
2784 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2786 (match_operand:SSEMODE12 1 "register_operand" "0")
2787 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2789 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2790 [(set_attr "type" "sseiadd")
2791 (set_attr "prefix_data16" "1")
2792 (set_attr "mode" "TI")])
2794 (define_insn "sse2_ussub<mode>3"
2795 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2797 (match_operand:SSEMODE12 1 "register_operand" "0")
2798 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2800 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2801 [(set_attr "type" "sseiadd")
2802 (set_attr "prefix_data16" "1")
2803 (set_attr "mode" "TI")])
2805 (define_insn_and_split "mulv16qi3"
2806 [(set (match_operand:V16QI 0 "register_operand" "")
2807 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2808 (match_operand:V16QI 2 "register_operand" "")))]
2810 && !(reload_completed || reload_in_progress)"
2815 rtx t[12], op0, op[3];
2820 /* On SSE5, we can take advantage of the pperm instruction to pack and
2821 unpack the bytes. Unpack data such that we've got a source byte in
2822 each low byte of each word. We don't care what goes into the high
2823 byte, so put 0 there. */
2824 for (i = 0; i < 6; ++i)
2825 t[i] = gen_reg_rtx (V8HImode);
2827 for (i = 0; i < 2; i++)
2830 op[1] = operands[i+1];
2831 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2834 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2837 /* Multiply words. */
2838 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2839 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2841 /* Pack the low byte of each word back into a single xmm */
2842 op[0] = operands[0];
2845 ix86_expand_sse5_pack (op);
2849 for (i = 0; i < 12; ++i)
2850 t[i] = gen_reg_rtx (V16QImode);
2852 /* Unpack data such that we've got a source byte in each low byte of
2853 each word. We don't care what goes into the high byte of each word.
2854 Rather than trying to get zero in there, most convenient is to let
2855 it be a copy of the low byte. */
2856 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2857 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2858 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2859 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2861 /* Multiply words. The end-of-line annotations here give a picture of what
2862 the output of that instruction looks like. Dot means don't care; the
2863 letters are the bytes of the result with A being the most significant. */
2864 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2865 gen_lowpart (V8HImode, t[0]),
2866 gen_lowpart (V8HImode, t[1])));
2867 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2868 gen_lowpart (V8HImode, t[2]),
2869 gen_lowpart (V8HImode, t[3])));
2871 /* Extract the relevant bytes and merge them back together. */
2872 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2873 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2874 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2875 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2876 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2877 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2880 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2884 (define_expand "mulv8hi3"
2885 [(set (match_operand:V8HI 0 "register_operand" "")
2886 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2887 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2889 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2891 (define_insn "*mulv8hi3"
2892 [(set (match_operand:V8HI 0 "register_operand" "=x")
2893 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2894 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2895 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2896 "pmullw\t{%2, %0|%0, %2}"
2897 [(set_attr "type" "sseimul")
2898 (set_attr "prefix_data16" "1")
2899 (set_attr "mode" "TI")])
2901 (define_expand "smulv8hi3_highpart"
2902 [(set (match_operand:V8HI 0 "register_operand" "")
2907 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2909 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2912 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2914 (define_insn "*smulv8hi3_highpart"
2915 [(set (match_operand:V8HI 0 "register_operand" "=x")
2920 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2922 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2924 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2925 "pmulhw\t{%2, %0|%0, %2}"
2926 [(set_attr "type" "sseimul")
2927 (set_attr "prefix_data16" "1")
2928 (set_attr "mode" "TI")])
2930 (define_expand "umulv8hi3_highpart"
2931 [(set (match_operand:V8HI 0 "register_operand" "")
2936 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2938 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2941 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2943 (define_insn "*umulv8hi3_highpart"
2944 [(set (match_operand:V8HI 0 "register_operand" "=x")
2949 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2951 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2953 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2954 "pmulhuw\t{%2, %0|%0, %2}"
2955 [(set_attr "type" "sseimul")
2956 (set_attr "prefix_data16" "1")
2957 (set_attr "mode" "TI")])
2959 (define_insn "sse2_umulv2siv2di3"
2960 [(set (match_operand:V2DI 0 "register_operand" "=x")
2964 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2965 (parallel [(const_int 0) (const_int 2)])))
2968 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2969 (parallel [(const_int 0) (const_int 2)])))))]
2970 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2971 "pmuludq\t{%2, %0|%0, %2}"
2972 [(set_attr "type" "sseimul")
2973 (set_attr "prefix_data16" "1")
2974 (set_attr "mode" "TI")])
2976 (define_insn "sse4_1_mulv2siv2di3"
2977 [(set (match_operand:V2DI 0 "register_operand" "=x")
2981 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2982 (parallel [(const_int 0) (const_int 2)])))
2985 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2986 (parallel [(const_int 0) (const_int 2)])))))]
2987 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2988 "pmuldq\t{%2, %0|%0, %2}"
2989 [(set_attr "type" "sseimul")
2990 (set_attr "prefix_extra" "1")
2991 (set_attr "mode" "TI")])
2993 (define_insn "sse2_pmaddwd"
2994 [(set (match_operand:V4SI 0 "register_operand" "=x")
2999 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3000 (parallel [(const_int 0)
3006 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3007 (parallel [(const_int 0)
3013 (vec_select:V4HI (match_dup 1)
3014 (parallel [(const_int 1)
3019 (vec_select:V4HI (match_dup 2)
3020 (parallel [(const_int 1)
3023 (const_int 7)]))))))]
3024 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3025 "pmaddwd\t{%2, %0|%0, %2}"
3026 [(set_attr "type" "sseiadd")
3027 (set_attr "prefix_data16" "1")
3028 (set_attr "mode" "TI")])
3030 (define_expand "mulv4si3"
3031 [(set (match_operand:V4SI 0 "register_operand" "")
3032 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3033 (match_operand:V4SI 2 "register_operand" "")))]
3036 if (TARGET_SSE4_1 || TARGET_SSE5)
3037 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3040 (define_insn "*sse4_1_mulv4si3"
3041 [(set (match_operand:V4SI 0 "register_operand" "=x")
3042 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3043 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3044 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3045 "pmulld\t{%2, %0|%0, %2}"
3046 [(set_attr "type" "sseimul")
3047 (set_attr "prefix_extra" "1")
3048 (set_attr "mode" "TI")])
3050 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3051 ;; multiply/add. In general, we expect the define_split to occur before
3052 ;; register allocation, so we have to handle the corner case where the target
3053 ;; is used as the base or index register in operands 1/2.
3054 (define_insn_and_split "*sse5_mulv4si3"
3055 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3056 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3057 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3060 "&& (reload_completed
3061 || (!reg_mentioned_p (operands[0], operands[1])
3062 && !reg_mentioned_p (operands[0], operands[2])))"
3066 (plus:V4SI (mult:V4SI (match_dup 1)
3070 operands[3] = CONST0_RTX (V4SImode);
3072 [(set_attr "type" "ssemuladd")
3073 (set_attr "mode" "TI")])
3075 (define_insn_and_split "*sse2_mulv4si3"
3076 [(set (match_operand:V4SI 0 "register_operand" "")
3077 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3078 (match_operand:V4SI 2 "register_operand" "")))]
3079 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3080 && !(reload_completed || reload_in_progress)"
3085 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3091 t1 = gen_reg_rtx (V4SImode);
3092 t2 = gen_reg_rtx (V4SImode);
3093 t3 = gen_reg_rtx (V4SImode);
3094 t4 = gen_reg_rtx (V4SImode);
3095 t5 = gen_reg_rtx (V4SImode);
3096 t6 = gen_reg_rtx (V4SImode);
3097 thirtytwo = GEN_INT (32);
3099 /* Multiply elements 2 and 0. */
3100 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3103 /* Shift both input vectors down one element, so that elements 3
3104 and 1 are now in the slots for elements 2 and 0. For K8, at
3105 least, this is faster than using a shuffle. */
3106 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3107 gen_lowpart (TImode, op1),
3109 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3110 gen_lowpart (TImode, op2),
3112 /* Multiply elements 3 and 1. */
3113 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3116 /* Move the results in element 2 down to element 1; we don't care
3117 what goes in elements 2 and 3. */
3118 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3119 const0_rtx, const0_rtx));
3120 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3121 const0_rtx, const0_rtx));
3123 /* Merge the parts back together. */
3124 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3128 (define_insn_and_split "mulv2di3"
3129 [(set (match_operand:V2DI 0 "register_operand" "")
3130 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3131 (match_operand:V2DI 2 "register_operand" "")))]
3133 && !(reload_completed || reload_in_progress)"
3138 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3144 t1 = gen_reg_rtx (V2DImode);
3145 t2 = gen_reg_rtx (V2DImode);
3146 t3 = gen_reg_rtx (V2DImode);
3147 t4 = gen_reg_rtx (V2DImode);
3148 t5 = gen_reg_rtx (V2DImode);
3149 t6 = gen_reg_rtx (V2DImode);
3150 thirtytwo = GEN_INT (32);
3152 /* Multiply low parts. */
3153 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3154 gen_lowpart (V4SImode, op2)));
3156 /* Shift input vectors left 32 bits so we can multiply high parts. */
3157 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3158 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3160 /* Multiply high parts by low parts. */
3161 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3162 gen_lowpart (V4SImode, t3)));
3163 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3164 gen_lowpart (V4SImode, t2)));
3166 /* Shift them back. */
3167 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3168 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3170 /* Add the three parts together. */
3171 emit_insn (gen_addv2di3 (t6, t1, t4));
3172 emit_insn (gen_addv2di3 (op0, t6, t5));
3176 (define_expand "vec_widen_smult_hi_v8hi"
3177 [(match_operand:V4SI 0 "register_operand" "")
3178 (match_operand:V8HI 1 "register_operand" "")
3179 (match_operand:V8HI 2 "register_operand" "")]
3182 rtx op1, op2, t1, t2, dest;
3186 t1 = gen_reg_rtx (V8HImode);
3187 t2 = gen_reg_rtx (V8HImode);
3188 dest = gen_lowpart (V8HImode, operands[0]);
3190 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3191 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3192 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3196 (define_expand "vec_widen_smult_lo_v8hi"
3197 [(match_operand:V4SI 0 "register_operand" "")
3198 (match_operand:V8HI 1 "register_operand" "")
3199 (match_operand:V8HI 2 "register_operand" "")]
3202 rtx op1, op2, t1, t2, dest;
3206 t1 = gen_reg_rtx (V8HImode);
3207 t2 = gen_reg_rtx (V8HImode);
3208 dest = gen_lowpart (V8HImode, operands[0]);
3210 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3211 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3212 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3216 (define_expand "vec_widen_umult_hi_v8hi"
3217 [(match_operand:V4SI 0 "register_operand" "")
3218 (match_operand:V8HI 1 "register_operand" "")
3219 (match_operand:V8HI 2 "register_operand" "")]
3222 rtx op1, op2, t1, t2, dest;
3226 t1 = gen_reg_rtx (V8HImode);
3227 t2 = gen_reg_rtx (V8HImode);
3228 dest = gen_lowpart (V8HImode, operands[0]);
3230 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3231 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3232 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3236 (define_expand "vec_widen_umult_lo_v8hi"
3237 [(match_operand:V4SI 0 "register_operand" "")
3238 (match_operand:V8HI 1 "register_operand" "")
3239 (match_operand:V8HI 2 "register_operand" "")]
3242 rtx op1, op2, t1, t2, dest;
3246 t1 = gen_reg_rtx (V8HImode);
3247 t2 = gen_reg_rtx (V8HImode);
3248 dest = gen_lowpart (V8HImode, operands[0]);
3250 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3251 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3252 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3256 (define_expand "vec_widen_smult_hi_v4si"
3257 [(match_operand:V2DI 0 "register_operand" "")
3258 (match_operand:V4SI 1 "register_operand" "")
3259 (match_operand:V4SI 2 "register_operand" "")]
3262 rtx op1, op2, t1, t2;
3266 t1 = gen_reg_rtx (V4SImode);
3267 t2 = gen_reg_rtx (V4SImode);
3269 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3270 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3271 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3275 (define_expand "vec_widen_smult_lo_v4si"
3276 [(match_operand:V2DI 0 "register_operand" "")
3277 (match_operand:V4SI 1 "register_operand" "")
3278 (match_operand:V4SI 2 "register_operand" "")]
3281 rtx op1, op2, t1, t2;
3285 t1 = gen_reg_rtx (V4SImode);
3286 t2 = gen_reg_rtx (V4SImode);
3288 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3289 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3290 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3294 (define_expand "vec_widen_umult_hi_v4si"
3295 [(match_operand:V2DI 0 "register_operand" "")
3296 (match_operand:V4SI 1 "register_operand" "")
3297 (match_operand:V4SI 2 "register_operand" "")]
3300 rtx op1, op2, t1, t2;
3304 t1 = gen_reg_rtx (V4SImode);
3305 t2 = gen_reg_rtx (V4SImode);
3307 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3308 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3309 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3313 (define_expand "vec_widen_umult_lo_v4si"
3314 [(match_operand:V2DI 0 "register_operand" "")
3315 (match_operand:V4SI 1 "register_operand" "")
3316 (match_operand:V4SI 2 "register_operand" "")]
3319 rtx op1, op2, t1, t2;
3323 t1 = gen_reg_rtx (V4SImode);
3324 t2 = gen_reg_rtx (V4SImode);
3326 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3327 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3328 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3332 (define_expand "sdot_prodv8hi"
3333 [(match_operand:V4SI 0 "register_operand" "")
3334 (match_operand:V8HI 1 "register_operand" "")
3335 (match_operand:V8HI 2 "register_operand" "")
3336 (match_operand:V4SI 3 "register_operand" "")]
3339 rtx t = gen_reg_rtx (V4SImode);
3340 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3341 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3345 (define_expand "udot_prodv4si"
3346 [(match_operand:V2DI 0 "register_operand" "")
3347 (match_operand:V4SI 1 "register_operand" "")
3348 (match_operand:V4SI 2 "register_operand" "")
3349 (match_operand:V2DI 3 "register_operand" "")]
3354 t1 = gen_reg_rtx (V2DImode);
3355 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3356 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3358 t2 = gen_reg_rtx (V4SImode);
3359 t3 = gen_reg_rtx (V4SImode);
3360 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3361 gen_lowpart (TImode, operands[1]),
3363 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3364 gen_lowpart (TImode, operands[2]),
3367 t4 = gen_reg_rtx (V2DImode);
3368 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3370 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3374 (define_insn "ashr<mode>3"
3375 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3377 (match_operand:SSEMODE24 1 "register_operand" "0")
3378 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3380 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3381 [(set_attr "type" "sseishft")
3382 (set_attr "prefix_data16" "1")
3383 (set_attr "mode" "TI")])
3385 (define_insn "lshr<mode>3"
3386 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3387 (lshiftrt:SSEMODE248
3388 (match_operand:SSEMODE248 1 "register_operand" "0")
3389 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3391 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3392 [(set_attr "type" "sseishft")
3393 (set_attr "prefix_data16" "1")
3394 (set_attr "mode" "TI")])
3396 (define_insn "ashl<mode>3"
3397 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3399 (match_operand:SSEMODE248 1 "register_operand" "0")
3400 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3402 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3403 [(set_attr "type" "sseishft")
3404 (set_attr "prefix_data16" "1")
3405 (set_attr "mode" "TI")])
3407 (define_expand "vec_shl_<mode>"
3408 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3409 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3410 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3413 operands[0] = gen_lowpart (TImode, operands[0]);
3414 operands[1] = gen_lowpart (TImode, operands[1]);
3417 (define_expand "vec_shr_<mode>"
3418 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3419 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3420 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3423 operands[0] = gen_lowpart (TImode, operands[0]);
3424 operands[1] = gen_lowpart (TImode, operands[1]);