1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 && (register_operand (operands[0], <MODE>mode)
64 || register_operand (operands[1], <MODE>mode))"
66 switch (which_alternative)
69 return standard_sse_constant_opcode (insn, operands[1]);
72 if (get_attr_mode (insn) == MODE_V4SF)
73 return "movaps\t{%1, %0|%0, %1}";
75 return "movdqa\t{%1, %0|%0, %1}";
80 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
84 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
85 (and (eq_attr "alternative" "2")
86 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
89 (const_string "TI")))])
91 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
92 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
93 ;; from memory, we'd prefer to load the memory directly into the %xmm
94 ;; register. To facilitate this happy circumstance, this pattern won't
95 ;; split until after register allocation. If the 64-bit value didn't
96 ;; come from memory, this is the best we can do. This is much better
97 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
100 (define_insn_and_split "movdi_to_sse"
102 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
103 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
104 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
105 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
107 "&& reload_completed"
110 if (register_operand (operands[1], DImode))
112 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
113 Assemble the 64-bit DImode value in an xmm register. */
114 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
115 gen_rtx_SUBREG (SImode, operands[1], 0)));
116 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 4)));
118 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
120 else if (memory_operand (operands[1], DImode))
121 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
126 (define_expand "movv4sf"
127 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
128 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
131 ix86_expand_vector_move (V4SFmode, operands);
135 (define_insn "*movv4sf_internal"
136 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
137 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
139 && (register_operand (operands[0], V4SFmode)
140 || register_operand (operands[1], V4SFmode))"
142 switch (which_alternative)
145 return standard_sse_constant_opcode (insn, operands[1]);
148 return "movaps\t{%1, %0|%0, %1}";
153 [(set_attr "type" "sselog1,ssemov,ssemov")
154 (set_attr "mode" "V4SF")])
157 [(set (match_operand:V4SF 0 "register_operand" "")
158 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
159 "TARGET_SSE && reload_completed"
162 (vec_duplicate:V4SF (match_dup 1))
166 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
167 operands[2] = CONST0_RTX (V4SFmode);
170 (define_expand "movv2df"
171 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
172 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
175 ix86_expand_vector_move (V2DFmode, operands);
179 (define_insn "*movv2df_internal"
180 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
181 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
183 && (register_operand (operands[0], V2DFmode)
184 || register_operand (operands[1], V2DFmode))"
186 switch (which_alternative)
189 return standard_sse_constant_opcode (insn, operands[1]);
192 if (get_attr_mode (insn) == MODE_V4SF)
193 return "movaps\t{%1, %0|%0, %1}";
195 return "movapd\t{%1, %0|%0, %1}";
200 [(set_attr "type" "sselog1,ssemov,ssemov")
203 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
204 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
205 (and (eq_attr "alternative" "2")
206 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
208 (const_string "V4SF")
209 (const_string "V2DF")))])
212 [(set (match_operand:V2DF 0 "register_operand" "")
213 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
214 "TARGET_SSE2 && reload_completed"
215 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
217 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
218 operands[2] = CONST0_RTX (DFmode);
221 (define_expand "push<mode>1"
222 [(match_operand:SSEMODE 0 "register_operand" "")]
225 ix86_expand_push (<MODE>mode, operands[0]);
229 (define_expand "movmisalign<mode>"
230 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
231 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
234 ix86_expand_vector_move_misalign (<MODE>mode, operands);
238 (define_insn "sse_movups"
239 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
240 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
242 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
243 "movups\t{%1, %0|%0, %1}"
244 [(set_attr "type" "ssemov")
245 (set_attr "mode" "V2DF")])
247 (define_insn "sse2_movupd"
248 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
249 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
251 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
252 "movupd\t{%1, %0|%0, %1}"
253 [(set_attr "type" "ssemov")
254 (set_attr "mode" "V2DF")])
256 (define_insn "sse2_movdqu"
257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
258 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
260 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
261 "movdqu\t{%1, %0|%0, %1}"
262 [(set_attr "type" "ssemov")
263 (set_attr "prefix_data16" "1")
264 (set_attr "mode" "TI")])
266 (define_insn "sse_movntv4sf"
267 [(set (match_operand:V4SF 0 "memory_operand" "=m")
268 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
271 "movntps\t{%1, %0|%0, %1}"
272 [(set_attr "type" "ssemov")
273 (set_attr "mode" "V4SF")])
275 (define_insn "sse2_movntv2df"
276 [(set (match_operand:V2DF 0 "memory_operand" "=m")
277 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
280 "movntpd\t{%1, %0|%0, %1}"
281 [(set_attr "type" "ssecvt")
282 (set_attr "mode" "V2DF")])
284 (define_insn "sse2_movntv2di"
285 [(set (match_operand:V2DI 0 "memory_operand" "=m")
286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
289 "movntdq\t{%1, %0|%0, %1}"
290 [(set_attr "type" "ssecvt")
291 (set_attr "prefix_data16" "1")
292 (set_attr "mode" "TI")])
294 (define_insn "sse2_movntsi"
295 [(set (match_operand:SI 0 "memory_operand" "=m")
296 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
299 "movnti\t{%1, %0|%0, %1}"
300 [(set_attr "type" "ssecvt")
301 (set_attr "mode" "V2DF")])
303 (define_insn "sse3_lddqu"
304 [(set (match_operand:V16QI 0 "register_operand" "=x")
305 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
308 "lddqu\t{%1, %0|%0, %1}"
309 [(set_attr "type" "ssecvt")
310 (set_attr "prefix_rep" "1")
311 (set_attr "mode" "TI")])
313 ; Expand patterns for non-temporal stores. At the moment, only those
314 ; that directly map to insns are defined; it would be possible to
315 ; define patterns for other modes that would expand to several insns.
317 (define_expand "storentv4sf"
318 [(set (match_operand:V4SF 0 "memory_operand" "=m")
319 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
324 (define_expand "storentv2df"
325 [(set (match_operand:V2DF 0 "memory_operand" "=m")
326 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
331 (define_expand "storentv2di"
332 [(set (match_operand:V2DI 0 "memory_operand" "=m")
333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
338 (define_expand "storentsi"
339 [(set (match_operand:SI 0 "memory_operand" "=m")
340 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
345 (define_expand "storentdf"
346 [(set (match_operand:DF 0 "memory_operand" "")
347 (unspec:DF [(match_operand:DF 1 "register_operand" "")]
352 (define_expand "storentsf"
353 [(set (match_operand:SF 0 "memory_operand" "")
354 (unspec:SF [(match_operand:SF 1 "register_operand" "")]
359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
361 ;; Parallel single-precision floating point arithmetic
363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
365 (define_expand "negv4sf2"
366 [(set (match_operand:V4SF 0 "register_operand" "")
367 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
369 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
371 (define_expand "absv4sf2"
372 [(set (match_operand:V4SF 0 "register_operand" "")
373 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
375 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
377 (define_expand "addv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "")
379 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
380 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
382 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
384 (define_insn "*addv4sf3"
385 [(set (match_operand:V4SF 0 "register_operand" "=x")
386 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
387 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
388 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
389 "addps\t{%2, %0|%0, %2}"
390 [(set_attr "type" "sseadd")
391 (set_attr "mode" "V4SF")])
393 (define_insn "sse_vmaddv4sf3"
394 [(set (match_operand:V4SF 0 "register_operand" "=x")
396 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
397 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
400 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
401 "addss\t{%2, %0|%0, %2}"
402 [(set_attr "type" "sseadd")
403 (set_attr "mode" "SF")])
405 (define_expand "subv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "")
407 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
408 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
410 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
412 (define_insn "*subv4sf3"
413 [(set (match_operand:V4SF 0 "register_operand" "=x")
414 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
415 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
417 "subps\t{%2, %0|%0, %2}"
418 [(set_attr "type" "sseadd")
419 (set_attr "mode" "V4SF")])
421 (define_insn "sse_vmsubv4sf3"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
425 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
429 "subss\t{%2, %0|%0, %2}"
430 [(set_attr "type" "sseadd")
431 (set_attr "mode" "SF")])
433 (define_expand "mulv4sf3"
434 [(set (match_operand:V4SF 0 "register_operand" "")
435 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
436 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
438 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
440 (define_insn "*mulv4sf3"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
443 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
444 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
445 "mulps\t{%2, %0|%0, %2}"
446 [(set_attr "type" "ssemul")
447 (set_attr "mode" "V4SF")])
449 (define_insn "sse_vmmulv4sf3"
450 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
456 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
457 "mulss\t{%2, %0|%0, %2}"
458 [(set_attr "type" "ssemul")
459 (set_attr "mode" "SF")])
461 (define_expand "divv4sf3"
462 [(set (match_operand:V4SF 0 "register_operand" "")
463 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
464 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
467 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
469 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
470 && flag_finite_math_only && !flag_trapping_math
471 && flag_unsafe_math_optimizations)
473 ix86_emit_swdivsf (operands[0], operands[1],
474 operands[2], V4SFmode);
479 (define_insn "*divv4sf3"
480 [(set (match_operand:V4SF 0 "register_operand" "=x")
481 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
482 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
484 "divps\t{%2, %0|%0, %2}"
485 [(set_attr "type" "ssediv")
486 (set_attr "mode" "V4SF")])
488 (define_insn "sse_vmdivv4sf3"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
492 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
496 "divss\t{%2, %0|%0, %2}"
497 [(set_attr "type" "ssediv")
498 (set_attr "mode" "SF")])
500 (define_insn "sse_rcpv4sf2"
501 [(set (match_operand:V4SF 0 "register_operand" "=x")
503 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
505 "rcpps\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "V4SF")])
509 (define_insn "sse_vmrcpv4sf2"
510 [(set (match_operand:V4SF 0 "register_operand" "=x")
512 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
514 (match_operand:V4SF 2 "register_operand" "0")
517 "rcpss\t{%1, %0|%0, %1}"
518 [(set_attr "type" "sse")
519 (set_attr "mode" "SF")])
521 (define_insn "*sse_rsqrtv4sf2"
522 [(set (match_operand:V4SF 0 "register_operand" "=x")
524 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
526 "rsqrtps\t{%1, %0|%0, %1}"
527 [(set_attr "type" "sse")
528 (set_attr "mode" "V4SF")])
530 (define_expand "sse_rsqrtv4sf2"
531 [(set (match_operand:V4SF 0 "register_operand" "")
533 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
536 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
537 && flag_finite_math_only && !flag_trapping_math
538 && flag_unsafe_math_optimizations)
540 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
545 (define_insn "sse_vmrsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
550 (match_operand:V4SF 2 "register_operand" "0")
553 "rsqrtss\t{%1, %0|%0, %1}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "SF")])
557 (define_insn "*sqrtv4sf2"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
561 "sqrtps\t{%1, %0|%0, %1}"
562 [(set_attr "type" "sse")
563 (set_attr "mode" "V4SF")])
565 (define_expand "sqrtv4sf2"
566 [(set (match_operand:V4SF 0 "register_operand" "=")
567 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
570 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
571 && flag_finite_math_only && !flag_trapping_math
572 && flag_unsafe_math_optimizations)
574 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
579 (define_insn "sse_vmsqrtv4sf2"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
583 (match_operand:V4SF 2 "register_operand" "0")
586 "sqrtss\t{%1, %0|%0, %1}"
587 [(set_attr "type" "sse")
588 (set_attr "mode" "SF")])
590 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
591 ;; isn't really correct, as those rtl operators aren't defined when
592 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
594 (define_expand "smaxv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "")
596 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
597 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
600 if (!flag_finite_math_only)
601 operands[1] = force_reg (V4SFmode, operands[1]);
602 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
605 (define_insn "*smaxv4sf3_finite"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
607 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
608 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
609 "TARGET_SSE && flag_finite_math_only
610 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
611 "maxps\t{%2, %0|%0, %2}"
612 [(set_attr "type" "sse")
613 (set_attr "mode" "V4SF")])
615 (define_insn "*smaxv4sf3"
616 [(set (match_operand:V4SF 0 "register_operand" "=x")
617 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
618 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
620 "maxps\t{%2, %0|%0, %2}"
621 [(set_attr "type" "sse")
622 (set_attr "mode" "V4SF")])
624 (define_insn "sse_vmsmaxv4sf3"
625 [(set (match_operand:V4SF 0 "register_operand" "=x")
627 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
628 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
632 "maxss\t{%2, %0|%0, %2}"
633 [(set_attr "type" "sse")
634 (set_attr "mode" "SF")])
636 (define_expand "sminv4sf3"
637 [(set (match_operand:V4SF 0 "register_operand" "")
638 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
639 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
642 if (!flag_finite_math_only)
643 operands[1] = force_reg (V4SFmode, operands[1]);
644 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
647 (define_insn "*sminv4sf3_finite"
648 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
651 "TARGET_SSE && flag_finite_math_only
652 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
653 "minps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sse")
655 (set_attr "mode" "V4SF")])
657 (define_insn "*sminv4sf3"
658 [(set (match_operand:V4SF 0 "register_operand" "=x")
659 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
660 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
662 "minps\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sse")
664 (set_attr "mode" "V4SF")])
666 (define_insn "sse_vmsminv4sf3"
667 [(set (match_operand:V4SF 0 "register_operand" "=x")
669 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
670 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
674 "minss\t{%2, %0|%0, %2}"
675 [(set_attr "type" "sse")
676 (set_attr "mode" "SF")])
678 ;; These versions of the min/max patterns implement exactly the operations
679 ;; min = (op1 < op2 ? op1 : op2)
680 ;; max = (!(op1 < op2) ? op1 : op2)
681 ;; Their operands are not commutative, and thus they may be used in the
682 ;; presence of -0.0 and NaN.
684 (define_insn "*ieee_sminv4sf3"
685 [(set (match_operand:V4SF 0 "register_operand" "=x")
686 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
687 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
690 "minps\t{%2, %0|%0, %2}"
691 [(set_attr "type" "sseadd")
692 (set_attr "mode" "V4SF")])
694 (define_insn "*ieee_smaxv4sf3"
695 [(set (match_operand:V4SF 0 "register_operand" "=x")
696 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
697 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
700 "maxps\t{%2, %0|%0, %2}"
701 [(set_attr "type" "sseadd")
702 (set_attr "mode" "V4SF")])
704 (define_insn "*ieee_sminv2df3"
705 [(set (match_operand:V2DF 0 "register_operand" "=x")
706 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
707 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
710 "minpd\t{%2, %0|%0, %2}"
711 [(set_attr "type" "sseadd")
712 (set_attr "mode" "V2DF")])
714 (define_insn "*ieee_smaxv2df3"
715 [(set (match_operand:V2DF 0 "register_operand" "=x")
716 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
717 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
720 "maxpd\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "mode" "V2DF")])
724 (define_insn "sse3_addsubv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operand:V4SF 1 "register_operand" "0")
729 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
730 (minus:V4SF (match_dup 1) (match_dup 2))
733 "addsubps\t{%2, %0|%0, %2}"
734 [(set_attr "type" "sseadd")
735 (set_attr "prefix_rep" "1")
736 (set_attr "mode" "V4SF")])
738 (define_insn "sse3_haddv4sf3"
739 [(set (match_operand:V4SF 0 "register_operand" "=x")
744 (match_operand:V4SF 1 "register_operand" "0")
745 (parallel [(const_int 0)]))
746 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
748 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
749 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
753 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))
755 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
757 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
758 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
760 "haddps\t{%2, %0|%0, %2}"
761 [(set_attr "type" "sseadd")
762 (set_attr "prefix_rep" "1")
763 (set_attr "mode" "V4SF")])
765 (define_insn "sse3_hsubv4sf3"
766 [(set (match_operand:V4SF 0 "register_operand" "=x")
771 (match_operand:V4SF 1 "register_operand" "0")
772 (parallel [(const_int 0)]))
773 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
775 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
776 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
780 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))
782 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
784 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
785 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
787 "hsubps\t{%2, %0|%0, %2}"
788 [(set_attr "type" "sseadd")
789 (set_attr "prefix_rep" "1")
790 (set_attr "mode" "V4SF")])
792 (define_expand "reduc_splus_v4sf"
793 [(match_operand:V4SF 0 "register_operand" "")
794 (match_operand:V4SF 1 "register_operand" "")]
799 rtx tmp = gen_reg_rtx (V4SFmode);
800 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
801 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
804 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
808 (define_expand "reduc_smax_v4sf"
809 [(match_operand:V4SF 0 "register_operand" "")
810 (match_operand:V4SF 1 "register_operand" "")]
813 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
817 (define_expand "reduc_smin_v4sf"
818 [(match_operand:V4SF 0 "register_operand" "")
819 (match_operand:V4SF 1 "register_operand" "")]
822 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
828 ;; Parallel single-precision floating point comparisons
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
832 (define_insn "sse_maskcmpv4sf3"
833 [(set (match_operand:V4SF 0 "register_operand" "=x")
834 (match_operator:V4SF 3 "sse_comparison_operator"
835 [(match_operand:V4SF 1 "register_operand" "0")
836 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
838 "cmp%D3ps\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssecmp")
840 (set_attr "mode" "V4SF")])
842 (define_insn "sse_maskcmpsf3"
843 [(set (match_operand:SF 0 "register_operand" "=x")
844 (match_operator:SF 3 "sse_comparison_operator"
845 [(match_operand:SF 1 "register_operand" "0")
846 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
852 (define_insn "sse_vmmaskcmpv4sf3"
853 [(set (match_operand:V4SF 0 "register_operand" "=x")
855 (match_operator:V4SF 3 "sse_comparison_operator"
856 [(match_operand:V4SF 1 "register_operand" "0")
857 (match_operand:V4SF 2 "register_operand" "x")])
861 "cmp%D3ss\t{%2, %0|%0, %2}"
862 [(set_attr "type" "ssecmp")
863 (set_attr "mode" "SF")])
865 (define_insn "sse_comi"
866 [(set (reg:CCFP FLAGS_REG)
869 (match_operand:V4SF 0 "register_operand" "x")
870 (parallel [(const_int 0)]))
872 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
873 (parallel [(const_int 0)]))))]
875 "comiss\t{%1, %0|%0, %1}"
876 [(set_attr "type" "ssecomi")
877 (set_attr "mode" "SF")])
879 (define_insn "sse_ucomi"
880 [(set (reg:CCFPU FLAGS_REG)
883 (match_operand:V4SF 0 "register_operand" "x")
884 (parallel [(const_int 0)]))
886 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
887 (parallel [(const_int 0)]))))]
889 "ucomiss\t{%1, %0|%0, %1}"
890 [(set_attr "type" "ssecomi")
891 (set_attr "mode" "SF")])
893 (define_expand "vcondv4sf"
894 [(set (match_operand:V4SF 0 "register_operand" "")
897 [(match_operand:V4SF 4 "nonimmediate_operand" "")
898 (match_operand:V4SF 5 "nonimmediate_operand" "")])
899 (match_operand:V4SF 1 "general_operand" "")
900 (match_operand:V4SF 2 "general_operand" "")))]
903 if (ix86_expand_fp_vcond (operands))
909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
911 ;; Parallel single-precision floating point logical operations
913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
915 (define_expand "andv4sf3"
916 [(set (match_operand:V4SF 0 "register_operand" "")
917 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
918 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
920 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
922 (define_insn "*andv4sf3"
923 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
925 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
926 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
927 "andps\t{%2, %0|%0, %2}"
928 [(set_attr "type" "sselog")
929 (set_attr "mode" "V4SF")])
931 (define_insn "sse_nandv4sf3"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
934 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
936 "andnps\t{%2, %0|%0, %2}"
937 [(set_attr "type" "sselog")
938 (set_attr "mode" "V4SF")])
940 (define_expand "iorv4sf3"
941 [(set (match_operand:V4SF 0 "register_operand" "")
942 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
943 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
945 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
947 (define_insn "*iorv4sf3"
948 [(set (match_operand:V4SF 0 "register_operand" "=x")
949 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
950 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
951 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
952 "orps\t{%2, %0|%0, %2}"
953 [(set_attr "type" "sselog")
954 (set_attr "mode" "V4SF")])
956 (define_expand "xorv4sf3"
957 [(set (match_operand:V4SF 0 "register_operand" "")
958 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
959 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
961 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
963 (define_insn "*xorv4sf3"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
965 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
966 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
967 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
968 "xorps\t{%2, %0|%0, %2}"
969 [(set_attr "type" "sselog")
970 (set_attr "mode" "V4SF")])
972 ;; Also define scalar versions. These are used for abs, neg, and
973 ;; conditional move. Using subregs into vector modes causes register
974 ;; allocation lossage. These patterns do not allow memory operands
975 ;; because the native instructions read the full 128-bits.
977 (define_insn "*andsf3"
978 [(set (match_operand:SF 0 "register_operand" "=x")
979 (and:SF (match_operand:SF 1 "register_operand" "0")
980 (match_operand:SF 2 "register_operand" "x")))]
982 "andps\t{%2, %0|%0, %2}"
983 [(set_attr "type" "sselog")
984 (set_attr "mode" "V4SF")])
986 (define_insn "*nandsf3"
987 [(set (match_operand:SF 0 "register_operand" "=x")
988 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
989 (match_operand:SF 2 "register_operand" "x")))]
991 "andnps\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sselog")
993 (set_attr "mode" "V4SF")])
995 (define_insn "*iorsf3"
996 [(set (match_operand:SF 0 "register_operand" "=x")
997 (ior:SF (match_operand:SF 1 "register_operand" "0")
998 (match_operand:SF 2 "register_operand" "x")))]
1000 "orps\t{%2, %0|%0, %2}"
1001 [(set_attr "type" "sselog")
1002 (set_attr "mode" "V4SF")])
1004 (define_insn "*xorsf3"
1005 [(set (match_operand:SF 0 "register_operand" "=x")
1006 (xor:SF (match_operand:SF 1 "register_operand" "0")
1007 (match_operand:SF 2 "register_operand" "x")))]
1009 "xorps\t{%2, %0|%0, %2}"
1010 [(set_attr "type" "sselog")
1011 (set_attr "mode" "V4SF")])
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1015 ;; Parallel single-precision floating point conversion operations
1017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1019 (define_insn "sse_cvtpi2ps"
1020 [(set (match_operand:V4SF 0 "register_operand" "=x")
1023 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1024 (match_operand:V4SF 1 "register_operand" "0")
1027 "cvtpi2ps\t{%2, %0|%0, %2}"
1028 [(set_attr "type" "ssecvt")
1029 (set_attr "mode" "V4SF")])
1031 (define_insn "sse_cvtps2pi"
1032 [(set (match_operand:V2SI 0 "register_operand" "=y")
1034 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1036 (parallel [(const_int 0) (const_int 1)])))]
1038 "cvtps2pi\t{%1, %0|%0, %1}"
1039 [(set_attr "type" "ssecvt")
1040 (set_attr "unit" "mmx")
1041 (set_attr "mode" "DI")])
1043 (define_insn "sse_cvttps2pi"
1044 [(set (match_operand:V2SI 0 "register_operand" "=y")
1046 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1047 (parallel [(const_int 0) (const_int 1)])))]
1049 "cvttps2pi\t{%1, %0|%0, %1}"
1050 [(set_attr "type" "ssecvt")
1051 (set_attr "unit" "mmx")
1052 (set_attr "mode" "SF")])
1054 (define_insn "sse_cvtsi2ss"
1055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1058 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1059 (match_operand:V4SF 1 "register_operand" "0,0")
1062 "cvtsi2ss\t{%2, %0|%0, %2}"
1063 [(set_attr "type" "sseicvt")
1064 (set_attr "athlon_decode" "vector,double")
1065 (set_attr "amdfam10_decode" "vector,double")
1066 (set_attr "mode" "SF")])
1068 (define_insn "sse_cvtsi2ssq"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1072 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1073 (match_operand:V4SF 1 "register_operand" "0,0")
1075 "TARGET_SSE && TARGET_64BIT"
1076 "cvtsi2ssq\t{%2, %0|%0, %2}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "vector,double")
1079 (set_attr "amdfam10_decode" "vector,double")
1080 (set_attr "mode" "SF")])
1082 (define_insn "sse_cvtss2si"
1083 [(set (match_operand:SI 0 "register_operand" "=r,r")
1086 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1087 (parallel [(const_int 0)]))]
1088 UNSPEC_FIX_NOTRUNC))]
1090 "cvtss2si\t{%1, %0|%0, %1}"
1091 [(set_attr "type" "sseicvt")
1092 (set_attr "athlon_decode" "double,vector")
1093 (set_attr "prefix_rep" "1")
1094 (set_attr "mode" "SI")])
1096 (define_insn "sse_cvtss2si_2"
1097 [(set (match_operand:SI 0 "register_operand" "=r,r")
1098 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1099 UNSPEC_FIX_NOTRUNC))]
1101 "cvtss2si\t{%1, %0|%0, %1}"
1102 [(set_attr "type" "sseicvt")
1103 (set_attr "athlon_decode" "double,vector")
1104 (set_attr "amdfam10_decode" "double,double")
1105 (set_attr "prefix_rep" "1")
1106 (set_attr "mode" "SI")])
1108 (define_insn "sse_cvtss2siq"
1109 [(set (match_operand:DI 0 "register_operand" "=r,r")
1112 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1113 (parallel [(const_int 0)]))]
1114 UNSPEC_FIX_NOTRUNC))]
1115 "TARGET_SSE && TARGET_64BIT"
1116 "cvtss2siq\t{%1, %0|%0, %1}"
1117 [(set_attr "type" "sseicvt")
1118 (set_attr "athlon_decode" "double,vector")
1119 (set_attr "prefix_rep" "1")
1120 (set_attr "mode" "DI")])
1122 (define_insn "sse_cvtss2siq_2"
1123 [(set (match_operand:DI 0 "register_operand" "=r,r")
1124 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1125 UNSPEC_FIX_NOTRUNC))]
1126 "TARGET_SSE && TARGET_64BIT"
1127 "cvtss2siq\t{%1, %0|%0, %1}"
1128 [(set_attr "type" "sseicvt")
1129 (set_attr "athlon_decode" "double,vector")
1130 (set_attr "amdfam10_decode" "double,double")
1131 (set_attr "prefix_rep" "1")
1132 (set_attr "mode" "DI")])
1134 (define_insn "sse_cvttss2si"
1135 [(set (match_operand:SI 0 "register_operand" "=r,r")
1138 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1139 (parallel [(const_int 0)]))))]
1141 "cvttss2si\t{%1, %0|%0, %1}"
1142 [(set_attr "type" "sseicvt")
1143 (set_attr "athlon_decode" "double,vector")
1144 (set_attr "amdfam10_decode" "double,double")
1145 (set_attr "prefix_rep" "1")
1146 (set_attr "mode" "SI")])
1148 (define_insn "sse_cvttss2siq"
1149 [(set (match_operand:DI 0 "register_operand" "=r,r")
1152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1153 (parallel [(const_int 0)]))))]
1154 "TARGET_SSE && TARGET_64BIT"
1155 "cvttss2siq\t{%1, %0|%0, %1}"
1156 [(set_attr "type" "sseicvt")
1157 (set_attr "athlon_decode" "double,vector")
1158 (set_attr "amdfam10_decode" "double,double")
1159 (set_attr "prefix_rep" "1")
1160 (set_attr "mode" "DI")])
1162 (define_insn "sse2_cvtdq2ps"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1164 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1166 "cvtdq2ps\t{%1, %0|%0, %1}"
1167 [(set_attr "type" "ssecvt")
1168 (set_attr "mode" "V4SF")])
1170 (define_insn "sse2_cvtps2dq"
1171 [(set (match_operand:V4SI 0 "register_operand" "=x")
1172 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1173 UNSPEC_FIX_NOTRUNC))]
1175 "cvtps2dq\t{%1, %0|%0, %1}"
1176 [(set_attr "type" "ssecvt")
1177 (set_attr "prefix_data16" "1")
1178 (set_attr "mode" "TI")])
1180 (define_insn "sse2_cvttps2dq"
1181 [(set (match_operand:V4SI 0 "register_operand" "=x")
1182 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1184 "cvttps2dq\t{%1, %0|%0, %1}"
1185 [(set_attr "type" "ssecvt")
1186 (set_attr "prefix_rep" "1")
1187 (set_attr "mode" "TI")])
1189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1191 ;; Parallel single-precision floating point element swizzling
1193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1195 (define_insn "sse_movhlps"
1196 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1199 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1200 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1201 (parallel [(const_int 6)
1205 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1207 movhlps\t{%2, %0|%0, %2}
1208 movlps\t{%H2, %0|%0, %H2}
1209 movhps\t{%2, %0|%0, %2}"
1210 [(set_attr "type" "ssemov")
1211 (set_attr "mode" "V4SF,V2SF,V2SF")])
1213 (define_insn "sse_movlhps"
1214 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1217 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1218 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1219 (parallel [(const_int 0)
1223 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1225 movlhps\t{%2, %0|%0, %2}
1226 movhps\t{%2, %0|%0, %2}
1227 movlps\t{%2, %H0|%H0, %2}"
1228 [(set_attr "type" "ssemov")
1229 (set_attr "mode" "V4SF,V2SF,V2SF")])
1231 (define_insn "sse_unpckhps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 2) (const_int 6)
1238 (const_int 3) (const_int 7)])))]
1240 "unpckhps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1244 (define_insn "sse_unpcklps"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1248 (match_operand:V4SF 1 "register_operand" "0")
1249 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1250 (parallel [(const_int 0) (const_int 4)
1251 (const_int 1) (const_int 5)])))]
1253 "unpcklps\t{%2, %0|%0, %2}"
1254 [(set_attr "type" "sselog")
1255 (set_attr "mode" "V4SF")])
1257 ;; These are modeled with the same vec_concat as the others so that we
1258 ;; capture users of shufps that can use the new instructions
1259 (define_insn "sse3_movshdup"
1260 [(set (match_operand:V4SF 0 "register_operand" "=x")
1263 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1265 (parallel [(const_int 1)
1270 "movshdup\t{%1, %0|%0, %1}"
1271 [(set_attr "type" "sse")
1272 (set_attr "prefix_rep" "1")
1273 (set_attr "mode" "V4SF")])
1275 (define_insn "sse3_movsldup"
1276 [(set (match_operand:V4SF 0 "register_operand" "=x")
1279 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1281 (parallel [(const_int 0)
1286 "movsldup\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "sse")
1288 (set_attr "prefix_rep" "1")
1289 (set_attr "mode" "V4SF")])
1291 (define_expand "sse_shufps"
1292 [(match_operand:V4SF 0 "register_operand" "")
1293 (match_operand:V4SF 1 "register_operand" "")
1294 (match_operand:V4SF 2 "nonimmediate_operand" "")
1295 (match_operand:SI 3 "const_int_operand" "")]
1298 int mask = INTVAL (operands[3]);
1299 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1300 GEN_INT ((mask >> 0) & 3),
1301 GEN_INT ((mask >> 2) & 3),
1302 GEN_INT (((mask >> 4) & 3) + 4),
1303 GEN_INT (((mask >> 6) & 3) + 4)));
1307 (define_insn "sse_shufps_1"
1308 [(set (match_operand:V4SF 0 "register_operand" "=x")
1311 (match_operand:V4SF 1 "register_operand" "0")
1312 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1313 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1314 (match_operand 4 "const_0_to_3_operand" "")
1315 (match_operand 5 "const_4_to_7_operand" "")
1316 (match_operand 6 "const_4_to_7_operand" "")])))]
1320 mask |= INTVAL (operands[3]) << 0;
1321 mask |= INTVAL (operands[4]) << 2;
1322 mask |= (INTVAL (operands[5]) - 4) << 4;
1323 mask |= (INTVAL (operands[6]) - 4) << 6;
1324 operands[3] = GEN_INT (mask);
1326 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1328 [(set_attr "type" "sselog")
1329 (set_attr "mode" "V4SF")])
1331 (define_insn "sse_storehps"
1332 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1334 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1335 (parallel [(const_int 2) (const_int 3)])))]
1338 movhps\t{%1, %0|%0, %1}
1339 movhlps\t{%1, %0|%0, %1}
1340 movlps\t{%H1, %0|%0, %H1}"
1341 [(set_attr "type" "ssemov")
1342 (set_attr "mode" "V2SF,V4SF,V2SF")])
1344 (define_insn "sse_loadhps"
1345 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1348 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1349 (parallel [(const_int 0) (const_int 1)]))
1350 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1353 movhps\t{%2, %0|%0, %2}
1354 movlhps\t{%2, %0|%0, %2}
1355 movlps\t{%2, %H0|%H0, %2}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1359 (define_insn "sse_storelps"
1360 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1362 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1363 (parallel [(const_int 0) (const_int 1)])))]
1366 movlps\t{%1, %0|%0, %1}
1367 movaps\t{%1, %0|%0, %1}
1368 movlps\t{%1, %0|%0, %1}"
1369 [(set_attr "type" "ssemov")
1370 (set_attr "mode" "V2SF,V4SF,V2SF")])
1372 (define_insn "sse_loadlps"
1373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1375 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1377 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1378 (parallel [(const_int 2) (const_int 3)]))))]
1381 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1382 movlps\t{%2, %0|%0, %2}
1383 movlps\t{%2, %0|%0, %2}"
1384 [(set_attr "type" "sselog,ssemov,ssemov")
1385 (set_attr "mode" "V4SF,V2SF,V2SF")])
1387 (define_insn "sse_movss"
1388 [(set (match_operand:V4SF 0 "register_operand" "=x")
1390 (match_operand:V4SF 2 "register_operand" "x")
1391 (match_operand:V4SF 1 "register_operand" "0")
1394 "movss\t{%2, %0|%0, %2}"
1395 [(set_attr "type" "ssemov")
1396 (set_attr "mode" "SF")])
1398 (define_insn "*vec_dupv4sf"
1399 [(set (match_operand:V4SF 0 "register_operand" "=x")
1401 (match_operand:SF 1 "register_operand" "0")))]
1403 "shufps\t{$0, %0, %0|%0, %0, 0}"
1404 [(set_attr "type" "sselog1")
1405 (set_attr "mode" "V4SF")])
1407 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1408 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1409 ;; alternatives pretty much forces the MMX alternative to be chosen.
1410 (define_insn "*sse_concatv2sf"
1411 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1413 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1414 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1417 unpcklps\t{%2, %0|%0, %2}
1418 movss\t{%1, %0|%0, %1}
1419 punpckldq\t{%2, %0|%0, %2}
1420 movd\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1422 (set_attr "mode" "V4SF,SF,DI,DI")])
1424 (define_insn "*sse_concatv4sf"
1425 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1427 (match_operand:V2SF 1 "register_operand" " 0,0")
1428 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1431 movlhps\t{%2, %0|%0, %2}
1432 movhps\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "ssemov")
1434 (set_attr "mode" "V4SF,V2SF")])
1436 (define_expand "vec_initv4sf"
1437 [(match_operand:V4SF 0 "register_operand" "")
1438 (match_operand 1 "" "")]
1441 ix86_expand_vector_init (false, operands[0], operands[1]);
1445 (define_insn "vec_setv4sf_0"
1446 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1449 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1450 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1454 movss\t{%2, %0|%0, %2}
1455 movss\t{%2, %0|%0, %2}
1456 movd\t{%2, %0|%0, %2}
1458 [(set_attr "type" "ssemov")
1459 (set_attr "mode" "SF")])
1461 ;; A subset is vec_setv4sf.
1462 (define_insn "*vec_setv4sf_sse4_1"
1463 [(set (match_operand:V4SF 0 "register_operand" "=x")
1466 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1467 (match_operand:V4SF 1 "register_operand" "0")
1468 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1471 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1472 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1474 [(set_attr "type" "sselog")
1475 (set_attr "prefix_extra" "1")
1476 (set_attr "mode" "V4SF")])
1478 (define_insn "sse4_1_insertps"
1479 [(set (match_operand:V4SF 0 "register_operand" "=x")
1480 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1481 (match_operand:V4SF 1 "register_operand" "0")
1482 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1485 "insertps\t{%3, %2, %0|%0, %2, %3}";
1486 [(set_attr "type" "sselog")
1487 (set_attr "prefix_extra" "1")
1488 (set_attr "mode" "V4SF")])
1491 [(set (match_operand:V4SF 0 "memory_operand" "")
1494 (match_operand:SF 1 "nonmemory_operand" ""))
1497 "TARGET_SSE && reload_completed"
1500 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1504 (define_expand "vec_setv4sf"
1505 [(match_operand:V4SF 0 "register_operand" "")
1506 (match_operand:SF 1 "register_operand" "")
1507 (match_operand 2 "const_int_operand" "")]
1510 ix86_expand_vector_set (false, operands[0], operands[1],
1511 INTVAL (operands[2]));
1515 (define_insn_and_split "*vec_extractv4sf_0"
1516 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1518 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1519 (parallel [(const_int 0)])))]
1520 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1522 "&& reload_completed"
1525 rtx op1 = operands[1];
1527 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1529 op1 = gen_lowpart (SFmode, op1);
1530 emit_move_insn (operands[0], op1);
1534 (define_insn "*sse4_1_extractps"
1535 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
1537 (match_operand:V4SF 1 "register_operand" "x")
1538 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1540 "extractps\t{%2, %1, %0|%0, %1, %2}"
1541 [(set_attr "type" "sselog")
1542 (set_attr "prefix_extra" "1")
1543 (set_attr "mode" "V4SF")])
1545 (define_insn_and_split "*vec_extract_v4sf_mem"
1546 [(set (match_operand:SF 0 "register_operand" "=x*rf")
1548 (match_operand:V4SF 1 "memory_operand" "o")
1549 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
1555 int i = INTVAL (operands[2]);
1557 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
1561 (define_expand "vec_extractv4sf"
1562 [(match_operand:SF 0 "register_operand" "")
1563 (match_operand:V4SF 1 "register_operand" "")
1564 (match_operand 2 "const_int_operand" "")]
1567 ix86_expand_vector_extract (false, operands[0], operands[1],
1568 INTVAL (operands[2]));
1572 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1574 ;; Parallel double-precision floating point arithmetic
1576 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1578 (define_expand "negv2df2"
1579 [(set (match_operand:V2DF 0 "register_operand" "")
1580 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1582 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1584 (define_expand "absv2df2"
1585 [(set (match_operand:V2DF 0 "register_operand" "")
1586 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1588 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1590 (define_expand "addv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "")
1592 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1593 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1595 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1597 (define_insn "*addv2df3"
1598 [(set (match_operand:V2DF 0 "register_operand" "=x")
1599 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1600 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1601 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1602 "addpd\t{%2, %0|%0, %2}"
1603 [(set_attr "type" "sseadd")
1604 (set_attr "mode" "V2DF")])
1606 (define_insn "sse2_vmaddv2df3"
1607 [(set (match_operand:V2DF 0 "register_operand" "=x")
1609 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1610 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1613 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1614 "addsd\t{%2, %0|%0, %2}"
1615 [(set_attr "type" "sseadd")
1616 (set_attr "mode" "DF")])
1618 (define_expand "subv2df3"
1619 [(set (match_operand:V2DF 0 "register_operand" "")
1620 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1621 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1623 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1625 (define_insn "*subv2df3"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x")
1627 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1628 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1630 "subpd\t{%2, %0|%0, %2}"
1631 [(set_attr "type" "sseadd")
1632 (set_attr "mode" "V2DF")])
1634 (define_insn "sse2_vmsubv2df3"
1635 [(set (match_operand:V2DF 0 "register_operand" "=x")
1637 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1638 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1642 "subsd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "sseadd")
1644 (set_attr "mode" "DF")])
1646 (define_expand "mulv2df3"
1647 [(set (match_operand:V2DF 0 "register_operand" "")
1648 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1649 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1651 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1653 (define_insn "*mulv2df3"
1654 [(set (match_operand:V2DF 0 "register_operand" "=x")
1655 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1656 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1657 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1658 "mulpd\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "ssemul")
1660 (set_attr "mode" "V2DF")])
1662 (define_insn "sse2_vmmulv2df3"
1663 [(set (match_operand:V2DF 0 "register_operand" "=x")
1665 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1666 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1669 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1670 "mulsd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "ssemul")
1672 (set_attr "mode" "DF")])
1674 (define_expand "divv2df3"
1675 [(set (match_operand:V2DF 0 "register_operand" "")
1676 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1677 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1679 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1681 (define_insn "*divv2df3"
1682 [(set (match_operand:V2DF 0 "register_operand" "=x")
1683 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1684 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1686 "divpd\t{%2, %0|%0, %2}"
1687 [(set_attr "type" "ssediv")
1688 (set_attr "mode" "V2DF")])
1690 (define_insn "sse2_vmdivv2df3"
1691 [(set (match_operand:V2DF 0 "register_operand" "=x")
1693 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1694 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1698 "divsd\t{%2, %0|%0, %2}"
1699 [(set_attr "type" "ssediv")
1700 (set_attr "mode" "DF")])
1702 (define_insn "sqrtv2df2"
1703 [(set (match_operand:V2DF 0 "register_operand" "=x")
1704 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1706 "sqrtpd\t{%1, %0|%0, %1}"
1707 [(set_attr "type" "sse")
1708 (set_attr "mode" "V2DF")])
1710 (define_insn "sse2_vmsqrtv2df2"
1711 [(set (match_operand:V2DF 0 "register_operand" "=x")
1713 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1714 (match_operand:V2DF 2 "register_operand" "0")
1717 "sqrtsd\t{%1, %0|%0, %1}"
1718 [(set_attr "type" "sse")
1719 (set_attr "mode" "DF")])
1721 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1722 ;; isn't really correct, as those rtl operators aren't defined when
1723 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1725 (define_expand "smaxv2df3"
1726 [(set (match_operand:V2DF 0 "register_operand" "")
1727 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1731 if (!flag_finite_math_only)
1732 operands[1] = force_reg (V2DFmode, operands[1]);
1733 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1736 (define_insn "*smaxv2df3_finite"
1737 [(set (match_operand:V2DF 0 "register_operand" "=x")
1738 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1739 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1740 "TARGET_SSE2 && flag_finite_math_only
1741 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1742 "maxpd\t{%2, %0|%0, %2}"
1743 [(set_attr "type" "sseadd")
1744 (set_attr "mode" "V2DF")])
1746 (define_insn "*smaxv2df3"
1747 [(set (match_operand:V2DF 0 "register_operand" "=x")
1748 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1749 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1751 "maxpd\t{%2, %0|%0, %2}"
1752 [(set_attr "type" "sseadd")
1753 (set_attr "mode" "V2DF")])
1755 (define_insn "sse2_vmsmaxv2df3"
1756 [(set (match_operand:V2DF 0 "register_operand" "=x")
1758 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1759 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1763 "maxsd\t{%2, %0|%0, %2}"
1764 [(set_attr "type" "sseadd")
1765 (set_attr "mode" "DF")])
1767 (define_expand "sminv2df3"
1768 [(set (match_operand:V2DF 0 "register_operand" "")
1769 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1770 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1773 if (!flag_finite_math_only)
1774 operands[1] = force_reg (V2DFmode, operands[1]);
1775 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1778 (define_insn "*sminv2df3_finite"
1779 [(set (match_operand:V2DF 0 "register_operand" "=x")
1780 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1781 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1782 "TARGET_SSE2 && flag_finite_math_only
1783 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1784 "minpd\t{%2, %0|%0, %2}"
1785 [(set_attr "type" "sseadd")
1786 (set_attr "mode" "V2DF")])
1788 (define_insn "*sminv2df3"
1789 [(set (match_operand:V2DF 0 "register_operand" "=x")
1790 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1791 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1793 "minpd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "sseadd")
1795 (set_attr "mode" "V2DF")])
1797 (define_insn "sse2_vmsminv2df3"
1798 [(set (match_operand:V2DF 0 "register_operand" "=x")
1800 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1801 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1805 "minsd\t{%2, %0|%0, %2}"
1806 [(set_attr "type" "sseadd")
1807 (set_attr "mode" "DF")])
1809 (define_insn "sse3_addsubv2df3"
1810 [(set (match_operand:V2DF 0 "register_operand" "=x")
1813 (match_operand:V2DF 1 "register_operand" "0")
1814 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1815 (minus:V2DF (match_dup 1) (match_dup 2))
1818 "addsubpd\t{%2, %0|%0, %2}"
1819 [(set_attr "type" "sseadd")
1820 (set_attr "mode" "V2DF")])
1822 (define_insn "sse3_haddv2df3"
1823 [(set (match_operand:V2DF 0 "register_operand" "=x")
1827 (match_operand:V2DF 1 "register_operand" "0")
1828 (parallel [(const_int 0)]))
1829 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1832 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1833 (parallel [(const_int 0)]))
1834 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1836 "haddpd\t{%2, %0|%0, %2}"
1837 [(set_attr "type" "sseadd")
1838 (set_attr "mode" "V2DF")])
1840 (define_insn "sse3_hsubv2df3"
1841 [(set (match_operand:V2DF 0 "register_operand" "=x")
1845 (match_operand:V2DF 1 "register_operand" "0")
1846 (parallel [(const_int 0)]))
1847 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1850 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1851 (parallel [(const_int 0)]))
1852 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1854 "hsubpd\t{%2, %0|%0, %2}"
1855 [(set_attr "type" "sseadd")
1856 (set_attr "mode" "V2DF")])
1858 (define_expand "reduc_splus_v2df"
1859 [(match_operand:V2DF 0 "register_operand" "")
1860 (match_operand:V2DF 1 "register_operand" "")]
1863 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1867 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1869 ;; Parallel double-precision floating point comparisons
1871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1873 (define_insn "sse2_maskcmpv2df3"
1874 [(set (match_operand:V2DF 0 "register_operand" "=x")
1875 (match_operator:V2DF 3 "sse_comparison_operator"
1876 [(match_operand:V2DF 1 "register_operand" "0")
1877 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1879 "cmp%D3pd\t{%2, %0|%0, %2}"
1880 [(set_attr "type" "ssecmp")
1881 (set_attr "mode" "V2DF")])
1883 (define_insn "sse2_maskcmpdf3"
1884 [(set (match_operand:DF 0 "register_operand" "=x")
1885 (match_operator:DF 3 "sse_comparison_operator"
1886 [(match_operand:DF 1 "register_operand" "0")
1887 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1889 "cmp%D3sd\t{%2, %0|%0, %2}"
1890 [(set_attr "type" "ssecmp")
1891 (set_attr "mode" "DF")])
1893 (define_insn "sse2_vmmaskcmpv2df3"
1894 [(set (match_operand:V2DF 0 "register_operand" "=x")
1896 (match_operator:V2DF 3 "sse_comparison_operator"
1897 [(match_operand:V2DF 1 "register_operand" "0")
1898 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1902 "cmp%D3sd\t{%2, %0|%0, %2}"
1903 [(set_attr "type" "ssecmp")
1904 (set_attr "mode" "DF")])
1906 (define_insn "sse2_comi"
1907 [(set (reg:CCFP FLAGS_REG)
1910 (match_operand:V2DF 0 "register_operand" "x")
1911 (parallel [(const_int 0)]))
1913 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1914 (parallel [(const_int 0)]))))]
1916 "comisd\t{%1, %0|%0, %1}"
1917 [(set_attr "type" "ssecomi")
1918 (set_attr "mode" "DF")])
1920 (define_insn "sse2_ucomi"
1921 [(set (reg:CCFPU FLAGS_REG)
1924 (match_operand:V2DF 0 "register_operand" "x")
1925 (parallel [(const_int 0)]))
1927 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1928 (parallel [(const_int 0)]))))]
1930 "ucomisd\t{%1, %0|%0, %1}"
1931 [(set_attr "type" "ssecomi")
1932 (set_attr "mode" "DF")])
1934 (define_expand "vcondv2df"
1935 [(set (match_operand:V2DF 0 "register_operand" "")
1937 (match_operator 3 ""
1938 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1939 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1940 (match_operand:V2DF 1 "general_operand" "")
1941 (match_operand:V2DF 2 "general_operand" "")))]
1944 if (ix86_expand_fp_vcond (operands))
1950 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1952 ;; Parallel double-precision floating point logical operations
1954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1956 (define_expand "andv2df3"
1957 [(set (match_operand:V2DF 0 "register_operand" "")
1958 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1959 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1961 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1963 (define_insn "*andv2df3"
1964 [(set (match_operand:V2DF 0 "register_operand" "=x")
1965 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1966 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1967 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1968 "andpd\t{%2, %0|%0, %2}"
1969 [(set_attr "type" "sselog")
1970 (set_attr "mode" "V2DF")])
1972 (define_insn "sse2_nandv2df3"
1973 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1975 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1977 "andnpd\t{%2, %0|%0, %2}"
1978 [(set_attr "type" "sselog")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "iorv2df3"
1982 [(set (match_operand:V2DF 0 "register_operand" "")
1983 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1984 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1986 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1988 (define_insn "*iorv2df3"
1989 [(set (match_operand:V2DF 0 "register_operand" "=x")
1990 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1991 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1992 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1993 "orpd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "sselog")
1995 (set_attr "mode" "V2DF")])
1997 (define_expand "xorv2df3"
1998 [(set (match_operand:V2DF 0 "register_operand" "")
1999 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2000 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2002 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
2004 (define_insn "*xorv2df3"
2005 [(set (match_operand:V2DF 0 "register_operand" "=x")
2006 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2007 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2008 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
2009 "xorpd\t{%2, %0|%0, %2}"
2010 [(set_attr "type" "sselog")
2011 (set_attr "mode" "V2DF")])
2013 ;; Also define scalar versions. These are used for abs, neg, and
2014 ;; conditional move. Using subregs into vector modes causes register
2015 ;; allocation lossage. These patterns do not allow memory operands
2016 ;; because the native instructions read the full 128-bits.
2018 (define_insn "*anddf3"
2019 [(set (match_operand:DF 0 "register_operand" "=x")
2020 (and:DF (match_operand:DF 1 "register_operand" "0")
2021 (match_operand:DF 2 "register_operand" "x")))]
2023 "andpd\t{%2, %0|%0, %2}"
2024 [(set_attr "type" "sselog")
2025 (set_attr "mode" "V2DF")])
2027 (define_insn "*nanddf3"
2028 [(set (match_operand:DF 0 "register_operand" "=x")
2029 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2030 (match_operand:DF 2 "register_operand" "x")))]
2032 "andnpd\t{%2, %0|%0, %2}"
2033 [(set_attr "type" "sselog")
2034 (set_attr "mode" "V2DF")])
2036 (define_insn "*iordf3"
2037 [(set (match_operand:DF 0 "register_operand" "=x")
2038 (ior:DF (match_operand:DF 1 "register_operand" "0")
2039 (match_operand:DF 2 "register_operand" "x")))]
2041 "orpd\t{%2, %0|%0, %2}"
2042 [(set_attr "type" "sselog")
2043 (set_attr "mode" "V2DF")])
2045 (define_insn "*xordf3"
2046 [(set (match_operand:DF 0 "register_operand" "=x")
2047 (xor:DF (match_operand:DF 1 "register_operand" "0")
2048 (match_operand:DF 2 "register_operand" "x")))]
2050 "xorpd\t{%2, %0|%0, %2}"
2051 [(set_attr "type" "sselog")
2052 (set_attr "mode" "V2DF")])
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 ;; Parallel double-precision floating point conversion operations
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 (define_insn "sse2_cvtpi2pd"
2061 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2062 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2064 "cvtpi2pd\t{%1, %0|%0, %1}"
2065 [(set_attr "type" "ssecvt")
2066 (set_attr "unit" "mmx,*")
2067 (set_attr "mode" "V2DF")])
2069 (define_insn "sse2_cvtpd2pi"
2070 [(set (match_operand:V2SI 0 "register_operand" "=y")
2071 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2072 UNSPEC_FIX_NOTRUNC))]
2074 "cvtpd2pi\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "ssecvt")
2076 (set_attr "unit" "mmx")
2077 (set_attr "prefix_data16" "1")
2078 (set_attr "mode" "DI")])
2080 (define_insn "sse2_cvttpd2pi"
2081 [(set (match_operand:V2SI 0 "register_operand" "=y")
2082 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2084 "cvttpd2pi\t{%1, %0|%0, %1}"
2085 [(set_attr "type" "ssecvt")
2086 (set_attr "unit" "mmx")
2087 (set_attr "prefix_data16" "1")
2088 (set_attr "mode" "TI")])
2090 (define_insn "sse2_cvtsi2sd"
2091 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2094 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2095 (match_operand:V2DF 1 "register_operand" "0,0")
2098 "cvtsi2sd\t{%2, %0|%0, %2}"
2099 [(set_attr "type" "sseicvt")
2100 (set_attr "mode" "DF")
2101 (set_attr "athlon_decode" "double,direct")
2102 (set_attr "amdfam10_decode" "vector,double")])
2104 (define_insn "sse2_cvtsi2sdq"
2105 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2108 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2109 (match_operand:V2DF 1 "register_operand" "0,0")
2111 "TARGET_SSE2 && TARGET_64BIT"
2112 "cvtsi2sdq\t{%2, %0|%0, %2}"
2113 [(set_attr "type" "sseicvt")
2114 (set_attr "mode" "DF")
2115 (set_attr "athlon_decode" "double,direct")
2116 (set_attr "amdfam10_decode" "vector,double")])
2118 (define_insn "sse2_cvtsd2si"
2119 [(set (match_operand:SI 0 "register_operand" "=r,r")
2122 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2123 (parallel [(const_int 0)]))]
2124 UNSPEC_FIX_NOTRUNC))]
2126 "cvtsd2si\t{%1, %0|%0, %1}"
2127 [(set_attr "type" "sseicvt")
2128 (set_attr "athlon_decode" "double,vector")
2129 (set_attr "prefix_rep" "1")
2130 (set_attr "mode" "SI")])
2132 (define_insn "sse2_cvtsd2si_2"
2133 [(set (match_operand:SI 0 "register_operand" "=r,r")
2134 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2135 UNSPEC_FIX_NOTRUNC))]
2137 "cvtsd2si\t{%1, %0|%0, %1}"
2138 [(set_attr "type" "sseicvt")
2139 (set_attr "athlon_decode" "double,vector")
2140 (set_attr "amdfam10_decode" "double,double")
2141 (set_attr "prefix_rep" "1")
2142 (set_attr "mode" "SI")])
2144 (define_insn "sse2_cvtsd2siq"
2145 [(set (match_operand:DI 0 "register_operand" "=r,r")
2148 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2149 (parallel [(const_int 0)]))]
2150 UNSPEC_FIX_NOTRUNC))]
2151 "TARGET_SSE2 && TARGET_64BIT"
2152 "cvtsd2siq\t{%1, %0|%0, %1}"
2153 [(set_attr "type" "sseicvt")
2154 (set_attr "athlon_decode" "double,vector")
2155 (set_attr "prefix_rep" "1")
2156 (set_attr "mode" "DI")])
2158 (define_insn "sse2_cvtsd2siq_2"
2159 [(set (match_operand:DI 0 "register_operand" "=r,r")
2160 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2161 UNSPEC_FIX_NOTRUNC))]
2162 "TARGET_SSE2 && TARGET_64BIT"
2163 "cvtsd2siq\t{%1, %0|%0, %1}"
2164 [(set_attr "type" "sseicvt")
2165 (set_attr "athlon_decode" "double,vector")
2166 (set_attr "amdfam10_decode" "double,double")
2167 (set_attr "prefix_rep" "1")
2168 (set_attr "mode" "DI")])
2170 (define_insn "sse2_cvttsd2si"
2171 [(set (match_operand:SI 0 "register_operand" "=r,r")
2174 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2175 (parallel [(const_int 0)]))))]
2177 "cvttsd2si\t{%1, %0|%0, %1}"
2178 [(set_attr "type" "sseicvt")
2179 (set_attr "prefix_rep" "1")
2180 (set_attr "mode" "SI")
2181 (set_attr "athlon_decode" "double,vector")
2182 (set_attr "amdfam10_decode" "double,double")])
2184 (define_insn "sse2_cvttsd2siq"
2185 [(set (match_operand:DI 0 "register_operand" "=r,r")
2188 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2189 (parallel [(const_int 0)]))))]
2190 "TARGET_SSE2 && TARGET_64BIT"
2191 "cvttsd2siq\t{%1, %0|%0, %1}"
2192 [(set_attr "type" "sseicvt")
2193 (set_attr "prefix_rep" "1")
2194 (set_attr "mode" "DI")
2195 (set_attr "athlon_decode" "double,vector")
2196 (set_attr "amdfam10_decode" "double,double")])
2198 (define_insn "sse2_cvtdq2pd"
2199 [(set (match_operand:V2DF 0 "register_operand" "=x")
2202 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2203 (parallel [(const_int 0) (const_int 1)]))))]
2205 "cvtdq2pd\t{%1, %0|%0, %1}"
2206 [(set_attr "type" "ssecvt")
2207 (set_attr "mode" "V2DF")])
2209 (define_expand "sse2_cvtpd2dq"
2210 [(set (match_operand:V4SI 0 "register_operand" "")
2212 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2216 "operands[2] = CONST0_RTX (V2SImode);")
2218 (define_insn "*sse2_cvtpd2dq"
2219 [(set (match_operand:V4SI 0 "register_operand" "=x")
2221 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2223 (match_operand:V2SI 2 "const0_operand" "")))]
2225 "cvtpd2dq\t{%1, %0|%0, %1}"
2226 [(set_attr "type" "ssecvt")
2227 (set_attr "prefix_rep" "1")
2228 (set_attr "mode" "TI")
2229 (set_attr "amdfam10_decode" "double")])
2231 (define_expand "sse2_cvttpd2dq"
2232 [(set (match_operand:V4SI 0 "register_operand" "")
2234 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2237 "operands[2] = CONST0_RTX (V2SImode);")
2239 (define_insn "*sse2_cvttpd2dq"
2240 [(set (match_operand:V4SI 0 "register_operand" "=x")
2242 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2243 (match_operand:V2SI 2 "const0_operand" "")))]
2245 "cvttpd2dq\t{%1, %0|%0, %1}"
2246 [(set_attr "type" "ssecvt")
2247 (set_attr "prefix_rep" "1")
2248 (set_attr "mode" "TI")
2249 (set_attr "amdfam10_decode" "double")])
2251 (define_insn "sse2_cvtsd2ss"
2252 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2255 (float_truncate:V2SF
2256 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2257 (match_operand:V4SF 1 "register_operand" "0,0")
2260 "cvtsd2ss\t{%2, %0|%0, %2}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "athlon_decode" "vector,double")
2263 (set_attr "amdfam10_decode" "vector,double")
2264 (set_attr "mode" "SF")])
2266 (define_insn "sse2_cvtss2sd"
2267 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2271 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2272 (parallel [(const_int 0) (const_int 1)])))
2273 (match_operand:V2DF 1 "register_operand" "0,0")
2276 "cvtss2sd\t{%2, %0|%0, %2}"
2277 [(set_attr "type" "ssecvt")
2278 (set_attr "amdfam10_decode" "vector,double")
2279 (set_attr "mode" "DF")])
2281 (define_expand "sse2_cvtpd2ps"
2282 [(set (match_operand:V4SF 0 "register_operand" "")
2284 (float_truncate:V2SF
2285 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2288 "operands[2] = CONST0_RTX (V2SFmode);")
2290 (define_insn "*sse2_cvtpd2ps"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x")
2293 (float_truncate:V2SF
2294 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2295 (match_operand:V2SF 2 "const0_operand" "")))]
2297 "cvtpd2ps\t{%1, %0|%0, %1}"
2298 [(set_attr "type" "ssecvt")
2299 (set_attr "prefix_data16" "1")
2300 (set_attr "mode" "V4SF")
2301 (set_attr "amdfam10_decode" "double")])
2303 (define_insn "sse2_cvtps2pd"
2304 [(set (match_operand:V2DF 0 "register_operand" "=x")
2307 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2308 (parallel [(const_int 0) (const_int 1)]))))]
2310 "cvtps2pd\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "ssecvt")
2312 (set_attr "mode" "V2DF")
2313 (set_attr "amdfam10_decode" "direct")])
2315 (define_expand "vec_unpacks_hi_v4sf"
2320 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2321 (parallel [(const_int 6)
2325 (set (match_operand:V2DF 0 "register_operand" "")
2329 (parallel [(const_int 0) (const_int 1)]))))]
2332 operands[2] = gen_reg_rtx (V4SFmode);
2335 (define_expand "vec_unpacks_lo_v4sf"
2336 [(set (match_operand:V2DF 0 "register_operand" "")
2339 (match_operand:V4SF 1 "nonimmediate_operand" "")
2340 (parallel [(const_int 0) (const_int 1)]))))]
2343 (define_expand "vec_unpacks_float_hi_v8hi"
2344 [(match_operand:V4SF 0 "register_operand" "")
2345 (match_operand:V8HI 1 "register_operand" "")]
2348 rtx tmp = gen_reg_rtx (V4SImode);
2350 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2351 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2355 (define_expand "vec_unpacks_float_lo_v8hi"
2356 [(match_operand:V4SF 0 "register_operand" "")
2357 (match_operand:V8HI 1 "register_operand" "")]
2360 rtx tmp = gen_reg_rtx (V4SImode);
2362 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2363 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2367 (define_expand "vec_unpacku_float_hi_v8hi"
2368 [(match_operand:V4SF 0 "register_operand" "")
2369 (match_operand:V8HI 1 "register_operand" "")]
2372 rtx tmp = gen_reg_rtx (V4SImode);
2374 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2375 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2379 (define_expand "vec_unpacku_float_lo_v8hi"
2380 [(match_operand:V4SF 0 "register_operand" "")
2381 (match_operand:V8HI 1 "register_operand" "")]
2384 rtx tmp = gen_reg_rtx (V4SImode);
2386 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2387 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2391 (define_expand "vec_unpacks_float_hi_v4si"
2394 (match_operand:V4SI 1 "nonimmediate_operand" "")
2395 (parallel [(const_int 2)
2399 (set (match_operand:V2DF 0 "register_operand" "")
2403 (parallel [(const_int 0) (const_int 1)]))))]
2406 operands[2] = gen_reg_rtx (V4SImode);
2409 (define_expand "vec_unpacks_float_lo_v4si"
2410 [(set (match_operand:V2DF 0 "register_operand" "")
2413 (match_operand:V4SI 1 "nonimmediate_operand" "")
2414 (parallel [(const_int 0) (const_int 1)]))))]
2417 (define_expand "vec_pack_trunc_v2df"
2418 [(match_operand:V4SF 0 "register_operand" "")
2419 (match_operand:V2DF 1 "nonimmediate_operand" "")
2420 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2425 r1 = gen_reg_rtx (V4SFmode);
2426 r2 = gen_reg_rtx (V4SFmode);
2428 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2429 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2430 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2434 (define_expand "vec_pack_sfix_trunc_v2df"
2435 [(match_operand:V4SI 0 "register_operand" "")
2436 (match_operand:V2DF 1 "nonimmediate_operand" "")
2437 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2442 r1 = gen_reg_rtx (V4SImode);
2443 r2 = gen_reg_rtx (V4SImode);
2445 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2446 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2447 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2448 gen_lowpart (V2DImode, r1),
2449 gen_lowpart (V2DImode, r2)));
2453 (define_expand "vec_pack_sfix_v2df"
2454 [(match_operand:V4SI 0 "register_operand" "")
2455 (match_operand:V2DF 1 "nonimmediate_operand" "")
2456 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2461 r1 = gen_reg_rtx (V4SImode);
2462 r2 = gen_reg_rtx (V4SImode);
2464 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2465 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2466 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2467 gen_lowpart (V2DImode, r1),
2468 gen_lowpart (V2DImode, r2)));
2473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2475 ;; Parallel double-precision floating point element swizzling
2477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2479 (define_insn "sse2_unpckhpd"
2480 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2483 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2484 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2485 (parallel [(const_int 1)
2487 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2489 unpckhpd\t{%2, %0|%0, %2}
2490 movlpd\t{%H1, %0|%0, %H1}
2491 movhpd\t{%1, %0|%0, %1}"
2492 [(set_attr "type" "sselog,ssemov,ssemov")
2493 (set_attr "mode" "V2DF,V1DF,V1DF")])
2495 (define_insn "*sse3_movddup"
2496 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2499 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2501 (parallel [(const_int 0)
2503 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2505 movddup\t{%1, %0|%0, %1}
2507 [(set_attr "type" "sselog1,ssemov")
2508 (set_attr "mode" "V2DF")])
2511 [(set (match_operand:V2DF 0 "memory_operand" "")
2514 (match_operand:V2DF 1 "register_operand" "")
2516 (parallel [(const_int 0)
2518 "TARGET_SSE3 && reload_completed"
2521 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2522 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2523 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2527 (define_insn "sse2_unpcklpd"
2528 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2531 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2532 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2533 (parallel [(const_int 0)
2535 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2537 unpcklpd\t{%2, %0|%0, %2}
2538 movhpd\t{%2, %0|%0, %2}
2539 movlpd\t{%2, %H0|%H0, %2}"
2540 [(set_attr "type" "sselog,ssemov,ssemov")
2541 (set_attr "mode" "V2DF,V1DF,V1DF")])
2543 (define_expand "sse2_shufpd"
2544 [(match_operand:V2DF 0 "register_operand" "")
2545 (match_operand:V2DF 1 "register_operand" "")
2546 (match_operand:V2DF 2 "nonimmediate_operand" "")
2547 (match_operand:SI 3 "const_int_operand" "")]
2550 int mask = INTVAL (operands[3]);
2551 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2553 GEN_INT (mask & 2 ? 3 : 2)));
2557 (define_insn "sse2_shufpd_1"
2558 [(set (match_operand:V2DF 0 "register_operand" "=x")
2561 (match_operand:V2DF 1 "register_operand" "0")
2562 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2563 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2564 (match_operand 4 "const_2_to_3_operand" "")])))]
2568 mask = INTVAL (operands[3]);
2569 mask |= (INTVAL (operands[4]) - 2) << 1;
2570 operands[3] = GEN_INT (mask);
2572 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2574 [(set_attr "type" "sselog")
2575 (set_attr "mode" "V2DF")])
2577 (define_insn "sse2_storehpd"
2578 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2580 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2581 (parallel [(const_int 1)])))]
2582 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2584 movhpd\t{%1, %0|%0, %1}
2587 [(set_attr "type" "ssemov,sselog1,ssemov")
2588 (set_attr "mode" "V1DF,V2DF,DF")])
2591 [(set (match_operand:DF 0 "register_operand" "")
2593 (match_operand:V2DF 1 "memory_operand" "")
2594 (parallel [(const_int 1)])))]
2595 "TARGET_SSE2 && reload_completed"
2596 [(set (match_dup 0) (match_dup 1))]
2598 operands[1] = adjust_address (operands[1], DFmode, 8);
2601 (define_insn "sse2_storelpd"
2602 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2604 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2605 (parallel [(const_int 0)])))]
2606 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2608 movlpd\t{%1, %0|%0, %1}
2611 [(set_attr "type" "ssemov")
2612 (set_attr "mode" "V1DF,DF,DF")])
2615 [(set (match_operand:DF 0 "register_operand" "")
2617 (match_operand:V2DF 1 "nonimmediate_operand" "")
2618 (parallel [(const_int 0)])))]
2619 "TARGET_SSE2 && reload_completed"
2622 rtx op1 = operands[1];
2624 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2626 op1 = gen_lowpart (DFmode, op1);
2627 emit_move_insn (operands[0], op1);
2631 (define_insn "sse2_loadhpd"
2632 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2635 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2636 (parallel [(const_int 0)]))
2637 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2638 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2640 movhpd\t{%2, %0|%0, %2}
2641 unpcklpd\t{%2, %0|%0, %2}
2642 shufpd\t{$1, %1, %0|%0, %1, 1}
2644 [(set_attr "type" "ssemov,sselog,sselog,other")
2645 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2648 [(set (match_operand:V2DF 0 "memory_operand" "")
2650 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2651 (match_operand:DF 1 "register_operand" "")))]
2652 "TARGET_SSE2 && reload_completed"
2653 [(set (match_dup 0) (match_dup 1))]
2655 operands[0] = adjust_address (operands[0], DFmode, 8);
2658 (define_insn "sse2_loadlpd"
2659 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2661 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2663 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2664 (parallel [(const_int 1)]))))]
2665 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2667 movsd\t{%2, %0|%0, %2}
2668 movlpd\t{%2, %0|%0, %2}
2669 movsd\t{%2, %0|%0, %2}
2670 shufpd\t{$2, %2, %0|%0, %2, 2}
2671 movhpd\t{%H1, %0|%0, %H1}
2673 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2674 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2677 [(set (match_operand:V2DF 0 "memory_operand" "")
2679 (match_operand:DF 1 "register_operand" "")
2680 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2681 "TARGET_SSE2 && reload_completed"
2682 [(set (match_dup 0) (match_dup 1))]
2684 operands[0] = adjust_address (operands[0], DFmode, 8);
2687 ;; Not sure these two are ever used, but it doesn't hurt to have
2689 (define_insn "*vec_extractv2df_1_sse"
2690 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2692 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2693 (parallel [(const_int 1)])))]
2694 "!TARGET_SSE2 && TARGET_SSE
2695 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2697 movhps\t{%1, %0|%0, %1}
2698 movhlps\t{%1, %0|%0, %1}
2699 movlps\t{%H1, %0|%0, %H1}"
2700 [(set_attr "type" "ssemov")
2701 (set_attr "mode" "V2SF,V4SF,V2SF")])
2703 (define_insn "*vec_extractv2df_0_sse"
2704 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2706 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2707 (parallel [(const_int 0)])))]
2708 "!TARGET_SSE2 && TARGET_SSE
2709 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2711 movlps\t{%1, %0|%0, %1}
2712 movaps\t{%1, %0|%0, %1}
2713 movlps\t{%1, %0|%0, %1}"
2714 [(set_attr "type" "ssemov")
2715 (set_attr "mode" "V2SF,V4SF,V2SF")])
2717 (define_insn "sse2_movsd"
2718 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2720 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2721 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2725 movsd\t{%2, %0|%0, %2}
2726 movlpd\t{%2, %0|%0, %2}
2727 movlpd\t{%2, %0|%0, %2}
2728 shufpd\t{$2, %2, %0|%0, %2, 2}
2729 movhps\t{%H1, %0|%0, %H1}
2730 movhps\t{%1, %H0|%H0, %1}"
2731 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2732 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2734 (define_insn "*vec_dupv2df_sse3"
2735 [(set (match_operand:V2DF 0 "register_operand" "=x")
2737 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2739 "movddup\t{%1, %0|%0, %1}"
2740 [(set_attr "type" "sselog1")
2741 (set_attr "mode" "DF")])
2743 (define_insn "*vec_dupv2df"
2744 [(set (match_operand:V2DF 0 "register_operand" "=x")
2746 (match_operand:DF 1 "register_operand" "0")))]
2749 [(set_attr "type" "sselog1")
2750 (set_attr "mode" "V2DF")])
2752 (define_insn "*vec_concatv2df_sse3"
2753 [(set (match_operand:V2DF 0 "register_operand" "=x")
2755 (match_operand:DF 1 "nonimmediate_operand" "xm")
2758 "movddup\t{%1, %0|%0, %1}"
2759 [(set_attr "type" "sselog1")
2760 (set_attr "mode" "DF")])
2762 (define_insn "*vec_concatv2df"
2763 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2765 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2766 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2769 unpcklpd\t{%2, %0|%0, %2}
2770 movhpd\t{%2, %0|%0, %2}
2771 movsd\t{%1, %0|%0, %1}
2772 movlhps\t{%2, %0|%0, %2}
2773 movhps\t{%2, %0|%0, %2}"
2774 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2775 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2777 (define_expand "vec_setv2df"
2778 [(match_operand:V2DF 0 "register_operand" "")
2779 (match_operand:DF 1 "register_operand" "")
2780 (match_operand 2 "const_int_operand" "")]
2783 ix86_expand_vector_set (false, operands[0], operands[1],
2784 INTVAL (operands[2]));
2788 (define_expand "vec_extractv2df"
2789 [(match_operand:DF 0 "register_operand" "")
2790 (match_operand:V2DF 1 "register_operand" "")
2791 (match_operand 2 "const_int_operand" "")]
2794 ix86_expand_vector_extract (false, operands[0], operands[1],
2795 INTVAL (operands[2]));
2799 (define_expand "vec_initv2df"
2800 [(match_operand:V2DF 0 "register_operand" "")
2801 (match_operand 1 "" "")]
2804 ix86_expand_vector_init (false, operands[0], operands[1]);
2808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2810 ;; Parallel integral arithmetic
2812 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2814 (define_expand "neg<mode>2"
2815 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2818 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2820 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2822 (define_expand "add<mode>3"
2823 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2824 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2825 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2827 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2829 (define_insn "*add<mode>3"
2830 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2832 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2833 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2834 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2835 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2836 [(set_attr "type" "sseiadd")
2837 (set_attr "prefix_data16" "1")
2838 (set_attr "mode" "TI")])
2840 (define_insn "sse2_ssadd<mode>3"
2841 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2843 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2844 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2845 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2846 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2847 [(set_attr "type" "sseiadd")
2848 (set_attr "prefix_data16" "1")
2849 (set_attr "mode" "TI")])
2851 (define_insn "sse2_usadd<mode>3"
2852 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2854 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2855 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2856 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2857 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2858 [(set_attr "type" "sseiadd")
2859 (set_attr "prefix_data16" "1")
2860 (set_attr "mode" "TI")])
2862 (define_expand "sub<mode>3"
2863 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2864 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2865 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2867 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2869 (define_insn "*sub<mode>3"
2870 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2872 (match_operand:SSEMODEI 1 "register_operand" "0")
2873 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2875 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2876 [(set_attr "type" "sseiadd")
2877 (set_attr "prefix_data16" "1")
2878 (set_attr "mode" "TI")])
2880 (define_insn "sse2_sssub<mode>3"
2881 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2883 (match_operand:SSEMODE12 1 "register_operand" "0")
2884 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2886 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2887 [(set_attr "type" "sseiadd")
2888 (set_attr "prefix_data16" "1")
2889 (set_attr "mode" "TI")])
2891 (define_insn "sse2_ussub<mode>3"
2892 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2894 (match_operand:SSEMODE12 1 "register_operand" "0")
2895 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2897 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2898 [(set_attr "type" "sseiadd")
2899 (set_attr "prefix_data16" "1")
2900 (set_attr "mode" "TI")])
2902 (define_insn_and_split "mulv16qi3"
2903 [(set (match_operand:V16QI 0 "register_operand" "")
2904 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2905 (match_operand:V16QI 2 "register_operand" "")))]
2907 && !(reload_completed || reload_in_progress)"
2915 for (i = 0; i < 12; ++i)
2916 t[i] = gen_reg_rtx (V16QImode);
2918 /* Unpack data such that we've got a source byte in each low byte of
2919 each word. We don't care what goes into the high byte of each word.
2920 Rather than trying to get zero in there, most convenient is to let
2921 it be a copy of the low byte. */
2922 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2923 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2924 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2925 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2927 /* Multiply words. The end-of-line annotations here give a picture of what
2928 the output of that instruction looks like. Dot means don't care; the
2929 letters are the bytes of the result with A being the most significant. */
2930 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2931 gen_lowpart (V8HImode, t[0]),
2932 gen_lowpart (V8HImode, t[1])));
2933 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2934 gen_lowpart (V8HImode, t[2]),
2935 gen_lowpart (V8HImode, t[3])));
2937 /* Extract the relevant bytes and merge them back together. */
2938 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2939 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2940 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2941 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2942 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2943 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2946 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2950 (define_expand "mulv8hi3"
2951 [(set (match_operand:V8HI 0 "register_operand" "")
2952 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2953 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2955 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2957 (define_insn "*mulv8hi3"
2958 [(set (match_operand:V8HI 0 "register_operand" "=x")
2959 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2960 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2961 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2962 "pmullw\t{%2, %0|%0, %2}"
2963 [(set_attr "type" "sseimul")
2964 (set_attr "prefix_data16" "1")
2965 (set_attr "mode" "TI")])
2967 (define_expand "smulv8hi3_highpart"
2968 [(set (match_operand:V8HI 0 "register_operand" "")
2973 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2975 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2978 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2980 (define_insn "*smulv8hi3_highpart"
2981 [(set (match_operand:V8HI 0 "register_operand" "=x")
2986 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2988 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2990 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2991 "pmulhw\t{%2, %0|%0, %2}"
2992 [(set_attr "type" "sseimul")
2993 (set_attr "prefix_data16" "1")
2994 (set_attr "mode" "TI")])
2996 (define_expand "umulv8hi3_highpart"
2997 [(set (match_operand:V8HI 0 "register_operand" "")
3002 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3004 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3007 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3009 (define_insn "*umulv8hi3_highpart"
3010 [(set (match_operand:V8HI 0 "register_operand" "=x")
3015 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3017 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3019 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3020 "pmulhuw\t{%2, %0|%0, %2}"
3021 [(set_attr "type" "sseimul")
3022 (set_attr "prefix_data16" "1")
3023 (set_attr "mode" "TI")])
3025 (define_insn "sse2_umulv2siv2di3"
3026 [(set (match_operand:V2DI 0 "register_operand" "=x")
3030 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3031 (parallel [(const_int 0) (const_int 2)])))
3034 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3035 (parallel [(const_int 0) (const_int 2)])))))]
3036 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3037 "pmuludq\t{%2, %0|%0, %2}"
3038 [(set_attr "type" "sseimul")
3039 (set_attr "prefix_data16" "1")
3040 (set_attr "mode" "TI")])
3042 (define_insn "sse4_1_mulv2siv2di3"
3043 [(set (match_operand:V2DI 0 "register_operand" "=x")
3047 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3048 (parallel [(const_int 0) (const_int 2)])))
3051 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3052 (parallel [(const_int 0) (const_int 2)])))))]
3053 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3054 "pmuldq\t{%2, %0|%0, %2}"
3055 [(set_attr "type" "sseimul")
3056 (set_attr "prefix_extra" "1")
3057 (set_attr "mode" "TI")])
3059 (define_insn "sse2_pmaddwd"
3060 [(set (match_operand:V4SI 0 "register_operand" "=x")
3065 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3066 (parallel [(const_int 0)
3072 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3073 (parallel [(const_int 0)
3079 (vec_select:V4HI (match_dup 1)
3080 (parallel [(const_int 1)
3085 (vec_select:V4HI (match_dup 2)
3086 (parallel [(const_int 1)
3089 (const_int 7)]))))))]
3090 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3091 "pmaddwd\t{%2, %0|%0, %2}"
3092 [(set_attr "type" "sseiadd")
3093 (set_attr "prefix_data16" "1")
3094 (set_attr "mode" "TI")])
3096 (define_expand "mulv4si3"
3097 [(set (match_operand:V4SI 0 "register_operand" "")
3098 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3099 (match_operand:V4SI 2 "register_operand" "")))]
3103 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3106 (define_insn "*sse4_1_mulv4si3"
3107 [(set (match_operand:V4SI 0 "register_operand" "=x")
3108 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3109 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3110 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3111 "pmulld\t{%2, %0|%0, %2}"
3112 [(set_attr "type" "sseimul")
3113 (set_attr "prefix_extra" "1")
3114 (set_attr "mode" "TI")])
3116 (define_insn_and_split "*sse2_mulv4si3"
3117 [(set (match_operand:V4SI 0 "register_operand" "")
3118 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3119 (match_operand:V4SI 2 "register_operand" "")))]
3120 "TARGET_SSE2 && !TARGET_SSE4_1
3121 && !(reload_completed || reload_in_progress)"
3126 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3132 t1 = gen_reg_rtx (V4SImode);
3133 t2 = gen_reg_rtx (V4SImode);
3134 t3 = gen_reg_rtx (V4SImode);
3135 t4 = gen_reg_rtx (V4SImode);
3136 t5 = gen_reg_rtx (V4SImode);
3137 t6 = gen_reg_rtx (V4SImode);
3138 thirtytwo = GEN_INT (32);
3140 /* Multiply elements 2 and 0. */
3141 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3144 /* Shift both input vectors down one element, so that elements 3
3145 and 1 are now in the slots for elements 2 and 0. For K8, at
3146 least, this is faster than using a shuffle. */
3147 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3148 gen_lowpart (TImode, op1),
3150 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3151 gen_lowpart (TImode, op2),
3153 /* Multiply elements 3 and 1. */
3154 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3157 /* Move the results in element 2 down to element 1; we don't care
3158 what goes in elements 2 and 3. */
3159 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3160 const0_rtx, const0_rtx));
3161 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3162 const0_rtx, const0_rtx));
3164 /* Merge the parts back together. */
3165 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3169 (define_insn_and_split "mulv2di3"
3170 [(set (match_operand:V2DI 0 "register_operand" "")
3171 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3172 (match_operand:V2DI 2 "register_operand" "")))]
3174 && !(reload_completed || reload_in_progress)"
3179 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3185 t1 = gen_reg_rtx (V2DImode);
3186 t2 = gen_reg_rtx (V2DImode);
3187 t3 = gen_reg_rtx (V2DImode);
3188 t4 = gen_reg_rtx (V2DImode);
3189 t5 = gen_reg_rtx (V2DImode);
3190 t6 = gen_reg_rtx (V2DImode);
3191 thirtytwo = GEN_INT (32);
3193 /* Multiply low parts. */
3194 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3195 gen_lowpart (V4SImode, op2)));
3197 /* Shift input vectors left 32 bits so we can multiply high parts. */
3198 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3199 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3201 /* Multiply high parts by low parts. */
3202 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3203 gen_lowpart (V4SImode, t3)));
3204 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3205 gen_lowpart (V4SImode, t2)));
3207 /* Shift them back. */
3208 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3209 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3211 /* Add the three parts together. */
3212 emit_insn (gen_addv2di3 (t6, t1, t4));
3213 emit_insn (gen_addv2di3 (op0, t6, t5));
3217 (define_expand "vec_widen_smult_hi_v8hi"
3218 [(match_operand:V4SI 0 "register_operand" "")
3219 (match_operand:V8HI 1 "register_operand" "")
3220 (match_operand:V8HI 2 "register_operand" "")]
3223 rtx op1, op2, t1, t2, dest;
3227 t1 = gen_reg_rtx (V8HImode);
3228 t2 = gen_reg_rtx (V8HImode);
3229 dest = gen_lowpart (V8HImode, operands[0]);
3231 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3232 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3233 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3237 (define_expand "vec_widen_smult_lo_v8hi"
3238 [(match_operand:V4SI 0 "register_operand" "")
3239 (match_operand:V8HI 1 "register_operand" "")
3240 (match_operand:V8HI 2 "register_operand" "")]
3243 rtx op1, op2, t1, t2, dest;
3247 t1 = gen_reg_rtx (V8HImode);
3248 t2 = gen_reg_rtx (V8HImode);
3249 dest = gen_lowpart (V8HImode, operands[0]);
3251 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3252 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3253 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3257 (define_expand "vec_widen_umult_hi_v8hi"
3258 [(match_operand:V4SI 0 "register_operand" "")
3259 (match_operand:V8HI 1 "register_operand" "")
3260 (match_operand:V8HI 2 "register_operand" "")]
3263 rtx op1, op2, t1, t2, dest;
3267 t1 = gen_reg_rtx (V8HImode);
3268 t2 = gen_reg_rtx (V8HImode);
3269 dest = gen_lowpart (V8HImode, operands[0]);
3271 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3272 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3273 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3277 (define_expand "vec_widen_umult_lo_v8hi"
3278 [(match_operand:V4SI 0 "register_operand" "")
3279 (match_operand:V8HI 1 "register_operand" "")
3280 (match_operand:V8HI 2 "register_operand" "")]
3283 rtx op1, op2, t1, t2, dest;
3287 t1 = gen_reg_rtx (V8HImode);
3288 t2 = gen_reg_rtx (V8HImode);
3289 dest = gen_lowpart (V8HImode, operands[0]);
3291 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3292 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3293 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3297 (define_expand "vec_widen_smult_hi_v4si"
3298 [(match_operand:V2DI 0 "register_operand" "")
3299 (match_operand:V4SI 1 "register_operand" "")
3300 (match_operand:V4SI 2 "register_operand" "")]
3303 rtx op1, op2, t1, t2;
3307 t1 = gen_reg_rtx (V4SImode);
3308 t2 = gen_reg_rtx (V4SImode);
3310 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3311 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3312 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3316 (define_expand "vec_widen_smult_lo_v4si"
3317 [(match_operand:V2DI 0 "register_operand" "")
3318 (match_operand:V4SI 1 "register_operand" "")
3319 (match_operand:V4SI 2 "register_operand" "")]
3322 rtx op1, op2, t1, t2;
3326 t1 = gen_reg_rtx (V4SImode);
3327 t2 = gen_reg_rtx (V4SImode);
3329 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3330 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3331 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3335 (define_expand "vec_widen_umult_hi_v4si"
3336 [(match_operand:V2DI 0 "register_operand" "")
3337 (match_operand:V4SI 1 "register_operand" "")
3338 (match_operand:V4SI 2 "register_operand" "")]
3341 rtx op1, op2, t1, t2;
3345 t1 = gen_reg_rtx (V4SImode);
3346 t2 = gen_reg_rtx (V4SImode);
3348 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3349 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3350 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3354 (define_expand "vec_widen_umult_lo_v4si"
3355 [(match_operand:V2DI 0 "register_operand" "")
3356 (match_operand:V4SI 1 "register_operand" "")
3357 (match_operand:V4SI 2 "register_operand" "")]
3360 rtx op1, op2, t1, t2;
3364 t1 = gen_reg_rtx (V4SImode);
3365 t2 = gen_reg_rtx (V4SImode);
3367 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3368 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3369 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3373 (define_expand "sdot_prodv8hi"
3374 [(match_operand:V4SI 0 "register_operand" "")
3375 (match_operand:V8HI 1 "register_operand" "")
3376 (match_operand:V8HI 2 "register_operand" "")
3377 (match_operand:V4SI 3 "register_operand" "")]
3380 rtx t = gen_reg_rtx (V4SImode);
3381 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3382 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3386 (define_expand "udot_prodv4si"
3387 [(match_operand:V2DI 0 "register_operand" "")
3388 (match_operand:V4SI 1 "register_operand" "")
3389 (match_operand:V4SI 2 "register_operand" "")
3390 (match_operand:V2DI 3 "register_operand" "")]
3395 t1 = gen_reg_rtx (V2DImode);
3396 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3397 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3399 t2 = gen_reg_rtx (V4SImode);
3400 t3 = gen_reg_rtx (V4SImode);
3401 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3402 gen_lowpart (TImode, operands[1]),
3404 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3405 gen_lowpart (TImode, operands[2]),
3408 t4 = gen_reg_rtx (V2DImode);
3409 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3411 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3415 (define_insn "ashr<mode>3"
3416 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3418 (match_operand:SSEMODE24 1 "register_operand" "0")
3419 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3421 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3422 [(set_attr "type" "sseishft")
3423 (set_attr "prefix_data16" "1")
3424 (set_attr "mode" "TI")])
3426 (define_insn "lshr<mode>3"
3427 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3428 (lshiftrt:SSEMODE248
3429 (match_operand:SSEMODE248 1 "register_operand" "0")
3430 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3432 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3433 [(set_attr "type" "sseishft")
3434 (set_attr "prefix_data16" "1")
3435 (set_attr "mode" "TI")])
3437 (define_insn "ashl<mode>3"
3438 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3440 (match_operand:SSEMODE248 1 "register_operand" "0")
3441 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3443 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3444 [(set_attr "type" "sseishft")
3445 (set_attr "prefix_data16" "1")
3446 (set_attr "mode" "TI")])
3448 (define_expand "vec_shl_<mode>"
3449 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3450 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3451 (match_operand:SI 2 "general_operand" "")))]
3454 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3456 operands[0] = gen_lowpart (TImode, operands[0]);
3457 operands[1] = gen_lowpart (TImode, operands[1]);
3460 (define_expand "vec_shr_<mode>"
3461 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3462 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3463 (match_operand:SI 2 "general_operand" "")))]
3466 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3468 operands[0] = gen_lowpart (TImode, operands[0]);
3469 operands[1] = gen_lowpart (TImode, operands[1]);
3472 (define_expand "umaxv16qi3"
3473 [(set (match_operand:V16QI 0 "register_operand" "")
3474 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3475 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3477 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3479 (define_insn "*umaxv16qi3"
3480 [(set (match_operand:V16QI 0 "register_operand" "=x")
3481 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3482 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3483 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3484 "pmaxub\t{%2, %0|%0, %2}"
3485 [(set_attr "type" "sseiadd")
3486 (set_attr "prefix_data16" "1")
3487 (set_attr "mode" "TI")])
3489 (define_expand "smaxv8hi3"
3490 [(set (match_operand:V8HI 0 "register_operand" "")
3491 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3492 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3494 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3496 (define_insn "*smaxv8hi3"
3497 [(set (match_operand:V8HI 0 "register_operand" "=x")
3498 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3499 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3500 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3501 "pmaxsw\t{%2, %0|%0, %2}"
3502 [(set_attr "type" "sseiadd")
3503 (set_attr "prefix_data16" "1")
3504 (set_attr "mode" "TI")])
3506 (define_expand "umaxv8hi3"
3507 [(set (match_operand:V8HI 0 "register_operand" "")
3508 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3509 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3513 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3516 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3517 if (rtx_equal_p (op3, op2))
3518 op3 = gen_reg_rtx (V8HImode);
3519 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3520 emit_insn (gen_addv8hi3 (op0, op3, op2));
3525 (define_expand "smax<mode>3"
3526 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3527 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3528 (match_operand:SSEMODE14 2 "register_operand" "")))]
3532 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3538 xops[0] = operands[0];
3539 xops[1] = operands[1];
3540 xops[2] = operands[2];
3541 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3542 xops[4] = operands[1];
3543 xops[5] = operands[2];
3544 ok = ix86_expand_int_vcond (xops);
3550 (define_insn "*sse4_1_smax<mode>3"
3551 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3553 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3554 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3555 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3556 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3557 [(set_attr "type" "sseiadd")
3558 (set_attr "prefix_extra" "1")
3559 (set_attr "mode" "TI")])
3561 (define_expand "umaxv4si3"
3562 [(set (match_operand:V4SI 0 "register_operand" "")
3563 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3564 (match_operand:V4SI 2 "register_operand" "")))]
3568 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3574 xops[0] = operands[0];
3575 xops[1] = operands[1];
3576 xops[2] = operands[2];
3577 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3578 xops[4] = operands[1];
3579 xops[5] = operands[2];
3580 ok = ix86_expand_int_vcond (xops);
3586 (define_insn "*sse4_1_umax<mode>3"
3587 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3589 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3590 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3591 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3592 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3593 [(set_attr "type" "sseiadd")
3594 (set_attr "prefix_extra" "1")
3595 (set_attr "mode" "TI")])
3597 (define_expand "uminv16qi3"
3598 [(set (match_operand:V16QI 0 "register_operand" "")
3599 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3600 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3602 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3604 (define_insn "*uminv16qi3"
3605 [(set (match_operand:V16QI 0 "register_operand" "=x")
3606 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3607 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3608 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3609 "pminub\t{%2, %0|%0, %2}"
3610 [(set_attr "type" "sseiadd")
3611 (set_attr "prefix_data16" "1")
3612 (set_attr "mode" "TI")])
3614 (define_expand "sminv8hi3"
3615 [(set (match_operand:V8HI 0 "register_operand" "")
3616 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3617 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3619 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3621 (define_insn "*sminv8hi3"
3622 [(set (match_operand:V8HI 0 "register_operand" "=x")
3623 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3624 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3625 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3626 "pminsw\t{%2, %0|%0, %2}"
3627 [(set_attr "type" "sseiadd")
3628 (set_attr "prefix_data16" "1")
3629 (set_attr "mode" "TI")])
3631 (define_expand "smin<mode>3"
3632 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3633 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3634 (match_operand:SSEMODE14 2 "register_operand" "")))]
3638 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3644 xops[0] = operands[0];
3645 xops[1] = operands[2];
3646 xops[2] = operands[1];
3647 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3648 xops[4] = operands[1];
3649 xops[5] = operands[2];
3650 ok = ix86_expand_int_vcond (xops);
3656 (define_insn "*sse4_1_smin<mode>3"
3657 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3659 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3660 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3661 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3662 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3663 [(set_attr "type" "sseiadd")
3664 (set_attr "prefix_extra" "1")
3665 (set_attr "mode" "TI")])
3667 (define_expand "umin<mode>3"
3668 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3669 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3670 (match_operand:SSEMODE24 2 "register_operand" "")))]
3674 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3680 xops[0] = operands[0];
3681 xops[1] = operands[2];
3682 xops[2] = operands[1];
3683 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3684 xops[4] = operands[1];
3685 xops[5] = operands[2];
3686 ok = ix86_expand_int_vcond (xops);
3692 (define_insn "*sse4_1_umin<mode>3"
3693 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3695 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3696 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3697 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3698 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3699 [(set_attr "type" "sseiadd")
3700 (set_attr "prefix_extra" "1")
3701 (set_attr "mode" "TI")])
3703 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3705 ;; Parallel integral comparisons
3707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3709 (define_insn "sse2_eq<mode>3"
3710 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3712 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3713 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3714 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3715 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3716 [(set_attr "type" "ssecmp")
3717 (set_attr "prefix_data16" "1")
3718 (set_attr "mode" "TI")])
3720 (define_insn "sse4_1_eqv2di3"
3721 [(set (match_operand:V2DI 0 "register_operand" "=x")
3723 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3724 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3725 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3726 "pcmpeqq\t{%2, %0|%0, %2}"
3727 [(set_attr "type" "ssecmp")
3728 (set_attr "prefix_extra" "1")
3729 (set_attr "mode" "TI")])
3731 (define_insn "sse2_gt<mode>3"
3732 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3734 (match_operand:SSEMODE124 1 "register_operand" "0")
3735 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3737 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3738 [(set_attr "type" "ssecmp")
3739 (set_attr "prefix_data16" "1")
3740 (set_attr "mode" "TI")])
3742 (define_insn "sse4_2_gtv2di3"
3743 [(set (match_operand:V2DI 0 "register_operand" "=x")
3745 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3746 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3748 "pcmpgtq\t{%2, %0|%0, %2}"
3749 [(set_attr "type" "ssecmp")
3750 (set_attr "mode" "TI")])
3752 (define_expand "vcond<mode>"
3753 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3754 (if_then_else:SSEMODEI
3755 (match_operator 3 ""
3756 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3757 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3758 (match_operand:SSEMODEI 1 "general_operand" "")
3759 (match_operand:SSEMODEI 2 "general_operand" "")))]
3762 if (ix86_expand_int_vcond (operands))
3768 (define_expand "vcondu<mode>"
3769 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3770 (if_then_else:SSEMODEI
3771 (match_operator 3 ""
3772 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3773 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3774 (match_operand:SSEMODEI 1 "general_operand" "")
3775 (match_operand:SSEMODEI 2 "general_operand" "")))]
3778 if (ix86_expand_int_vcond (operands))
3784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3786 ;; Parallel bitwise logical operations
3788 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3790 (define_expand "one_cmpl<mode>2"
3791 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3792 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3796 int i, n = GET_MODE_NUNITS (<MODE>mode);
3797 rtvec v = rtvec_alloc (n);
3799 for (i = 0; i < n; ++i)
3800 RTVEC_ELT (v, i) = constm1_rtx;
3802 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3805 (define_expand "and<mode>3"
3806 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3807 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3808 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3810 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3812 (define_insn "*sse_and<mode>3"
3813 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3815 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3816 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3817 "(TARGET_SSE && !TARGET_SSE2)
3818 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3819 "andps\t{%2, %0|%0, %2}"
3820 [(set_attr "type" "sselog")
3821 (set_attr "mode" "V4SF")])
3823 (define_insn "*sse2_and<mode>3"
3824 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3826 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3827 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3828 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3829 "pand\t{%2, %0|%0, %2}"
3830 [(set_attr "type" "sselog")
3831 (set_attr "prefix_data16" "1")
3832 (set_attr "mode" "TI")])
3834 (define_insn "*sse_nand<mode>3"
3835 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3837 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3838 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3839 "(TARGET_SSE && !TARGET_SSE2)"
3840 "andnps\t{%2, %0|%0, %2}"
3841 [(set_attr "type" "sselog")
3842 (set_attr "mode" "V4SF")])
3844 (define_insn "sse2_nand<mode>3"
3845 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3847 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3848 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3850 "pandn\t{%2, %0|%0, %2}"
3851 [(set_attr "type" "sselog")
3852 (set_attr "prefix_data16" "1")
3853 (set_attr "mode" "TI")])
3855 (define_expand "andtf3"
3856 [(set (match_operand:TF 0 "register_operand" "")
3857 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3858 (match_operand:TF 2 "nonimmediate_operand" "")))]
3860 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3862 (define_insn "*andtf3"
3863 [(set (match_operand:TF 0 "register_operand" "=x")
3865 (match_operand:TF 1 "nonimmediate_operand" "%0")
3866 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3867 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3868 "pand\t{%2, %0|%0, %2}"
3869 [(set_attr "type" "sselog")
3870 (set_attr "prefix_data16" "1")
3871 (set_attr "mode" "TI")])
3873 (define_insn "*nandtf3"
3874 [(set (match_operand:TF 0 "register_operand" "=x")
3876 (not:TF (match_operand:TF 1 "register_operand" "0"))
3877 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3879 "pandn\t{%2, %0|%0, %2}"
3880 [(set_attr "type" "sselog")
3881 (set_attr "prefix_data16" "1")
3882 (set_attr "mode" "TI")])
3884 (define_expand "ior<mode>3"
3885 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3886 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3887 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3889 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3891 (define_insn "*sse_ior<mode>3"
3892 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3894 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3895 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3896 "(TARGET_SSE && !TARGET_SSE2)
3897 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3898 "orps\t{%2, %0|%0, %2}"
3899 [(set_attr "type" "sselog")
3900 (set_attr "mode" "V4SF")])
3902 (define_insn "*sse2_ior<mode>3"
3903 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3905 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3906 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3907 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3908 "por\t{%2, %0|%0, %2}"
3909 [(set_attr "type" "sselog")
3910 (set_attr "prefix_data16" "1")
3911 (set_attr "mode" "TI")])
3913 (define_expand "iortf3"
3914 [(set (match_operand:TF 0 "register_operand" "")
3915 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3916 (match_operand:TF 2 "nonimmediate_operand" "")))]
3918 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3920 (define_insn "*iortf3"
3921 [(set (match_operand:TF 0 "register_operand" "=x")
3923 (match_operand:TF 1 "nonimmediate_operand" "%0")
3924 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3925 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3926 "por\t{%2, %0|%0, %2}"
3927 [(set_attr "type" "sselog")
3928 (set_attr "prefix_data16" "1")
3929 (set_attr "mode" "TI")])
3931 (define_expand "xor<mode>3"
3932 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3933 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3934 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3936 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3938 (define_insn "*sse_xor<mode>3"
3939 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3941 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3942 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3943 "(TARGET_SSE && !TARGET_SSE2)
3944 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3945 "xorps\t{%2, %0|%0, %2}"
3946 [(set_attr "type" "sselog")
3947 (set_attr "mode" "V4SF")])
3949 (define_insn "*sse2_xor<mode>3"
3950 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3952 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3953 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3954 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3955 "pxor\t{%2, %0|%0, %2}"
3956 [(set_attr "type" "sselog")
3957 (set_attr "prefix_data16" "1")
3958 (set_attr "mode" "TI")])
3960 (define_expand "xortf3"
3961 [(set (match_operand:TF 0 "register_operand" "")
3962 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3963 (match_operand:TF 2 "nonimmediate_operand" "")))]
3965 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3967 (define_insn "*xortf3"
3968 [(set (match_operand:TF 0 "register_operand" "=x")
3970 (match_operand:TF 1 "nonimmediate_operand" "%0")
3971 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3972 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3973 "pxor\t{%2, %0|%0, %2}"
3974 [(set_attr "type" "sselog")
3975 (set_attr "prefix_data16" "1")
3976 (set_attr "mode" "TI")])
3978 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3980 ;; Parallel integral element swizzling
3982 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3985 ;; op1 = abcdefghijklmnop
3986 ;; op2 = qrstuvwxyz012345
3987 ;; h1 = aqbrcsdteufvgwhx
3988 ;; l1 = iyjzk0l1m2n3o4p5
3989 ;; h2 = aiqybjrzcks0dlt1
3990 ;; l2 = emu2fnv3gow4hpx5
3991 ;; h3 = aeimquy2bfjnrvz3
3992 ;; l3 = cgkosw04dhlptx15
3993 ;; result = bdfhjlnprtvxz135
3994 (define_expand "vec_pack_trunc_v8hi"
3995 [(match_operand:V16QI 0 "register_operand" "")
3996 (match_operand:V8HI 1 "register_operand" "")
3997 (match_operand:V8HI 2 "register_operand" "")]
4000 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4002 op1 = gen_lowpart (V16QImode, operands[1]);
4003 op2 = gen_lowpart (V16QImode, operands[2]);
4004 h1 = gen_reg_rtx (V16QImode);
4005 l1 = gen_reg_rtx (V16QImode);
4006 h2 = gen_reg_rtx (V16QImode);
4007 l2 = gen_reg_rtx (V16QImode);
4008 h3 = gen_reg_rtx (V16QImode);
4009 l3 = gen_reg_rtx (V16QImode);
4011 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4012 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4013 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4014 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4015 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4016 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4017 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4028 ;; result = bdfhjlnp
4029 (define_expand "vec_pack_trunc_v4si"
4030 [(match_operand:V8HI 0 "register_operand" "")
4031 (match_operand:V4SI 1 "register_operand" "")
4032 (match_operand:V4SI 2 "register_operand" "")]
4035 rtx op1, op2, h1, l1, h2, l2;
4037 op1 = gen_lowpart (V8HImode, operands[1]);
4038 op2 = gen_lowpart (V8HImode, operands[2]);
4039 h1 = gen_reg_rtx (V8HImode);
4040 l1 = gen_reg_rtx (V8HImode);
4041 h2 = gen_reg_rtx (V8HImode);
4042 l2 = gen_reg_rtx (V8HImode);
4044 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4045 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));