1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
67 switch (which_alternative)
70 return standard_sse_constant_opcode (insn, operands[1]);
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
90 (const_string "TI")))])
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
101 (define_insn_and_split "movdi_to_sse"
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
108 "&& reload_completed"
111 if (register_operand (operands[1], DImode))
113 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
114 Assemble the 64-bit DImode value in an xmm register. */
115 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
116 gen_rtx_SUBREG (SImode, operands[1], 0)));
117 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
118 gen_rtx_SUBREG (SImode, operands[1], 4)));
119 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
121 else if (memory_operand (operands[1], DImode))
122 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
127 (define_expand "movv4sf"
128 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
129 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
132 ix86_expand_vector_move (V4SFmode, operands);
136 (define_insn "*movv4sf_internal"
137 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
138 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
140 && (register_operand (operands[0], V4SFmode)
141 || register_operand (operands[1], V4SFmode))"
143 switch (which_alternative)
146 return standard_sse_constant_opcode (insn, operands[1]);
149 return "movaps\t{%1, %0|%0, %1}";
154 [(set_attr "type" "sselog1,ssemov,ssemov")
155 (set_attr "mode" "V4SF")])
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
163 (vec_duplicate:V4SF (match_dup 1))
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
171 (define_expand "movv2df"
172 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
173 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
176 ix86_expand_vector_move (V2DFmode, operands);
180 (define_insn "*movv2df_internal"
181 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
182 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
184 && (register_operand (operands[0], V2DFmode)
185 || register_operand (operands[1], V2DFmode))"
187 switch (which_alternative)
190 return standard_sse_constant_opcode (insn, operands[1]);
193 if (get_attr_mode (insn) == MODE_V4SF)
194 return "movaps\t{%1, %0|%0, %1}";
196 return "movapd\t{%1, %0|%0, %1}";
201 [(set_attr "type" "sselog1,ssemov,ssemov")
204 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
205 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
206 (and (eq_attr "alternative" "2")
207 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
209 (const_string "V4SF")
210 (const_string "V2DF")))])
213 [(set (match_operand:V2DF 0 "register_operand" "")
214 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
215 "TARGET_SSE2 && reload_completed"
216 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
218 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
219 operands[2] = CONST0_RTX (DFmode);
222 (define_expand "push<mode>1"
223 [(match_operand:SSEMODE 0 "register_operand" "")]
226 ix86_expand_push (<MODE>mode, operands[0]);
230 (define_expand "movmisalign<mode>"
231 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
232 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
235 ix86_expand_vector_move_misalign (<MODE>mode, operands);
239 (define_insn "sse_movups"
240 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
241 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
243 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
244 "movups\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssemov")
246 (set_attr "mode" "V2DF")])
248 (define_insn "sse2_movupd"
249 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
250 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
252 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
253 "movupd\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssemov")
255 (set_attr "mode" "V2DF")])
257 (define_insn "sse2_movdqu"
258 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
259 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
261 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
262 "movdqu\t{%1, %0|%0, %1}"
263 [(set_attr "type" "ssemov")
264 (set_attr "prefix_data16" "1")
265 (set_attr "mode" "TI")])
267 (define_insn "sse_movntv4sf"
268 [(set (match_operand:V4SF 0 "memory_operand" "=m")
269 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
272 "movntps\t{%1, %0|%0, %1}"
273 [(set_attr "type" "ssemov")
274 (set_attr "mode" "V4SF")])
276 (define_insn "sse2_movntv2df"
277 [(set (match_operand:V2DF 0 "memory_operand" "=m")
278 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
281 "movntpd\t{%1, %0|%0, %1}"
282 [(set_attr "type" "ssecvt")
283 (set_attr "mode" "V2DF")])
285 (define_insn "sse2_movntv2di"
286 [(set (match_operand:V2DI 0 "memory_operand" "=m")
287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
290 "movntdq\t{%1, %0|%0, %1}"
291 [(set_attr "type" "ssecvt")
292 (set_attr "prefix_data16" "1")
293 (set_attr "mode" "TI")])
295 (define_insn "sse2_movntsi"
296 [(set (match_operand:SI 0 "memory_operand" "=m")
297 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
300 "movnti\t{%1, %0|%0, %1}"
301 [(set_attr "type" "ssecvt")
302 (set_attr "mode" "V2DF")])
304 (define_insn "sse3_lddqu"
305 [(set (match_operand:V16QI 0 "register_operand" "=x")
306 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
309 "lddqu\t{%1, %0|%0, %1}"
310 [(set_attr "type" "ssecvt")
311 (set_attr "prefix_rep" "1")
312 (set_attr "mode" "TI")])
314 ; Expand patterns for non-temporal stores. At the moment, only those
315 ; that directly map to insns are defined; it would be possible to
316 ; define patterns for other modes that would expand to several insns.
318 (define_expand "storentv4sf"
319 [(set (match_operand:V4SF 0 "memory_operand" "=m")
320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
325 (define_expand "storentv2df"
326 [(set (match_operand:V2DF 0 "memory_operand" "=m")
327 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
332 (define_expand "storentv2di"
333 [(set (match_operand:V2DI 0 "memory_operand" "=m")
334 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
339 (define_expand "storentsi"
340 [(set (match_operand:SI 0 "memory_operand" "=m")
341 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
348 ;; Parallel single-precision floating point arithmetic
350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
352 (define_expand "negv4sf2"
353 [(set (match_operand:V4SF 0 "register_operand" "")
354 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
356 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
358 (define_expand "absv4sf2"
359 [(set (match_operand:V4SF 0 "register_operand" "")
360 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
362 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
364 (define_expand "addv4sf3"
365 [(set (match_operand:V4SF 0 "register_operand" "")
366 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
367 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
369 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
371 (define_insn "*addv4sf3"
372 [(set (match_operand:V4SF 0 "register_operand" "=x")
373 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
374 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
375 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
376 "addps\t{%2, %0|%0, %2}"
377 [(set_attr "type" "sseadd")
378 (set_attr "mode" "V4SF")])
380 (define_insn "sse_vmaddv4sf3"
381 [(set (match_operand:V4SF 0 "register_operand" "=x")
383 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
384 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
387 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
388 "addss\t{%2, %0|%0, %2}"
389 [(set_attr "type" "sseadd")
390 (set_attr "mode" "SF")])
392 (define_expand "subv4sf3"
393 [(set (match_operand:V4SF 0 "register_operand" "")
394 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
395 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
397 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
399 (define_insn "*subv4sf3"
400 [(set (match_operand:V4SF 0 "register_operand" "=x")
401 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
402 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
404 "subps\t{%2, %0|%0, %2}"
405 [(set_attr "type" "sseadd")
406 (set_attr "mode" "V4SF")])
408 (define_insn "sse_vmsubv4sf3"
409 [(set (match_operand:V4SF 0 "register_operand" "=x")
411 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
412 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
416 "subss\t{%2, %0|%0, %2}"
417 [(set_attr "type" "sseadd")
418 (set_attr "mode" "SF")])
420 (define_expand "mulv4sf3"
421 [(set (match_operand:V4SF 0 "register_operand" "")
422 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
423 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
425 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
427 (define_insn "*mulv4sf3"
428 [(set (match_operand:V4SF 0 "register_operand" "=x")
429 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
430 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
431 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
432 "mulps\t{%2, %0|%0, %2}"
433 [(set_attr "type" "ssemul")
434 (set_attr "mode" "V4SF")])
436 (define_insn "sse_vmmulv4sf3"
437 [(set (match_operand:V4SF 0 "register_operand" "=x")
439 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
440 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
443 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
444 "mulss\t{%2, %0|%0, %2}"
445 [(set_attr "type" "ssemul")
446 (set_attr "mode" "SF")])
448 (define_expand "divv4sf3"
449 [(set (match_operand:V4SF 0 "register_operand" "")
450 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
451 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
454 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
456 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
457 && flag_finite_math_only && !flag_trapping_math
458 && flag_unsafe_math_optimizations)
460 ix86_emit_swdivsf (operands[0], operands[1],
461 operands[2], V4SFmode);
466 (define_insn "*divv4sf3"
467 [(set (match_operand:V4SF 0 "register_operand" "=x")
468 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
469 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
471 "divps\t{%2, %0|%0, %2}"
472 [(set_attr "type" "ssediv")
473 (set_attr "mode" "V4SF")])
475 (define_insn "sse_vmdivv4sf3"
476 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
483 "divss\t{%2, %0|%0, %2}"
484 [(set_attr "type" "ssediv")
485 (set_attr "mode" "SF")])
487 (define_insn "sse_rcpv4sf2"
488 [(set (match_operand:V4SF 0 "register_operand" "=x")
490 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
492 "rcpps\t{%1, %0|%0, %1}"
493 [(set_attr "type" "sse")
494 (set_attr "mode" "V4SF")])
496 (define_insn "sse_vmrcpv4sf2"
497 [(set (match_operand:V4SF 0 "register_operand" "=x")
499 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
501 (match_operand:V4SF 2 "register_operand" "0")
504 "rcpss\t{%1, %0|%0, %1}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 (define_insn "*sse_rsqrtv4sf2"
509 [(set (match_operand:V4SF 0 "register_operand" "=x")
511 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
513 "rsqrtps\t{%1, %0|%0, %1}"
514 [(set_attr "type" "sse")
515 (set_attr "mode" "V4SF")])
517 (define_expand "sse_rsqrtv4sf2"
518 [(set (match_operand:V4SF 0 "register_operand" "")
520 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
523 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
524 && flag_finite_math_only && !flag_trapping_math
525 && flag_unsafe_math_optimizations)
527 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
532 (define_insn "sse_vmrsqrtv4sf2"
533 [(set (match_operand:V4SF 0 "register_operand" "=x")
535 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
537 (match_operand:V4SF 2 "register_operand" "0")
540 "rsqrtss\t{%1, %0|%0, %1}"
541 [(set_attr "type" "sse")
542 (set_attr "mode" "SF")])
544 (define_insn "*sqrtv4sf2"
545 [(set (match_operand:V4SF 0 "register_operand" "=x")
546 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
548 "sqrtps\t{%1, %0|%0, %1}"
549 [(set_attr "type" "sse")
550 (set_attr "mode" "V4SF")])
552 (define_expand "sqrtv4sf2"
553 [(set (match_operand:V4SF 0 "register_operand" "=")
554 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
557 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
558 && flag_finite_math_only && !flag_trapping_math
559 && flag_unsafe_math_optimizations)
561 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
566 (define_insn "sse_vmsqrtv4sf2"
567 [(set (match_operand:V4SF 0 "register_operand" "=x")
569 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
570 (match_operand:V4SF 2 "register_operand" "0")
573 "sqrtss\t{%1, %0|%0, %1}"
574 [(set_attr "type" "sse")
575 (set_attr "mode" "SF")])
577 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
578 ;; isn't really correct, as those rtl operators aren't defined when
579 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
581 (define_expand "smaxv4sf3"
582 [(set (match_operand:V4SF 0 "register_operand" "")
583 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
584 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
587 if (!flag_finite_math_only)
588 operands[1] = force_reg (V4SFmode, operands[1]);
589 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
592 (define_insn "*smaxv4sf3_finite"
593 [(set (match_operand:V4SF 0 "register_operand" "=x")
594 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
595 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
596 "TARGET_SSE && flag_finite_math_only
597 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
598 "maxps\t{%2, %0|%0, %2}"
599 [(set_attr "type" "sse")
600 (set_attr "mode" "V4SF")])
602 (define_insn "*smaxv4sf3"
603 [(set (match_operand:V4SF 0 "register_operand" "=x")
604 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
605 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
607 "maxps\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sse")
609 (set_attr "mode" "V4SF")])
611 (define_insn "sse_vmsmaxv4sf3"
612 [(set (match_operand:V4SF 0 "register_operand" "=x")
614 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
615 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
619 "maxss\t{%2, %0|%0, %2}"
620 [(set_attr "type" "sse")
621 (set_attr "mode" "SF")])
623 (define_expand "sminv4sf3"
624 [(set (match_operand:V4SF 0 "register_operand" "")
625 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
626 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
629 if (!flag_finite_math_only)
630 operands[1] = force_reg (V4SFmode, operands[1]);
631 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
634 (define_insn "*sminv4sf3_finite"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
637 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
638 "TARGET_SSE && flag_finite_math_only
639 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
640 "minps\t{%2, %0|%0, %2}"
641 [(set_attr "type" "sse")
642 (set_attr "mode" "V4SF")])
644 (define_insn "*sminv4sf3"
645 [(set (match_operand:V4SF 0 "register_operand" "=x")
646 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
649 "minps\t{%2, %0|%0, %2}"
650 [(set_attr "type" "sse")
651 (set_attr "mode" "V4SF")])
653 (define_insn "sse_vmsminv4sf3"
654 [(set (match_operand:V4SF 0 "register_operand" "=x")
656 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
657 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
661 "minss\t{%2, %0|%0, %2}"
662 [(set_attr "type" "sse")
663 (set_attr "mode" "SF")])
665 ;; These versions of the min/max patterns implement exactly the operations
666 ;; min = (op1 < op2 ? op1 : op2)
667 ;; max = (!(op1 < op2) ? op1 : op2)
668 ;; Their operands are not commutative, and thus they may be used in the
669 ;; presence of -0.0 and NaN.
671 (define_insn "*ieee_sminv4sf3"
672 [(set (match_operand:V4SF 0 "register_operand" "=x")
673 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
674 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
677 "minps\t{%2, %0|%0, %2}"
678 [(set_attr "type" "sseadd")
679 (set_attr "mode" "V4SF")])
681 (define_insn "*ieee_smaxv4sf3"
682 [(set (match_operand:V4SF 0 "register_operand" "=x")
683 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
684 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
687 "maxps\t{%2, %0|%0, %2}"
688 [(set_attr "type" "sseadd")
689 (set_attr "mode" "V4SF")])
691 (define_insn "*ieee_sminv2df3"
692 [(set (match_operand:V2DF 0 "register_operand" "=x")
693 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
694 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
697 "minpd\t{%2, %0|%0, %2}"
698 [(set_attr "type" "sseadd")
699 (set_attr "mode" "V2DF")])
701 (define_insn "*ieee_smaxv2df3"
702 [(set (match_operand:V2DF 0 "register_operand" "=x")
703 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
707 "maxpd\t{%2, %0|%0, %2}"
708 [(set_attr "type" "sseadd")
709 (set_attr "mode" "V2DF")])
711 (define_insn "sse3_addsubv4sf3"
712 [(set (match_operand:V4SF 0 "register_operand" "=x")
715 (match_operand:V4SF 1 "register_operand" "0")
716 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
717 (minus:V4SF (match_dup 1) (match_dup 2))
720 "addsubps\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "prefix_rep" "1")
723 (set_attr "mode" "V4SF")])
725 (define_insn "sse3_haddv4sf3"
726 [(set (match_operand:V4SF 0 "register_operand" "=x")
731 (match_operand:V4SF 1 "register_operand" "0")
732 (parallel [(const_int 0)]))
733 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
735 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
736 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
740 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
741 (parallel [(const_int 0)]))
742 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
744 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
745 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
747 "haddps\t{%2, %0|%0, %2}"
748 [(set_attr "type" "sseadd")
749 (set_attr "prefix_rep" "1")
750 (set_attr "mode" "V4SF")])
752 (define_insn "sse3_hsubv4sf3"
753 [(set (match_operand:V4SF 0 "register_operand" "=x")
758 (match_operand:V4SF 1 "register_operand" "0")
759 (parallel [(const_int 0)]))
760 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
762 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
763 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
767 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
768 (parallel [(const_int 0)]))
769 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
771 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
772 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
774 "hsubps\t{%2, %0|%0, %2}"
775 [(set_attr "type" "sseadd")
776 (set_attr "prefix_rep" "1")
777 (set_attr "mode" "V4SF")])
779 (define_expand "reduc_splus_v4sf"
780 [(match_operand:V4SF 0 "register_operand" "")
781 (match_operand:V4SF 1 "register_operand" "")]
786 rtx tmp = gen_reg_rtx (V4SFmode);
787 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
788 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
791 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
795 (define_expand "reduc_smax_v4sf"
796 [(match_operand:V4SF 0 "register_operand" "")
797 (match_operand:V4SF 1 "register_operand" "")]
800 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
804 (define_expand "reduc_smin_v4sf"
805 [(match_operand:V4SF 0 "register_operand" "")
806 (match_operand:V4SF 1 "register_operand" "")]
809 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
815 ;; Parallel single-precision floating point comparisons
817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
819 (define_insn "sse_maskcmpv4sf3"
820 [(set (match_operand:V4SF 0 "register_operand" "=x")
821 (match_operator:V4SF 3 "sse_comparison_operator"
822 [(match_operand:V4SF 1 "register_operand" "0")
823 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
825 "cmp%D3ps\t{%2, %0|%0, %2}"
826 [(set_attr "type" "ssecmp")
827 (set_attr "mode" "V4SF")])
829 (define_insn "sse_maskcmpsf3"
830 [(set (match_operand:SF 0 "register_operand" "=x")
831 (match_operator:SF 3 "sse_comparison_operator"
832 [(match_operand:SF 1 "register_operand" "0")
833 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
835 "cmp%D3ss\t{%2, %0|%0, %2}"
836 [(set_attr "type" "ssecmp")
837 (set_attr "mode" "SF")])
839 (define_insn "sse_vmmaskcmpv4sf3"
840 [(set (match_operand:V4SF 0 "register_operand" "=x")
842 (match_operator:V4SF 3 "sse_comparison_operator"
843 [(match_operand:V4SF 1 "register_operand" "0")
844 (match_operand:V4SF 2 "register_operand" "x")])
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
852 (define_insn "sse_comi"
853 [(set (reg:CCFP FLAGS_REG)
856 (match_operand:V4SF 0 "register_operand" "x")
857 (parallel [(const_int 0)]))
859 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
860 (parallel [(const_int 0)]))))]
862 "comiss\t{%1, %0|%0, %1}"
863 [(set_attr "type" "ssecomi")
864 (set_attr "mode" "SF")])
866 (define_insn "sse_ucomi"
867 [(set (reg:CCFPU FLAGS_REG)
870 (match_operand:V4SF 0 "register_operand" "x")
871 (parallel [(const_int 0)]))
873 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
874 (parallel [(const_int 0)]))))]
876 "ucomiss\t{%1, %0|%0, %1}"
877 [(set_attr "type" "ssecomi")
878 (set_attr "mode" "SF")])
880 (define_expand "vcondv4sf"
881 [(set (match_operand:V4SF 0 "register_operand" "")
884 [(match_operand:V4SF 4 "nonimmediate_operand" "")
885 (match_operand:V4SF 5 "nonimmediate_operand" "")])
886 (match_operand:V4SF 1 "general_operand" "")
887 (match_operand:V4SF 2 "general_operand" "")))]
890 if (ix86_expand_fp_vcond (operands))
896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
898 ;; Parallel single-precision floating point logical operations
900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
902 (define_expand "andv4sf3"
903 [(set (match_operand:V4SF 0 "register_operand" "")
904 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
905 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
907 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
909 (define_insn "*andv4sf3"
910 [(set (match_operand:V4SF 0 "register_operand" "=x")
911 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
912 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
913 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
914 "andps\t{%2, %0|%0, %2}"
915 [(set_attr "type" "sselog")
916 (set_attr "mode" "V4SF")])
918 (define_insn "sse_nandv4sf3"
919 [(set (match_operand:V4SF 0 "register_operand" "=x")
920 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
921 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
923 "andnps\t{%2, %0|%0, %2}"
924 [(set_attr "type" "sselog")
925 (set_attr "mode" "V4SF")])
927 (define_expand "iorv4sf3"
928 [(set (match_operand:V4SF 0 "register_operand" "")
929 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
930 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
932 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
934 (define_insn "*iorv4sf3"
935 [(set (match_operand:V4SF 0 "register_operand" "=x")
936 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
937 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
938 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
939 "orps\t{%2, %0|%0, %2}"
940 [(set_attr "type" "sselog")
941 (set_attr "mode" "V4SF")])
943 (define_expand "xorv4sf3"
944 [(set (match_operand:V4SF 0 "register_operand" "")
945 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
946 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
948 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
950 (define_insn "*xorv4sf3"
951 [(set (match_operand:V4SF 0 "register_operand" "=x")
952 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
953 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
954 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
955 "xorps\t{%2, %0|%0, %2}"
956 [(set_attr "type" "sselog")
957 (set_attr "mode" "V4SF")])
959 ;; Also define scalar versions. These are used for abs, neg, and
960 ;; conditional move. Using subregs into vector modes causes register
961 ;; allocation lossage. These patterns do not allow memory operands
962 ;; because the native instructions read the full 128-bits.
964 (define_insn "*andsf3"
965 [(set (match_operand:SF 0 "register_operand" "=x")
966 (and:SF (match_operand:SF 1 "register_operand" "0")
967 (match_operand:SF 2 "register_operand" "x")))]
969 "andps\t{%2, %0|%0, %2}"
970 [(set_attr "type" "sselog")
971 (set_attr "mode" "V4SF")])
973 (define_insn "*nandsf3"
974 [(set (match_operand:SF 0 "register_operand" "=x")
975 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
976 (match_operand:SF 2 "register_operand" "x")))]
978 "andnps\t{%2, %0|%0, %2}"
979 [(set_attr "type" "sselog")
980 (set_attr "mode" "V4SF")])
982 (define_insn "*iorsf3"
983 [(set (match_operand:SF 0 "register_operand" "=x")
984 (ior:SF (match_operand:SF 1 "register_operand" "0")
985 (match_operand:SF 2 "register_operand" "x")))]
987 "orps\t{%2, %0|%0, %2}"
988 [(set_attr "type" "sselog")
989 (set_attr "mode" "V4SF")])
991 (define_insn "*xorsf3"
992 [(set (match_operand:SF 0 "register_operand" "=x")
993 (xor:SF (match_operand:SF 1 "register_operand" "0")
994 (match_operand:SF 2 "register_operand" "x")))]
996 "xorps\t{%2, %0|%0, %2}"
997 [(set_attr "type" "sselog")
998 (set_attr "mode" "V4SF")])
1000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1002 ;; Parallel single-precision floating point conversion operations
1004 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1006 (define_insn "sse_cvtpi2ps"
1007 [(set (match_operand:V4SF 0 "register_operand" "=x")
1010 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1011 (match_operand:V4SF 1 "register_operand" "0")
1014 "cvtpi2ps\t{%2, %0|%0, %2}"
1015 [(set_attr "type" "ssecvt")
1016 (set_attr "mode" "V4SF")])
1018 (define_insn "sse_cvtps2pi"
1019 [(set (match_operand:V2SI 0 "register_operand" "=y")
1021 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1023 (parallel [(const_int 0) (const_int 1)])))]
1025 "cvtps2pi\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "ssecvt")
1027 (set_attr "unit" "mmx")
1028 (set_attr "mode" "DI")])
1030 (define_insn "sse_cvttps2pi"
1031 [(set (match_operand:V2SI 0 "register_operand" "=y")
1033 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1034 (parallel [(const_int 0) (const_int 1)])))]
1036 "cvttps2pi\t{%1, %0|%0, %1}"
1037 [(set_attr "type" "ssecvt")
1038 (set_attr "unit" "mmx")
1039 (set_attr "mode" "SF")])
1041 (define_insn "sse_cvtsi2ss"
1042 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1045 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1046 (match_operand:V4SF 1 "register_operand" "0,0")
1049 "cvtsi2ss\t{%2, %0|%0, %2}"
1050 [(set_attr "type" "sseicvt")
1051 (set_attr "athlon_decode" "vector,double")
1052 (set_attr "amdfam10_decode" "vector,double")
1053 (set_attr "mode" "SF")])
1055 (define_insn "sse_cvtsi2ssq"
1056 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1059 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1060 (match_operand:V4SF 1 "register_operand" "0,0")
1062 "TARGET_SSE && TARGET_64BIT"
1063 "cvtsi2ssq\t{%2, %0|%0, %2}"
1064 [(set_attr "type" "sseicvt")
1065 (set_attr "athlon_decode" "vector,double")
1066 (set_attr "amdfam10_decode" "vector,double")
1067 (set_attr "mode" "SF")])
1069 (define_insn "sse_cvtss2si"
1070 [(set (match_operand:SI 0 "register_operand" "=r,r")
1073 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1074 (parallel [(const_int 0)]))]
1075 UNSPEC_FIX_NOTRUNC))]
1077 "cvtss2si\t{%1, %0|%0, %1}"
1078 [(set_attr "type" "sseicvt")
1079 (set_attr "athlon_decode" "double,vector")
1080 (set_attr "prefix_rep" "1")
1081 (set_attr "mode" "SI")])
1083 (define_insn "sse_cvtss2si_2"
1084 [(set (match_operand:SI 0 "register_operand" "=r,r")
1085 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1086 UNSPEC_FIX_NOTRUNC))]
1088 "cvtss2si\t{%1, %0|%0, %1}"
1089 [(set_attr "type" "sseicvt")
1090 (set_attr "athlon_decode" "double,vector")
1091 (set_attr "amdfam10_decode" "double,double")
1092 (set_attr "prefix_rep" "1")
1093 (set_attr "mode" "SI")])
1095 (define_insn "sse_cvtss2siq"
1096 [(set (match_operand:DI 0 "register_operand" "=r,r")
1099 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1100 (parallel [(const_int 0)]))]
1101 UNSPEC_FIX_NOTRUNC))]
1102 "TARGET_SSE && TARGET_64BIT"
1103 "cvtss2siq\t{%1, %0|%0, %1}"
1104 [(set_attr "type" "sseicvt")
1105 (set_attr "athlon_decode" "double,vector")
1106 (set_attr "prefix_rep" "1")
1107 (set_attr "mode" "DI")])
1109 (define_insn "sse_cvtss2siq_2"
1110 [(set (match_operand:DI 0 "register_operand" "=r,r")
1111 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1112 UNSPEC_FIX_NOTRUNC))]
1113 "TARGET_SSE && TARGET_64BIT"
1114 "cvtss2siq\t{%1, %0|%0, %1}"
1115 [(set_attr "type" "sseicvt")
1116 (set_attr "athlon_decode" "double,vector")
1117 (set_attr "amdfam10_decode" "double,double")
1118 (set_attr "prefix_rep" "1")
1119 (set_attr "mode" "DI")])
1121 (define_insn "sse_cvttss2si"
1122 [(set (match_operand:SI 0 "register_operand" "=r,r")
1125 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1126 (parallel [(const_int 0)]))))]
1128 "cvttss2si\t{%1, %0|%0, %1}"
1129 [(set_attr "type" "sseicvt")
1130 (set_attr "athlon_decode" "double,vector")
1131 (set_attr "amdfam10_decode" "double,double")
1132 (set_attr "prefix_rep" "1")
1133 (set_attr "mode" "SI")])
1135 (define_insn "sse_cvttss2siq"
1136 [(set (match_operand:DI 0 "register_operand" "=r,r")
1139 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1140 (parallel [(const_int 0)]))))]
1141 "TARGET_SSE && TARGET_64BIT"
1142 "cvttss2siq\t{%1, %0|%0, %1}"
1143 [(set_attr "type" "sseicvt")
1144 (set_attr "athlon_decode" "double,vector")
1145 (set_attr "amdfam10_decode" "double,double")
1146 (set_attr "prefix_rep" "1")
1147 (set_attr "mode" "DI")])
1149 (define_insn "sse2_cvtdq2ps"
1150 [(set (match_operand:V4SF 0 "register_operand" "=x")
1151 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1153 "cvtdq2ps\t{%1, %0|%0, %1}"
1154 [(set_attr "type" "ssecvt")
1155 (set_attr "mode" "V4SF")])
1157 (define_insn "sse2_cvtps2dq"
1158 [(set (match_operand:V4SI 0 "register_operand" "=x")
1159 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1160 UNSPEC_FIX_NOTRUNC))]
1162 "cvtps2dq\t{%1, %0|%0, %1}"
1163 [(set_attr "type" "ssecvt")
1164 (set_attr "prefix_data16" "1")
1165 (set_attr "mode" "TI")])
1167 (define_insn "sse2_cvttps2dq"
1168 [(set (match_operand:V4SI 0 "register_operand" "=x")
1169 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1171 "cvttps2dq\t{%1, %0|%0, %1}"
1172 [(set_attr "type" "ssecvt")
1173 (set_attr "prefix_rep" "1")
1174 (set_attr "mode" "TI")])
1176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1178 ;; Parallel single-precision floating point element swizzling
1180 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1182 (define_insn "sse_movhlps"
1183 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1186 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1187 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1188 (parallel [(const_int 6)
1192 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1194 movhlps\t{%2, %0|%0, %2}
1195 movlps\t{%H2, %0|%0, %H2}
1196 movhps\t{%2, %0|%0, %2}"
1197 [(set_attr "type" "ssemov")
1198 (set_attr "mode" "V4SF,V2SF,V2SF")])
1200 (define_insn "sse_movlhps"
1201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1204 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1205 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1206 (parallel [(const_int 0)
1210 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1212 movlhps\t{%2, %0|%0, %2}
1213 movhps\t{%2, %0|%0, %2}
1214 movlps\t{%2, %H0|%H0, %2}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V4SF,V2SF,V2SF")])
1218 (define_insn "sse_unpckhps"
1219 [(set (match_operand:V4SF 0 "register_operand" "=x")
1222 (match_operand:V4SF 1 "register_operand" "0")
1223 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1224 (parallel [(const_int 2) (const_int 6)
1225 (const_int 3) (const_int 7)])))]
1227 "unpckhps\t{%2, %0|%0, %2}"
1228 [(set_attr "type" "sselog")
1229 (set_attr "mode" "V4SF")])
1231 (define_insn "sse_unpcklps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 0) (const_int 4)
1238 (const_int 1) (const_int 5)])))]
1240 "unpcklps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1244 ;; These are modeled with the same vec_concat as the others so that we
1245 ;; capture users of shufps that can use the new instructions
1246 (define_insn "sse3_movshdup"
1247 [(set (match_operand:V4SF 0 "register_operand" "=x")
1250 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1252 (parallel [(const_int 1)
1257 "movshdup\t{%1, %0|%0, %1}"
1258 [(set_attr "type" "sse")
1259 (set_attr "prefix_rep" "1")
1260 (set_attr "mode" "V4SF")])
1262 (define_insn "sse3_movsldup"
1263 [(set (match_operand:V4SF 0 "register_operand" "=x")
1266 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1268 (parallel [(const_int 0)
1273 "movsldup\t{%1, %0|%0, %1}"
1274 [(set_attr "type" "sse")
1275 (set_attr "prefix_rep" "1")
1276 (set_attr "mode" "V4SF")])
1278 (define_expand "sse_shufps"
1279 [(match_operand:V4SF 0 "register_operand" "")
1280 (match_operand:V4SF 1 "register_operand" "")
1281 (match_operand:V4SF 2 "nonimmediate_operand" "")
1282 (match_operand:SI 3 "const_int_operand" "")]
1285 int mask = INTVAL (operands[3]);
1286 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1287 GEN_INT ((mask >> 0) & 3),
1288 GEN_INT ((mask >> 2) & 3),
1289 GEN_INT (((mask >> 4) & 3) + 4),
1290 GEN_INT (((mask >> 6) & 3) + 4)));
1294 (define_insn "sse_shufps_1"
1295 [(set (match_operand:V4SF 0 "register_operand" "=x")
1298 (match_operand:V4SF 1 "register_operand" "0")
1299 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1300 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1301 (match_operand 4 "const_0_to_3_operand" "")
1302 (match_operand 5 "const_4_to_7_operand" "")
1303 (match_operand 6 "const_4_to_7_operand" "")])))]
1307 mask |= INTVAL (operands[3]) << 0;
1308 mask |= INTVAL (operands[4]) << 2;
1309 mask |= (INTVAL (operands[5]) - 4) << 4;
1310 mask |= (INTVAL (operands[6]) - 4) << 6;
1311 operands[3] = GEN_INT (mask);
1313 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1315 [(set_attr "type" "sselog")
1316 (set_attr "mode" "V4SF")])
1318 (define_insn "sse_storehps"
1319 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1321 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1322 (parallel [(const_int 2) (const_int 3)])))]
1325 movhps\t{%1, %0|%0, %1}
1326 movhlps\t{%1, %0|%0, %1}
1327 movlps\t{%H1, %0|%0, %H1}"
1328 [(set_attr "type" "ssemov")
1329 (set_attr "mode" "V2SF,V4SF,V2SF")])
1331 (define_insn "sse_loadhps"
1332 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1335 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1336 (parallel [(const_int 0) (const_int 1)]))
1337 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1340 movhps\t{%2, %0|%0, %2}
1341 movlhps\t{%2, %0|%0, %2}
1342 movlps\t{%2, %H0|%H0, %2}"
1343 [(set_attr "type" "ssemov")
1344 (set_attr "mode" "V2SF,V4SF,V2SF")])
1346 (define_insn "sse_storelps"
1347 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1349 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1350 (parallel [(const_int 0) (const_int 1)])))]
1353 movlps\t{%1, %0|%0, %1}
1354 movaps\t{%1, %0|%0, %1}
1355 movlps\t{%1, %0|%0, %1}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1359 (define_insn "sse_loadlps"
1360 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1362 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1364 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1365 (parallel [(const_int 2) (const_int 3)]))))]
1368 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1369 movlps\t{%2, %0|%0, %2}
1370 movlps\t{%2, %0|%0, %2}"
1371 [(set_attr "type" "sselog,ssemov,ssemov")
1372 (set_attr "mode" "V4SF,V2SF,V2SF")])
1374 (define_insn "sse_movss"
1375 [(set (match_operand:V4SF 0 "register_operand" "=x")
1377 (match_operand:V4SF 2 "register_operand" "x")
1378 (match_operand:V4SF 1 "register_operand" "0")
1381 "movss\t{%2, %0|%0, %2}"
1382 [(set_attr "type" "ssemov")
1383 (set_attr "mode" "SF")])
1385 (define_insn "*vec_dupv4sf"
1386 [(set (match_operand:V4SF 0 "register_operand" "=x")
1388 (match_operand:SF 1 "register_operand" "0")))]
1390 "shufps\t{$0, %0, %0|%0, %0, 0}"
1391 [(set_attr "type" "sselog1")
1392 (set_attr "mode" "V4SF")])
1394 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1395 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1396 ;; alternatives pretty much forces the MMX alternative to be chosen.
1397 (define_insn "*sse_concatv2sf"
1398 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1400 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1401 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1404 unpcklps\t{%2, %0|%0, %2}
1405 movss\t{%1, %0|%0, %1}
1406 punpckldq\t{%2, %0|%0, %2}
1407 movd\t{%1, %0|%0, %1}"
1408 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1409 (set_attr "mode" "V4SF,SF,DI,DI")])
1411 (define_insn "*sse_concatv4sf"
1412 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1414 (match_operand:V2SF 1 "register_operand" " 0,0")
1415 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1418 movlhps\t{%2, %0|%0, %2}
1419 movhps\t{%2, %0|%0, %2}"
1420 [(set_attr "type" "ssemov")
1421 (set_attr "mode" "V4SF,V2SF")])
1423 (define_expand "vec_initv4sf"
1424 [(match_operand:V4SF 0 "register_operand" "")
1425 (match_operand 1 "" "")]
1428 ix86_expand_vector_init (false, operands[0], operands[1]);
1432 (define_insn "vec_setv4sf_0"
1433 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1436 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1437 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1441 movss\t{%2, %0|%0, %2}
1442 movss\t{%2, %0|%0, %2}
1443 movd\t{%2, %0|%0, %2}
1445 [(set_attr "type" "ssemov")
1446 (set_attr "mode" "SF")])
1448 ;; A subset is vec_setv4sf.
1449 (define_insn "*vec_setv4sf_sse4_1"
1450 [(set (match_operand:V4SF 0 "register_operand" "=x")
1453 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1454 (match_operand:V4SF 1 "register_operand" "0")
1455 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1458 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1459 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1461 [(set_attr "type" "sselog")
1462 (set_attr "prefix_extra" "1")
1463 (set_attr "mode" "V4SF")])
1465 (define_insn "sse4_1_insertps"
1466 [(set (match_operand:V4SF 0 "register_operand" "=x")
1467 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1468 (match_operand:V4SF 1 "register_operand" "0")
1469 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1472 "insertps\t{%3, %2, %0|%0, %2, %3}";
1473 [(set_attr "type" "sselog")
1474 (set_attr "prefix_extra" "1")
1475 (set_attr "mode" "V4SF")])
1478 [(set (match_operand:V4SF 0 "memory_operand" "")
1481 (match_operand:SF 1 "nonmemory_operand" ""))
1484 "TARGET_SSE && reload_completed"
1487 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1491 (define_expand "vec_setv4sf"
1492 [(match_operand:V4SF 0 "register_operand" "")
1493 (match_operand:SF 1 "register_operand" "")
1494 (match_operand 2 "const_int_operand" "")]
1497 ix86_expand_vector_set (false, operands[0], operands[1],
1498 INTVAL (operands[2]));
1502 (define_insn_and_split "*vec_extractv4sf_0"
1503 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1505 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1506 (parallel [(const_int 0)])))]
1507 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1509 "&& reload_completed"
1512 rtx op1 = operands[1];
1514 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1516 op1 = gen_lowpart (SFmode, op1);
1517 emit_move_insn (operands[0], op1);
1521 (define_insn "*sse4_1_extractps"
1522 [(set (match_operand:SF 0 "register_operand" "=rm")
1524 (match_operand:V4SF 1 "register_operand" "x")
1525 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1527 "extractps\t{%2, %1, %0|%0, %1, %2}"
1528 [(set_attr "type" "sselog")
1529 (set_attr "prefix_extra" "1")
1530 (set_attr "mode" "V4SF")])
1532 (define_expand "vec_extractv4sf"
1533 [(match_operand:SF 0 "register_operand" "")
1534 (match_operand:V4SF 1 "register_operand" "")
1535 (match_operand 2 "const_int_operand" "")]
1538 ix86_expand_vector_extract (false, operands[0], operands[1],
1539 INTVAL (operands[2]));
1543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1545 ;; Parallel double-precision floating point arithmetic
1547 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1549 (define_expand "negv2df2"
1550 [(set (match_operand:V2DF 0 "register_operand" "")
1551 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1553 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1555 (define_expand "absv2df2"
1556 [(set (match_operand:V2DF 0 "register_operand" "")
1557 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1559 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1561 (define_expand "addv2df3"
1562 [(set (match_operand:V2DF 0 "register_operand" "")
1563 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1564 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1566 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1568 (define_insn "*addv2df3"
1569 [(set (match_operand:V2DF 0 "register_operand" "=x")
1570 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1571 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1572 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1573 "addpd\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "sseadd")
1575 (set_attr "mode" "V2DF")])
1577 (define_insn "sse2_vmaddv2df3"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x")
1580 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1581 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1584 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1585 "addsd\t{%2, %0|%0, %2}"
1586 [(set_attr "type" "sseadd")
1587 (set_attr "mode" "DF")])
1589 (define_expand "subv2df3"
1590 [(set (match_operand:V2DF 0 "register_operand" "")
1591 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1592 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1594 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1596 (define_insn "*subv2df3"
1597 [(set (match_operand:V2DF 0 "register_operand" "=x")
1598 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1599 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1601 "subpd\t{%2, %0|%0, %2}"
1602 [(set_attr "type" "sseadd")
1603 (set_attr "mode" "V2DF")])
1605 (define_insn "sse2_vmsubv2df3"
1606 [(set (match_operand:V2DF 0 "register_operand" "=x")
1608 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1609 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1613 "subsd\t{%2, %0|%0, %2}"
1614 [(set_attr "type" "sseadd")
1615 (set_attr "mode" "DF")])
1617 (define_expand "mulv2df3"
1618 [(set (match_operand:V2DF 0 "register_operand" "")
1619 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1620 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1622 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1624 (define_insn "*mulv2df3"
1625 [(set (match_operand:V2DF 0 "register_operand" "=x")
1626 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1627 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1628 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1629 "mulpd\t{%2, %0|%0, %2}"
1630 [(set_attr "type" "ssemul")
1631 (set_attr "mode" "V2DF")])
1633 (define_insn "sse2_vmmulv2df3"
1634 [(set (match_operand:V2DF 0 "register_operand" "=x")
1636 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1637 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1640 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1641 "mulsd\t{%2, %0|%0, %2}"
1642 [(set_attr "type" "ssemul")
1643 (set_attr "mode" "DF")])
1645 (define_expand "divv2df3"
1646 [(set (match_operand:V2DF 0 "register_operand" "")
1647 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1648 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1650 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1652 (define_insn "*divv2df3"
1653 [(set (match_operand:V2DF 0 "register_operand" "=x")
1654 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1655 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1657 "divpd\t{%2, %0|%0, %2}"
1658 [(set_attr "type" "ssediv")
1659 (set_attr "mode" "V2DF")])
1661 (define_insn "sse2_vmdivv2df3"
1662 [(set (match_operand:V2DF 0 "register_operand" "=x")
1664 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1665 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1669 "divsd\t{%2, %0|%0, %2}"
1670 [(set_attr "type" "ssediv")
1671 (set_attr "mode" "DF")])
1673 (define_insn "sqrtv2df2"
1674 [(set (match_operand:V2DF 0 "register_operand" "=x")
1675 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1677 "sqrtpd\t{%1, %0|%0, %1}"
1678 [(set_attr "type" "sse")
1679 (set_attr "mode" "V2DF")])
1681 (define_insn "sse2_vmsqrtv2df2"
1682 [(set (match_operand:V2DF 0 "register_operand" "=x")
1684 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1685 (match_operand:V2DF 2 "register_operand" "0")
1688 "sqrtsd\t{%1, %0|%0, %1}"
1689 [(set_attr "type" "sse")
1690 (set_attr "mode" "DF")])
1692 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1693 ;; isn't really correct, as those rtl operators aren't defined when
1694 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1696 (define_expand "smaxv2df3"
1697 [(set (match_operand:V2DF 0 "register_operand" "")
1698 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1699 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1702 if (!flag_finite_math_only)
1703 operands[1] = force_reg (V2DFmode, operands[1]);
1704 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1707 (define_insn "*smaxv2df3_finite"
1708 [(set (match_operand:V2DF 0 "register_operand" "=x")
1709 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1710 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1711 "TARGET_SSE2 && flag_finite_math_only
1712 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1713 "maxpd\t{%2, %0|%0, %2}"
1714 [(set_attr "type" "sseadd")
1715 (set_attr "mode" "V2DF")])
1717 (define_insn "*smaxv2df3"
1718 [(set (match_operand:V2DF 0 "register_operand" "=x")
1719 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1720 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1722 "maxpd\t{%2, %0|%0, %2}"
1723 [(set_attr "type" "sseadd")
1724 (set_attr "mode" "V2DF")])
1726 (define_insn "sse2_vmsmaxv2df3"
1727 [(set (match_operand:V2DF 0 "register_operand" "=x")
1729 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1730 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1734 "maxsd\t{%2, %0|%0, %2}"
1735 [(set_attr "type" "sseadd")
1736 (set_attr "mode" "DF")])
1738 (define_expand "sminv2df3"
1739 [(set (match_operand:V2DF 0 "register_operand" "")
1740 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1741 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1744 if (!flag_finite_math_only)
1745 operands[1] = force_reg (V2DFmode, operands[1]);
1746 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1749 (define_insn "*sminv2df3_finite"
1750 [(set (match_operand:V2DF 0 "register_operand" "=x")
1751 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1752 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1753 "TARGET_SSE2 && flag_finite_math_only
1754 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1755 "minpd\t{%2, %0|%0, %2}"
1756 [(set_attr "type" "sseadd")
1757 (set_attr "mode" "V2DF")])
1759 (define_insn "*sminv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "=x")
1761 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1762 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1764 "minpd\t{%2, %0|%0, %2}"
1765 [(set_attr "type" "sseadd")
1766 (set_attr "mode" "V2DF")])
1768 (define_insn "sse2_vmsminv2df3"
1769 [(set (match_operand:V2DF 0 "register_operand" "=x")
1771 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1772 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1776 "minsd\t{%2, %0|%0, %2}"
1777 [(set_attr "type" "sseadd")
1778 (set_attr "mode" "DF")])
1780 (define_insn "sse3_addsubv2df3"
1781 [(set (match_operand:V2DF 0 "register_operand" "=x")
1784 (match_operand:V2DF 1 "register_operand" "0")
1785 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1786 (minus:V2DF (match_dup 1) (match_dup 2))
1789 "addsubpd\t{%2, %0|%0, %2}"
1790 [(set_attr "type" "sseadd")
1791 (set_attr "mode" "V2DF")])
1793 (define_insn "sse3_haddv2df3"
1794 [(set (match_operand:V2DF 0 "register_operand" "=x")
1798 (match_operand:V2DF 1 "register_operand" "0")
1799 (parallel [(const_int 0)]))
1800 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1803 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1804 (parallel [(const_int 0)]))
1805 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1807 "haddpd\t{%2, %0|%0, %2}"
1808 [(set_attr "type" "sseadd")
1809 (set_attr "mode" "V2DF")])
1811 (define_insn "sse3_hsubv2df3"
1812 [(set (match_operand:V2DF 0 "register_operand" "=x")
1816 (match_operand:V2DF 1 "register_operand" "0")
1817 (parallel [(const_int 0)]))
1818 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1821 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1822 (parallel [(const_int 0)]))
1823 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1825 "hsubpd\t{%2, %0|%0, %2}"
1826 [(set_attr "type" "sseadd")
1827 (set_attr "mode" "V2DF")])
1829 (define_expand "reduc_splus_v2df"
1830 [(match_operand:V2DF 0 "register_operand" "")
1831 (match_operand:V2DF 1 "register_operand" "")]
1834 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1838 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1840 ;; Parallel double-precision floating point comparisons
1842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1844 (define_insn "sse2_maskcmpv2df3"
1845 [(set (match_operand:V2DF 0 "register_operand" "=x")
1846 (match_operator:V2DF 3 "sse_comparison_operator"
1847 [(match_operand:V2DF 1 "register_operand" "0")
1848 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1850 "cmp%D3pd\t{%2, %0|%0, %2}"
1851 [(set_attr "type" "ssecmp")
1852 (set_attr "mode" "V2DF")])
1854 (define_insn "sse2_maskcmpdf3"
1855 [(set (match_operand:DF 0 "register_operand" "=x")
1856 (match_operator:DF 3 "sse_comparison_operator"
1857 [(match_operand:DF 1 "register_operand" "0")
1858 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1860 "cmp%D3sd\t{%2, %0|%0, %2}"
1861 [(set_attr "type" "ssecmp")
1862 (set_attr "mode" "DF")])
1864 (define_insn "sse2_vmmaskcmpv2df3"
1865 [(set (match_operand:V2DF 0 "register_operand" "=x")
1867 (match_operator:V2DF 3 "sse_comparison_operator"
1868 [(match_operand:V2DF 1 "register_operand" "0")
1869 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1873 "cmp%D3sd\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "ssecmp")
1875 (set_attr "mode" "DF")])
1877 (define_insn "sse2_comi"
1878 [(set (reg:CCFP FLAGS_REG)
1881 (match_operand:V2DF 0 "register_operand" "x")
1882 (parallel [(const_int 0)]))
1884 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1885 (parallel [(const_int 0)]))))]
1887 "comisd\t{%1, %0|%0, %1}"
1888 [(set_attr "type" "ssecomi")
1889 (set_attr "mode" "DF")])
1891 (define_insn "sse2_ucomi"
1892 [(set (reg:CCFPU FLAGS_REG)
1895 (match_operand:V2DF 0 "register_operand" "x")
1896 (parallel [(const_int 0)]))
1898 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1899 (parallel [(const_int 0)]))))]
1901 "ucomisd\t{%1, %0|%0, %1}"
1902 [(set_attr "type" "ssecomi")
1903 (set_attr "mode" "DF")])
1905 (define_expand "vcondv2df"
1906 [(set (match_operand:V2DF 0 "register_operand" "")
1908 (match_operator 3 ""
1909 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1910 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1911 (match_operand:V2DF 1 "general_operand" "")
1912 (match_operand:V2DF 2 "general_operand" "")))]
1915 if (ix86_expand_fp_vcond (operands))
1921 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1923 ;; Parallel double-precision floating point logical operations
1925 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1927 (define_expand "andv2df3"
1928 [(set (match_operand:V2DF 0 "register_operand" "")
1929 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1930 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1932 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1934 (define_insn "*andv2df3"
1935 [(set (match_operand:V2DF 0 "register_operand" "=x")
1936 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1938 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1939 "andpd\t{%2, %0|%0, %2}"
1940 [(set_attr "type" "sselog")
1941 (set_attr "mode" "V2DF")])
1943 (define_insn "sse2_nandv2df3"
1944 [(set (match_operand:V2DF 0 "register_operand" "=x")
1945 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1946 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1948 "andnpd\t{%2, %0|%0, %2}"
1949 [(set_attr "type" "sselog")
1950 (set_attr "mode" "V2DF")])
1952 (define_expand "iorv2df3"
1953 [(set (match_operand:V2DF 0 "register_operand" "")
1954 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1955 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1957 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1959 (define_insn "*iorv2df3"
1960 [(set (match_operand:V2DF 0 "register_operand" "=x")
1961 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1962 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1963 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1964 "orpd\t{%2, %0|%0, %2}"
1965 [(set_attr "type" "sselog")
1966 (set_attr "mode" "V2DF")])
1968 (define_expand "xorv2df3"
1969 [(set (match_operand:V2DF 0 "register_operand" "")
1970 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1971 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1973 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1975 (define_insn "*xorv2df3"
1976 [(set (match_operand:V2DF 0 "register_operand" "=x")
1977 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1978 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1979 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1980 "xorpd\t{%2, %0|%0, %2}"
1981 [(set_attr "type" "sselog")
1982 (set_attr "mode" "V2DF")])
1984 ;; Also define scalar versions. These are used for abs, neg, and
1985 ;; conditional move. Using subregs into vector modes causes register
1986 ;; allocation lossage. These patterns do not allow memory operands
1987 ;; because the native instructions read the full 128-bits.
1989 (define_insn "*anddf3"
1990 [(set (match_operand:DF 0 "register_operand" "=x")
1991 (and:DF (match_operand:DF 1 "register_operand" "0")
1992 (match_operand:DF 2 "register_operand" "x")))]
1994 "andpd\t{%2, %0|%0, %2}"
1995 [(set_attr "type" "sselog")
1996 (set_attr "mode" "V2DF")])
1998 (define_insn "*nanddf3"
1999 [(set (match_operand:DF 0 "register_operand" "=x")
2000 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2001 (match_operand:DF 2 "register_operand" "x")))]
2003 "andnpd\t{%2, %0|%0, %2}"
2004 [(set_attr "type" "sselog")
2005 (set_attr "mode" "V2DF")])
2007 (define_insn "*iordf3"
2008 [(set (match_operand:DF 0 "register_operand" "=x")
2009 (ior:DF (match_operand:DF 1 "register_operand" "0")
2010 (match_operand:DF 2 "register_operand" "x")))]
2012 "orpd\t{%2, %0|%0, %2}"
2013 [(set_attr "type" "sselog")
2014 (set_attr "mode" "V2DF")])
2016 (define_insn "*xordf3"
2017 [(set (match_operand:DF 0 "register_operand" "=x")
2018 (xor:DF (match_operand:DF 1 "register_operand" "0")
2019 (match_operand:DF 2 "register_operand" "x")))]
2021 "xorpd\t{%2, %0|%0, %2}"
2022 [(set_attr "type" "sselog")
2023 (set_attr "mode" "V2DF")])
2025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2027 ;; Parallel double-precision floating point conversion operations
2029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2031 (define_insn "sse2_cvtpi2pd"
2032 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2033 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2035 "cvtpi2pd\t{%1, %0|%0, %1}"
2036 [(set_attr "type" "ssecvt")
2037 (set_attr "unit" "mmx,*")
2038 (set_attr "mode" "V2DF")])
2040 (define_insn "sse2_cvtpd2pi"
2041 [(set (match_operand:V2SI 0 "register_operand" "=y")
2042 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2043 UNSPEC_FIX_NOTRUNC))]
2045 "cvtpd2pi\t{%1, %0|%0, %1}"
2046 [(set_attr "type" "ssecvt")
2047 (set_attr "unit" "mmx")
2048 (set_attr "prefix_data16" "1")
2049 (set_attr "mode" "DI")])
2051 (define_insn "sse2_cvttpd2pi"
2052 [(set (match_operand:V2SI 0 "register_operand" "=y")
2053 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2055 "cvttpd2pi\t{%1, %0|%0, %1}"
2056 [(set_attr "type" "ssecvt")
2057 (set_attr "unit" "mmx")
2058 (set_attr "prefix_data16" "1")
2059 (set_attr "mode" "TI")])
2061 (define_insn "sse2_cvtsi2sd"
2062 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2065 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2066 (match_operand:V2DF 1 "register_operand" "0,0")
2069 "cvtsi2sd\t{%2, %0|%0, %2}"
2070 [(set_attr "type" "sseicvt")
2071 (set_attr "mode" "DF")
2072 (set_attr "athlon_decode" "double,direct")
2073 (set_attr "amdfam10_decode" "vector,double")])
2075 (define_insn "sse2_cvtsi2sdq"
2076 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2079 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2080 (match_operand:V2DF 1 "register_operand" "0,0")
2082 "TARGET_SSE2 && TARGET_64BIT"
2083 "cvtsi2sdq\t{%2, %0|%0, %2}"
2084 [(set_attr "type" "sseicvt")
2085 (set_attr "mode" "DF")
2086 (set_attr "athlon_decode" "double,direct")
2087 (set_attr "amdfam10_decode" "vector,double")])
2089 (define_insn "sse2_cvtsd2si"
2090 [(set (match_operand:SI 0 "register_operand" "=r,r")
2093 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2094 (parallel [(const_int 0)]))]
2095 UNSPEC_FIX_NOTRUNC))]
2097 "cvtsd2si\t{%1, %0|%0, %1}"
2098 [(set_attr "type" "sseicvt")
2099 (set_attr "athlon_decode" "double,vector")
2100 (set_attr "prefix_rep" "1")
2101 (set_attr "mode" "SI")])
2103 (define_insn "sse2_cvtsd2si_2"
2104 [(set (match_operand:SI 0 "register_operand" "=r,r")
2105 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2106 UNSPEC_FIX_NOTRUNC))]
2108 "cvtsd2si\t{%1, %0|%0, %1}"
2109 [(set_attr "type" "sseicvt")
2110 (set_attr "athlon_decode" "double,vector")
2111 (set_attr "amdfam10_decode" "double,double")
2112 (set_attr "prefix_rep" "1")
2113 (set_attr "mode" "SI")])
2115 (define_insn "sse2_cvtsd2siq"
2116 [(set (match_operand:DI 0 "register_operand" "=r,r")
2119 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2120 (parallel [(const_int 0)]))]
2121 UNSPEC_FIX_NOTRUNC))]
2122 "TARGET_SSE2 && TARGET_64BIT"
2123 "cvtsd2siq\t{%1, %0|%0, %1}"
2124 [(set_attr "type" "sseicvt")
2125 (set_attr "athlon_decode" "double,vector")
2126 (set_attr "prefix_rep" "1")
2127 (set_attr "mode" "DI")])
2129 (define_insn "sse2_cvtsd2siq_2"
2130 [(set (match_operand:DI 0 "register_operand" "=r,r")
2131 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2132 UNSPEC_FIX_NOTRUNC))]
2133 "TARGET_SSE2 && TARGET_64BIT"
2134 "cvtsd2siq\t{%1, %0|%0, %1}"
2135 [(set_attr "type" "sseicvt")
2136 (set_attr "athlon_decode" "double,vector")
2137 (set_attr "amdfam10_decode" "double,double")
2138 (set_attr "prefix_rep" "1")
2139 (set_attr "mode" "DI")])
2141 (define_insn "sse2_cvttsd2si"
2142 [(set (match_operand:SI 0 "register_operand" "=r,r")
2145 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2146 (parallel [(const_int 0)]))))]
2148 "cvttsd2si\t{%1, %0|%0, %1}"
2149 [(set_attr "type" "sseicvt")
2150 (set_attr "prefix_rep" "1")
2151 (set_attr "mode" "SI")
2152 (set_attr "athlon_decode" "double,vector")
2153 (set_attr "amdfam10_decode" "double,double")])
2155 (define_insn "sse2_cvttsd2siq"
2156 [(set (match_operand:DI 0 "register_operand" "=r,r")
2159 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2160 (parallel [(const_int 0)]))))]
2161 "TARGET_SSE2 && TARGET_64BIT"
2162 "cvttsd2siq\t{%1, %0|%0, %1}"
2163 [(set_attr "type" "sseicvt")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "mode" "DI")
2166 (set_attr "athlon_decode" "double,vector")
2167 (set_attr "amdfam10_decode" "double,double")])
2169 (define_insn "sse2_cvtdq2pd"
2170 [(set (match_operand:V2DF 0 "register_operand" "=x")
2173 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2174 (parallel [(const_int 0) (const_int 1)]))))]
2176 "cvtdq2pd\t{%1, %0|%0, %1}"
2177 [(set_attr "type" "ssecvt")
2178 (set_attr "mode" "V2DF")])
2180 (define_expand "sse2_cvtpd2dq"
2181 [(set (match_operand:V4SI 0 "register_operand" "")
2183 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2187 "operands[2] = CONST0_RTX (V2SImode);")
2189 (define_insn "*sse2_cvtpd2dq"
2190 [(set (match_operand:V4SI 0 "register_operand" "=x")
2192 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2194 (match_operand:V2SI 2 "const0_operand" "")))]
2196 "cvtpd2dq\t{%1, %0|%0, %1}"
2197 [(set_attr "type" "ssecvt")
2198 (set_attr "prefix_rep" "1")
2199 (set_attr "mode" "TI")
2200 (set_attr "amdfam10_decode" "double")])
2202 (define_expand "sse2_cvttpd2dq"
2203 [(set (match_operand:V4SI 0 "register_operand" "")
2205 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2208 "operands[2] = CONST0_RTX (V2SImode);")
2210 (define_insn "*sse2_cvttpd2dq"
2211 [(set (match_operand:V4SI 0 "register_operand" "=x")
2213 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2214 (match_operand:V2SI 2 "const0_operand" "")))]
2216 "cvttpd2dq\t{%1, %0|%0, %1}"
2217 [(set_attr "type" "ssecvt")
2218 (set_attr "prefix_rep" "1")
2219 (set_attr "mode" "TI")
2220 (set_attr "amdfam10_decode" "double")])
2222 (define_insn "sse2_cvtsd2ss"
2223 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2226 (float_truncate:V2SF
2227 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2228 (match_operand:V4SF 1 "register_operand" "0,0")
2231 "cvtsd2ss\t{%2, %0|%0, %2}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "athlon_decode" "vector,double")
2234 (set_attr "amdfam10_decode" "vector,double")
2235 (set_attr "mode" "SF")])
2237 (define_insn "sse2_cvtss2sd"
2238 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2242 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2243 (parallel [(const_int 0) (const_int 1)])))
2244 (match_operand:V2DF 1 "register_operand" "0,0")
2247 "cvtss2sd\t{%2, %0|%0, %2}"
2248 [(set_attr "type" "ssecvt")
2249 (set_attr "amdfam10_decode" "vector,double")
2250 (set_attr "mode" "DF")])
2252 (define_expand "sse2_cvtpd2ps"
2253 [(set (match_operand:V4SF 0 "register_operand" "")
2255 (float_truncate:V2SF
2256 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2259 "operands[2] = CONST0_RTX (V2SFmode);")
2261 (define_insn "*sse2_cvtpd2ps"
2262 [(set (match_operand:V4SF 0 "register_operand" "=x")
2264 (float_truncate:V2SF
2265 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2266 (match_operand:V2SF 2 "const0_operand" "")))]
2268 "cvtpd2ps\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssecvt")
2270 (set_attr "prefix_data16" "1")
2271 (set_attr "mode" "V4SF")
2272 (set_attr "amdfam10_decode" "double")])
2274 (define_insn "sse2_cvtps2pd"
2275 [(set (match_operand:V2DF 0 "register_operand" "=x")
2278 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2279 (parallel [(const_int 0) (const_int 1)]))))]
2281 "cvtps2pd\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "mode" "V2DF")
2284 (set_attr "amdfam10_decode" "direct")])
2286 (define_expand "vec_unpacks_hi_v4sf"
2291 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2292 (parallel [(const_int 6)
2296 (set (match_operand:V2DF 0 "register_operand" "")
2300 (parallel [(const_int 0) (const_int 1)]))))]
2303 operands[2] = gen_reg_rtx (V4SFmode);
2306 (define_expand "vec_unpacks_lo_v4sf"
2307 [(set (match_operand:V2DF 0 "register_operand" "")
2310 (match_operand:V4SF 1 "nonimmediate_operand" "")
2311 (parallel [(const_int 0) (const_int 1)]))))]
2314 (define_expand "vec_unpacks_float_hi_v8hi"
2315 [(match_operand:V4SF 0 "register_operand" "")
2316 (match_operand:V8HI 1 "register_operand" "")]
2319 rtx tmp = gen_reg_rtx (V4SImode);
2321 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2322 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2326 (define_expand "vec_unpacks_float_lo_v8hi"
2327 [(match_operand:V4SF 0 "register_operand" "")
2328 (match_operand:V8HI 1 "register_operand" "")]
2331 rtx tmp = gen_reg_rtx (V4SImode);
2333 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2334 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2338 (define_expand "vec_unpacku_float_hi_v8hi"
2339 [(match_operand:V4SF 0 "register_operand" "")
2340 (match_operand:V8HI 1 "register_operand" "")]
2343 rtx tmp = gen_reg_rtx (V4SImode);
2345 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2346 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2350 (define_expand "vec_unpacku_float_lo_v8hi"
2351 [(match_operand:V4SF 0 "register_operand" "")
2352 (match_operand:V8HI 1 "register_operand" "")]
2355 rtx tmp = gen_reg_rtx (V4SImode);
2357 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2358 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2362 (define_expand "vec_unpacks_float_hi_v4si"
2365 (match_operand:V4SI 1 "nonimmediate_operand" "")
2366 (parallel [(const_int 2)
2370 (set (match_operand:V2DF 0 "register_operand" "")
2374 (parallel [(const_int 0) (const_int 1)]))))]
2377 operands[2] = gen_reg_rtx (V4SImode);
2380 (define_expand "vec_unpacks_float_lo_v4si"
2381 [(set (match_operand:V2DF 0 "register_operand" "")
2384 (match_operand:V4SI 1 "nonimmediate_operand" "")
2385 (parallel [(const_int 0) (const_int 1)]))))]
2388 (define_expand "vec_pack_trunc_v2df"
2389 [(match_operand:V4SF 0 "register_operand" "")
2390 (match_operand:V2DF 1 "nonimmediate_operand" "")
2391 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2396 r1 = gen_reg_rtx (V4SFmode);
2397 r2 = gen_reg_rtx (V4SFmode);
2399 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2400 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2401 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2405 (define_expand "vec_pack_sfix_trunc_v2df"
2406 [(match_operand:V4SI 0 "register_operand" "")
2407 (match_operand:V2DF 1 "nonimmediate_operand" "")
2408 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2413 r1 = gen_reg_rtx (V4SImode);
2414 r2 = gen_reg_rtx (V4SImode);
2416 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2417 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2418 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2419 gen_lowpart (V2DImode, r1),
2420 gen_lowpart (V2DImode, r2)));
2424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2426 ;; Parallel double-precision floating point element swizzling
2428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2430 (define_insn "sse2_unpckhpd"
2431 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2434 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2435 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2436 (parallel [(const_int 1)
2438 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2440 unpckhpd\t{%2, %0|%0, %2}
2441 movlpd\t{%H1, %0|%0, %H1}
2442 movhpd\t{%1, %0|%0, %1}"
2443 [(set_attr "type" "sselog,ssemov,ssemov")
2444 (set_attr "mode" "V2DF,V1DF,V1DF")])
2446 (define_insn "*sse3_movddup"
2447 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2450 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2452 (parallel [(const_int 0)
2454 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2456 movddup\t{%1, %0|%0, %1}
2458 [(set_attr "type" "sselog1,ssemov")
2459 (set_attr "mode" "V2DF")])
2462 [(set (match_operand:V2DF 0 "memory_operand" "")
2465 (match_operand:V2DF 1 "register_operand" "")
2467 (parallel [(const_int 0)
2469 "TARGET_SSE3 && reload_completed"
2472 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2473 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2474 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2478 (define_insn "sse2_unpcklpd"
2479 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2482 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2483 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2484 (parallel [(const_int 0)
2486 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2488 unpcklpd\t{%2, %0|%0, %2}
2489 movhpd\t{%2, %0|%0, %2}
2490 movlpd\t{%2, %H0|%H0, %2}"
2491 [(set_attr "type" "sselog,ssemov,ssemov")
2492 (set_attr "mode" "V2DF,V1DF,V1DF")])
2494 (define_expand "sse2_shufpd"
2495 [(match_operand:V2DF 0 "register_operand" "")
2496 (match_operand:V2DF 1 "register_operand" "")
2497 (match_operand:V2DF 2 "nonimmediate_operand" "")
2498 (match_operand:SI 3 "const_int_operand" "")]
2501 int mask = INTVAL (operands[3]);
2502 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2504 GEN_INT (mask & 2 ? 3 : 2)));
2508 (define_insn "sse2_shufpd_1"
2509 [(set (match_operand:V2DF 0 "register_operand" "=x")
2512 (match_operand:V2DF 1 "register_operand" "0")
2513 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2514 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2515 (match_operand 4 "const_2_to_3_operand" "")])))]
2519 mask = INTVAL (operands[3]);
2520 mask |= (INTVAL (operands[4]) - 2) << 1;
2521 operands[3] = GEN_INT (mask);
2523 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2525 [(set_attr "type" "sselog")
2526 (set_attr "mode" "V2DF")])
2528 (define_insn "sse2_storehpd"
2529 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2531 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2532 (parallel [(const_int 1)])))]
2533 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2535 movhpd\t{%1, %0|%0, %1}
2538 [(set_attr "type" "ssemov,sselog1,ssemov")
2539 (set_attr "mode" "V1DF,V2DF,DF")])
2542 [(set (match_operand:DF 0 "register_operand" "")
2544 (match_operand:V2DF 1 "memory_operand" "")
2545 (parallel [(const_int 1)])))]
2546 "TARGET_SSE2 && reload_completed"
2547 [(set (match_dup 0) (match_dup 1))]
2549 operands[1] = adjust_address (operands[1], DFmode, 8);
2552 (define_insn "sse2_storelpd"
2553 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2555 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2556 (parallel [(const_int 0)])))]
2557 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2559 movlpd\t{%1, %0|%0, %1}
2562 [(set_attr "type" "ssemov")
2563 (set_attr "mode" "V1DF,DF,DF")])
2566 [(set (match_operand:DF 0 "register_operand" "")
2568 (match_operand:V2DF 1 "nonimmediate_operand" "")
2569 (parallel [(const_int 0)])))]
2570 "TARGET_SSE2 && reload_completed"
2573 rtx op1 = operands[1];
2575 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2577 op1 = gen_lowpart (DFmode, op1);
2578 emit_move_insn (operands[0], op1);
2582 (define_insn "sse2_loadhpd"
2583 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2586 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2587 (parallel [(const_int 0)]))
2588 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2589 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2591 movhpd\t{%2, %0|%0, %2}
2592 unpcklpd\t{%2, %0|%0, %2}
2593 shufpd\t{$1, %1, %0|%0, %1, 1}
2595 [(set_attr "type" "ssemov,sselog,sselog,other")
2596 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2599 [(set (match_operand:V2DF 0 "memory_operand" "")
2601 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2602 (match_operand:DF 1 "register_operand" "")))]
2603 "TARGET_SSE2 && reload_completed"
2604 [(set (match_dup 0) (match_dup 1))]
2606 operands[0] = adjust_address (operands[0], DFmode, 8);
2609 (define_insn "sse2_loadlpd"
2610 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2612 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2614 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2615 (parallel [(const_int 1)]))))]
2616 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2618 movsd\t{%2, %0|%0, %2}
2619 movlpd\t{%2, %0|%0, %2}
2620 movsd\t{%2, %0|%0, %2}
2621 shufpd\t{$2, %2, %0|%0, %2, 2}
2622 movhpd\t{%H1, %0|%0, %H1}
2624 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2625 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2628 [(set (match_operand:V2DF 0 "memory_operand" "")
2630 (match_operand:DF 1 "register_operand" "")
2631 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2632 "TARGET_SSE2 && reload_completed"
2633 [(set (match_dup 0) (match_dup 1))]
2635 operands[0] = adjust_address (operands[0], DFmode, 8);
2638 ;; Not sure these two are ever used, but it doesn't hurt to have
2640 (define_insn "*vec_extractv2df_1_sse"
2641 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2643 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2644 (parallel [(const_int 1)])))]
2645 "!TARGET_SSE2 && TARGET_SSE
2646 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2648 movhps\t{%1, %0|%0, %1}
2649 movhlps\t{%1, %0|%0, %1}
2650 movlps\t{%H1, %0|%0, %H1}"
2651 [(set_attr "type" "ssemov")
2652 (set_attr "mode" "V2SF,V4SF,V2SF")])
2654 (define_insn "*vec_extractv2df_0_sse"
2655 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2657 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2658 (parallel [(const_int 0)])))]
2659 "!TARGET_SSE2 && TARGET_SSE
2660 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2662 movlps\t{%1, %0|%0, %1}
2663 movaps\t{%1, %0|%0, %1}
2664 movlps\t{%1, %0|%0, %1}"
2665 [(set_attr "type" "ssemov")
2666 (set_attr "mode" "V2SF,V4SF,V2SF")])
2668 (define_insn "sse2_movsd"
2669 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2671 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2672 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2676 movsd\t{%2, %0|%0, %2}
2677 movlpd\t{%2, %0|%0, %2}
2678 movlpd\t{%2, %0|%0, %2}
2679 shufpd\t{$2, %2, %0|%0, %2, 2}
2680 movhps\t{%H1, %0|%0, %H1}
2681 movhps\t{%1, %H0|%H0, %1}"
2682 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2683 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2685 (define_insn "*vec_dupv2df_sse3"
2686 [(set (match_operand:V2DF 0 "register_operand" "=x")
2688 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2690 "movddup\t{%1, %0|%0, %1}"
2691 [(set_attr "type" "sselog1")
2692 (set_attr "mode" "DF")])
2694 (define_insn "*vec_dupv2df"
2695 [(set (match_operand:V2DF 0 "register_operand" "=x")
2697 (match_operand:DF 1 "register_operand" "0")))]
2700 [(set_attr "type" "sselog1")
2701 (set_attr "mode" "V2DF")])
2703 (define_insn "*vec_concatv2df_sse3"
2704 [(set (match_operand:V2DF 0 "register_operand" "=x")
2706 (match_operand:DF 1 "nonimmediate_operand" "xm")
2709 "movddup\t{%1, %0|%0, %1}"
2710 [(set_attr "type" "sselog1")
2711 (set_attr "mode" "DF")])
2713 (define_insn "*vec_concatv2df"
2714 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2716 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2717 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2720 unpcklpd\t{%2, %0|%0, %2}
2721 movhpd\t{%2, %0|%0, %2}
2722 movsd\t{%1, %0|%0, %1}
2723 movlhps\t{%2, %0|%0, %2}
2724 movhps\t{%2, %0|%0, %2}"
2725 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2726 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2728 (define_expand "vec_setv2df"
2729 [(match_operand:V2DF 0 "register_operand" "")
2730 (match_operand:DF 1 "register_operand" "")
2731 (match_operand 2 "const_int_operand" "")]
2734 ix86_expand_vector_set (false, operands[0], operands[1],
2735 INTVAL (operands[2]));
2739 (define_expand "vec_extractv2df"
2740 [(match_operand:DF 0 "register_operand" "")
2741 (match_operand:V2DF 1 "register_operand" "")
2742 (match_operand 2 "const_int_operand" "")]
2745 ix86_expand_vector_extract (false, operands[0], operands[1],
2746 INTVAL (operands[2]));
2750 (define_expand "vec_initv2df"
2751 [(match_operand:V2DF 0 "register_operand" "")
2752 (match_operand 1 "" "")]
2755 ix86_expand_vector_init (false, operands[0], operands[1]);
2759 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2761 ;; Parallel integral arithmetic
2763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2765 (define_expand "neg<mode>2"
2766 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2769 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2771 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2773 (define_expand "add<mode>3"
2774 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2775 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2776 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2778 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2780 (define_insn "*add<mode>3"
2781 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2783 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2784 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2785 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2786 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2787 [(set_attr "type" "sseiadd")
2788 (set_attr "prefix_data16" "1")
2789 (set_attr "mode" "TI")])
2791 (define_insn "sse2_ssadd<mode>3"
2792 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2794 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2795 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2796 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2797 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2798 [(set_attr "type" "sseiadd")
2799 (set_attr "prefix_data16" "1")
2800 (set_attr "mode" "TI")])
2802 (define_insn "sse2_usadd<mode>3"
2803 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2805 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2806 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2807 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2808 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2809 [(set_attr "type" "sseiadd")
2810 (set_attr "prefix_data16" "1")
2811 (set_attr "mode" "TI")])
2813 (define_expand "sub<mode>3"
2814 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2815 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2816 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2818 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2820 (define_insn "*sub<mode>3"
2821 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2823 (match_operand:SSEMODEI 1 "register_operand" "0")
2824 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2826 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2827 [(set_attr "type" "sseiadd")
2828 (set_attr "prefix_data16" "1")
2829 (set_attr "mode" "TI")])
2831 (define_insn "sse2_sssub<mode>3"
2832 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2834 (match_operand:SSEMODE12 1 "register_operand" "0")
2835 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2837 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2838 [(set_attr "type" "sseiadd")
2839 (set_attr "prefix_data16" "1")
2840 (set_attr "mode" "TI")])
2842 (define_insn "sse2_ussub<mode>3"
2843 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2845 (match_operand:SSEMODE12 1 "register_operand" "0")
2846 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2848 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2849 [(set_attr "type" "sseiadd")
2850 (set_attr "prefix_data16" "1")
2851 (set_attr "mode" "TI")])
2853 (define_expand "mulv16qi3"
2854 [(set (match_operand:V16QI 0 "register_operand" "")
2855 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2856 (match_operand:V16QI 2 "register_operand" "")))]
2862 for (i = 0; i < 12; ++i)
2863 t[i] = gen_reg_rtx (V16QImode);
2865 /* Unpack data such that we've got a source byte in each low byte of
2866 each word. We don't care what goes into the high byte of each word.
2867 Rather than trying to get zero in there, most convenient is to let
2868 it be a copy of the low byte. */
2869 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2870 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2871 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2872 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2874 /* Multiply words. The end-of-line annotations here give a picture of what
2875 the output of that instruction looks like. Dot means don't care; the
2876 letters are the bytes of the result with A being the most significant. */
2877 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2878 gen_lowpart (V8HImode, t[0]),
2879 gen_lowpart (V8HImode, t[1])));
2880 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2881 gen_lowpart (V8HImode, t[2]),
2882 gen_lowpart (V8HImode, t[3])));
2884 /* Extract the relevant bytes and merge them back together. */
2885 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2886 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2887 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2888 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2889 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2890 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2893 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2897 (define_expand "mulv8hi3"
2898 [(set (match_operand:V8HI 0 "register_operand" "")
2899 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2900 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2902 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2904 (define_insn "*mulv8hi3"
2905 [(set (match_operand:V8HI 0 "register_operand" "=x")
2906 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2907 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2908 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2909 "pmullw\t{%2, %0|%0, %2}"
2910 [(set_attr "type" "sseimul")
2911 (set_attr "prefix_data16" "1")
2912 (set_attr "mode" "TI")])
2914 (define_expand "smulv8hi3_highpart"
2915 [(set (match_operand:V8HI 0 "register_operand" "")
2920 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2922 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2925 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2927 (define_insn "*smulv8hi3_highpart"
2928 [(set (match_operand:V8HI 0 "register_operand" "=x")
2933 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2935 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2937 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2938 "pmulhw\t{%2, %0|%0, %2}"
2939 [(set_attr "type" "sseimul")
2940 (set_attr "prefix_data16" "1")
2941 (set_attr "mode" "TI")])
2943 (define_expand "umulv8hi3_highpart"
2944 [(set (match_operand:V8HI 0 "register_operand" "")
2949 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2951 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2954 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2956 (define_insn "*umulv8hi3_highpart"
2957 [(set (match_operand:V8HI 0 "register_operand" "=x")
2962 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2964 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2966 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2967 "pmulhuw\t{%2, %0|%0, %2}"
2968 [(set_attr "type" "sseimul")
2969 (set_attr "prefix_data16" "1")
2970 (set_attr "mode" "TI")])
2972 (define_insn "sse2_umulv2siv2di3"
2973 [(set (match_operand:V2DI 0 "register_operand" "=x")
2977 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2978 (parallel [(const_int 0) (const_int 2)])))
2981 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2982 (parallel [(const_int 0) (const_int 2)])))))]
2983 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2984 "pmuludq\t{%2, %0|%0, %2}"
2985 [(set_attr "type" "sseimul")
2986 (set_attr "prefix_data16" "1")
2987 (set_attr "mode" "TI")])
2989 (define_insn "sse4_1_mulv2siv2di3"
2990 [(set (match_operand:V2DI 0 "register_operand" "=x")
2994 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2995 (parallel [(const_int 0) (const_int 2)])))
2998 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2999 (parallel [(const_int 0) (const_int 2)])))))]
3000 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3001 "pmuldq\t{%2, %0|%0, %2}"
3002 [(set_attr "type" "sseimul")
3003 (set_attr "prefix_extra" "1")
3004 (set_attr "mode" "TI")])
3006 (define_insn "sse2_pmaddwd"
3007 [(set (match_operand:V4SI 0 "register_operand" "=x")
3012 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3013 (parallel [(const_int 0)
3019 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3020 (parallel [(const_int 0)
3026 (vec_select:V4HI (match_dup 1)
3027 (parallel [(const_int 1)
3032 (vec_select:V4HI (match_dup 2)
3033 (parallel [(const_int 1)
3036 (const_int 7)]))))))]
3037 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3038 "pmaddwd\t{%2, %0|%0, %2}"
3039 [(set_attr "type" "sseiadd")
3040 (set_attr "prefix_data16" "1")
3041 (set_attr "mode" "TI")])
3043 (define_expand "mulv4si3"
3044 [(set (match_operand:V4SI 0 "register_operand" "")
3045 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3046 (match_operand:V4SI 2 "register_operand" "")))]
3050 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3053 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3059 t1 = gen_reg_rtx (V4SImode);
3060 t2 = gen_reg_rtx (V4SImode);
3061 t3 = gen_reg_rtx (V4SImode);
3062 t4 = gen_reg_rtx (V4SImode);
3063 t5 = gen_reg_rtx (V4SImode);
3064 t6 = gen_reg_rtx (V4SImode);
3065 thirtytwo = GEN_INT (32);
3067 /* Multiply elements 2 and 0. */
3068 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3071 /* Shift both input vectors down one element, so that elements 3
3072 and 1 are now in the slots for elements 2 and 0. For K8, at
3073 least, this is faster than using a shuffle. */
3074 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3075 gen_lowpart (TImode, op1),
3077 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3078 gen_lowpart (TImode, op2),
3080 /* Multiply elements 3 and 1. */
3081 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3084 /* Move the results in element 2 down to element 1; we don't care
3085 what goes in elements 2 and 3. */
3086 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3087 const0_rtx, const0_rtx));
3088 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3089 const0_rtx, const0_rtx));
3091 /* Merge the parts back together. */
3092 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3097 (define_insn "*sse4_1_mulv4si3"
3098 [(set (match_operand:V4SI 0 "register_operand" "=x")
3099 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3100 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3101 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3102 "pmulld\t{%2, %0|%0, %2}"
3103 [(set_attr "type" "sseimul")
3104 (set_attr "prefix_extra" "1")
3105 (set_attr "mode" "TI")])
3107 (define_expand "mulv2di3"
3108 [(set (match_operand:V2DI 0 "register_operand" "")
3109 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3110 (match_operand:V2DI 2 "register_operand" "")))]
3113 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3119 t1 = gen_reg_rtx (V2DImode);
3120 t2 = gen_reg_rtx (V2DImode);
3121 t3 = gen_reg_rtx (V2DImode);
3122 t4 = gen_reg_rtx (V2DImode);
3123 t5 = gen_reg_rtx (V2DImode);
3124 t6 = gen_reg_rtx (V2DImode);
3125 thirtytwo = GEN_INT (32);
3127 /* Multiply low parts. */
3128 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3129 gen_lowpart (V4SImode, op2)));
3131 /* Shift input vectors left 32 bits so we can multiply high parts. */
3132 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3133 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3135 /* Multiply high parts by low parts. */
3136 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3137 gen_lowpart (V4SImode, t3)));
3138 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3139 gen_lowpart (V4SImode, t2)));
3141 /* Shift them back. */
3142 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3143 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3145 /* Add the three parts together. */
3146 emit_insn (gen_addv2di3 (t6, t1, t4));
3147 emit_insn (gen_addv2di3 (op0, t6, t5));
3151 (define_expand "vec_widen_smult_hi_v8hi"
3152 [(match_operand:V4SI 0 "register_operand" "")
3153 (match_operand:V8HI 1 "register_operand" "")
3154 (match_operand:V8HI 2 "register_operand" "")]
3157 rtx op1, op2, t1, t2, dest;
3161 t1 = gen_reg_rtx (V8HImode);
3162 t2 = gen_reg_rtx (V8HImode);
3163 dest = gen_lowpart (V8HImode, operands[0]);
3165 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3166 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3167 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3171 (define_expand "vec_widen_smult_lo_v8hi"
3172 [(match_operand:V4SI 0 "register_operand" "")
3173 (match_operand:V8HI 1 "register_operand" "")
3174 (match_operand:V8HI 2 "register_operand" "")]
3177 rtx op1, op2, t1, t2, dest;
3181 t1 = gen_reg_rtx (V8HImode);
3182 t2 = gen_reg_rtx (V8HImode);
3183 dest = gen_lowpart (V8HImode, operands[0]);
3185 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3186 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3187 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3191 (define_expand "vec_widen_umult_hi_v8hi"
3192 [(match_operand:V4SI 0 "register_operand" "")
3193 (match_operand:V8HI 1 "register_operand" "")
3194 (match_operand:V8HI 2 "register_operand" "")]
3197 rtx op1, op2, t1, t2, dest;
3201 t1 = gen_reg_rtx (V8HImode);
3202 t2 = gen_reg_rtx (V8HImode);
3203 dest = gen_lowpart (V8HImode, operands[0]);
3205 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3206 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3207 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3211 (define_expand "vec_widen_umult_lo_v8hi"
3212 [(match_operand:V4SI 0 "register_operand" "")
3213 (match_operand:V8HI 1 "register_operand" "")
3214 (match_operand:V8HI 2 "register_operand" "")]
3217 rtx op1, op2, t1, t2, dest;
3221 t1 = gen_reg_rtx (V8HImode);
3222 t2 = gen_reg_rtx (V8HImode);
3223 dest = gen_lowpart (V8HImode, operands[0]);
3225 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3226 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3227 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3231 (define_expand "vec_widen_smult_hi_v4si"
3232 [(match_operand:V2DI 0 "register_operand" "")
3233 (match_operand:V4SI 1 "register_operand" "")
3234 (match_operand:V4SI 2 "register_operand" "")]
3237 rtx op1, op2, t1, t2;
3241 t1 = gen_reg_rtx (V4SImode);
3242 t2 = gen_reg_rtx (V4SImode);
3244 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3245 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3246 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3250 (define_expand "vec_widen_smult_lo_v4si"
3251 [(match_operand:V2DI 0 "register_operand" "")
3252 (match_operand:V4SI 1 "register_operand" "")
3253 (match_operand:V4SI 2 "register_operand" "")]
3256 rtx op1, op2, t1, t2;
3260 t1 = gen_reg_rtx (V4SImode);
3261 t2 = gen_reg_rtx (V4SImode);
3263 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3264 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3265 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3269 (define_expand "vec_widen_umult_hi_v4si"
3270 [(match_operand:V2DI 0 "register_operand" "")
3271 (match_operand:V4SI 1 "register_operand" "")
3272 (match_operand:V4SI 2 "register_operand" "")]
3275 rtx op1, op2, t1, t2;
3279 t1 = gen_reg_rtx (V4SImode);
3280 t2 = gen_reg_rtx (V4SImode);
3282 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3283 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3284 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3288 (define_expand "vec_widen_umult_lo_v4si"
3289 [(match_operand:V2DI 0 "register_operand" "")
3290 (match_operand:V4SI 1 "register_operand" "")
3291 (match_operand:V4SI 2 "register_operand" "")]
3294 rtx op1, op2, t1, t2;
3298 t1 = gen_reg_rtx (V4SImode);
3299 t2 = gen_reg_rtx (V4SImode);
3301 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3302 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3303 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3307 (define_expand "sdot_prodv8hi"
3308 [(match_operand:V4SI 0 "register_operand" "")
3309 (match_operand:V8HI 1 "register_operand" "")
3310 (match_operand:V8HI 2 "register_operand" "")
3311 (match_operand:V4SI 3 "register_operand" "")]
3314 rtx t = gen_reg_rtx (V4SImode);
3315 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3316 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3320 (define_expand "udot_prodv4si"
3321 [(match_operand:V2DI 0 "register_operand" "")
3322 (match_operand:V4SI 1 "register_operand" "")
3323 (match_operand:V4SI 2 "register_operand" "")
3324 (match_operand:V2DI 3 "register_operand" "")]
3329 t1 = gen_reg_rtx (V2DImode);
3330 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3331 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3333 t2 = gen_reg_rtx (V4SImode);
3334 t3 = gen_reg_rtx (V4SImode);
3335 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3336 gen_lowpart (TImode, operands[1]),
3338 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3339 gen_lowpart (TImode, operands[2]),
3342 t4 = gen_reg_rtx (V2DImode);
3343 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3345 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3349 (define_insn "ashr<mode>3"
3350 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3352 (match_operand:SSEMODE24 1 "register_operand" "0")
3353 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3355 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3356 [(set_attr "type" "sseishft")
3357 (set_attr "prefix_data16" "1")
3358 (set_attr "mode" "TI")])
3360 (define_insn "lshr<mode>3"
3361 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3362 (lshiftrt:SSEMODE248
3363 (match_operand:SSEMODE248 1 "register_operand" "0")
3364 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3366 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3367 [(set_attr "type" "sseishft")
3368 (set_attr "prefix_data16" "1")
3369 (set_attr "mode" "TI")])
3371 (define_insn "ashl<mode>3"
3372 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3374 (match_operand:SSEMODE248 1 "register_operand" "0")
3375 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3377 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3378 [(set_attr "type" "sseishft")
3379 (set_attr "prefix_data16" "1")
3380 (set_attr "mode" "TI")])
3382 (define_expand "vec_shl_<mode>"
3383 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3384 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3385 (match_operand:SI 2 "general_operand" "")))]
3388 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3390 operands[0] = gen_lowpart (TImode, operands[0]);
3391 operands[1] = gen_lowpart (TImode, operands[1]);
3394 (define_expand "vec_shr_<mode>"
3395 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3396 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3397 (match_operand:SI 2 "general_operand" "")))]
3400 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3402 operands[0] = gen_lowpart (TImode, operands[0]);
3403 operands[1] = gen_lowpart (TImode, operands[1]);
3406 (define_expand "umaxv16qi3"
3407 [(set (match_operand:V16QI 0 "register_operand" "")
3408 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3409 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3411 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3413 (define_insn "*umaxv16qi3"
3414 [(set (match_operand:V16QI 0 "register_operand" "=x")
3415 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3416 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3417 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3418 "pmaxub\t{%2, %0|%0, %2}"
3419 [(set_attr "type" "sseiadd")
3420 (set_attr "prefix_data16" "1")
3421 (set_attr "mode" "TI")])
3423 (define_expand "smaxv8hi3"
3424 [(set (match_operand:V8HI 0 "register_operand" "")
3425 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3426 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3428 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3430 (define_insn "*smaxv8hi3"
3431 [(set (match_operand:V8HI 0 "register_operand" "=x")
3432 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3433 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3434 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3435 "pmaxsw\t{%2, %0|%0, %2}"
3436 [(set_attr "type" "sseiadd")
3437 (set_attr "prefix_data16" "1")
3438 (set_attr "mode" "TI")])
3440 (define_expand "umaxv8hi3"
3441 [(set (match_operand:V8HI 0 "register_operand" "")
3442 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3443 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3447 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3450 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3451 if (rtx_equal_p (op3, op2))
3452 op3 = gen_reg_rtx (V8HImode);
3453 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3454 emit_insn (gen_addv8hi3 (op0, op3, op2));
3459 (define_expand "smax<mode>3"
3460 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3461 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3462 (match_operand:SSEMODE14 2 "register_operand" "")))]
3466 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3472 xops[0] = operands[0];
3473 xops[1] = operands[1];
3474 xops[2] = operands[2];
3475 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3476 xops[4] = operands[1];
3477 xops[5] = operands[2];
3478 ok = ix86_expand_int_vcond (xops);
3484 (define_insn "*sse4_1_smax<mode>3"
3485 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3487 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3488 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3489 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3490 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3491 [(set_attr "type" "sseiadd")
3492 (set_attr "prefix_extra" "1")
3493 (set_attr "mode" "TI")])
3495 (define_expand "umaxv4si3"
3496 [(set (match_operand:V4SI 0 "register_operand" "")
3497 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3498 (match_operand:V4SI 2 "register_operand" "")))]
3502 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3508 xops[0] = operands[0];
3509 xops[1] = operands[1];
3510 xops[2] = operands[2];
3511 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3512 xops[4] = operands[1];
3513 xops[5] = operands[2];
3514 ok = ix86_expand_int_vcond (xops);
3520 (define_insn "*sse4_1_umax<mode>3"
3521 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3523 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3524 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3525 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3526 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3527 [(set_attr "type" "sseiadd")
3528 (set_attr "prefix_extra" "1")
3529 (set_attr "mode" "TI")])
3531 (define_expand "uminv16qi3"
3532 [(set (match_operand:V16QI 0 "register_operand" "")
3533 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3534 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3536 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3538 (define_insn "*uminv16qi3"
3539 [(set (match_operand:V16QI 0 "register_operand" "=x")
3540 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3541 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3542 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3543 "pminub\t{%2, %0|%0, %2}"
3544 [(set_attr "type" "sseiadd")
3545 (set_attr "prefix_data16" "1")
3546 (set_attr "mode" "TI")])
3548 (define_expand "sminv8hi3"
3549 [(set (match_operand:V8HI 0 "register_operand" "")
3550 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3551 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3553 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3555 (define_insn "*sminv8hi3"
3556 [(set (match_operand:V8HI 0 "register_operand" "=x")
3557 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3558 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3559 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3560 "pminsw\t{%2, %0|%0, %2}"
3561 [(set_attr "type" "sseiadd")
3562 (set_attr "prefix_data16" "1")
3563 (set_attr "mode" "TI")])
3565 (define_expand "smin<mode>3"
3566 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3567 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3568 (match_operand:SSEMODE14 2 "register_operand" "")))]
3572 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3578 xops[0] = operands[0];
3579 xops[1] = operands[2];
3580 xops[2] = operands[1];
3581 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3582 xops[4] = operands[1];
3583 xops[5] = operands[2];
3584 ok = ix86_expand_int_vcond (xops);
3590 (define_insn "*sse4_1_smin<mode>3"
3591 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3593 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3594 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3595 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3596 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3597 [(set_attr "type" "sseiadd")
3598 (set_attr "prefix_extra" "1")
3599 (set_attr "mode" "TI")])
3601 (define_expand "umin<mode>3"
3602 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3603 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3604 (match_operand:SSEMODE24 2 "register_operand" "")))]
3608 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3614 xops[0] = operands[0];
3615 xops[1] = operands[2];
3616 xops[2] = operands[1];
3617 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3618 xops[4] = operands[1];
3619 xops[5] = operands[2];
3620 ok = ix86_expand_int_vcond (xops);
3626 (define_insn "*sse4_1_umin<mode>3"
3627 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3629 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3630 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3631 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3632 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3633 [(set_attr "type" "sseiadd")
3634 (set_attr "prefix_extra" "1")
3635 (set_attr "mode" "TI")])
3637 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3639 ;; Parallel integral comparisons
3641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3643 (define_insn "sse2_eq<mode>3"
3644 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3646 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3647 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3648 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3649 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3650 [(set_attr "type" "ssecmp")
3651 (set_attr "prefix_data16" "1")
3652 (set_attr "mode" "TI")])
3654 (define_insn "sse4_1_eqv2di3"
3655 [(set (match_operand:V2DI 0 "register_operand" "=x")
3657 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3658 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3659 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3660 "pcmpeqq\t{%2, %0|%0, %2}"
3661 [(set_attr "type" "ssecmp")
3662 (set_attr "prefix_extra" "1")
3663 (set_attr "mode" "TI")])
3665 (define_insn "sse2_gt<mode>3"
3666 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3668 (match_operand:SSEMODE124 1 "register_operand" "0")
3669 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3671 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3672 [(set_attr "type" "ssecmp")
3673 (set_attr "prefix_data16" "1")
3674 (set_attr "mode" "TI")])
3676 (define_insn "sse4_2_gtv2di3"
3677 [(set (match_operand:V2DI 0 "register_operand" "=x")
3679 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3680 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3682 "pcmpgtq\t{%2, %0|%0, %2}"
3683 [(set_attr "type" "ssecmp")
3684 (set_attr "mode" "TI")])
3686 (define_expand "vcond<mode>"
3687 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3688 (if_then_else:SSEMODEI
3689 (match_operator 3 ""
3690 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3691 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3692 (match_operand:SSEMODEI 1 "general_operand" "")
3693 (match_operand:SSEMODEI 2 "general_operand" "")))]
3696 if (ix86_expand_int_vcond (operands))
3702 (define_expand "vcondu<mode>"
3703 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3704 (if_then_else:SSEMODEI
3705 (match_operator 3 ""
3706 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3707 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3708 (match_operand:SSEMODEI 1 "general_operand" "")
3709 (match_operand:SSEMODEI 2 "general_operand" "")))]
3712 if (ix86_expand_int_vcond (operands))
3718 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3720 ;; Parallel bitwise logical operations
3722 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3724 (define_expand "one_cmpl<mode>2"
3725 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3726 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3730 int i, n = GET_MODE_NUNITS (<MODE>mode);
3731 rtvec v = rtvec_alloc (n);
3733 for (i = 0; i < n; ++i)
3734 RTVEC_ELT (v, i) = constm1_rtx;
3736 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3739 (define_expand "and<mode>3"
3740 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3741 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3742 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3744 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3746 (define_insn "*and<mode>3"
3747 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3749 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3750 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3751 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3752 "pand\t{%2, %0|%0, %2}"
3753 [(set_attr "type" "sselog")
3754 (set_attr "prefix_data16" "1")
3755 (set_attr "mode" "TI")])
3757 (define_insn "sse2_nand<mode>3"
3758 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3760 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3761 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3763 "pandn\t{%2, %0|%0, %2}"
3764 [(set_attr "type" "sselog")
3765 (set_attr "prefix_data16" "1")
3766 (set_attr "mode" "TI")])
3768 (define_expand "andtf3"
3769 [(set (match_operand:TF 0 "register_operand" "")
3770 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3771 (match_operand:TF 2 "nonimmediate_operand" "")))]
3773 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3775 (define_insn "*andtf3"
3776 [(set (match_operand:TF 0 "register_operand" "=x")
3778 (match_operand:TF 1 "nonimmediate_operand" "%0")
3779 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3780 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3781 "pand\t{%2, %0|%0, %2}"
3782 [(set_attr "type" "sselog")
3783 (set_attr "prefix_data16" "1")
3784 (set_attr "mode" "TI")])
3786 (define_insn "*nandtf3"
3787 [(set (match_operand:TF 0 "register_operand" "=x")
3789 (not:TF (match_operand:TF 1 "register_operand" "0"))
3790 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3792 "pandn\t{%2, %0|%0, %2}"
3793 [(set_attr "type" "sselog")
3794 (set_attr "prefix_data16" "1")
3795 (set_attr "mode" "TI")])
3797 (define_expand "ior<mode>3"
3798 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3799 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3800 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3802 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3804 (define_insn "*ior<mode>3"
3805 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3807 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3808 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3809 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3810 "por\t{%2, %0|%0, %2}"
3811 [(set_attr "type" "sselog")
3812 (set_attr "prefix_data16" "1")
3813 (set_attr "mode" "TI")])
3815 (define_expand "iortf3"
3816 [(set (match_operand:TF 0 "register_operand" "")
3817 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3818 (match_operand:TF 2 "nonimmediate_operand" "")))]
3820 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3822 (define_insn "*iortf3"
3823 [(set (match_operand:TF 0 "register_operand" "=x")
3825 (match_operand:TF 1 "nonimmediate_operand" "%0")
3826 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3827 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3828 "por\t{%2, %0|%0, %2}"
3829 [(set_attr "type" "sselog")
3830 (set_attr "prefix_data16" "1")
3831 (set_attr "mode" "TI")])
3833 (define_expand "xor<mode>3"
3834 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3835 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3836 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3838 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3840 (define_insn "*xor<mode>3"
3841 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3843 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3844 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3845 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3846 "pxor\t{%2, %0|%0, %2}"
3847 [(set_attr "type" "sselog")
3848 (set_attr "prefix_data16" "1")
3849 (set_attr "mode" "TI")])
3851 (define_expand "xortf3"
3852 [(set (match_operand:TF 0 "register_operand" "")
3853 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3854 (match_operand:TF 2 "nonimmediate_operand" "")))]
3856 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3858 (define_insn "*xortf3"
3859 [(set (match_operand:TF 0 "register_operand" "=x")
3861 (match_operand:TF 1 "nonimmediate_operand" "%0")
3862 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3863 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3864 "pxor\t{%2, %0|%0, %2}"
3865 [(set_attr "type" "sselog")
3866 (set_attr "prefix_data16" "1")
3867 (set_attr "mode" "TI")])
3869 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3871 ;; Parallel integral element swizzling
3873 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3876 ;; op1 = abcdefghijklmnop
3877 ;; op2 = qrstuvwxyz012345
3878 ;; h1 = aqbrcsdteufvgwhx
3879 ;; l1 = iyjzk0l1m2n3o4p5
3880 ;; h2 = aiqybjrzcks0dlt1
3881 ;; l2 = emu2fnv3gow4hpx5
3882 ;; h3 = aeimquy2bfjnrvz3
3883 ;; l3 = cgkosw04dhlptx15
3884 ;; result = bdfhjlnprtvxz135
3885 (define_expand "vec_pack_trunc_v8hi"
3886 [(match_operand:V16QI 0 "register_operand" "")
3887 (match_operand:V8HI 1 "register_operand" "")
3888 (match_operand:V8HI 2 "register_operand" "")]
3891 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3893 op1 = gen_lowpart (V16QImode, operands[1]);
3894 op2 = gen_lowpart (V16QImode, operands[2]);
3895 h1 = gen_reg_rtx (V16QImode);
3896 l1 = gen_reg_rtx (V16QImode);
3897 h2 = gen_reg_rtx (V16QImode);
3898 l2 = gen_reg_rtx (V16QImode);
3899 h3 = gen_reg_rtx (V16QImode);
3900 l3 = gen_reg_rtx (V16QImode);
3902 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3903 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3904 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3905 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3906 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3907 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3908 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3919 ;; result = bdfhjlnp
3920 (define_expand "vec_pack_trunc_v4si"
3921 [(match_operand:V8HI 0 "register_operand" "")
3922 (match_operand:V4SI 1 "register_operand" "")
3923 (match_operand:V4SI 2 "register_operand" "")]
3926 rtx op1, op2, h1, l1, h2, l2;
3928 op1 = gen_lowpart (V8HImode, operands[1]);
3929 op2 = gen_lowpart (V8HImode, operands[2]);
3930 h1 = gen_reg_rtx (V8HImode);
3931 l1 = gen_reg_rtx (V8HImode);
3932 h2 = gen_reg_rtx (V8HImode);
3933 l2 = gen_reg_rtx (V8HImode);
3935 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3936 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3937 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3938 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3939 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3949 (define_expand "vec_pack_trunc_v2di"
3950 [(match_operand:V4SI 0 "register_operand" "")
3951 (match_operand:V2DI 1 "register_operand" "")
3952 (match_operand:V2DI 2 "register_operand" "")]
3955 rtx op1, op2, h1, l1;
3957 op1 = gen_lowpart (V4SImode, operands[1]);
3958 op2 = gen_lowpart (V4SImode, operands[2]);
3959 h1 = gen_reg_rtx (V4SImode);
3960 l1 = gen_reg_rtx (V4SImode);
3962 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3963 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3964 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3968 (define_expand "vec_interleave_highv16qi"
3969 [(set (match_operand:V16QI 0 "register_operand" "=x")
3972 (match_operand:V16QI 1 "register_operand" "0")
3973 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3974 (parallel [(const_int 8) (const_int 24)
3975 (const_int 9) (const_int 25)
3976 (const_int 10) (const_int 26)
3977 (const_int 11) (const_int 27)
3978 (const_int 12) (const_int 28)
3979 (const_int 13) (const_int 29)
3980 (const_int 14) (const_int 30)
3981 (const_int 15) (const_int 31)])))]
3984 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3988 (define_expand "vec_interleave_lowv16qi"
3989 [(set (match_operand:V16QI 0 "register_operand" "=x")
3992 (match_operand:V16QI 1 "register_operand" "0")
3993 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3994 (parallel [(const_int 0) (const_int 16)
3995 (const_int 1) (const_int 17)
3996 (const_int 2) (const_int 18)
3997 (const_int 3) (const_int 19)
3998 (const_int 4) (const_int 20)
3999 (const_int 5) (const_int 21)
4000 (const_int 6) (const_int 22)
4001 (const_int 7) (const_int 23)])))]
4004 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4008 (define_expand "vec_interleave_highv8hi"
4009 [(set (match_operand:V8HI 0 "register_operand" "=x")
4012 (match_operand:V8HI 1 "register_operand" "0")
4013 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4014 (parallel [(const_int 4) (const_int 12)
4015 (const_int 5) (const_int 13)
4016 (const_int 6) (const_int 14)
4017 (const_int 7) (const_int 15)])))]
4020 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4024 (define_expand "vec_interleave_lowv8hi"
4025 [(set (match_operand:V8HI 0 "register_operand" "=x")
4028 (match_operand:V8HI 1 "register_operand" "0")
4029 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4030 (parallel [(const_int 0) (const_int 8)
4031 (const_int 1) (const_int 9)
4032 (const_int 2) (const_int 10)
4033 (const_int 3) (const_int 11)])))]
4036 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4040 (define_expand "vec_interleave_highv4si"
4041 [(set (match_operand:V4SI 0 "register_operand" "=x")
4044 (match_operand:V4SI 1 "register_operand" "0")
4045 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4046 (parallel [(const_int 2) (const_int 6)
4047 (const_int 3) (const_int 7)])))]
4050 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4054 (define_expand "vec_interleave_lowv4si"
4055 [(set (match_operand:V4SI 0 "register_operand" "=x")
4058 (match_operand:V4SI 1 "register_operand" "0")
4059 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4060 (parallel [(const_int 0) (const_int 4)
4061 (const_int 1) (const_int 5)])))]
4064 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4068 (define_expand "vec_interleave_highv2di"
4069 [(set (match_operand:V2DI 0 "register_operand" "=x")
4072 (match_operand:V2DI 1 "register_operand" "0")
4073 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4074 (parallel [(const_int 1)
4078 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4082 (define_expand "vec_interleave_lowv2di"
4083 [(set (match_operand:V2DI 0 "register_operand" "=x")
4086 (match_operand:V2DI 1 "register_operand" "0")
4087 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4088 (parallel [(const_int 0)
4092 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4096 (define_insn "sse2_packsswb"
4097 [(set (match_operand:V16QI 0 "register_operand" "=x")
4100 (match_operand:V8HI 1 "register_operand" "0"))
4102 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4104 "packsswb\t{%2, %0|%0, %2}"
4105 [(set_attr "type" "sselog")
4106 (set_attr "prefix_data16" "1")
4107 (set_attr "mode" "TI")])
4109 (define_insn "sse2_packssdw"
4110 [(set (match_operand:V8HI 0 "register_operand" "=x")
4113 (match_operand:V4SI 1 "register_operand" "0"))
4115 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4117 "packssdw\t{%2, %0|%0, %2}"
4118 [(set_attr "type" "sselog")
4119 (set_attr "prefix_data16" "1")
4120 (set_attr "mode" "TI")])
4122 (define_insn "sse2_packuswb"
4123 [(set (match_operand:V16QI 0 "register_operand" "=x")
4126 (match_operand:V8HI 1 "register_operand" "0"))
4128 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4130 "packuswb\t{%2, %0|%0, %2}"
4131 [(set_attr "type" "sselog")
4132 (set_attr "prefix_data16" "1")
4133 (set_attr "mode" "TI")])
4135 (define_insn "sse2_punpckhbw"
4136 [(set (match_operand:V16QI 0 "register_operand" "=x")
4139 (match_operand:V16QI 1 "register_operand" "0")
4140 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4141 (parallel [(const_int 8) (const_int 24)
4142 (const_int 9) (const_int 25)
4143 (const_int 10) (const_int 26)
4144 (const_int 11) (const_int 27)
4145 (const_int 12) (const_int 28)
4146 (const_int 13) (const_int 29)
4147 (const_int 14) (const_int 30)
4148 (const_int 15) (const_int 31)])))]
4150 "punpckhbw\t{%2, %0|%0, %2}"
4151 [(set_attr "type" "sselog")
4152 (set_attr "prefix_data16" "1")
4153 (set_attr "mode" "TI")])
4155 (define_insn "sse2_punpcklbw"
4156 [(set (match_operand:V16QI 0 "register_operand" "=x")
4159 (match_operand:V16QI 1 "register_operand" "0")
4160 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4161 (parallel [(const_int 0) (const_int 16)
4162 (const_int 1) (const_int 17)
4163 (const_int 2) (const_int 18)
4164 (const_int 3) (const_int 19)
4165 (const_int 4) (const_int 20)
4166 (const_int 5) (const_int 21)
4167 (const_int 6) (const_int 22)
4168 (const_int 7) (const_int 23)])))]
4170 "punpcklbw\t{%2, %0|%0, %2}"
4171 [(set_attr "type" "sselog")
4172 (set_attr "prefix_data16" "1")
4173 (set_attr "mode" "TI")])
4175 (define_insn "sse2_punpckhwd"
4176 [(set (match_operand:V8HI 0 "register_operand" "=x")
4179 (match_operand:V8HI 1 "register_operand" "0")
4180 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4181 (parallel [(const_int 4) (const_int 12)
4182 (const_int 5) (const_int 13)
4183 (const_int 6) (const_int 14)
4184 (const_int 7) (const_int 15)])))]
4186 "punpckhwd\t{%2, %0|%0, %2}"
4187 [(set_attr "type" "sselog")
4188 (set_attr "prefix_data16" "1")
4189 (set_attr "mode" "TI")])
4191 (define_insn "sse2_punpcklwd"
4192 [(set (match_operand:V8HI 0 "register_operand" "=x")
4195 (match_operand:V8HI 1 "register_operand" "0")
4196 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4197 (parallel [(const_int 0) (const_int 8)
4198 (const_int 1) (const_int 9)
4199 (const_int 2) (const_int 10)
4200 (const_int 3) (const_int 11)])))]
4202 "punpcklwd\t{%2, %0|%0, %2}"
4203 [(set_attr "type" "sselog")
4204 (set_attr "prefix_data16" "1")
4205 (set_attr "mode" "TI")])
4207 (define_insn "sse2_punpckhdq"
4208 [(set (match_operand:V4SI 0 "register_operand" "=x")
4211 (match_operand:V4SI 1 "register_operand" "0")
4212 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4213 (parallel [(const_int 2) (const_int 6)
4214 (const_int 3) (const_int 7)])))]
4216 "punpckhdq\t{%2, %0|%0, %2}"
4217 [(set_attr "type" "sselog")
4218 (set_attr "prefix_data16" "1")
4219 (set_attr "mode" "TI")])
4221 (define_insn "sse2_punpckldq"
4222 [(set (match_operand:V4SI 0 "register_operand" "=x")
4225 (match_operand:V4SI 1 "register_operand" "0")
4226 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4227 (parallel [(const_int 0) (const_int 4)
4228 (const_int 1) (const_int 5)])))]
4230 "punpckldq\t{%2, %0|%0, %2}"
4231 [(set_attr "type" "sselog")
4232 (set_attr "prefix_data16" "1")
4233 (set_attr "mode" "TI")])
4235 (define_insn "sse2_punpckhqdq"
4236 [(set (match_operand:V2DI 0 "register_operand" "=x")
4239 (match_operand:V2DI 1 "register_operand" "0")
4240 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4241 (parallel [(const_int 1)
4244 "punpckhqdq\t{%2, %0|%0, %2}"
4245 [(set_attr "type" "sselog")
4246 (set_attr "prefix_data16" "1")
4247 (set_attr "mode" "TI")])
4249 (define_insn "sse2_punpcklqdq"
4250 [(set (match_operand:V2DI 0 "register_operand" "=x")
4253 (match_operand:V2DI 1 "register_operand" "0")
4254 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4255 (parallel [(const_int 0)
4258 "punpcklqdq\t{%2, %0|%0, %2}"
4259 [(set_attr "type" "sselog")
4260 (set_attr "prefix_data16" "1")
4261 (set_attr "mode" "TI")])
4263 (define_insn "*sse4_1_pinsrb"
4264 [(set (match_operand:V16QI 0 "register_operand" "=x")
4266 (vec_duplicate:V16QI
4267 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4268 (match_operand:V16QI 1 "register_operand" "0")
4269 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4272 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4273 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4275 [(set_attr "type" "sselog")
4276 (set_attr "prefix_extra" "1")
4277 (set_attr "mode" "TI")])
4279 (define_insn "*sse2_pinsrw"
4280 [(set (match_operand:V8HI 0 "register_operand" "=x")
4283 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4284 (match_operand:V8HI 1 "register_operand" "0")
4285 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4288 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4289 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4291 [(set_attr "type" "sselog")
4292 (set_attr "prefix_data16" "1")
4293 (set_attr "mode" "TI")])
4295 ;; It must come before sse2_loadld since it is preferred.
4296 (define_insn "*sse4_1_pinsrd"
4297 [(set (match_operand:V4SI 0 "register_operand" "=x")
4300 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4301 (match_operand:V4SI 1 "register_operand" "0")
4302 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4305 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4306 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4308 [(set_attr "type" "sselog")
4309 (set_attr "prefix_extra" "1")
4310 (set_attr "mode" "TI")])
4312 (define_insn "*sse4_1_pinsrq"
4313 [(set (match_operand:V2DI 0 "register_operand" "=x")
4316 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4317 (match_operand:V2DI 1 "register_operand" "0")
4318 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4321 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4322 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4324 [(set_attr "type" "sselog")
4325 (set_attr "prefix_extra" "1")
4326 (set_attr "mode" "TI")])
4328 (define_insn "*sse4_1_pextrb"
4329 [(set (match_operand:SI 0 "register_operand" "=r")
4332 (match_operand:V16QI 1 "register_operand" "x")
4333 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4335 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4336 [(set_attr "type" "sselog")
4337 (set_attr "prefix_extra" "1")
4338 (set_attr "mode" "TI")])
4340 (define_insn "*sse4_1_pextrb_memory"
4341 [(set (match_operand:QI 0 "memory_operand" "=m")
4343 (match_operand:V16QI 1 "register_operand" "x")
4344 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4346 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4347 [(set_attr "type" "sselog")
4348 (set_attr "prefix_extra" "1")
4349 (set_attr "mode" "TI")])
4351 (define_insn "*sse2_pextrw"
4352 [(set (match_operand:SI 0 "register_operand" "=r")
4355 (match_operand:V8HI 1 "register_operand" "x")
4356 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4358 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4359 [(set_attr "type" "sselog")
4360 (set_attr "prefix_data16" "1")
4361 (set_attr "mode" "TI")])
4363 (define_insn "*sse4_1_pextrw_memory"
4364 [(set (match_operand:HI 0 "memory_operand" "=m")
4366 (match_operand:V8HI 1 "register_operand" "x")
4367 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4369 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4370 [(set_attr "type" "sselog")
4371 (set_attr "prefix_extra" "1")
4372 (set_attr "mode" "TI")])
4374 (define_insn "*sse4_1_pextrd"
4375 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4377 (match_operand:V4SI 1 "register_operand" "x")
4378 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4380 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4381 [(set_attr "type" "sselog")
4382 (set_attr "prefix_extra" "1")
4383 (set_attr "mode" "TI")])
4385 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4386 (define_insn "*sse4_1_pextrq"
4387 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4389 (match_operand:V2DI 1 "register_operand" "x")
4390 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4391 "TARGET_SSE4_1 && TARGET_64BIT"
4392 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4393 [(set_attr "type" "sselog")
4394 (set_attr "prefix_extra" "1")
4395 (set_attr "mode" "TI")])
4397 (define_expand "sse2_pshufd"
4398 [(match_operand:V4SI 0 "register_operand" "")
4399 (match_operand:V4SI 1 "nonimmediate_operand" "")
4400 (match_operand:SI 2 "const_int_operand" "")]
4403 int mask = INTVAL (operands[2]);
4404 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4405 GEN_INT ((mask >> 0) & 3),
4406 GEN_INT ((mask >> 2) & 3),
4407 GEN_INT ((mask >> 4) & 3),
4408 GEN_INT ((mask >> 6) & 3)));
4412 (define_insn "sse2_pshufd_1"
4413 [(set (match_operand:V4SI 0 "register_operand" "=x")
4415 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4416 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4417 (match_operand 3 "const_0_to_3_operand" "")
4418 (match_operand 4 "const_0_to_3_operand" "")
4419 (match_operand 5 "const_0_to_3_operand" "")])))]
4423 mask |= INTVAL (operands[2]) << 0;
4424 mask |= INTVAL (operands[3]) << 2;
4425 mask |= INTVAL (operands[4]) << 4;
4426 mask |= INTVAL (operands[5]) << 6;
4427 operands[2] = GEN_INT (mask);
4429 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4431 [(set_attr "type" "sselog1")
4432 (set_attr "prefix_data16" "1")
4433 (set_attr "mode" "TI")])
4435 (define_expand "sse2_pshuflw"
4436 [(match_operand:V8HI 0 "register_operand" "")
4437 (match_operand:V8HI 1 "nonimmediate_operand" "")
4438 (match_operand:SI 2 "const_int_operand" "")]
4441 int mask = INTVAL (operands[2]);
4442 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4443 GEN_INT ((mask >> 0) & 3),
4444 GEN_INT ((mask >> 2) & 3),
4445 GEN_INT ((mask >> 4) & 3),
4446 GEN_INT ((mask >> 6) & 3)));
4450 (define_insn "sse2_pshuflw_1"
4451 [(set (match_operand:V8HI 0 "register_operand" "=x")
4453 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4454 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4455 (match_operand 3 "const_0_to_3_operand" "")
4456 (match_operand 4 "const_0_to_3_operand" "")
4457 (match_operand 5 "const_0_to_3_operand" "")
4465 mask |= INTVAL (operands[2]) << 0;
4466 mask |= INTVAL (operands[3]) << 2;
4467 mask |= INTVAL (operands[4]) << 4;
4468 mask |= INTVAL (operands[5]) << 6;
4469 operands[2] = GEN_INT (mask);
4471 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4473 [(set_attr "type" "sselog")
4474 (set_attr "prefix_rep" "1")
4475 (set_attr "mode" "TI")])
4477 (define_expand "sse2_pshufhw"
4478 [(match_operand:V8HI 0 "register_operand" "")
4479 (match_operand:V8HI 1 "nonimmediate_operand" "")
4480 (match_operand:SI 2 "const_int_operand" "")]
4483 int mask = INTVAL (operands[2]);
4484 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4485 GEN_INT (((mask >> 0) & 3) + 4),
4486 GEN_INT (((mask >> 2) & 3) + 4),
4487 GEN_INT (((mask >> 4) & 3) + 4),
4488 GEN_INT (((mask >> 6) & 3) + 4)));
4492 (define_insn "sse2_pshufhw_1"
4493 [(set (match_operand:V8HI 0 "register_operand" "=x")
4495 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4496 (parallel [(const_int 0)
4500 (match_operand 2 "const_4_to_7_operand" "")
4501 (match_operand 3 "const_4_to_7_operand" "")
4502 (match_operand 4 "const_4_to_7_operand" "")
4503 (match_operand 5 "const_4_to_7_operand" "")])))]
4507 mask |= (INTVAL (operands[2]) - 4) << 0;
4508 mask |= (INTVAL (operands[3]) - 4) << 2;
4509 mask |= (INTVAL (operands[4]) - 4) << 4;
4510 mask |= (INTVAL (operands[5]) - 4) << 6;
4511 operands[2] = GEN_INT (mask);
4513 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4515 [(set_attr "type" "sselog")
4516 (set_attr "prefix_rep" "1")
4517 (set_attr "mode" "TI")])
4519 (define_expand "sse2_loadd"
4520 [(set (match_operand:V4SI 0 "register_operand" "")
4523 (match_operand:SI 1 "nonimmediate_operand" ""))
4527 "operands[2] = CONST0_RTX (V4SImode);")
4529 (define_insn "sse2_loadld"
4530 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4533 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4534 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4538 movd\t{%2, %0|%0, %2}
4539 movd\t{%2, %0|%0, %2}
4540 movss\t{%2, %0|%0, %2}
4541 movss\t{%2, %0|%0, %2}"
4542 [(set_attr "type" "ssemov")
4543 (set_attr "mode" "TI,TI,V4SF,SF")])
4545 (define_insn_and_split "sse2_stored"
4546 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4548 (match_operand:V4SI 1 "register_operand" "x,Yi")
4549 (parallel [(const_int 0)])))]
4552 "&& reload_completed
4553 && (TARGET_INTER_UNIT_MOVES
4554 || MEM_P (operands [0])
4555 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4556 [(set (match_dup 0) (match_dup 1))]
4558 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4561 (define_expand "sse_storeq"
4562 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4564 (match_operand:V2DI 1 "register_operand" "")
4565 (parallel [(const_int 0)])))]
4569 (define_insn "*sse2_storeq_rex64"
4570 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4572 (match_operand:V2DI 1 "register_operand" "x,Yi")
4573 (parallel [(const_int 0)])))]
4574 "TARGET_64BIT && TARGET_SSE"
4577 (define_insn "*sse2_storeq"
4578 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4580 (match_operand:V2DI 1 "register_operand" "x")
4581 (parallel [(const_int 0)])))]
4586 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4588 (match_operand:V2DI 1 "register_operand" "")
4589 (parallel [(const_int 0)])))]
4592 && (TARGET_INTER_UNIT_MOVES
4593 || MEM_P (operands [0])
4594 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4595 [(set (match_dup 0) (match_dup 1))]
4597 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4600 (define_insn "*vec_extractv2di_1_sse2"
4601 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4603 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4604 (parallel [(const_int 1)])))]
4605 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4607 movhps\t{%1, %0|%0, %1}
4608 psrldq\t{$8, %0|%0, 8}
4609 movq\t{%H1, %0|%0, %H1}"
4610 [(set_attr "type" "ssemov,sseishft,ssemov")
4611 (set_attr "memory" "*,none,*")
4612 (set_attr "mode" "V2SF,TI,TI")])
4614 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4615 (define_insn "*vec_extractv2di_1_sse"
4616 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4618 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4619 (parallel [(const_int 1)])))]
4620 "!TARGET_SSE2 && TARGET_SSE
4621 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4623 movhps\t{%1, %0|%0, %1}
4624 movhlps\t{%1, %0|%0, %1}
4625 movlps\t{%H1, %0|%0, %H1}"
4626 [(set_attr "type" "ssemov")
4627 (set_attr "mode" "V2SF,V4SF,V2SF")])
4629 (define_insn "*vec_dupv4si"
4630 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4632 (match_operand:SI 1 "register_operand" " Yt,0")))]
4635 pshufd\t{$0, %1, %0|%0, %1, 0}
4636 shufps\t{$0, %0, %0|%0, %0, 0}"
4637 [(set_attr "type" "sselog1")
4638 (set_attr "mode" "TI,V4SF")])
4640 (define_insn "*vec_dupv2di"
4641 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4643 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4648 [(set_attr "type" "sselog1,ssemov")
4649 (set_attr "mode" "TI,V4SF")])
4651 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4652 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4653 ;; alternatives pretty much forces the MMX alternative to be chosen.
4654 (define_insn "*sse2_concatv2si"
4655 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4657 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4658 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4661 punpckldq\t{%2, %0|%0, %2}
4662 movd\t{%1, %0|%0, %1}
4663 punpckldq\t{%2, %0|%0, %2}
4664 movd\t{%1, %0|%0, %1}"
4665 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4666 (set_attr "mode" "TI,TI,DI,DI")])
4668 (define_insn "*sse1_concatv2si"
4669 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4671 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4672 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4675 unpcklps\t{%2, %0|%0, %2}
4676 movss\t{%1, %0|%0, %1}
4677 punpckldq\t{%2, %0|%0, %2}
4678 movd\t{%1, %0|%0, %1}"
4679 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4680 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4682 (define_insn "*vec_concatv4si_1"
4683 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4685 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4686 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4689 punpcklqdq\t{%2, %0|%0, %2}
4690 movlhps\t{%2, %0|%0, %2}
4691 movhps\t{%2, %0|%0, %2}"
4692 [(set_attr "type" "sselog,ssemov,ssemov")
4693 (set_attr "mode" "TI,V4SF,V2SF")])
4695 (define_insn "vec_concatv2di"
4696 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4698 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4699 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4702 movq\t{%1, %0|%0, %1}
4703 movq2dq\t{%1, %0|%0, %1}
4704 punpcklqdq\t{%2, %0|%0, %2}
4705 movlhps\t{%2, %0|%0, %2}
4706 movhps\t{%2, %0|%0, %2}
4707 movlps\t{%1, %0|%0, %1}"
4708 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4709 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4711 (define_expand "vec_setv2di"
4712 [(match_operand:V2DI 0 "register_operand" "")
4713 (match_operand:DI 1 "register_operand" "")
4714 (match_operand 2 "const_int_operand" "")]
4717 ix86_expand_vector_set (false, operands[0], operands[1],
4718 INTVAL (operands[2]));
4722 (define_expand "vec_extractv2di"
4723 [(match_operand:DI 0 "register_operand" "")
4724 (match_operand:V2DI 1 "register_operand" "")
4725 (match_operand 2 "const_int_operand" "")]
4728 ix86_expand_vector_extract (false, operands[0], operands[1],
4729 INTVAL (operands[2]));
4733 (define_expand "vec_initv2di"
4734 [(match_operand:V2DI 0 "register_operand" "")
4735 (match_operand 1 "" "")]
4738 ix86_expand_vector_init (false, operands[0], operands[1]);
4742 (define_expand "vec_setv4si"
4743 [(match_operand:V4SI 0 "register_operand" "")
4744 (match_operand:SI 1 "register_operand" "")
4745 (match_operand 2 "const_int_operand" "")]
4748 ix86_expand_vector_set (false, operands[0], operands[1],
4749 INTVAL (operands[2]));
4753 (define_expand "vec_extractv4si"
4754 [(match_operand:SI 0 "register_operand" "")
4755 (match_operand:V4SI 1 "register_operand" "")
4756 (match_operand 2 "const_int_operand" "")]
4759 ix86_expand_vector_extract (false, operands[0], operands[1],
4760 INTVAL (operands[2]));
4764 (define_expand "vec_initv4si"
4765 [(match_operand:V4SI 0 "register_operand" "")
4766 (match_operand 1 "" "")]
4769 ix86_expand_vector_init (false, operands[0], operands[1]);
4773 (define_expand "vec_setv8hi"
4774 [(match_operand:V8HI 0 "register_operand" "")
4775 (match_operand:HI 1 "register_operand" "")
4776 (match_operand 2 "const_int_operand" "")]
4779 ix86_expand_vector_set (false, operands[0], operands[1],
4780 INTVAL (operands[2]));
4784 (define_expand "vec_extractv8hi"
4785 [(match_operand:HI 0 "register_operand" "")
4786 (match_operand:V8HI 1 "register_operand" "")
4787 (match_operand 2 "const_int_operand" "")]
4790 ix86_expand_vector_extract (false, operands[0], operands[1],
4791 INTVAL (operands[2]));
4795 (define_expand "vec_initv8hi"
4796 [(match_operand:V8HI 0 "register_operand" "")
4797 (match_operand 1 "" "")]
4800 ix86_expand_vector_init (false, operands[0], operands[1]);
4804 (define_expand "vec_setv16qi"
4805 [(match_operand:V16QI 0 "register_operand" "")
4806 (match_operand:QI 1 "register_operand" "")
4807 (match_operand 2 "const_int_operand" "")]
4810 ix86_expand_vector_set (false, operands[0], operands[1],
4811 INTVAL (operands[2]));
4815 (define_expand "vec_extractv16qi"
4816 [(match_operand:QI 0 "register_operand" "")
4817 (match_operand:V16QI 1 "register_operand" "")
4818 (match_operand 2 "const_int_operand" "")]
4821 ix86_expand_vector_extract (false, operands[0], operands[1],
4822 INTVAL (operands[2]));
4826 (define_expand "vec_initv16qi"
4827 [(match_operand:V16QI 0 "register_operand" "")
4828 (match_operand 1 "" "")]
4831 ix86_expand_vector_init (false, operands[0], operands[1]);
4835 (define_expand "vec_unpacku_hi_v16qi"
4836 [(match_operand:V8HI 0 "register_operand" "")
4837 (match_operand:V16QI 1 "register_operand" "")]
4841 ix86_expand_sse4_unpack (operands, true, true);
4843 ix86_expand_sse_unpack (operands, true, true);
4847 (define_expand "vec_unpacks_hi_v16qi"
4848 [(match_operand:V8HI 0 "register_operand" "")
4849 (match_operand:V16QI 1 "register_operand" "")]
4853 ix86_expand_sse4_unpack (operands, false, true);
4855 ix86_expand_sse_unpack (operands, false, true);
4859 (define_expand "vec_unpacku_lo_v16qi"
4860 [(match_operand:V8HI 0 "register_operand" "")
4861 (match_operand:V16QI 1 "register_operand" "")]
4865 ix86_expand_sse4_unpack (operands, true, false);
4867 ix86_expand_sse_unpack (operands, true, false);
4871 (define_expand "vec_unpacks_lo_v16qi"
4872 [(match_operand:V8HI 0 "register_operand" "")
4873 (match_operand:V16QI 1 "register_operand" "")]
4877 ix86_expand_sse4_unpack (operands, false, false);
4879 ix86_expand_sse_unpack (operands, false, false);
4883 (define_expand "vec_unpacku_hi_v8hi"
4884 [(match_operand:V4SI 0 "register_operand" "")
4885 (match_operand:V8HI 1 "register_operand" "")]
4889 ix86_expand_sse4_unpack (operands, true, true);
4891 ix86_expand_sse_unpack (operands, true, true);
4895 (define_expand "vec_unpacks_hi_v8hi"
4896 [(match_operand:V4SI 0 "register_operand" "")
4897 (match_operand:V8HI 1 "register_operand" "")]
4901 ix86_expand_sse4_unpack (operands, false, true);
4903 ix86_expand_sse_unpack (operands, false, true);
4907 (define_expand "vec_unpacku_lo_v8hi"
4908 [(match_operand:V4SI 0 "register_operand" "")
4909 (match_operand:V8HI 1 "register_operand" "")]
4913 ix86_expand_sse4_unpack (operands, true, false);
4915 ix86_expand_sse_unpack (operands, true, false);
4919 (define_expand "vec_unpacks_lo_v8hi"
4920 [(match_operand:V4SI 0 "register_operand" "")
4921 (match_operand:V8HI 1 "register_operand" "")]
4925 ix86_expand_sse4_unpack (operands, false, false);
4927 ix86_expand_sse_unpack (operands, false, false);
4931 (define_expand "vec_unpacku_hi_v4si"
4932 [(match_operand:V2DI 0 "register_operand" "")
4933 (match_operand:V4SI 1 "register_operand" "")]
4937 ix86_expand_sse4_unpack (operands, true, true);
4939 ix86_expand_sse_unpack (operands, true, true);
4943 (define_expand "vec_unpacks_hi_v4si"
4944 [(match_operand:V2DI 0 "register_operand" "")
4945 (match_operand:V4SI 1 "register_operand" "")]
4949 ix86_expand_sse4_unpack (operands, false, true);
4951 ix86_expand_sse_unpack (operands, false, true);
4955 (define_expand "vec_unpacku_lo_v4si"
4956 [(match_operand:V2DI 0 "register_operand" "")
4957 (match_operand:V4SI 1 "register_operand" "")]
4961 ix86_expand_sse4_unpack (operands, true, false);
4963 ix86_expand_sse_unpack (operands, true, false);
4967 (define_expand "vec_unpacks_lo_v4si"
4968 [(match_operand:V2DI 0 "register_operand" "")
4969 (match_operand:V4SI 1 "register_operand" "")]
4973 ix86_expand_sse4_unpack (operands, false, false);
4975 ix86_expand_sse_unpack (operands, false, false);
4979 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4983 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4985 (define_insn "sse2_uavgv16qi3"
4986 [(set (match_operand:V16QI 0 "register_operand" "=x")
4992 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4994 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4995 (const_vector:V16QI [(const_int 1) (const_int 1)
4996 (const_int 1) (const_int 1)
4997 (const_int 1) (const_int 1)
4998 (const_int 1) (const_int 1)
4999 (const_int 1) (const_int 1)
5000 (const_int 1) (const_int 1)
5001 (const_int 1) (const_int 1)
5002 (const_int 1) (const_int 1)]))
5004 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5005 "pavgb\t{%2, %0|%0, %2}"
5006 [(set_attr "type" "sseiadd")
5007 (set_attr "prefix_data16" "1")
5008 (set_attr "mode" "TI")])
5010 (define_insn "sse2_uavgv8hi3"
5011 [(set (match_operand:V8HI 0 "register_operand" "=x")
5017 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5019 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5020 (const_vector:V8HI [(const_int 1) (const_int 1)
5021 (const_int 1) (const_int 1)
5022 (const_int 1) (const_int 1)
5023 (const_int 1) (const_int 1)]))
5025 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5026 "pavgw\t{%2, %0|%0, %2}"
5027 [(set_attr "type" "sseiadd")
5028 (set_attr "prefix_data16" "1")
5029 (set_attr "mode" "TI")])
5031 ;; The correct representation for this is absolutely enormous, and
5032 ;; surely not generally useful.
5033 (define_insn "sse2_psadbw"
5034 [(set (match_operand:V2DI 0 "register_operand" "=x")
5035 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5036 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5039 "psadbw\t{%2, %0|%0, %2}"
5040 [(set_attr "type" "sseiadd")
5041 (set_attr "prefix_data16" "1")
5042 (set_attr "mode" "TI")])
5044 (define_insn "sse_movmskps"
5045 [(set (match_operand:SI 0 "register_operand" "=r")
5046 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5049 "movmskps\t{%1, %0|%0, %1}"
5050 [(set_attr "type" "ssecvt")
5051 (set_attr "mode" "V4SF")])
5053 (define_insn "sse2_movmskpd"
5054 [(set (match_operand:SI 0 "register_operand" "=r")
5055 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5058 "movmskpd\t{%1, %0|%0, %1}"
5059 [(set_attr "type" "ssecvt")
5060 (set_attr "mode" "V2DF")])
5062 (define_insn "sse2_pmovmskb"
5063 [(set (match_operand:SI 0 "register_operand" "=r")
5064 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5067 "pmovmskb\t{%1, %0|%0, %1}"
5068 [(set_attr "type" "ssecvt")
5069 (set_attr "prefix_data16" "1")
5070 (set_attr "mode" "SI")])
5072 (define_expand "sse2_maskmovdqu"
5073 [(set (match_operand:V16QI 0 "memory_operand" "")
5074 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5075 (match_operand:V16QI 2 "register_operand" "x")
5081 (define_insn "*sse2_maskmovdqu"
5082 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5083 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5084 (match_operand:V16QI 2 "register_operand" "x")
5085 (mem:V16QI (match_dup 0))]
5087 "TARGET_SSE2 && !TARGET_64BIT"
5088 ;; @@@ check ordering of operands in intel/nonintel syntax
5089 "maskmovdqu\t{%2, %1|%1, %2}"
5090 [(set_attr "type" "ssecvt")
5091 (set_attr "prefix_data16" "1")
5092 (set_attr "mode" "TI")])
5094 (define_insn "*sse2_maskmovdqu_rex64"
5095 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5096 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5097 (match_operand:V16QI 2 "register_operand" "x")
5098 (mem:V16QI (match_dup 0))]
5100 "TARGET_SSE2 && TARGET_64BIT"
5101 ;; @@@ check ordering of operands in intel/nonintel syntax
5102 "maskmovdqu\t{%2, %1|%1, %2}"
5103 [(set_attr "type" "ssecvt")
5104 (set_attr "prefix_data16" "1")
5105 (set_attr "mode" "TI")])
5107 (define_insn "sse_ldmxcsr"
5108 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5112 [(set_attr "type" "sse")
5113 (set_attr "memory" "load")])
5115 (define_insn "sse_stmxcsr"
5116 [(set (match_operand:SI 0 "memory_operand" "=m")
5117 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5120 [(set_attr "type" "sse")
5121 (set_attr "memory" "store")])
5123 (define_expand "sse_sfence"
5125 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5126 "TARGET_SSE || TARGET_3DNOW_A"
5128 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5129 MEM_VOLATILE_P (operands[0]) = 1;
5132 (define_insn "*sse_sfence"
5133 [(set (match_operand:BLK 0 "" "")
5134 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5135 "TARGET_SSE || TARGET_3DNOW_A"
5137 [(set_attr "type" "sse")
5138 (set_attr "memory" "unknown")])
5140 (define_insn "sse2_clflush"
5141 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5145 [(set_attr "type" "sse")
5146 (set_attr "memory" "unknown")])
5148 (define_expand "sse2_mfence"
5150 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5153 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5154 MEM_VOLATILE_P (operands[0]) = 1;
5157 (define_insn "*sse2_mfence"
5158 [(set (match_operand:BLK 0 "" "")
5159 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5162 [(set_attr "type" "sse")
5163 (set_attr "memory" "unknown")])
5165 (define_expand "sse2_lfence"
5167 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5170 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5171 MEM_VOLATILE_P (operands[0]) = 1;
5174 (define_insn "*sse2_lfence"
5175 [(set (match_operand:BLK 0 "" "")
5176 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5179 [(set_attr "type" "sse")
5180 (set_attr "memory" "unknown")])
5182 (define_insn "sse3_mwait"
5183 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5184 (match_operand:SI 1 "register_operand" "c")]
5187 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5188 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5189 ;; we only need to set up 32bit registers.
5191 [(set_attr "length" "3")])
5193 (define_insn "sse3_monitor"
5194 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5195 (match_operand:SI 1 "register_operand" "c")
5196 (match_operand:SI 2 "register_operand" "d")]
5198 "TARGET_SSE3 && !TARGET_64BIT"
5199 "monitor\t%0, %1, %2"
5200 [(set_attr "length" "3")])
5202 (define_insn "sse3_monitor64"
5203 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5204 (match_operand:SI 1 "register_operand" "c")
5205 (match_operand:SI 2 "register_operand" "d")]
5207 "TARGET_SSE3 && TARGET_64BIT"
5208 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5209 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5210 ;; zero extended to 64bit, we only need to set up 32bit registers.
5212 [(set_attr "length" "3")])
5215 (define_insn "ssse3_phaddwv8hi3"
5216 [(set (match_operand:V8HI 0 "register_operand" "=x")
5222 (match_operand:V8HI 1 "register_operand" "0")
5223 (parallel [(const_int 0)]))
5224 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5226 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5227 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5230 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5231 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5233 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5234 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5239 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5240 (parallel [(const_int 0)]))
5241 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5243 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5244 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5247 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5248 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5250 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5251 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5253 "phaddw\t{%2, %0|%0, %2}"
5254 [(set_attr "type" "sseiadd")
5255 (set_attr "prefix_data16" "1")
5256 (set_attr "prefix_extra" "1")
5257 (set_attr "mode" "TI")])
5259 (define_insn "ssse3_phaddwv4hi3"
5260 [(set (match_operand:V4HI 0 "register_operand" "=y")
5265 (match_operand:V4HI 1 "register_operand" "0")
5266 (parallel [(const_int 0)]))
5267 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5269 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5270 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5274 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5275 (parallel [(const_int 0)]))
5276 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5278 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5279 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5281 "phaddw\t{%2, %0|%0, %2}"
5282 [(set_attr "type" "sseiadd")
5283 (set_attr "prefix_extra" "1")
5284 (set_attr "mode" "DI")])
5286 (define_insn "ssse3_phadddv4si3"
5287 [(set (match_operand:V4SI 0 "register_operand" "=x")
5292 (match_operand:V4SI 1 "register_operand" "0")
5293 (parallel [(const_int 0)]))
5294 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5296 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5297 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5301 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5302 (parallel [(const_int 0)]))
5303 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5305 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5306 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5308 "phaddd\t{%2, %0|%0, %2}"
5309 [(set_attr "type" "sseiadd")
5310 (set_attr "prefix_data16" "1")
5311 (set_attr "prefix_extra" "1")
5312 (set_attr "mode" "TI")])
5314 (define_insn "ssse3_phadddv2si3"
5315 [(set (match_operand:V2SI 0 "register_operand" "=y")
5319 (match_operand:V2SI 1 "register_operand" "0")
5320 (parallel [(const_int 0)]))
5321 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5324 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5325 (parallel [(const_int 0)]))
5326 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5328 "phaddd\t{%2, %0|%0, %2}"
5329 [(set_attr "type" "sseiadd")
5330 (set_attr "prefix_extra" "1")
5331 (set_attr "mode" "DI")])
5333 (define_insn "ssse3_phaddswv8hi3"
5334 [(set (match_operand:V8HI 0 "register_operand" "=x")
5340 (match_operand:V8HI 1 "register_operand" "0")
5341 (parallel [(const_int 0)]))
5342 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5344 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5345 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5348 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5349 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5351 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5352 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5357 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5358 (parallel [(const_int 0)]))
5359 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5361 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5362 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5365 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5366 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5368 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5369 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5371 "phaddsw\t{%2, %0|%0, %2}"
5372 [(set_attr "type" "sseiadd")
5373 (set_attr "prefix_data16" "1")
5374 (set_attr "prefix_extra" "1")
5375 (set_attr "mode" "TI")])
5377 (define_insn "ssse3_phaddswv4hi3"
5378 [(set (match_operand:V4HI 0 "register_operand" "=y")
5383 (match_operand:V4HI 1 "register_operand" "0")
5384 (parallel [(const_int 0)]))
5385 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5387 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5388 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5392 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5393 (parallel [(const_int 0)]))
5394 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5396 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5397 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5399 "phaddsw\t{%2, %0|%0, %2}"
5400 [(set_attr "type" "sseiadd")
5401 (set_attr "prefix_extra" "1")
5402 (set_attr "mode" "DI")])
5404 (define_insn "ssse3_phsubwv8hi3"
5405 [(set (match_operand:V8HI 0 "register_operand" "=x")
5411 (match_operand:V8HI 1 "register_operand" "0")
5412 (parallel [(const_int 0)]))
5413 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5415 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5416 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5419 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5420 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5422 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5423 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5428 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5429 (parallel [(const_int 0)]))
5430 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5432 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5433 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5436 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5437 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5439 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5440 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5442 "phsubw\t{%2, %0|%0, %2}"
5443 [(set_attr "type" "sseiadd")
5444 (set_attr "prefix_data16" "1")
5445 (set_attr "prefix_extra" "1")
5446 (set_attr "mode" "TI")])
5448 (define_insn "ssse3_phsubwv4hi3"
5449 [(set (match_operand:V4HI 0 "register_operand" "=y")
5454 (match_operand:V4HI 1 "register_operand" "0")
5455 (parallel [(const_int 0)]))
5456 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5458 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5459 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5463 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5464 (parallel [(const_int 0)]))
5465 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5467 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5468 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5470 "phsubw\t{%2, %0|%0, %2}"
5471 [(set_attr "type" "sseiadd")
5472 (set_attr "prefix_extra" "1")
5473 (set_attr "mode" "DI")])
5475 (define_insn "ssse3_phsubdv4si3"
5476 [(set (match_operand:V4SI 0 "register_operand" "=x")
5481 (match_operand:V4SI 1 "register_operand" "0")
5482 (parallel [(const_int 0)]))
5483 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5485 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5486 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5490 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5491 (parallel [(const_int 0)]))
5492 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5494 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5495 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5497 "phsubd\t{%2, %0|%0, %2}"
5498 [(set_attr "type" "sseiadd")
5499 (set_attr "prefix_data16" "1")
5500 (set_attr "prefix_extra" "1")
5501 (set_attr "mode" "TI")])
5503 (define_insn "ssse3_phsubdv2si3"
5504 [(set (match_operand:V2SI 0 "register_operand" "=y")
5508 (match_operand:V2SI 1 "register_operand" "0")
5509 (parallel [(const_int 0)]))
5510 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5513 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5514 (parallel [(const_int 0)]))
5515 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5517 "phsubd\t{%2, %0|%0, %2}"
5518 [(set_attr "type" "sseiadd")
5519 (set_attr "prefix_extra" "1")
5520 (set_attr "mode" "DI")])
5522 (define_insn "ssse3_phsubswv8hi3"
5523 [(set (match_operand:V8HI 0 "register_operand" "=x")
5529 (match_operand:V8HI 1 "register_operand" "0")
5530 (parallel [(const_int 0)]))
5531 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5533 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5534 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5537 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5538 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5540 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5541 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5546 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5547 (parallel [(const_int 0)]))
5548 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5550 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5551 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5554 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5555 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5557 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5558 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5560 "phsubsw\t{%2, %0|%0, %2}"
5561 [(set_attr "type" "sseiadd")
5562 (set_attr "prefix_data16" "1")
5563 (set_attr "prefix_extra" "1")
5564 (set_attr "mode" "TI")])
5566 (define_insn "ssse3_phsubswv4hi3"
5567 [(set (match_operand:V4HI 0 "register_operand" "=y")
5572 (match_operand:V4HI 1 "register_operand" "0")
5573 (parallel [(const_int 0)]))
5574 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5576 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5577 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5581 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5582 (parallel [(const_int 0)]))
5583 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5585 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5586 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5588 "phsubsw\t{%2, %0|%0, %2}"
5589 [(set_attr "type" "sseiadd")
5590 (set_attr "prefix_extra" "1")
5591 (set_attr "mode" "DI")])
5593 (define_insn "ssse3_pmaddubswv8hi3"
5594 [(set (match_operand:V8HI 0 "register_operand" "=x")
5599 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5600 (parallel [(const_int 0)
5610 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5611 (parallel [(const_int 0)
5621 (vec_select:V16QI (match_dup 1)
5622 (parallel [(const_int 1)
5631 (vec_select:V16QI (match_dup 2)
5632 (parallel [(const_int 1)
5639 (const_int 15)]))))))]
5641 "pmaddubsw\t{%2, %0|%0, %2}"
5642 [(set_attr "type" "sseiadd")
5643 (set_attr "prefix_data16" "1")
5644 (set_attr "prefix_extra" "1")
5645 (set_attr "mode" "TI")])
5647 (define_insn "ssse3_pmaddubswv4hi3"
5648 [(set (match_operand:V4HI 0 "register_operand" "=y")
5653 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5654 (parallel [(const_int 0)
5660 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5661 (parallel [(const_int 0)
5667 (vec_select:V8QI (match_dup 1)
5668 (parallel [(const_int 1)
5673 (vec_select:V8QI (match_dup 2)
5674 (parallel [(const_int 1)
5677 (const_int 7)]))))))]
5679 "pmaddubsw\t{%2, %0|%0, %2}"
5680 [(set_attr "type" "sseiadd")
5681 (set_attr "prefix_extra" "1")
5682 (set_attr "mode" "DI")])
5684 (define_insn "ssse3_pmulhrswv8hi3"
5685 [(set (match_operand:V8HI 0 "register_operand" "=x")
5692 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5694 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5696 (const_vector:V8HI [(const_int 1) (const_int 1)
5697 (const_int 1) (const_int 1)
5698 (const_int 1) (const_int 1)
5699 (const_int 1) (const_int 1)]))
5701 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5702 "pmulhrsw\t{%2, %0|%0, %2}"
5703 [(set_attr "type" "sseimul")
5704 (set_attr "prefix_data16" "1")
5705 (set_attr "prefix_extra" "1")
5706 (set_attr "mode" "TI")])
5708 (define_insn "ssse3_pmulhrswv4hi3"
5709 [(set (match_operand:V4HI 0 "register_operand" "=y")
5716 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5718 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5720 (const_vector:V4HI [(const_int 1) (const_int 1)
5721 (const_int 1) (const_int 1)]))
5723 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5724 "pmulhrsw\t{%2, %0|%0, %2}"
5725 [(set_attr "type" "sseimul")
5726 (set_attr "prefix_extra" "1")
5727 (set_attr "mode" "DI")])
5729 (define_insn "ssse3_pshufbv16qi3"
5730 [(set (match_operand:V16QI 0 "register_operand" "=x")
5731 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5732 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5735 "pshufb\t{%2, %0|%0, %2}";
5736 [(set_attr "type" "sselog1")
5737 (set_attr "prefix_data16" "1")
5738 (set_attr "prefix_extra" "1")
5739 (set_attr "mode" "TI")])
5741 (define_insn "ssse3_pshufbv8qi3"
5742 [(set (match_operand:V8QI 0 "register_operand" "=y")
5743 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5744 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5747 "pshufb\t{%2, %0|%0, %2}";
5748 [(set_attr "type" "sselog1")
5749 (set_attr "prefix_extra" "1")
5750 (set_attr "mode" "DI")])
5752 (define_insn "ssse3_psign<mode>3"
5753 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5754 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5755 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5758 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5759 [(set_attr "type" "sselog1")
5760 (set_attr "prefix_data16" "1")
5761 (set_attr "prefix_extra" "1")
5762 (set_attr "mode" "TI")])
5764 (define_insn "ssse3_psign<mode>3"
5765 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5766 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5767 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5770 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5771 [(set_attr "type" "sselog1")
5772 (set_attr "prefix_extra" "1")
5773 (set_attr "mode" "DI")])
5775 (define_insn "ssse3_palignrti"
5776 [(set (match_operand:TI 0 "register_operand" "=x")
5777 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5778 (match_operand:TI 2 "nonimmediate_operand" "xm")
5779 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5783 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5784 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5786 [(set_attr "type" "sseishft")
5787 (set_attr "prefix_data16" "1")
5788 (set_attr "prefix_extra" "1")
5789 (set_attr "mode" "TI")])
5791 (define_insn "ssse3_palignrdi"
5792 [(set (match_operand:DI 0 "register_operand" "=y")
5793 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5794 (match_operand:DI 2 "nonimmediate_operand" "ym")
5795 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5799 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5800 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5802 [(set_attr "type" "sseishft")
5803 (set_attr "prefix_extra" "1")
5804 (set_attr "mode" "DI")])
5806 (define_insn "abs<mode>2"
5807 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5808 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5810 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5811 [(set_attr "type" "sselog1")
5812 (set_attr "prefix_data16" "1")
5813 (set_attr "prefix_extra" "1")
5814 (set_attr "mode" "TI")])
5816 (define_insn "abs<mode>2"
5817 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5818 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5820 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5821 [(set_attr "type" "sselog1")
5822 (set_attr "prefix_extra" "1")
5823 (set_attr "mode" "DI")])
5825 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5827 ;; AMD SSE4A instructions
5829 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5831 (define_insn "sse4a_vmmovntv2df"
5832 [(set (match_operand:DF 0 "memory_operand" "=m")
5833 (unspec:DF [(vec_select:DF
5834 (match_operand:V2DF 1 "register_operand" "x")
5835 (parallel [(const_int 0)]))]
5838 "movntsd\t{%1, %0|%0, %1}"
5839 [(set_attr "type" "ssemov")
5840 (set_attr "mode" "DF")])
5842 (define_insn "sse4a_movntdf"
5843 [(set (match_operand:DF 0 "memory_operand" "=m")
5844 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5847 "movntsd\t{%1, %0|%0, %1}"
5848 [(set_attr "type" "ssemov")
5849 (set_attr "mode" "DF")])
5851 (define_insn "sse4a_vmmovntv4sf"
5852 [(set (match_operand:SF 0 "memory_operand" "=m")
5853 (unspec:SF [(vec_select:SF
5854 (match_operand:V4SF 1 "register_operand" "x")
5855 (parallel [(const_int 0)]))]
5858 "movntss\t{%1, %0|%0, %1}"
5859 [(set_attr "type" "ssemov")
5860 (set_attr "mode" "SF")])
5862 (define_insn "sse4a_movntsf"
5863 [(set (match_operand:SF 0 "memory_operand" "=m")
5864 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5867 "movntss\t{%1, %0|%0, %1}"
5868 [(set_attr "type" "ssemov")
5869 (set_attr "mode" "SF")])
5871 (define_insn "sse4a_extrqi"
5872 [(set (match_operand:V2DI 0 "register_operand" "=x")
5873 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5874 (match_operand 2 "const_int_operand" "")
5875 (match_operand 3 "const_int_operand" "")]
5878 "extrq\t{%3, %2, %0|%0, %2, %3}"
5879 [(set_attr "type" "sse")
5880 (set_attr "prefix_data16" "1")
5881 (set_attr "mode" "TI")])
5883 (define_insn "sse4a_extrq"
5884 [(set (match_operand:V2DI 0 "register_operand" "=x")
5885 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5886 (match_operand:V16QI 2 "register_operand" "x")]
5889 "extrq\t{%2, %0|%0, %2}"
5890 [(set_attr "type" "sse")
5891 (set_attr "prefix_data16" "1")
5892 (set_attr "mode" "TI")])
5894 (define_insn "sse4a_insertqi"
5895 [(set (match_operand:V2DI 0 "register_operand" "=x")
5896 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5897 (match_operand:V2DI 2 "register_operand" "x")
5898 (match_operand 3 "const_int_operand" "")
5899 (match_operand 4 "const_int_operand" "")]
5902 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5903 [(set_attr "type" "sseins")
5904 (set_attr "prefix_rep" "1")
5905 (set_attr "mode" "TI")])
5907 (define_insn "sse4a_insertq"
5908 [(set (match_operand:V2DI 0 "register_operand" "=x")
5909 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5910 (match_operand:V2DI 2 "register_operand" "x")]
5913 "insertq\t{%2, %0|%0, %2}"
5914 [(set_attr "type" "sseins")
5915 (set_attr "prefix_rep" "1")
5916 (set_attr "mode" "TI")])
5918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5920 ;; Intel SSE4.1 instructions
5922 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5924 (define_insn "sse4_1_blendpd"
5925 [(set (match_operand:V2DF 0 "register_operand" "=x")
5927 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5928 (match_operand:V2DF 1 "register_operand" "0")
5929 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
5931 "blendpd\t{%3, %2, %0|%0, %2, %3}"
5932 [(set_attr "type" "ssemov")
5933 (set_attr "prefix_extra" "1")
5934 (set_attr "mode" "V2DF")])
5936 (define_insn "sse4_1_blendps"
5937 [(set (match_operand:V4SF 0 "register_operand" "=x")
5939 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5940 (match_operand:V4SF 1 "register_operand" "0")
5941 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
5943 "blendps\t{%3, %2, %0|%0, %2, %3}"
5944 [(set_attr "type" "ssemov")
5945 (set_attr "prefix_extra" "1")
5946 (set_attr "mode" "V4SF")])
5948 (define_insn "sse4_1_blendvpd"
5949 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
5950 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
5951 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
5952 (match_operand:V2DF 3 "register_operand" "Y0")]
5955 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
5956 [(set_attr "type" "ssemov")
5957 (set_attr "prefix_extra" "1")
5958 (set_attr "mode" "V2DF")])
5960 (define_insn "sse4_1_blendvps"
5961 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
5962 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
5963 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
5964 (match_operand:V4SF 3 "register_operand" "Y0")]
5967 "blendvps\t{%3, %2, %0|%0, %2, %3}"
5968 [(set_attr "type" "ssemov")
5969 (set_attr "prefix_extra" "1")
5970 (set_attr "mode" "V4SF")])
5972 (define_insn "sse4_1_dppd"
5973 [(set (match_operand:V2DF 0 "register_operand" "=x")
5974 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
5975 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5976 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5979 "dppd\t{%3, %2, %0|%0, %2, %3}"
5980 [(set_attr "type" "ssemul")
5981 (set_attr "prefix_extra" "1")
5982 (set_attr "mode" "V2DF")])
5984 (define_insn "sse4_1_dpps"
5985 [(set (match_operand:V4SF 0 "register_operand" "=x")
5986 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
5987 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5988 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5991 "dpps\t{%3, %2, %0|%0, %2, %3}"
5992 [(set_attr "type" "ssemul")
5993 (set_attr "prefix_extra" "1")
5994 (set_attr "mode" "V4SF")])
5996 (define_insn "sse4_1_movntdqa"
5997 [(set (match_operand:V2DI 0 "register_operand" "=x")
5998 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6001 "movntdqa\t{%1, %0|%0, %1}"
6002 [(set_attr "type" "ssecvt")
6003 (set_attr "prefix_extra" "1")
6004 (set_attr "mode" "TI")])
6006 (define_insn "sse4_1_mpsadbw"
6007 [(set (match_operand:V16QI 0 "register_operand" "=x")
6008 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6009 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6010 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6013 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6014 [(set_attr "type" "sselog1")
6015 (set_attr "prefix_extra" "1")
6016 (set_attr "mode" "TI")])
6018 (define_insn "sse4_1_packusdw"
6019 [(set (match_operand:V8HI 0 "register_operand" "=x")
6022 (match_operand:V4SI 1 "register_operand" "0"))
6024 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6026 "packusdw\t{%2, %0|%0, %2}"
6027 [(set_attr "type" "sselog")
6028 (set_attr "prefix_extra" "1")
6029 (set_attr "mode" "TI")])
6031 (define_insn "sse4_1_pblendvb"
6032 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6033 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6034 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6035 (match_operand:V16QI 3 "register_operand" "Y0")]
6038 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6039 [(set_attr "type" "ssemov")
6040 (set_attr "prefix_extra" "1")
6041 (set_attr "mode" "TI")])
6043 (define_insn "sse4_1_pblendw"
6044 [(set (match_operand:V8HI 0 "register_operand" "=x")
6046 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6047 (match_operand:V8HI 1 "register_operand" "0")
6048 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6050 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6051 [(set_attr "type" "ssemov")
6052 (set_attr "prefix_extra" "1")
6053 (set_attr "mode" "TI")])
6055 (define_insn "sse4_1_phminposuw"
6056 [(set (match_operand:V8HI 0 "register_operand" "=x")
6057 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6058 UNSPEC_PHMINPOSUW))]
6060 "phminposuw\t{%1, %0|%0, %1}"
6061 [(set_attr "type" "sselog1")
6062 (set_attr "prefix_extra" "1")
6063 (set_attr "mode" "TI")])
6065 (define_insn "sse4_1_extendv8qiv8hi2"
6066 [(set (match_operand:V8HI 0 "register_operand" "=x")
6069 (match_operand:V16QI 1 "register_operand" "x")
6070 (parallel [(const_int 0)
6079 "pmovsxbw\t{%1, %0|%0, %1}"
6080 [(set_attr "type" "ssemov")
6081 (set_attr "prefix_extra" "1")
6082 (set_attr "mode" "TI")])
6084 (define_insn "*sse4_1_extendv8qiv8hi2"
6085 [(set (match_operand:V8HI 0 "register_operand" "=x")
6088 (vec_duplicate:V16QI
6089 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6090 (parallel [(const_int 0)
6099 "pmovsxbw\t{%1, %0|%0, %1}"
6100 [(set_attr "type" "ssemov")
6101 (set_attr "prefix_extra" "1")
6102 (set_attr "mode" "TI")])
6104 (define_insn "sse4_1_extendv4qiv4si2"
6105 [(set (match_operand:V4SI 0 "register_operand" "=x")
6108 (match_operand:V16QI 1 "register_operand" "x")
6109 (parallel [(const_int 0)
6114 "pmovsxbd\t{%1, %0|%0, %1}"
6115 [(set_attr "type" "ssemov")
6116 (set_attr "prefix_extra" "1")
6117 (set_attr "mode" "TI")])
6119 (define_insn "*sse4_1_extendv4qiv4si2"
6120 [(set (match_operand:V4SI 0 "register_operand" "=x")
6123 (vec_duplicate:V16QI
6124 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6125 (parallel [(const_int 0)
6130 "pmovsxbd\t{%1, %0|%0, %1}"
6131 [(set_attr "type" "ssemov")
6132 (set_attr "prefix_extra" "1")
6133 (set_attr "mode" "TI")])
6135 (define_insn "sse4_1_extendv2qiv2di2"
6136 [(set (match_operand:V2DI 0 "register_operand" "=x")
6139 (match_operand:V16QI 1 "register_operand" "x")
6140 (parallel [(const_int 0)
6143 "pmovsxbq\t{%1, %0|%0, %1}"
6144 [(set_attr "type" "ssemov")
6145 (set_attr "prefix_extra" "1")
6146 (set_attr "mode" "TI")])
6148 (define_insn "*sse4_1_extendv2qiv2di2"
6149 [(set (match_operand:V2DI 0 "register_operand" "=x")
6152 (vec_duplicate:V16QI
6153 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6154 (parallel [(const_int 0)
6157 "pmovsxbq\t{%1, %0|%0, %1}"
6158 [(set_attr "type" "ssemov")
6159 (set_attr "prefix_extra" "1")
6160 (set_attr "mode" "TI")])
6162 (define_insn "sse4_1_extendv4hiv4si2"
6163 [(set (match_operand:V4SI 0 "register_operand" "=x")
6166 (match_operand:V8HI 1 "register_operand" "x")
6167 (parallel [(const_int 0)
6172 "pmovsxwd\t{%1, %0|%0, %1}"
6173 [(set_attr "type" "ssemov")
6174 (set_attr "prefix_extra" "1")
6175 (set_attr "mode" "TI")])
6177 (define_insn "*sse4_1_extendv4hiv4si2"
6178 [(set (match_operand:V4SI 0 "register_operand" "=x")
6182 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6183 (parallel [(const_int 0)
6188 "pmovsxwd\t{%1, %0|%0, %1}"
6189 [(set_attr "type" "ssemov")
6190 (set_attr "prefix_extra" "1")
6191 (set_attr "mode" "TI")])
6193 (define_insn "sse4_1_extendv2hiv2di2"
6194 [(set (match_operand:V2DI 0 "register_operand" "=x")
6197 (match_operand:V8HI 1 "register_operand" "x")
6198 (parallel [(const_int 0)
6201 "pmovsxwq\t{%1, %0|%0, %1}"
6202 [(set_attr "type" "ssemov")
6203 (set_attr "prefix_extra" "1")
6204 (set_attr "mode" "TI")])
6206 (define_insn "*sse4_1_extendv2hiv2di2"
6207 [(set (match_operand:V2DI 0 "register_operand" "=x")
6211 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6212 (parallel [(const_int 0)
6215 "pmovsxwq\t{%1, %0|%0, %1}"
6216 [(set_attr "type" "ssemov")
6217 (set_attr "prefix_extra" "1")
6218 (set_attr "mode" "TI")])
6220 (define_insn "sse4_1_extendv2siv2di2"
6221 [(set (match_operand:V2DI 0 "register_operand" "=x")
6224 (match_operand:V4SI 1 "register_operand" "x")
6225 (parallel [(const_int 0)
6228 "pmovsxdq\t{%1, %0|%0, %1}"
6229 [(set_attr "type" "ssemov")
6230 (set_attr "prefix_extra" "1")
6231 (set_attr "mode" "TI")])
6233 (define_insn "*sse4_1_extendv2siv2di2"
6234 [(set (match_operand:V2DI 0 "register_operand" "=x")
6238 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6239 (parallel [(const_int 0)
6242 "pmovsxdq\t{%1, %0|%0, %1}"
6243 [(set_attr "type" "ssemov")
6244 (set_attr "prefix_extra" "1")
6245 (set_attr "mode" "TI")])
6247 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6248 [(set (match_operand:V8HI 0 "register_operand" "=x")
6251 (match_operand:V16QI 1 "register_operand" "x")
6252 (parallel [(const_int 0)
6261 "pmovzxbw\t{%1, %0|%0, %1}"
6262 [(set_attr "type" "ssemov")
6263 (set_attr "prefix_extra" "1")
6264 (set_attr "mode" "TI")])
6266 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6267 [(set (match_operand:V8HI 0 "register_operand" "=x")
6270 (vec_duplicate:V16QI
6271 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6272 (parallel [(const_int 0)
6281 "pmovzxbw\t{%1, %0|%0, %1}"
6282 [(set_attr "type" "ssemov")
6283 (set_attr "prefix_extra" "1")
6284 (set_attr "mode" "TI")])
6286 (define_insn "sse4_1_zero_extendv4qiv4si2"
6287 [(set (match_operand:V4SI 0 "register_operand" "=x")
6290 (match_operand:V16QI 1 "register_operand" "x")
6291 (parallel [(const_int 0)
6296 "pmovzxbd\t{%1, %0|%0, %1}"
6297 [(set_attr "type" "ssemov")
6298 (set_attr "prefix_extra" "1")
6299 (set_attr "mode" "TI")])
6301 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6302 [(set (match_operand:V4SI 0 "register_operand" "=x")
6305 (vec_duplicate:V16QI
6306 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6307 (parallel [(const_int 0)
6312 "pmovzxbd\t{%1, %0|%0, %1}"
6313 [(set_attr "type" "ssemov")
6314 (set_attr "prefix_extra" "1")
6315 (set_attr "mode" "TI")])
6317 (define_insn "sse4_1_zero_extendv2qiv2di2"
6318 [(set (match_operand:V2DI 0 "register_operand" "=x")
6321 (match_operand:V16QI 1 "register_operand" "x")
6322 (parallel [(const_int 0)
6325 "pmovzxbq\t{%1, %0|%0, %1}"
6326 [(set_attr "type" "ssemov")
6327 (set_attr "prefix_extra" "1")
6328 (set_attr "mode" "TI")])
6330 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6331 [(set (match_operand:V2DI 0 "register_operand" "=x")
6334 (vec_duplicate:V16QI
6335 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6336 (parallel [(const_int 0)
6339 "pmovzxbq\t{%1, %0|%0, %1}"
6340 [(set_attr "type" "ssemov")
6341 (set_attr "prefix_extra" "1")
6342 (set_attr "mode" "TI")])
6344 (define_insn "sse4_1_zero_extendv4hiv4si2"
6345 [(set (match_operand:V4SI 0 "register_operand" "=x")
6348 (match_operand:V8HI 1 "register_operand" "x")
6349 (parallel [(const_int 0)
6354 "pmovzxwd\t{%1, %0|%0, %1}"
6355 [(set_attr "type" "ssemov")
6356 (set_attr "prefix_extra" "1")
6357 (set_attr "mode" "TI")])
6359 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6360 [(set (match_operand:V4SI 0 "register_operand" "=x")
6364 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6365 (parallel [(const_int 0)
6370 "pmovzxwd\t{%1, %0|%0, %1}"
6371 [(set_attr "type" "ssemov")
6372 (set_attr "prefix_extra" "1")
6373 (set_attr "mode" "TI")])
6375 (define_insn "sse4_1_zero_extendv2hiv2di2"
6376 [(set (match_operand:V2DI 0 "register_operand" "=x")
6379 (match_operand:V8HI 1 "register_operand" "x")
6380 (parallel [(const_int 0)
6383 "pmovzxwq\t{%1, %0|%0, %1}"
6384 [(set_attr "type" "ssemov")
6385 (set_attr "prefix_extra" "1")
6386 (set_attr "mode" "TI")])
6388 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6389 [(set (match_operand:V2DI 0 "register_operand" "=x")
6393 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6394 (parallel [(const_int 0)
6397 "pmovzxwq\t{%1, %0|%0, %1}"
6398 [(set_attr "type" "ssemov")
6399 (set_attr "prefix_extra" "1")
6400 (set_attr "mode" "TI")])
6402 (define_insn "sse4_1_zero_extendv2siv2di2"
6403 [(set (match_operand:V2DI 0 "register_operand" "=x")
6406 (match_operand:V4SI 1 "register_operand" "x")
6407 (parallel [(const_int 0)
6410 "pmovzxdq\t{%1, %0|%0, %1}"
6411 [(set_attr "type" "ssemov")
6412 (set_attr "prefix_extra" "1")
6413 (set_attr "mode" "TI")])
6415 (define_insn "*sse4_1_zero_extendv2siv2di2"
6416 [(set (match_operand:V2DI 0 "register_operand" "=x")
6420 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6421 (parallel [(const_int 0)
6424 "pmovzxdq\t{%1, %0|%0, %1}"
6425 [(set_attr "type" "ssemov")
6426 (set_attr "prefix_extra" "1")
6427 (set_attr "mode" "TI")])
6429 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6430 ;; But it is not a really compare instruction.
6431 (define_insn "sse4_1_ptest"
6432 [(set (reg:CC FLAGS_REG)
6433 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6434 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6437 "ptest\t{%1, %0|%0, %1}"
6438 [(set_attr "type" "ssecomi")
6439 (set_attr "prefix_extra" "1")
6440 (set_attr "mode" "TI")])
6442 (define_insn "sse4_1_roundpd"
6443 [(set (match_operand:V2DF 0 "register_operand" "=x")
6444 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6445 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6448 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6449 [(set_attr "type" "ssecvt")
6450 (set_attr "prefix_extra" "1")
6451 (set_attr "mode" "V2DF")])
6453 (define_insn "sse4_1_roundps"
6454 [(set (match_operand:V4SF 0 "register_operand" "=x")
6455 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6456 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6459 "roundps\t{%2, %1, %0|%0, %1, %2}"
6460 [(set_attr "type" "ssecvt")
6461 (set_attr "prefix_extra" "1")
6462 (set_attr "mode" "V4SF")])
6464 (define_insn "sse4_1_roundsd"
6465 [(set (match_operand:V2DF 0 "register_operand" "=x")
6467 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6468 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6470 (match_operand:V2DF 1 "register_operand" "0")
6473 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6474 [(set_attr "type" "ssecvt")
6475 (set_attr "prefix_extra" "1")
6476 (set_attr "mode" "V2DF")])
6478 (define_insn "sse4_1_roundss"
6479 [(set (match_operand:V4SF 0 "register_operand" "=x")
6481 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6482 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6484 (match_operand:V4SF 1 "register_operand" "0")
6487 "roundss\t{%3, %2, %0|%0, %2, %3}"
6488 [(set_attr "type" "ssecvt")
6489 (set_attr "prefix_extra" "1")
6490 (set_attr "mode" "V4SF")])
6492 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6494 ;; Intel SSE4.2 string/text processing instructions
6496 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6498 (define_insn_and_split "sse4_2_pcmpestr"
6499 [(set (match_operand:SI 0 "register_operand" "=c,c")
6501 [(match_operand:V16QI 2 "register_operand" "x,x")
6502 (match_operand:SI 3 "register_operand" "a,a")
6503 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6504 (match_operand:SI 5 "register_operand" "d,d")
6505 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6507 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6515 (set (reg:CC FLAGS_REG)
6524 && !(reload_completed || reload_in_progress)"
6529 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6530 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6531 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6534 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6535 operands[3], operands[4],
6536 operands[5], operands[6]));
6538 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6539 operands[3], operands[4],
6540 operands[5], operands[6]));
6541 if (flags && !(ecx || xmm0))
6542 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6543 operands[4], operands[5],
6547 [(set_attr "type" "sselog")
6548 (set_attr "prefix_data16" "1")
6549 (set_attr "prefix_extra" "1")
6550 (set_attr "memory" "none,load")
6551 (set_attr "mode" "TI")])
6553 (define_insn "sse4_2_pcmpestri"
6554 [(set (match_operand:SI 0 "register_operand" "=c,c")
6556 [(match_operand:V16QI 1 "register_operand" "x,x")
6557 (match_operand:SI 2 "register_operand" "a,a")
6558 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6559 (match_operand:SI 4 "register_operand" "d,d")
6560 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6562 (set (reg:CC FLAGS_REG)
6571 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6572 [(set_attr "type" "sselog")
6573 (set_attr "prefix_data16" "1")
6574 (set_attr "prefix_extra" "1")
6575 (set_attr "memory" "none,load")
6576 (set_attr "mode" "TI")])
6578 (define_insn "sse4_2_pcmpestrm"
6579 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6581 [(match_operand:V16QI 1 "register_operand" "x,x")
6582 (match_operand:SI 2 "register_operand" "a,a")
6583 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6584 (match_operand:SI 4 "register_operand" "d,d")
6585 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6587 (set (reg:CC FLAGS_REG)
6596 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6597 [(set_attr "type" "sselog")
6598 (set_attr "prefix_data16" "1")
6599 (set_attr "prefix_extra" "1")
6600 (set_attr "memory" "none,load")
6601 (set_attr "mode" "TI")])
6603 (define_insn "sse4_2_pcmpestr_cconly"
6604 [(set (reg:CC FLAGS_REG)
6606 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6607 (match_operand:SI 1 "register_operand" "a,a,a,a")
6608 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6609 (match_operand:SI 3 "register_operand" "d,d,d,d")
6610 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6612 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6613 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6616 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6617 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6618 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6619 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6620 [(set_attr "type" "sselog")
6621 (set_attr "prefix_data16" "1")
6622 (set_attr "prefix_extra" "1")
6623 (set_attr "memory" "none,load,none,load")
6624 (set_attr "mode" "TI")])
6626 (define_insn_and_split "sse4_2_pcmpistr"
6627 [(set (match_operand:SI 0 "register_operand" "=c,c")
6629 [(match_operand:V16QI 2 "register_operand" "x,x")
6630 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6631 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6633 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6639 (set (reg:CC FLAGS_REG)
6646 && !(reload_completed || reload_in_progress)"
6651 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6652 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6653 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6656 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6657 operands[3], operands[4]));
6659 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6660 operands[3], operands[4]));
6661 if (flags && !(ecx || xmm0))
6662 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6666 [(set_attr "type" "sselog")
6667 (set_attr "prefix_data16" "1")
6668 (set_attr "prefix_extra" "1")
6669 (set_attr "memory" "none,load")
6670 (set_attr "mode" "TI")])
6672 (define_insn "sse4_2_pcmpistri"
6673 [(set (match_operand:SI 0 "register_operand" "=c,c")
6675 [(match_operand:V16QI 1 "register_operand" "x,x")
6676 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6677 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6679 (set (reg:CC FLAGS_REG)
6686 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6687 [(set_attr "type" "sselog")
6688 (set_attr "prefix_data16" "1")
6689 (set_attr "prefix_extra" "1")
6690 (set_attr "memory" "none,load")
6691 (set_attr "mode" "TI")])
6693 (define_insn "sse4_2_pcmpistrm"
6694 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6696 [(match_operand:V16QI 1 "register_operand" "x,x")
6697 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6698 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6700 (set (reg:CC FLAGS_REG)
6707 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6708 [(set_attr "type" "sselog")
6709 (set_attr "prefix_data16" "1")
6710 (set_attr "prefix_extra" "1")
6711 (set_attr "memory" "none,load")
6712 (set_attr "mode" "TI")])
6714 (define_insn "sse4_2_pcmpistr_cconly"
6715 [(set (reg:CC FLAGS_REG)
6717 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6718 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6719 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6721 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6722 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6725 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6726 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6727 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6728 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6729 [(set_attr "type" "sselog")
6730 (set_attr "prefix_data16" "1")
6731 (set_attr "prefix_extra" "1")
6732 (set_attr "memory" "none,load,none,load")
6733 (set_attr "mode" "TI")])