1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
67 switch (which_alternative)
70 return standard_sse_constant_opcode (insn, operands[1]);
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
90 (const_string "TI")))])
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
101 (define_insn_and_split "movdi_to_sse"
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
108 "&& reload_completed"
111 if (register_operand (operands[1], DImode))
113 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
114 Assemble the 64-bit DImode value in an xmm register. */
115 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
116 gen_rtx_SUBREG (SImode, operands[1], 0)));
117 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
118 gen_rtx_SUBREG (SImode, operands[1], 4)));
119 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
121 else if (memory_operand (operands[1], DImode))
122 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
127 (define_expand "movv4sf"
128 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
129 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
132 ix86_expand_vector_move (V4SFmode, operands);
136 (define_insn "*movv4sf_internal"
137 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
138 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
140 && (register_operand (operands[0], V4SFmode)
141 || register_operand (operands[1], V4SFmode))"
143 switch (which_alternative)
146 return standard_sse_constant_opcode (insn, operands[1]);
149 return "movaps\t{%1, %0|%0, %1}";
154 [(set_attr "type" "sselog1,ssemov,ssemov")
155 (set_attr "mode" "V4SF")])
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
163 (vec_duplicate:V4SF (match_dup 1))
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
171 (define_expand "movv2df"
172 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
173 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
176 ix86_expand_vector_move (V2DFmode, operands);
180 (define_insn "*movv2df_internal"
181 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
182 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
184 && (register_operand (operands[0], V2DFmode)
185 || register_operand (operands[1], V2DFmode))"
187 switch (which_alternative)
190 return standard_sse_constant_opcode (insn, operands[1]);
193 if (get_attr_mode (insn) == MODE_V4SF)
194 return "movaps\t{%1, %0|%0, %1}";
196 return "movapd\t{%1, %0|%0, %1}";
201 [(set_attr "type" "sselog1,ssemov,ssemov")
204 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
205 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
206 (and (eq_attr "alternative" "2")
207 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
209 (const_string "V4SF")
210 (const_string "V2DF")))])
213 [(set (match_operand:V2DF 0 "register_operand" "")
214 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
215 "TARGET_SSE2 && reload_completed"
216 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
218 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
219 operands[2] = CONST0_RTX (DFmode);
222 (define_expand "push<mode>1"
223 [(match_operand:SSEMODE 0 "register_operand" "")]
226 ix86_expand_push (<MODE>mode, operands[0]);
230 (define_expand "movmisalign<mode>"
231 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
232 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
235 ix86_expand_vector_move_misalign (<MODE>mode, operands);
239 (define_insn "sse_movups"
240 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
241 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
243 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
244 "movups\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssemov")
246 (set_attr "mode" "V2DF")])
248 (define_insn "sse2_movupd"
249 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
250 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
252 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
253 "movupd\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssemov")
255 (set_attr "mode" "V2DF")])
257 (define_insn "sse2_movdqu"
258 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
259 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
261 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
262 "movdqu\t{%1, %0|%0, %1}"
263 [(set_attr "type" "ssemov")
264 (set_attr "prefix_data16" "1")
265 (set_attr "mode" "TI")])
267 (define_insn "sse_movntv4sf"
268 [(set (match_operand:V4SF 0 "memory_operand" "=m")
269 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
272 "movntps\t{%1, %0|%0, %1}"
273 [(set_attr "type" "ssemov")
274 (set_attr "mode" "V4SF")])
276 (define_insn "sse2_movntv2df"
277 [(set (match_operand:V2DF 0 "memory_operand" "=m")
278 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
281 "movntpd\t{%1, %0|%0, %1}"
282 [(set_attr "type" "ssecvt")
283 (set_attr "mode" "V2DF")])
285 (define_insn "sse2_movntv2di"
286 [(set (match_operand:V2DI 0 "memory_operand" "=m")
287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
290 "movntdq\t{%1, %0|%0, %1}"
291 [(set_attr "type" "ssecvt")
292 (set_attr "prefix_data16" "1")
293 (set_attr "mode" "TI")])
295 (define_insn "sse2_movntsi"
296 [(set (match_operand:SI 0 "memory_operand" "=m")
297 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
300 "movnti\t{%1, %0|%0, %1}"
301 [(set_attr "type" "ssecvt")
302 (set_attr "mode" "V2DF")])
304 (define_insn "sse3_lddqu"
305 [(set (match_operand:V16QI 0 "register_operand" "=x")
306 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
309 "lddqu\t{%1, %0|%0, %1}"
310 [(set_attr "type" "ssecvt")
311 (set_attr "prefix_rep" "1")
312 (set_attr "mode" "TI")])
314 ; Expand patterns for non-temporal stores. At the moment, only those
315 ; that directly map to insns are defined; it would be possible to
316 ; define patterns for other modes that would expand to several insns.
318 (define_expand "storentv4sf"
319 [(set (match_operand:V4SF 0 "memory_operand" "=m")
320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
325 (define_expand "storentv2df"
326 [(set (match_operand:V2DF 0 "memory_operand" "=m")
327 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
332 (define_expand "storentv2di"
333 [(set (match_operand:V2DI 0 "memory_operand" "=m")
334 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
339 (define_expand "storentsi"
340 [(set (match_operand:SI 0 "memory_operand" "=m")
341 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
348 ;; Parallel single-precision floating point arithmetic
350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
352 (define_expand "negv4sf2"
353 [(set (match_operand:V4SF 0 "register_operand" "")
354 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
356 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
358 (define_expand "absv4sf2"
359 [(set (match_operand:V4SF 0 "register_operand" "")
360 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
362 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
364 (define_expand "addv4sf3"
365 [(set (match_operand:V4SF 0 "register_operand" "")
366 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
367 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
369 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
371 (define_insn "*addv4sf3"
372 [(set (match_operand:V4SF 0 "register_operand" "=x")
373 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
374 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
375 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
376 "addps\t{%2, %0|%0, %2}"
377 [(set_attr "type" "sseadd")
378 (set_attr "mode" "V4SF")])
380 (define_insn "sse_vmaddv4sf3"
381 [(set (match_operand:V4SF 0 "register_operand" "=x")
383 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
384 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
387 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
388 "addss\t{%2, %0|%0, %2}"
389 [(set_attr "type" "sseadd")
390 (set_attr "mode" "SF")])
392 (define_expand "subv4sf3"
393 [(set (match_operand:V4SF 0 "register_operand" "")
394 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
395 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
397 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
399 (define_insn "*subv4sf3"
400 [(set (match_operand:V4SF 0 "register_operand" "=x")
401 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
402 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
404 "subps\t{%2, %0|%0, %2}"
405 [(set_attr "type" "sseadd")
406 (set_attr "mode" "V4SF")])
408 (define_insn "sse_vmsubv4sf3"
409 [(set (match_operand:V4SF 0 "register_operand" "=x")
411 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
412 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
416 "subss\t{%2, %0|%0, %2}"
417 [(set_attr "type" "sseadd")
418 (set_attr "mode" "SF")])
420 (define_expand "mulv4sf3"
421 [(set (match_operand:V4SF 0 "register_operand" "")
422 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
423 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
425 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
427 (define_insn "*mulv4sf3"
428 [(set (match_operand:V4SF 0 "register_operand" "=x")
429 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
430 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
431 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
432 "mulps\t{%2, %0|%0, %2}"
433 [(set_attr "type" "ssemul")
434 (set_attr "mode" "V4SF")])
436 (define_insn "sse_vmmulv4sf3"
437 [(set (match_operand:V4SF 0 "register_operand" "=x")
439 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
440 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
443 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
444 "mulss\t{%2, %0|%0, %2}"
445 [(set_attr "type" "ssemul")
446 (set_attr "mode" "SF")])
448 (define_expand "divv4sf3"
449 [(set (match_operand:V4SF 0 "register_operand" "")
450 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
451 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
454 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
456 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
457 && flag_finite_math_only && !flag_trapping_math
458 && flag_unsafe_math_optimizations)
460 ix86_emit_swdivsf (operands[0], operands[1],
461 operands[2], V4SFmode);
466 (define_insn "*divv4sf3"
467 [(set (match_operand:V4SF 0 "register_operand" "=x")
468 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
469 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
471 "divps\t{%2, %0|%0, %2}"
472 [(set_attr "type" "ssediv")
473 (set_attr "mode" "V4SF")])
475 (define_insn "sse_vmdivv4sf3"
476 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
483 "divss\t{%2, %0|%0, %2}"
484 [(set_attr "type" "ssediv")
485 (set_attr "mode" "SF")])
487 (define_insn "sse_rcpv4sf2"
488 [(set (match_operand:V4SF 0 "register_operand" "=x")
490 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
492 "rcpps\t{%1, %0|%0, %1}"
493 [(set_attr "type" "sse")
494 (set_attr "mode" "V4SF")])
496 (define_insn "sse_vmrcpv4sf2"
497 [(set (match_operand:V4SF 0 "register_operand" "=x")
499 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
501 (match_operand:V4SF 2 "register_operand" "0")
504 "rcpss\t{%1, %0|%0, %1}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 (define_insn "*sse_rsqrtv4sf2"
509 [(set (match_operand:V4SF 0 "register_operand" "=x")
511 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
513 "rsqrtps\t{%1, %0|%0, %1}"
514 [(set_attr "type" "sse")
515 (set_attr "mode" "V4SF")])
517 (define_expand "sse_rsqrtv4sf2"
518 [(set (match_operand:V4SF 0 "register_operand" "")
520 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
523 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
524 && flag_finite_math_only && !flag_trapping_math
525 && flag_unsafe_math_optimizations)
527 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
532 (define_insn "sse_vmrsqrtv4sf2"
533 [(set (match_operand:V4SF 0 "register_operand" "=x")
535 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
537 (match_operand:V4SF 2 "register_operand" "0")
540 "rsqrtss\t{%1, %0|%0, %1}"
541 [(set_attr "type" "sse")
542 (set_attr "mode" "SF")])
544 (define_insn "*sqrtv4sf2"
545 [(set (match_operand:V4SF 0 "register_operand" "=x")
546 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
548 "sqrtps\t{%1, %0|%0, %1}"
549 [(set_attr "type" "sse")
550 (set_attr "mode" "V4SF")])
552 (define_expand "sqrtv4sf2"
553 [(set (match_operand:V4SF 0 "register_operand" "=")
554 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
557 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
558 && flag_finite_math_only && !flag_trapping_math
559 && flag_unsafe_math_optimizations)
561 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
566 (define_insn "sse_vmsqrtv4sf2"
567 [(set (match_operand:V4SF 0 "register_operand" "=x")
569 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
570 (match_operand:V4SF 2 "register_operand" "0")
573 "sqrtss\t{%1, %0|%0, %1}"
574 [(set_attr "type" "sse")
575 (set_attr "mode" "SF")])
577 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
578 ;; isn't really correct, as those rtl operators aren't defined when
579 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
581 (define_expand "smaxv4sf3"
582 [(set (match_operand:V4SF 0 "register_operand" "")
583 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
584 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
587 if (!flag_finite_math_only)
588 operands[1] = force_reg (V4SFmode, operands[1]);
589 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
592 (define_insn "*smaxv4sf3_finite"
593 [(set (match_operand:V4SF 0 "register_operand" "=x")
594 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
595 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
596 "TARGET_SSE && flag_finite_math_only
597 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
598 "maxps\t{%2, %0|%0, %2}"
599 [(set_attr "type" "sse")
600 (set_attr "mode" "V4SF")])
602 (define_insn "*smaxv4sf3"
603 [(set (match_operand:V4SF 0 "register_operand" "=x")
604 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
605 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
607 "maxps\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sse")
609 (set_attr "mode" "V4SF")])
611 (define_insn "sse_vmsmaxv4sf3"
612 [(set (match_operand:V4SF 0 "register_operand" "=x")
614 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
615 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
619 "maxss\t{%2, %0|%0, %2}"
620 [(set_attr "type" "sse")
621 (set_attr "mode" "SF")])
623 (define_expand "sminv4sf3"
624 [(set (match_operand:V4SF 0 "register_operand" "")
625 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
626 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
629 if (!flag_finite_math_only)
630 operands[1] = force_reg (V4SFmode, operands[1]);
631 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
634 (define_insn "*sminv4sf3_finite"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
637 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
638 "TARGET_SSE && flag_finite_math_only
639 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
640 "minps\t{%2, %0|%0, %2}"
641 [(set_attr "type" "sse")
642 (set_attr "mode" "V4SF")])
644 (define_insn "*sminv4sf3"
645 [(set (match_operand:V4SF 0 "register_operand" "=x")
646 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
649 "minps\t{%2, %0|%0, %2}"
650 [(set_attr "type" "sse")
651 (set_attr "mode" "V4SF")])
653 (define_insn "sse_vmsminv4sf3"
654 [(set (match_operand:V4SF 0 "register_operand" "=x")
656 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
657 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
661 "minss\t{%2, %0|%0, %2}"
662 [(set_attr "type" "sse")
663 (set_attr "mode" "SF")])
665 ;; These versions of the min/max patterns implement exactly the operations
666 ;; min = (op1 < op2 ? op1 : op2)
667 ;; max = (!(op1 < op2) ? op1 : op2)
668 ;; Their operands are not commutative, and thus they may be used in the
669 ;; presence of -0.0 and NaN.
671 (define_insn "*ieee_sminv4sf3"
672 [(set (match_operand:V4SF 0 "register_operand" "=x")
673 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
674 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
677 "minps\t{%2, %0|%0, %2}"
678 [(set_attr "type" "sseadd")
679 (set_attr "mode" "V4SF")])
681 (define_insn "*ieee_smaxv4sf3"
682 [(set (match_operand:V4SF 0 "register_operand" "=x")
683 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
684 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
687 "maxps\t{%2, %0|%0, %2}"
688 [(set_attr "type" "sseadd")
689 (set_attr "mode" "V4SF")])
691 (define_insn "*ieee_sminv2df3"
692 [(set (match_operand:V2DF 0 "register_operand" "=x")
693 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
694 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
697 "minpd\t{%2, %0|%0, %2}"
698 [(set_attr "type" "sseadd")
699 (set_attr "mode" "V2DF")])
701 (define_insn "*ieee_smaxv2df3"
702 [(set (match_operand:V2DF 0 "register_operand" "=x")
703 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
707 "maxpd\t{%2, %0|%0, %2}"
708 [(set_attr "type" "sseadd")
709 (set_attr "mode" "V2DF")])
711 (define_insn "sse3_addsubv4sf3"
712 [(set (match_operand:V4SF 0 "register_operand" "=x")
715 (match_operand:V4SF 1 "register_operand" "0")
716 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
717 (minus:V4SF (match_dup 1) (match_dup 2))
720 "addsubps\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "prefix_rep" "1")
723 (set_attr "mode" "V4SF")])
725 (define_insn "sse3_haddv4sf3"
726 [(set (match_operand:V4SF 0 "register_operand" "=x")
731 (match_operand:V4SF 1 "register_operand" "0")
732 (parallel [(const_int 0)]))
733 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
735 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
736 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
740 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
741 (parallel [(const_int 0)]))
742 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
744 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
745 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
747 "haddps\t{%2, %0|%0, %2}"
748 [(set_attr "type" "sseadd")
749 (set_attr "prefix_rep" "1")
750 (set_attr "mode" "V4SF")])
752 (define_insn "sse3_hsubv4sf3"
753 [(set (match_operand:V4SF 0 "register_operand" "=x")
758 (match_operand:V4SF 1 "register_operand" "0")
759 (parallel [(const_int 0)]))
760 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
762 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
763 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
767 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
768 (parallel [(const_int 0)]))
769 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
771 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
772 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
774 "hsubps\t{%2, %0|%0, %2}"
775 [(set_attr "type" "sseadd")
776 (set_attr "prefix_rep" "1")
777 (set_attr "mode" "V4SF")])
779 (define_expand "reduc_splus_v4sf"
780 [(match_operand:V4SF 0 "register_operand" "")
781 (match_operand:V4SF 1 "register_operand" "")]
786 rtx tmp = gen_reg_rtx (V4SFmode);
787 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
788 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
791 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
795 (define_expand "reduc_smax_v4sf"
796 [(match_operand:V4SF 0 "register_operand" "")
797 (match_operand:V4SF 1 "register_operand" "")]
800 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
804 (define_expand "reduc_smin_v4sf"
805 [(match_operand:V4SF 0 "register_operand" "")
806 (match_operand:V4SF 1 "register_operand" "")]
809 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
815 ;; Parallel single-precision floating point comparisons
817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
819 (define_insn "sse_maskcmpv4sf3"
820 [(set (match_operand:V4SF 0 "register_operand" "=x")
821 (match_operator:V4SF 3 "sse_comparison_operator"
822 [(match_operand:V4SF 1 "register_operand" "0")
823 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
825 "cmp%D3ps\t{%2, %0|%0, %2}"
826 [(set_attr "type" "ssecmp")
827 (set_attr "mode" "V4SF")])
829 (define_insn "sse_maskcmpsf3"
830 [(set (match_operand:SF 0 "register_operand" "=x")
831 (match_operator:SF 3 "sse_comparison_operator"
832 [(match_operand:SF 1 "register_operand" "0")
833 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
835 "cmp%D3ss\t{%2, %0|%0, %2}"
836 [(set_attr "type" "ssecmp")
837 (set_attr "mode" "SF")])
839 (define_insn "sse_vmmaskcmpv4sf3"
840 [(set (match_operand:V4SF 0 "register_operand" "=x")
842 (match_operator:V4SF 3 "sse_comparison_operator"
843 [(match_operand:V4SF 1 "register_operand" "0")
844 (match_operand:V4SF 2 "register_operand" "x")])
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
852 (define_insn "sse_comi"
853 [(set (reg:CCFP FLAGS_REG)
856 (match_operand:V4SF 0 "register_operand" "x")
857 (parallel [(const_int 0)]))
859 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
860 (parallel [(const_int 0)]))))]
862 "comiss\t{%1, %0|%0, %1}"
863 [(set_attr "type" "ssecomi")
864 (set_attr "mode" "SF")])
866 (define_insn "sse_ucomi"
867 [(set (reg:CCFPU FLAGS_REG)
870 (match_operand:V4SF 0 "register_operand" "x")
871 (parallel [(const_int 0)]))
873 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
874 (parallel [(const_int 0)]))))]
876 "ucomiss\t{%1, %0|%0, %1}"
877 [(set_attr "type" "ssecomi")
878 (set_attr "mode" "SF")])
880 (define_expand "vcondv4sf"
881 [(set (match_operand:V4SF 0 "register_operand" "")
884 [(match_operand:V4SF 4 "nonimmediate_operand" "")
885 (match_operand:V4SF 5 "nonimmediate_operand" "")])
886 (match_operand:V4SF 1 "general_operand" "")
887 (match_operand:V4SF 2 "general_operand" "")))]
890 if (ix86_expand_fp_vcond (operands))
896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
898 ;; Parallel single-precision floating point logical operations
900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
902 (define_expand "andv4sf3"
903 [(set (match_operand:V4SF 0 "register_operand" "")
904 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
905 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
907 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
909 (define_insn "*andv4sf3"
910 [(set (match_operand:V4SF 0 "register_operand" "=x")
911 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
912 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
913 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
914 "andps\t{%2, %0|%0, %2}"
915 [(set_attr "type" "sselog")
916 (set_attr "mode" "V4SF")])
918 (define_insn "sse_nandv4sf3"
919 [(set (match_operand:V4SF 0 "register_operand" "=x")
920 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
921 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
923 "andnps\t{%2, %0|%0, %2}"
924 [(set_attr "type" "sselog")
925 (set_attr "mode" "V4SF")])
927 (define_expand "iorv4sf3"
928 [(set (match_operand:V4SF 0 "register_operand" "")
929 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
930 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
932 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
934 (define_insn "*iorv4sf3"
935 [(set (match_operand:V4SF 0 "register_operand" "=x")
936 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
937 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
938 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
939 "orps\t{%2, %0|%0, %2}"
940 [(set_attr "type" "sselog")
941 (set_attr "mode" "V4SF")])
943 (define_expand "xorv4sf3"
944 [(set (match_operand:V4SF 0 "register_operand" "")
945 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
946 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
948 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
950 (define_insn "*xorv4sf3"
951 [(set (match_operand:V4SF 0 "register_operand" "=x")
952 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
953 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
954 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
955 "xorps\t{%2, %0|%0, %2}"
956 [(set_attr "type" "sselog")
957 (set_attr "mode" "V4SF")])
959 ;; Also define scalar versions. These are used for abs, neg, and
960 ;; conditional move. Using subregs into vector modes causes register
961 ;; allocation lossage. These patterns do not allow memory operands
962 ;; because the native instructions read the full 128-bits.
964 (define_insn "*andsf3"
965 [(set (match_operand:SF 0 "register_operand" "=x")
966 (and:SF (match_operand:SF 1 "register_operand" "0")
967 (match_operand:SF 2 "register_operand" "x")))]
969 "andps\t{%2, %0|%0, %2}"
970 [(set_attr "type" "sselog")
971 (set_attr "mode" "V4SF")])
973 (define_insn "*nandsf3"
974 [(set (match_operand:SF 0 "register_operand" "=x")
975 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
976 (match_operand:SF 2 "register_operand" "x")))]
978 "andnps\t{%2, %0|%0, %2}"
979 [(set_attr "type" "sselog")
980 (set_attr "mode" "V4SF")])
982 (define_insn "*iorsf3"
983 [(set (match_operand:SF 0 "register_operand" "=x")
984 (ior:SF (match_operand:SF 1 "register_operand" "0")
985 (match_operand:SF 2 "register_operand" "x")))]
987 "orps\t{%2, %0|%0, %2}"
988 [(set_attr "type" "sselog")
989 (set_attr "mode" "V4SF")])
991 (define_insn "*xorsf3"
992 [(set (match_operand:SF 0 "register_operand" "=x")
993 (xor:SF (match_operand:SF 1 "register_operand" "0")
994 (match_operand:SF 2 "register_operand" "x")))]
996 "xorps\t{%2, %0|%0, %2}"
997 [(set_attr "type" "sselog")
998 (set_attr "mode" "V4SF")])
1000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1002 ;; Parallel single-precision floating point conversion operations
1004 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1006 (define_insn "sse_cvtpi2ps"
1007 [(set (match_operand:V4SF 0 "register_operand" "=x")
1010 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1011 (match_operand:V4SF 1 "register_operand" "0")
1014 "cvtpi2ps\t{%2, %0|%0, %2}"
1015 [(set_attr "type" "ssecvt")
1016 (set_attr "mode" "V4SF")])
1018 (define_insn "sse_cvtps2pi"
1019 [(set (match_operand:V2SI 0 "register_operand" "=y")
1021 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1023 (parallel [(const_int 0) (const_int 1)])))]
1025 "cvtps2pi\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "ssecvt")
1027 (set_attr "unit" "mmx")
1028 (set_attr "mode" "DI")])
1030 (define_insn "sse_cvttps2pi"
1031 [(set (match_operand:V2SI 0 "register_operand" "=y")
1033 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1034 (parallel [(const_int 0) (const_int 1)])))]
1036 "cvttps2pi\t{%1, %0|%0, %1}"
1037 [(set_attr "type" "ssecvt")
1038 (set_attr "unit" "mmx")
1039 (set_attr "mode" "SF")])
1041 (define_insn "sse_cvtsi2ss"
1042 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1045 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1046 (match_operand:V4SF 1 "register_operand" "0,0")
1049 "cvtsi2ss\t{%2, %0|%0, %2}"
1050 [(set_attr "type" "sseicvt")
1051 (set_attr "athlon_decode" "vector,double")
1052 (set_attr "amdfam10_decode" "vector,double")
1053 (set_attr "mode" "SF")])
1055 (define_insn "sse_cvtsi2ssq"
1056 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1059 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1060 (match_operand:V4SF 1 "register_operand" "0,0")
1062 "TARGET_SSE && TARGET_64BIT"
1063 "cvtsi2ssq\t{%2, %0|%0, %2}"
1064 [(set_attr "type" "sseicvt")
1065 (set_attr "athlon_decode" "vector,double")
1066 (set_attr "amdfam10_decode" "vector,double")
1067 (set_attr "mode" "SF")])
1069 (define_insn "sse_cvtss2si"
1070 [(set (match_operand:SI 0 "register_operand" "=r,r")
1073 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1074 (parallel [(const_int 0)]))]
1075 UNSPEC_FIX_NOTRUNC))]
1077 "cvtss2si\t{%1, %0|%0, %1}"
1078 [(set_attr "type" "sseicvt")
1079 (set_attr "athlon_decode" "double,vector")
1080 (set_attr "prefix_rep" "1")
1081 (set_attr "mode" "SI")])
1083 (define_insn "sse_cvtss2si_2"
1084 [(set (match_operand:SI 0 "register_operand" "=r,r")
1085 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1086 UNSPEC_FIX_NOTRUNC))]
1088 "cvtss2si\t{%1, %0|%0, %1}"
1089 [(set_attr "type" "sseicvt")
1090 (set_attr "athlon_decode" "double,vector")
1091 (set_attr "amdfam10_decode" "double,double")
1092 (set_attr "prefix_rep" "1")
1093 (set_attr "mode" "SI")])
1095 (define_insn "sse_cvtss2siq"
1096 [(set (match_operand:DI 0 "register_operand" "=r,r")
1099 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1100 (parallel [(const_int 0)]))]
1101 UNSPEC_FIX_NOTRUNC))]
1102 "TARGET_SSE && TARGET_64BIT"
1103 "cvtss2siq\t{%1, %0|%0, %1}"
1104 [(set_attr "type" "sseicvt")
1105 (set_attr "athlon_decode" "double,vector")
1106 (set_attr "prefix_rep" "1")
1107 (set_attr "mode" "DI")])
1109 (define_insn "sse_cvtss2siq_2"
1110 [(set (match_operand:DI 0 "register_operand" "=r,r")
1111 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1112 UNSPEC_FIX_NOTRUNC))]
1113 "TARGET_SSE && TARGET_64BIT"
1114 "cvtss2siq\t{%1, %0|%0, %1}"
1115 [(set_attr "type" "sseicvt")
1116 (set_attr "athlon_decode" "double,vector")
1117 (set_attr "amdfam10_decode" "double,double")
1118 (set_attr "prefix_rep" "1")
1119 (set_attr "mode" "DI")])
1121 (define_insn "sse_cvttss2si"
1122 [(set (match_operand:SI 0 "register_operand" "=r,r")
1125 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1126 (parallel [(const_int 0)]))))]
1128 "cvttss2si\t{%1, %0|%0, %1}"
1129 [(set_attr "type" "sseicvt")
1130 (set_attr "athlon_decode" "double,vector")
1131 (set_attr "amdfam10_decode" "double,double")
1132 (set_attr "prefix_rep" "1")
1133 (set_attr "mode" "SI")])
1135 (define_insn "sse_cvttss2siq"
1136 [(set (match_operand:DI 0 "register_operand" "=r,r")
1139 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1140 (parallel [(const_int 0)]))))]
1141 "TARGET_SSE && TARGET_64BIT"
1142 "cvttss2siq\t{%1, %0|%0, %1}"
1143 [(set_attr "type" "sseicvt")
1144 (set_attr "athlon_decode" "double,vector")
1145 (set_attr "amdfam10_decode" "double,double")
1146 (set_attr "prefix_rep" "1")
1147 (set_attr "mode" "DI")])
1149 (define_insn "sse2_cvtdq2ps"
1150 [(set (match_operand:V4SF 0 "register_operand" "=x")
1151 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1153 "cvtdq2ps\t{%1, %0|%0, %1}"
1154 [(set_attr "type" "ssecvt")
1155 (set_attr "mode" "V4SF")])
1157 (define_insn "sse2_cvtps2dq"
1158 [(set (match_operand:V4SI 0 "register_operand" "=x")
1159 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1160 UNSPEC_FIX_NOTRUNC))]
1162 "cvtps2dq\t{%1, %0|%0, %1}"
1163 [(set_attr "type" "ssecvt")
1164 (set_attr "prefix_data16" "1")
1165 (set_attr "mode" "TI")])
1167 (define_insn "sse2_cvttps2dq"
1168 [(set (match_operand:V4SI 0 "register_operand" "=x")
1169 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1171 "cvttps2dq\t{%1, %0|%0, %1}"
1172 [(set_attr "type" "ssecvt")
1173 (set_attr "prefix_rep" "1")
1174 (set_attr "mode" "TI")])
1176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1178 ;; Parallel single-precision floating point element swizzling
1180 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1182 (define_insn "sse_movhlps"
1183 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1186 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1187 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1188 (parallel [(const_int 6)
1192 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1194 movhlps\t{%2, %0|%0, %2}
1195 movlps\t{%H2, %0|%0, %H2}
1196 movhps\t{%2, %0|%0, %2}"
1197 [(set_attr "type" "ssemov")
1198 (set_attr "mode" "V4SF,V2SF,V2SF")])
1200 (define_insn "sse_movlhps"
1201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1204 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1205 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1206 (parallel [(const_int 0)
1210 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1212 movlhps\t{%2, %0|%0, %2}
1213 movhps\t{%2, %0|%0, %2}
1214 movlps\t{%2, %H0|%H0, %2}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V4SF,V2SF,V2SF")])
1218 (define_insn "sse_unpckhps"
1219 [(set (match_operand:V4SF 0 "register_operand" "=x")
1222 (match_operand:V4SF 1 "register_operand" "0")
1223 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1224 (parallel [(const_int 2) (const_int 6)
1225 (const_int 3) (const_int 7)])))]
1227 "unpckhps\t{%2, %0|%0, %2}"
1228 [(set_attr "type" "sselog")
1229 (set_attr "mode" "V4SF")])
1231 (define_insn "sse_unpcklps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 0) (const_int 4)
1238 (const_int 1) (const_int 5)])))]
1240 "unpcklps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1244 ;; These are modeled with the same vec_concat as the others so that we
1245 ;; capture users of shufps that can use the new instructions
1246 (define_insn "sse3_movshdup"
1247 [(set (match_operand:V4SF 0 "register_operand" "=x")
1250 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1252 (parallel [(const_int 1)
1257 "movshdup\t{%1, %0|%0, %1}"
1258 [(set_attr "type" "sse")
1259 (set_attr "prefix_rep" "1")
1260 (set_attr "mode" "V4SF")])
1262 (define_insn "sse3_movsldup"
1263 [(set (match_operand:V4SF 0 "register_operand" "=x")
1266 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1268 (parallel [(const_int 0)
1273 "movsldup\t{%1, %0|%0, %1}"
1274 [(set_attr "type" "sse")
1275 (set_attr "prefix_rep" "1")
1276 (set_attr "mode" "V4SF")])
1278 (define_expand "sse_shufps"
1279 [(match_operand:V4SF 0 "register_operand" "")
1280 (match_operand:V4SF 1 "register_operand" "")
1281 (match_operand:V4SF 2 "nonimmediate_operand" "")
1282 (match_operand:SI 3 "const_int_operand" "")]
1285 int mask = INTVAL (operands[3]);
1286 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1287 GEN_INT ((mask >> 0) & 3),
1288 GEN_INT ((mask >> 2) & 3),
1289 GEN_INT (((mask >> 4) & 3) + 4),
1290 GEN_INT (((mask >> 6) & 3) + 4)));
1294 (define_insn "sse_shufps_1"
1295 [(set (match_operand:V4SF 0 "register_operand" "=x")
1298 (match_operand:V4SF 1 "register_operand" "0")
1299 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1300 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1301 (match_operand 4 "const_0_to_3_operand" "")
1302 (match_operand 5 "const_4_to_7_operand" "")
1303 (match_operand 6 "const_4_to_7_operand" "")])))]
1307 mask |= INTVAL (operands[3]) << 0;
1308 mask |= INTVAL (operands[4]) << 2;
1309 mask |= (INTVAL (operands[5]) - 4) << 4;
1310 mask |= (INTVAL (operands[6]) - 4) << 6;
1311 operands[3] = GEN_INT (mask);
1313 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1315 [(set_attr "type" "sselog")
1316 (set_attr "mode" "V4SF")])
1318 (define_insn "sse_storehps"
1319 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1321 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1322 (parallel [(const_int 2) (const_int 3)])))]
1325 movhps\t{%1, %0|%0, %1}
1326 movhlps\t{%1, %0|%0, %1}
1327 movlps\t{%H1, %0|%0, %H1}"
1328 [(set_attr "type" "ssemov")
1329 (set_attr "mode" "V2SF,V4SF,V2SF")])
1331 (define_insn "sse_loadhps"
1332 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1335 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1336 (parallel [(const_int 0) (const_int 1)]))
1337 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1340 movhps\t{%2, %0|%0, %2}
1341 movlhps\t{%2, %0|%0, %2}
1342 movlps\t{%2, %H0|%H0, %2}"
1343 [(set_attr "type" "ssemov")
1344 (set_attr "mode" "V2SF,V4SF,V2SF")])
1346 (define_insn "sse_storelps"
1347 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1349 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1350 (parallel [(const_int 0) (const_int 1)])))]
1353 movlps\t{%1, %0|%0, %1}
1354 movaps\t{%1, %0|%0, %1}
1355 movlps\t{%1, %0|%0, %1}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1359 (define_insn "sse_loadlps"
1360 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1362 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1364 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1365 (parallel [(const_int 2) (const_int 3)]))))]
1368 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1369 movlps\t{%2, %0|%0, %2}
1370 movlps\t{%2, %0|%0, %2}"
1371 [(set_attr "type" "sselog,ssemov,ssemov")
1372 (set_attr "mode" "V4SF,V2SF,V2SF")])
1374 (define_insn "sse_movss"
1375 [(set (match_operand:V4SF 0 "register_operand" "=x")
1377 (match_operand:V4SF 2 "register_operand" "x")
1378 (match_operand:V4SF 1 "register_operand" "0")
1381 "movss\t{%2, %0|%0, %2}"
1382 [(set_attr "type" "ssemov")
1383 (set_attr "mode" "SF")])
1385 (define_insn "*vec_dupv4sf"
1386 [(set (match_operand:V4SF 0 "register_operand" "=x")
1388 (match_operand:SF 1 "register_operand" "0")))]
1390 "shufps\t{$0, %0, %0|%0, %0, 0}"
1391 [(set_attr "type" "sselog1")
1392 (set_attr "mode" "V4SF")])
1394 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1395 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1396 ;; alternatives pretty much forces the MMX alternative to be chosen.
1397 (define_insn "*sse_concatv2sf"
1398 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1400 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1401 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1404 unpcklps\t{%2, %0|%0, %2}
1405 movss\t{%1, %0|%0, %1}
1406 punpckldq\t{%2, %0|%0, %2}
1407 movd\t{%1, %0|%0, %1}"
1408 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1409 (set_attr "mode" "V4SF,SF,DI,DI")])
1411 (define_insn "*sse_concatv4sf"
1412 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1414 (match_operand:V2SF 1 "register_operand" " 0,0")
1415 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1418 movlhps\t{%2, %0|%0, %2}
1419 movhps\t{%2, %0|%0, %2}"
1420 [(set_attr "type" "ssemov")
1421 (set_attr "mode" "V4SF,V2SF")])
1423 (define_expand "vec_initv4sf"
1424 [(match_operand:V4SF 0 "register_operand" "")
1425 (match_operand 1 "" "")]
1428 ix86_expand_vector_init (false, operands[0], operands[1]);
1432 (define_insn "vec_setv4sf_0"
1433 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1436 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1437 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1441 movss\t{%2, %0|%0, %2}
1442 movss\t{%2, %0|%0, %2}
1443 movd\t{%2, %0|%0, %2}
1445 [(set_attr "type" "ssemov")
1446 (set_attr "mode" "SF")])
1448 ;; A subset is vec_setv4sf.
1449 (define_insn "*vec_setv4sf_sse4_1"
1450 [(set (match_operand:V4SF 0 "register_operand" "=x")
1453 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1454 (match_operand:V4SF 1 "register_operand" "0")
1455 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1458 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1459 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1461 [(set_attr "type" "sselog")
1462 (set_attr "prefix_extra" "1")
1463 (set_attr "mode" "V4SF")])
1465 (define_insn "sse4_1_insertps"
1466 [(set (match_operand:V4SF 0 "register_operand" "=x")
1467 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1468 (match_operand:V4SF 1 "register_operand" "0")
1469 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1472 "insertps\t{%3, %2, %0|%0, %2, %3}";
1473 [(set_attr "type" "sselog")
1474 (set_attr "prefix_extra" "1")
1475 (set_attr "mode" "V4SF")])
1478 [(set (match_operand:V4SF 0 "memory_operand" "")
1481 (match_operand:SF 1 "nonmemory_operand" ""))
1484 "TARGET_SSE && reload_completed"
1487 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1491 (define_expand "vec_setv4sf"
1492 [(match_operand:V4SF 0 "register_operand" "")
1493 (match_operand:SF 1 "register_operand" "")
1494 (match_operand 2 "const_int_operand" "")]
1497 ix86_expand_vector_set (false, operands[0], operands[1],
1498 INTVAL (operands[2]));
1502 (define_insn_and_split "*vec_extractv4sf_0"
1503 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1505 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1506 (parallel [(const_int 0)])))]
1507 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1509 "&& reload_completed"
1512 rtx op1 = operands[1];
1514 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1516 op1 = gen_lowpart (SFmode, op1);
1517 emit_move_insn (operands[0], op1);
1521 (define_insn "*sse4_1_extractps"
1522 [(set (match_operand:SF 0 "register_operand" "=rm")
1524 (match_operand:V4SF 1 "register_operand" "x")
1525 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1527 "extractps\t{%2, %1, %0|%0, %1, %2}"
1528 [(set_attr "type" "sselog")
1529 (set_attr "prefix_extra" "1")
1530 (set_attr "mode" "V4SF")])
1532 (define_expand "vec_extractv4sf"
1533 [(match_operand:SF 0 "register_operand" "")
1534 (match_operand:V4SF 1 "register_operand" "")
1535 (match_operand 2 "const_int_operand" "")]
1538 ix86_expand_vector_extract (false, operands[0], operands[1],
1539 INTVAL (operands[2]));
1543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1545 ;; Parallel double-precision floating point arithmetic
1547 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1549 (define_expand "negv2df2"
1550 [(set (match_operand:V2DF 0 "register_operand" "")
1551 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1553 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1555 (define_expand "absv2df2"
1556 [(set (match_operand:V2DF 0 "register_operand" "")
1557 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1559 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1561 (define_expand "addv2df3"
1562 [(set (match_operand:V2DF 0 "register_operand" "")
1563 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1564 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1566 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1568 (define_insn "*addv2df3"
1569 [(set (match_operand:V2DF 0 "register_operand" "=x")
1570 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1571 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1572 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1573 "addpd\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "sseadd")
1575 (set_attr "mode" "V2DF")])
1577 (define_insn "sse2_vmaddv2df3"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x")
1580 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1581 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1584 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1585 "addsd\t{%2, %0|%0, %2}"
1586 [(set_attr "type" "sseadd")
1587 (set_attr "mode" "DF")])
1589 (define_expand "subv2df3"
1590 [(set (match_operand:V2DF 0 "register_operand" "")
1591 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1592 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1594 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1596 (define_insn "*subv2df3"
1597 [(set (match_operand:V2DF 0 "register_operand" "=x")
1598 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1599 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1601 "subpd\t{%2, %0|%0, %2}"
1602 [(set_attr "type" "sseadd")
1603 (set_attr "mode" "V2DF")])
1605 (define_insn "sse2_vmsubv2df3"
1606 [(set (match_operand:V2DF 0 "register_operand" "=x")
1608 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1609 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1613 "subsd\t{%2, %0|%0, %2}"
1614 [(set_attr "type" "sseadd")
1615 (set_attr "mode" "DF")])
1617 (define_expand "mulv2df3"
1618 [(set (match_operand:V2DF 0 "register_operand" "")
1619 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1620 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1622 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1624 (define_insn "*mulv2df3"
1625 [(set (match_operand:V2DF 0 "register_operand" "=x")
1626 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1627 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1628 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1629 "mulpd\t{%2, %0|%0, %2}"
1630 [(set_attr "type" "ssemul")
1631 (set_attr "mode" "V2DF")])
1633 (define_insn "sse2_vmmulv2df3"
1634 [(set (match_operand:V2DF 0 "register_operand" "=x")
1636 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1637 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1640 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1641 "mulsd\t{%2, %0|%0, %2}"
1642 [(set_attr "type" "ssemul")
1643 (set_attr "mode" "DF")])
1645 (define_expand "divv2df3"
1646 [(set (match_operand:V2DF 0 "register_operand" "")
1647 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1648 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1650 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1652 (define_insn "*divv2df3"
1653 [(set (match_operand:V2DF 0 "register_operand" "=x")
1654 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1655 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1657 "divpd\t{%2, %0|%0, %2}"
1658 [(set_attr "type" "ssediv")
1659 (set_attr "mode" "V2DF")])
1661 (define_insn "sse2_vmdivv2df3"
1662 [(set (match_operand:V2DF 0 "register_operand" "=x")
1664 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1665 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1669 "divsd\t{%2, %0|%0, %2}"
1670 [(set_attr "type" "ssediv")
1671 (set_attr "mode" "DF")])
1673 (define_insn "sqrtv2df2"
1674 [(set (match_operand:V2DF 0 "register_operand" "=x")
1675 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1677 "sqrtpd\t{%1, %0|%0, %1}"
1678 [(set_attr "type" "sse")
1679 (set_attr "mode" "V2DF")])
1681 (define_insn "sse2_vmsqrtv2df2"
1682 [(set (match_operand:V2DF 0 "register_operand" "=x")
1684 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1685 (match_operand:V2DF 2 "register_operand" "0")
1688 "sqrtsd\t{%1, %0|%0, %1}"
1689 [(set_attr "type" "sse")
1690 (set_attr "mode" "DF")])
1692 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1693 ;; isn't really correct, as those rtl operators aren't defined when
1694 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1696 (define_expand "smaxv2df3"
1697 [(set (match_operand:V2DF 0 "register_operand" "")
1698 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1699 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1702 if (!flag_finite_math_only)
1703 operands[1] = force_reg (V2DFmode, operands[1]);
1704 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1707 (define_insn "*smaxv2df3_finite"
1708 [(set (match_operand:V2DF 0 "register_operand" "=x")
1709 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1710 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1711 "TARGET_SSE2 && flag_finite_math_only
1712 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1713 "maxpd\t{%2, %0|%0, %2}"
1714 [(set_attr "type" "sseadd")
1715 (set_attr "mode" "V2DF")])
1717 (define_insn "*smaxv2df3"
1718 [(set (match_operand:V2DF 0 "register_operand" "=x")
1719 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1720 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1722 "maxpd\t{%2, %0|%0, %2}"
1723 [(set_attr "type" "sseadd")
1724 (set_attr "mode" "V2DF")])
1726 (define_insn "sse2_vmsmaxv2df3"
1727 [(set (match_operand:V2DF 0 "register_operand" "=x")
1729 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1730 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1734 "maxsd\t{%2, %0|%0, %2}"
1735 [(set_attr "type" "sseadd")
1736 (set_attr "mode" "DF")])
1738 (define_expand "sminv2df3"
1739 [(set (match_operand:V2DF 0 "register_operand" "")
1740 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1741 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1744 if (!flag_finite_math_only)
1745 operands[1] = force_reg (V2DFmode, operands[1]);
1746 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1749 (define_insn "*sminv2df3_finite"
1750 [(set (match_operand:V2DF 0 "register_operand" "=x")
1751 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1752 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1753 "TARGET_SSE2 && flag_finite_math_only
1754 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1755 "minpd\t{%2, %0|%0, %2}"
1756 [(set_attr "type" "sseadd")
1757 (set_attr "mode" "V2DF")])
1759 (define_insn "*sminv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "=x")
1761 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1762 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1764 "minpd\t{%2, %0|%0, %2}"
1765 [(set_attr "type" "sseadd")
1766 (set_attr "mode" "V2DF")])
1768 (define_insn "sse2_vmsminv2df3"
1769 [(set (match_operand:V2DF 0 "register_operand" "=x")
1771 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1772 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1776 "minsd\t{%2, %0|%0, %2}"
1777 [(set_attr "type" "sseadd")
1778 (set_attr "mode" "DF")])
1780 (define_insn "sse3_addsubv2df3"
1781 [(set (match_operand:V2DF 0 "register_operand" "=x")
1784 (match_operand:V2DF 1 "register_operand" "0")
1785 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1786 (minus:V2DF (match_dup 1) (match_dup 2))
1789 "addsubpd\t{%2, %0|%0, %2}"
1790 [(set_attr "type" "sseadd")
1791 (set_attr "mode" "V2DF")])
1793 (define_insn "sse3_haddv2df3"
1794 [(set (match_operand:V2DF 0 "register_operand" "=x")
1798 (match_operand:V2DF 1 "register_operand" "0")
1799 (parallel [(const_int 0)]))
1800 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1803 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1804 (parallel [(const_int 0)]))
1805 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1807 "haddpd\t{%2, %0|%0, %2}"
1808 [(set_attr "type" "sseadd")
1809 (set_attr "mode" "V2DF")])
1811 (define_insn "sse3_hsubv2df3"
1812 [(set (match_operand:V2DF 0 "register_operand" "=x")
1816 (match_operand:V2DF 1 "register_operand" "0")
1817 (parallel [(const_int 0)]))
1818 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1821 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1822 (parallel [(const_int 0)]))
1823 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1825 "hsubpd\t{%2, %0|%0, %2}"
1826 [(set_attr "type" "sseadd")
1827 (set_attr "mode" "V2DF")])
1829 (define_expand "reduc_splus_v2df"
1830 [(match_operand:V2DF 0 "register_operand" "")
1831 (match_operand:V2DF 1 "register_operand" "")]
1834 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1838 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1840 ;; Parallel double-precision floating point comparisons
1842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1844 (define_insn "sse2_maskcmpv2df3"
1845 [(set (match_operand:V2DF 0 "register_operand" "=x")
1846 (match_operator:V2DF 3 "sse_comparison_operator"
1847 [(match_operand:V2DF 1 "register_operand" "0")
1848 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1850 "cmp%D3pd\t{%2, %0|%0, %2}"
1851 [(set_attr "type" "ssecmp")
1852 (set_attr "mode" "V2DF")])
1854 (define_insn "sse2_maskcmpdf3"
1855 [(set (match_operand:DF 0 "register_operand" "=x")
1856 (match_operator:DF 3 "sse_comparison_operator"
1857 [(match_operand:DF 1 "register_operand" "0")
1858 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1860 "cmp%D3sd\t{%2, %0|%0, %2}"
1861 [(set_attr "type" "ssecmp")
1862 (set_attr "mode" "DF")])
1864 (define_insn "sse2_vmmaskcmpv2df3"
1865 [(set (match_operand:V2DF 0 "register_operand" "=x")
1867 (match_operator:V2DF 3 "sse_comparison_operator"
1868 [(match_operand:V2DF 1 "register_operand" "0")
1869 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1873 "cmp%D3sd\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "ssecmp")
1875 (set_attr "mode" "DF")])
1877 (define_insn "sse2_comi"
1878 [(set (reg:CCFP FLAGS_REG)
1881 (match_operand:V2DF 0 "register_operand" "x")
1882 (parallel [(const_int 0)]))
1884 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1885 (parallel [(const_int 0)]))))]
1887 "comisd\t{%1, %0|%0, %1}"
1888 [(set_attr "type" "ssecomi")
1889 (set_attr "mode" "DF")])
1891 (define_insn "sse2_ucomi"
1892 [(set (reg:CCFPU FLAGS_REG)
1895 (match_operand:V2DF 0 "register_operand" "x")
1896 (parallel [(const_int 0)]))
1898 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1899 (parallel [(const_int 0)]))))]
1901 "ucomisd\t{%1, %0|%0, %1}"
1902 [(set_attr "type" "ssecomi")
1903 (set_attr "mode" "DF")])
1905 (define_expand "vcondv2df"
1906 [(set (match_operand:V2DF 0 "register_operand" "")
1908 (match_operator 3 ""
1909 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1910 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1911 (match_operand:V2DF 1 "general_operand" "")
1912 (match_operand:V2DF 2 "general_operand" "")))]
1915 if (ix86_expand_fp_vcond (operands))
1921 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1923 ;; Parallel double-precision floating point logical operations
1925 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1927 (define_expand "andv2df3"
1928 [(set (match_operand:V2DF 0 "register_operand" "")
1929 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1930 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1932 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1934 (define_insn "*andv2df3"
1935 [(set (match_operand:V2DF 0 "register_operand" "=x")
1936 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1938 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1939 "andpd\t{%2, %0|%0, %2}"
1940 [(set_attr "type" "sselog")
1941 (set_attr "mode" "V2DF")])
1943 (define_insn "sse2_nandv2df3"
1944 [(set (match_operand:V2DF 0 "register_operand" "=x")
1945 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1946 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1948 "andnpd\t{%2, %0|%0, %2}"
1949 [(set_attr "type" "sselog")
1950 (set_attr "mode" "V2DF")])
1952 (define_expand "iorv2df3"
1953 [(set (match_operand:V2DF 0 "register_operand" "")
1954 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1955 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1957 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1959 (define_insn "*iorv2df3"
1960 [(set (match_operand:V2DF 0 "register_operand" "=x")
1961 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1962 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1963 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1964 "orpd\t{%2, %0|%0, %2}"
1965 [(set_attr "type" "sselog")
1966 (set_attr "mode" "V2DF")])
1968 (define_expand "xorv2df3"
1969 [(set (match_operand:V2DF 0 "register_operand" "")
1970 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1971 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1973 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1975 (define_insn "*xorv2df3"
1976 [(set (match_operand:V2DF 0 "register_operand" "=x")
1977 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1978 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1979 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1980 "xorpd\t{%2, %0|%0, %2}"
1981 [(set_attr "type" "sselog")
1982 (set_attr "mode" "V2DF")])
1984 ;; Also define scalar versions. These are used for abs, neg, and
1985 ;; conditional move. Using subregs into vector modes causes register
1986 ;; allocation lossage. These patterns do not allow memory operands
1987 ;; because the native instructions read the full 128-bits.
1989 (define_insn "*anddf3"
1990 [(set (match_operand:DF 0 "register_operand" "=x")
1991 (and:DF (match_operand:DF 1 "register_operand" "0")
1992 (match_operand:DF 2 "register_operand" "x")))]
1994 "andpd\t{%2, %0|%0, %2}"
1995 [(set_attr "type" "sselog")
1996 (set_attr "mode" "V2DF")])
1998 (define_insn "*nanddf3"
1999 [(set (match_operand:DF 0 "register_operand" "=x")
2000 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2001 (match_operand:DF 2 "register_operand" "x")))]
2003 "andnpd\t{%2, %0|%0, %2}"
2004 [(set_attr "type" "sselog")
2005 (set_attr "mode" "V2DF")])
2007 (define_insn "*iordf3"
2008 [(set (match_operand:DF 0 "register_operand" "=x")
2009 (ior:DF (match_operand:DF 1 "register_operand" "0")
2010 (match_operand:DF 2 "register_operand" "x")))]
2012 "orpd\t{%2, %0|%0, %2}"
2013 [(set_attr "type" "sselog")
2014 (set_attr "mode" "V2DF")])
2016 (define_insn "*xordf3"
2017 [(set (match_operand:DF 0 "register_operand" "=x")
2018 (xor:DF (match_operand:DF 1 "register_operand" "0")
2019 (match_operand:DF 2 "register_operand" "x")))]
2021 "xorpd\t{%2, %0|%0, %2}"
2022 [(set_attr "type" "sselog")
2023 (set_attr "mode" "V2DF")])
2025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2027 ;; Parallel double-precision floating point conversion operations
2029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2031 (define_insn "sse2_cvtpi2pd"
2032 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2033 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2035 "cvtpi2pd\t{%1, %0|%0, %1}"
2036 [(set_attr "type" "ssecvt")
2037 (set_attr "unit" "mmx,*")
2038 (set_attr "mode" "V2DF")])
2040 (define_insn "sse2_cvtpd2pi"
2041 [(set (match_operand:V2SI 0 "register_operand" "=y")
2042 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2043 UNSPEC_FIX_NOTRUNC))]
2045 "cvtpd2pi\t{%1, %0|%0, %1}"
2046 [(set_attr "type" "ssecvt")
2047 (set_attr "unit" "mmx")
2048 (set_attr "prefix_data16" "1")
2049 (set_attr "mode" "DI")])
2051 (define_insn "sse2_cvttpd2pi"
2052 [(set (match_operand:V2SI 0 "register_operand" "=y")
2053 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2055 "cvttpd2pi\t{%1, %0|%0, %1}"
2056 [(set_attr "type" "ssecvt")
2057 (set_attr "unit" "mmx")
2058 (set_attr "prefix_data16" "1")
2059 (set_attr "mode" "TI")])
2061 (define_insn "sse2_cvtsi2sd"
2062 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2065 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2066 (match_operand:V2DF 1 "register_operand" "0,0")
2069 "cvtsi2sd\t{%2, %0|%0, %2}"
2070 [(set_attr "type" "sseicvt")
2071 (set_attr "mode" "DF")
2072 (set_attr "athlon_decode" "double,direct")
2073 (set_attr "amdfam10_decode" "vector,double")])
2075 (define_insn "sse2_cvtsi2sdq"
2076 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2079 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2080 (match_operand:V2DF 1 "register_operand" "0,0")
2082 "TARGET_SSE2 && TARGET_64BIT"
2083 "cvtsi2sdq\t{%2, %0|%0, %2}"
2084 [(set_attr "type" "sseicvt")
2085 (set_attr "mode" "DF")
2086 (set_attr "athlon_decode" "double,direct")
2087 (set_attr "amdfam10_decode" "vector,double")])
2089 (define_insn "sse2_cvtsd2si"
2090 [(set (match_operand:SI 0 "register_operand" "=r,r")
2093 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2094 (parallel [(const_int 0)]))]
2095 UNSPEC_FIX_NOTRUNC))]
2097 "cvtsd2si\t{%1, %0|%0, %1}"
2098 [(set_attr "type" "sseicvt")
2099 (set_attr "athlon_decode" "double,vector")
2100 (set_attr "prefix_rep" "1")
2101 (set_attr "mode" "SI")])
2103 (define_insn "sse2_cvtsd2si_2"
2104 [(set (match_operand:SI 0 "register_operand" "=r,r")
2105 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2106 UNSPEC_FIX_NOTRUNC))]
2108 "cvtsd2si\t{%1, %0|%0, %1}"
2109 [(set_attr "type" "sseicvt")
2110 (set_attr "athlon_decode" "double,vector")
2111 (set_attr "amdfam10_decode" "double,double")
2112 (set_attr "prefix_rep" "1")
2113 (set_attr "mode" "SI")])
2115 (define_insn "sse2_cvtsd2siq"
2116 [(set (match_operand:DI 0 "register_operand" "=r,r")
2119 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2120 (parallel [(const_int 0)]))]
2121 UNSPEC_FIX_NOTRUNC))]
2122 "TARGET_SSE2 && TARGET_64BIT"
2123 "cvtsd2siq\t{%1, %0|%0, %1}"
2124 [(set_attr "type" "sseicvt")
2125 (set_attr "athlon_decode" "double,vector")
2126 (set_attr "prefix_rep" "1")
2127 (set_attr "mode" "DI")])
2129 (define_insn "sse2_cvtsd2siq_2"
2130 [(set (match_operand:DI 0 "register_operand" "=r,r")
2131 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2132 UNSPEC_FIX_NOTRUNC))]
2133 "TARGET_SSE2 && TARGET_64BIT"
2134 "cvtsd2siq\t{%1, %0|%0, %1}"
2135 [(set_attr "type" "sseicvt")
2136 (set_attr "athlon_decode" "double,vector")
2137 (set_attr "amdfam10_decode" "double,double")
2138 (set_attr "prefix_rep" "1")
2139 (set_attr "mode" "DI")])
2141 (define_insn "sse2_cvttsd2si"
2142 [(set (match_operand:SI 0 "register_operand" "=r,r")
2145 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2146 (parallel [(const_int 0)]))))]
2148 "cvttsd2si\t{%1, %0|%0, %1}"
2149 [(set_attr "type" "sseicvt")
2150 (set_attr "prefix_rep" "1")
2151 (set_attr "mode" "SI")
2152 (set_attr "athlon_decode" "double,vector")
2153 (set_attr "amdfam10_decode" "double,double")])
2155 (define_insn "sse2_cvttsd2siq"
2156 [(set (match_operand:DI 0 "register_operand" "=r,r")
2159 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2160 (parallel [(const_int 0)]))))]
2161 "TARGET_SSE2 && TARGET_64BIT"
2162 "cvttsd2siq\t{%1, %0|%0, %1}"
2163 [(set_attr "type" "sseicvt")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "mode" "DI")
2166 (set_attr "athlon_decode" "double,vector")
2167 (set_attr "amdfam10_decode" "double,double")])
2169 (define_insn "sse2_cvtdq2pd"
2170 [(set (match_operand:V2DF 0 "register_operand" "=x")
2173 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2174 (parallel [(const_int 0) (const_int 1)]))))]
2176 "cvtdq2pd\t{%1, %0|%0, %1}"
2177 [(set_attr "type" "ssecvt")
2178 (set_attr "mode" "V2DF")])
2180 (define_expand "sse2_cvtpd2dq"
2181 [(set (match_operand:V4SI 0 "register_operand" "")
2183 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2187 "operands[2] = CONST0_RTX (V2SImode);")
2189 (define_insn "*sse2_cvtpd2dq"
2190 [(set (match_operand:V4SI 0 "register_operand" "=x")
2192 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2194 (match_operand:V2SI 2 "const0_operand" "")))]
2196 "cvtpd2dq\t{%1, %0|%0, %1}"
2197 [(set_attr "type" "ssecvt")
2198 (set_attr "prefix_rep" "1")
2199 (set_attr "mode" "TI")
2200 (set_attr "amdfam10_decode" "double")])
2202 (define_expand "sse2_cvttpd2dq"
2203 [(set (match_operand:V4SI 0 "register_operand" "")
2205 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2208 "operands[2] = CONST0_RTX (V2SImode);")
2210 (define_insn "*sse2_cvttpd2dq"
2211 [(set (match_operand:V4SI 0 "register_operand" "=x")
2213 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2214 (match_operand:V2SI 2 "const0_operand" "")))]
2216 "cvttpd2dq\t{%1, %0|%0, %1}"
2217 [(set_attr "type" "ssecvt")
2218 (set_attr "prefix_rep" "1")
2219 (set_attr "mode" "TI")
2220 (set_attr "amdfam10_decode" "double")])
2222 (define_insn "sse2_cvtsd2ss"
2223 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2226 (float_truncate:V2SF
2227 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2228 (match_operand:V4SF 1 "register_operand" "0,0")
2231 "cvtsd2ss\t{%2, %0|%0, %2}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "athlon_decode" "vector,double")
2234 (set_attr "amdfam10_decode" "vector,double")
2235 (set_attr "mode" "SF")])
2237 (define_insn "sse2_cvtss2sd"
2238 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2242 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2243 (parallel [(const_int 0) (const_int 1)])))
2244 (match_operand:V2DF 1 "register_operand" "0,0")
2247 "cvtss2sd\t{%2, %0|%0, %2}"
2248 [(set_attr "type" "ssecvt")
2249 (set_attr "amdfam10_decode" "vector,double")
2250 (set_attr "mode" "DF")])
2252 (define_expand "sse2_cvtpd2ps"
2253 [(set (match_operand:V4SF 0 "register_operand" "")
2255 (float_truncate:V2SF
2256 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2259 "operands[2] = CONST0_RTX (V2SFmode);")
2261 (define_insn "*sse2_cvtpd2ps"
2262 [(set (match_operand:V4SF 0 "register_operand" "=x")
2264 (float_truncate:V2SF
2265 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2266 (match_operand:V2SF 2 "const0_operand" "")))]
2268 "cvtpd2ps\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssecvt")
2270 (set_attr "prefix_data16" "1")
2271 (set_attr "mode" "V4SF")
2272 (set_attr "amdfam10_decode" "double")])
2274 (define_insn "sse2_cvtps2pd"
2275 [(set (match_operand:V2DF 0 "register_operand" "=x")
2278 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2279 (parallel [(const_int 0) (const_int 1)]))))]
2281 "cvtps2pd\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "mode" "V2DF")
2284 (set_attr "amdfam10_decode" "direct")])
2286 (define_expand "vec_unpacks_hi_v4sf"
2291 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2292 (parallel [(const_int 6)
2296 (set (match_operand:V2DF 0 "register_operand" "")
2300 (parallel [(const_int 0) (const_int 1)]))))]
2303 operands[2] = gen_reg_rtx (V4SFmode);
2306 (define_expand "vec_unpacks_lo_v4sf"
2307 [(set (match_operand:V2DF 0 "register_operand" "")
2310 (match_operand:V4SF 1 "nonimmediate_operand" "")
2311 (parallel [(const_int 0) (const_int 1)]))))]
2314 (define_expand "vec_unpacks_float_hi_v8hi"
2315 [(match_operand:V4SF 0 "register_operand" "")
2316 (match_operand:V8HI 1 "register_operand" "")]
2319 rtx tmp = gen_reg_rtx (V4SImode);
2321 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2322 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2326 (define_expand "vec_unpacks_float_lo_v8hi"
2327 [(match_operand:V4SF 0 "register_operand" "")
2328 (match_operand:V8HI 1 "register_operand" "")]
2331 rtx tmp = gen_reg_rtx (V4SImode);
2333 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2334 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2338 (define_expand "vec_unpacku_float_hi_v8hi"
2339 [(match_operand:V4SF 0 "register_operand" "")
2340 (match_operand:V8HI 1 "register_operand" "")]
2343 rtx tmp = gen_reg_rtx (V4SImode);
2345 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2346 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2350 (define_expand "vec_unpacku_float_lo_v8hi"
2351 [(match_operand:V4SF 0 "register_operand" "")
2352 (match_operand:V8HI 1 "register_operand" "")]
2355 rtx tmp = gen_reg_rtx (V4SImode);
2357 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2358 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2362 (define_expand "vec_unpacks_float_hi_v4si"
2365 (match_operand:V4SI 1 "nonimmediate_operand" "")
2366 (parallel [(const_int 2)
2370 (set (match_operand:V2DF 0 "register_operand" "")
2374 (parallel [(const_int 0) (const_int 1)]))))]
2377 operands[2] = gen_reg_rtx (V4SImode);
2380 (define_expand "vec_unpacks_float_lo_v4si"
2381 [(set (match_operand:V2DF 0 "register_operand" "")
2384 (match_operand:V4SI 1 "nonimmediate_operand" "")
2385 (parallel [(const_int 0) (const_int 1)]))))]
2388 (define_expand "vec_pack_trunc_v2df"
2389 [(match_operand:V4SF 0 "register_operand" "")
2390 (match_operand:V2DF 1 "nonimmediate_operand" "")
2391 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2396 r1 = gen_reg_rtx (V4SFmode);
2397 r2 = gen_reg_rtx (V4SFmode);
2399 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2400 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2401 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2405 (define_expand "vec_pack_sfix_trunc_v2df"
2406 [(match_operand:V4SI 0 "register_operand" "")
2407 (match_operand:V2DF 1 "nonimmediate_operand" "")
2408 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2413 r1 = gen_reg_rtx (V4SImode);
2414 r2 = gen_reg_rtx (V4SImode);
2416 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2417 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2418 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2419 gen_lowpart (V2DImode, r1),
2420 gen_lowpart (V2DImode, r2)));
2424 (define_expand "vec_pack_sfix_v2df"
2425 [(match_operand:V4SI 0 "register_operand" "")
2426 (match_operand:V2DF 1 "nonimmediate_operand" "")
2427 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2432 r1 = gen_reg_rtx (V4SImode);
2433 r2 = gen_reg_rtx (V4SImode);
2435 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2436 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2437 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2438 gen_lowpart (V2DImode, r1),
2439 gen_lowpart (V2DImode, r2)));
2444 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2446 ;; Parallel double-precision floating point element swizzling
2448 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2450 (define_insn "sse2_unpckhpd"
2451 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2454 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2455 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2456 (parallel [(const_int 1)
2458 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2460 unpckhpd\t{%2, %0|%0, %2}
2461 movlpd\t{%H1, %0|%0, %H1}
2462 movhpd\t{%1, %0|%0, %1}"
2463 [(set_attr "type" "sselog,ssemov,ssemov")
2464 (set_attr "mode" "V2DF,V1DF,V1DF")])
2466 (define_insn "*sse3_movddup"
2467 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2470 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2472 (parallel [(const_int 0)
2474 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2476 movddup\t{%1, %0|%0, %1}
2478 [(set_attr "type" "sselog1,ssemov")
2479 (set_attr "mode" "V2DF")])
2482 [(set (match_operand:V2DF 0 "memory_operand" "")
2485 (match_operand:V2DF 1 "register_operand" "")
2487 (parallel [(const_int 0)
2489 "TARGET_SSE3 && reload_completed"
2492 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2493 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2494 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2498 (define_insn "sse2_unpcklpd"
2499 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2502 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2503 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2504 (parallel [(const_int 0)
2506 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2508 unpcklpd\t{%2, %0|%0, %2}
2509 movhpd\t{%2, %0|%0, %2}
2510 movlpd\t{%2, %H0|%H0, %2}"
2511 [(set_attr "type" "sselog,ssemov,ssemov")
2512 (set_attr "mode" "V2DF,V1DF,V1DF")])
2514 (define_expand "sse2_shufpd"
2515 [(match_operand:V2DF 0 "register_operand" "")
2516 (match_operand:V2DF 1 "register_operand" "")
2517 (match_operand:V2DF 2 "nonimmediate_operand" "")
2518 (match_operand:SI 3 "const_int_operand" "")]
2521 int mask = INTVAL (operands[3]);
2522 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2524 GEN_INT (mask & 2 ? 3 : 2)));
2528 (define_insn "sse2_shufpd_1"
2529 [(set (match_operand:V2DF 0 "register_operand" "=x")
2532 (match_operand:V2DF 1 "register_operand" "0")
2533 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2534 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2535 (match_operand 4 "const_2_to_3_operand" "")])))]
2539 mask = INTVAL (operands[3]);
2540 mask |= (INTVAL (operands[4]) - 2) << 1;
2541 operands[3] = GEN_INT (mask);
2543 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2545 [(set_attr "type" "sselog")
2546 (set_attr "mode" "V2DF")])
2548 (define_insn "sse2_storehpd"
2549 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2551 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2552 (parallel [(const_int 1)])))]
2553 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2555 movhpd\t{%1, %0|%0, %1}
2558 [(set_attr "type" "ssemov,sselog1,ssemov")
2559 (set_attr "mode" "V1DF,V2DF,DF")])
2562 [(set (match_operand:DF 0 "register_operand" "")
2564 (match_operand:V2DF 1 "memory_operand" "")
2565 (parallel [(const_int 1)])))]
2566 "TARGET_SSE2 && reload_completed"
2567 [(set (match_dup 0) (match_dup 1))]
2569 operands[1] = adjust_address (operands[1], DFmode, 8);
2572 (define_insn "sse2_storelpd"
2573 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2575 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2576 (parallel [(const_int 0)])))]
2577 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2579 movlpd\t{%1, %0|%0, %1}
2582 [(set_attr "type" "ssemov")
2583 (set_attr "mode" "V1DF,DF,DF")])
2586 [(set (match_operand:DF 0 "register_operand" "")
2588 (match_operand:V2DF 1 "nonimmediate_operand" "")
2589 (parallel [(const_int 0)])))]
2590 "TARGET_SSE2 && reload_completed"
2593 rtx op1 = operands[1];
2595 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2597 op1 = gen_lowpart (DFmode, op1);
2598 emit_move_insn (operands[0], op1);
2602 (define_insn "sse2_loadhpd"
2603 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2606 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2607 (parallel [(const_int 0)]))
2608 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2609 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2611 movhpd\t{%2, %0|%0, %2}
2612 unpcklpd\t{%2, %0|%0, %2}
2613 shufpd\t{$1, %1, %0|%0, %1, 1}
2615 [(set_attr "type" "ssemov,sselog,sselog,other")
2616 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2619 [(set (match_operand:V2DF 0 "memory_operand" "")
2621 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2622 (match_operand:DF 1 "register_operand" "")))]
2623 "TARGET_SSE2 && reload_completed"
2624 [(set (match_dup 0) (match_dup 1))]
2626 operands[0] = adjust_address (operands[0], DFmode, 8);
2629 (define_insn "sse2_loadlpd"
2630 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2632 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2634 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2635 (parallel [(const_int 1)]))))]
2636 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2638 movsd\t{%2, %0|%0, %2}
2639 movlpd\t{%2, %0|%0, %2}
2640 movsd\t{%2, %0|%0, %2}
2641 shufpd\t{$2, %2, %0|%0, %2, 2}
2642 movhpd\t{%H1, %0|%0, %H1}
2644 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2645 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2648 [(set (match_operand:V2DF 0 "memory_operand" "")
2650 (match_operand:DF 1 "register_operand" "")
2651 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2652 "TARGET_SSE2 && reload_completed"
2653 [(set (match_dup 0) (match_dup 1))]
2655 operands[0] = adjust_address (operands[0], DFmode, 8);
2658 ;; Not sure these two are ever used, but it doesn't hurt to have
2660 (define_insn "*vec_extractv2df_1_sse"
2661 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2663 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2664 (parallel [(const_int 1)])))]
2665 "!TARGET_SSE2 && TARGET_SSE
2666 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2668 movhps\t{%1, %0|%0, %1}
2669 movhlps\t{%1, %0|%0, %1}
2670 movlps\t{%H1, %0|%0, %H1}"
2671 [(set_attr "type" "ssemov")
2672 (set_attr "mode" "V2SF,V4SF,V2SF")])
2674 (define_insn "*vec_extractv2df_0_sse"
2675 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2677 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2678 (parallel [(const_int 0)])))]
2679 "!TARGET_SSE2 && TARGET_SSE
2680 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2682 movlps\t{%1, %0|%0, %1}
2683 movaps\t{%1, %0|%0, %1}
2684 movlps\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssemov")
2686 (set_attr "mode" "V2SF,V4SF,V2SF")])
2688 (define_insn "sse2_movsd"
2689 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2691 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2692 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2696 movsd\t{%2, %0|%0, %2}
2697 movlpd\t{%2, %0|%0, %2}
2698 movlpd\t{%2, %0|%0, %2}
2699 shufpd\t{$2, %2, %0|%0, %2, 2}
2700 movhps\t{%H1, %0|%0, %H1}
2701 movhps\t{%1, %H0|%H0, %1}"
2702 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2703 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2705 (define_insn "*vec_dupv2df_sse3"
2706 [(set (match_operand:V2DF 0 "register_operand" "=x")
2708 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2710 "movddup\t{%1, %0|%0, %1}"
2711 [(set_attr "type" "sselog1")
2712 (set_attr "mode" "DF")])
2714 (define_insn "*vec_dupv2df"
2715 [(set (match_operand:V2DF 0 "register_operand" "=x")
2717 (match_operand:DF 1 "register_operand" "0")))]
2720 [(set_attr "type" "sselog1")
2721 (set_attr "mode" "V2DF")])
2723 (define_insn "*vec_concatv2df_sse3"
2724 [(set (match_operand:V2DF 0 "register_operand" "=x")
2726 (match_operand:DF 1 "nonimmediate_operand" "xm")
2729 "movddup\t{%1, %0|%0, %1}"
2730 [(set_attr "type" "sselog1")
2731 (set_attr "mode" "DF")])
2733 (define_insn "*vec_concatv2df"
2734 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2736 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2737 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2740 unpcklpd\t{%2, %0|%0, %2}
2741 movhpd\t{%2, %0|%0, %2}
2742 movsd\t{%1, %0|%0, %1}
2743 movlhps\t{%2, %0|%0, %2}
2744 movhps\t{%2, %0|%0, %2}"
2745 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2746 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2748 (define_expand "vec_setv2df"
2749 [(match_operand:V2DF 0 "register_operand" "")
2750 (match_operand:DF 1 "register_operand" "")
2751 (match_operand 2 "const_int_operand" "")]
2754 ix86_expand_vector_set (false, operands[0], operands[1],
2755 INTVAL (operands[2]));
2759 (define_expand "vec_extractv2df"
2760 [(match_operand:DF 0 "register_operand" "")
2761 (match_operand:V2DF 1 "register_operand" "")
2762 (match_operand 2 "const_int_operand" "")]
2765 ix86_expand_vector_extract (false, operands[0], operands[1],
2766 INTVAL (operands[2]));
2770 (define_expand "vec_initv2df"
2771 [(match_operand:V2DF 0 "register_operand" "")
2772 (match_operand 1 "" "")]
2775 ix86_expand_vector_init (false, operands[0], operands[1]);
2779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2781 ;; Parallel integral arithmetic
2783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2785 (define_expand "neg<mode>2"
2786 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2789 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2791 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2793 (define_expand "add<mode>3"
2794 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2795 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2796 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2798 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2800 (define_insn "*add<mode>3"
2801 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2803 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2804 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2805 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2806 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2807 [(set_attr "type" "sseiadd")
2808 (set_attr "prefix_data16" "1")
2809 (set_attr "mode" "TI")])
2811 (define_insn "sse2_ssadd<mode>3"
2812 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2814 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2815 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2816 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2817 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2818 [(set_attr "type" "sseiadd")
2819 (set_attr "prefix_data16" "1")
2820 (set_attr "mode" "TI")])
2822 (define_insn "sse2_usadd<mode>3"
2823 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2825 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2826 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2827 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2828 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2829 [(set_attr "type" "sseiadd")
2830 (set_attr "prefix_data16" "1")
2831 (set_attr "mode" "TI")])
2833 (define_expand "sub<mode>3"
2834 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2835 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2836 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2838 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2840 (define_insn "*sub<mode>3"
2841 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2843 (match_operand:SSEMODEI 1 "register_operand" "0")
2844 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2846 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2847 [(set_attr "type" "sseiadd")
2848 (set_attr "prefix_data16" "1")
2849 (set_attr "mode" "TI")])
2851 (define_insn "sse2_sssub<mode>3"
2852 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2854 (match_operand:SSEMODE12 1 "register_operand" "0")
2855 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2857 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2858 [(set_attr "type" "sseiadd")
2859 (set_attr "prefix_data16" "1")
2860 (set_attr "mode" "TI")])
2862 (define_insn "sse2_ussub<mode>3"
2863 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2865 (match_operand:SSEMODE12 1 "register_operand" "0")
2866 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2868 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2869 [(set_attr "type" "sseiadd")
2870 (set_attr "prefix_data16" "1")
2871 (set_attr "mode" "TI")])
2873 (define_expand "mulv16qi3"
2874 [(set (match_operand:V16QI 0 "register_operand" "")
2875 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2876 (match_operand:V16QI 2 "register_operand" "")))]
2882 for (i = 0; i < 12; ++i)
2883 t[i] = gen_reg_rtx (V16QImode);
2885 /* Unpack data such that we've got a source byte in each low byte of
2886 each word. We don't care what goes into the high byte of each word.
2887 Rather than trying to get zero in there, most convenient is to let
2888 it be a copy of the low byte. */
2889 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2890 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2891 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2892 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2894 /* Multiply words. The end-of-line annotations here give a picture of what
2895 the output of that instruction looks like. Dot means don't care; the
2896 letters are the bytes of the result with A being the most significant. */
2897 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2898 gen_lowpart (V8HImode, t[0]),
2899 gen_lowpart (V8HImode, t[1])));
2900 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2901 gen_lowpart (V8HImode, t[2]),
2902 gen_lowpart (V8HImode, t[3])));
2904 /* Extract the relevant bytes and merge them back together. */
2905 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2906 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2907 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2908 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2909 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2910 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2913 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2917 (define_expand "mulv8hi3"
2918 [(set (match_operand:V8HI 0 "register_operand" "")
2919 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2920 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2922 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2924 (define_insn "*mulv8hi3"
2925 [(set (match_operand:V8HI 0 "register_operand" "=x")
2926 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2927 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2928 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2929 "pmullw\t{%2, %0|%0, %2}"
2930 [(set_attr "type" "sseimul")
2931 (set_attr "prefix_data16" "1")
2932 (set_attr "mode" "TI")])
2934 (define_expand "smulv8hi3_highpart"
2935 [(set (match_operand:V8HI 0 "register_operand" "")
2940 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2942 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2945 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2947 (define_insn "*smulv8hi3_highpart"
2948 [(set (match_operand:V8HI 0 "register_operand" "=x")
2953 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2955 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2957 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2958 "pmulhw\t{%2, %0|%0, %2}"
2959 [(set_attr "type" "sseimul")
2960 (set_attr "prefix_data16" "1")
2961 (set_attr "mode" "TI")])
2963 (define_expand "umulv8hi3_highpart"
2964 [(set (match_operand:V8HI 0 "register_operand" "")
2969 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2971 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2974 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2976 (define_insn "*umulv8hi3_highpart"
2977 [(set (match_operand:V8HI 0 "register_operand" "=x")
2982 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2984 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2986 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2987 "pmulhuw\t{%2, %0|%0, %2}"
2988 [(set_attr "type" "sseimul")
2989 (set_attr "prefix_data16" "1")
2990 (set_attr "mode" "TI")])
2992 (define_insn "sse2_umulv2siv2di3"
2993 [(set (match_operand:V2DI 0 "register_operand" "=x")
2997 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2998 (parallel [(const_int 0) (const_int 2)])))
3001 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3002 (parallel [(const_int 0) (const_int 2)])))))]
3003 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3004 "pmuludq\t{%2, %0|%0, %2}"
3005 [(set_attr "type" "sseimul")
3006 (set_attr "prefix_data16" "1")
3007 (set_attr "mode" "TI")])
3009 (define_insn "sse4_1_mulv2siv2di3"
3010 [(set (match_operand:V2DI 0 "register_operand" "=x")
3014 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3015 (parallel [(const_int 0) (const_int 2)])))
3018 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3019 (parallel [(const_int 0) (const_int 2)])))))]
3020 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3021 "pmuldq\t{%2, %0|%0, %2}"
3022 [(set_attr "type" "sseimul")
3023 (set_attr "prefix_extra" "1")
3024 (set_attr "mode" "TI")])
3026 (define_insn "sse2_pmaddwd"
3027 [(set (match_operand:V4SI 0 "register_operand" "=x")
3032 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3033 (parallel [(const_int 0)
3039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3040 (parallel [(const_int 0)
3046 (vec_select:V4HI (match_dup 1)
3047 (parallel [(const_int 1)
3052 (vec_select:V4HI (match_dup 2)
3053 (parallel [(const_int 1)
3056 (const_int 7)]))))))]
3057 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3058 "pmaddwd\t{%2, %0|%0, %2}"
3059 [(set_attr "type" "sseiadd")
3060 (set_attr "prefix_data16" "1")
3061 (set_attr "mode" "TI")])
3063 (define_expand "mulv4si3"
3064 [(set (match_operand:V4SI 0 "register_operand" "")
3065 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3066 (match_operand:V4SI 2 "register_operand" "")))]
3070 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3073 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3079 t1 = gen_reg_rtx (V4SImode);
3080 t2 = gen_reg_rtx (V4SImode);
3081 t3 = gen_reg_rtx (V4SImode);
3082 t4 = gen_reg_rtx (V4SImode);
3083 t5 = gen_reg_rtx (V4SImode);
3084 t6 = gen_reg_rtx (V4SImode);
3085 thirtytwo = GEN_INT (32);
3087 /* Multiply elements 2 and 0. */
3088 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3091 /* Shift both input vectors down one element, so that elements 3
3092 and 1 are now in the slots for elements 2 and 0. For K8, at
3093 least, this is faster than using a shuffle. */
3094 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3095 gen_lowpart (TImode, op1),
3097 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3098 gen_lowpart (TImode, op2),
3100 /* Multiply elements 3 and 1. */
3101 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3104 /* Move the results in element 2 down to element 1; we don't care
3105 what goes in elements 2 and 3. */
3106 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3107 const0_rtx, const0_rtx));
3108 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3109 const0_rtx, const0_rtx));
3111 /* Merge the parts back together. */
3112 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3117 (define_insn "*sse4_1_mulv4si3"
3118 [(set (match_operand:V4SI 0 "register_operand" "=x")
3119 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3120 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3121 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3122 "pmulld\t{%2, %0|%0, %2}"
3123 [(set_attr "type" "sseimul")
3124 (set_attr "prefix_extra" "1")
3125 (set_attr "mode" "TI")])
3127 (define_expand "mulv2di3"
3128 [(set (match_operand:V2DI 0 "register_operand" "")
3129 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3130 (match_operand:V2DI 2 "register_operand" "")))]
3133 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3139 t1 = gen_reg_rtx (V2DImode);
3140 t2 = gen_reg_rtx (V2DImode);
3141 t3 = gen_reg_rtx (V2DImode);
3142 t4 = gen_reg_rtx (V2DImode);
3143 t5 = gen_reg_rtx (V2DImode);
3144 t6 = gen_reg_rtx (V2DImode);
3145 thirtytwo = GEN_INT (32);
3147 /* Multiply low parts. */
3148 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3149 gen_lowpart (V4SImode, op2)));
3151 /* Shift input vectors left 32 bits so we can multiply high parts. */
3152 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3153 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3155 /* Multiply high parts by low parts. */
3156 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3157 gen_lowpart (V4SImode, t3)));
3158 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3159 gen_lowpart (V4SImode, t2)));
3161 /* Shift them back. */
3162 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3163 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3165 /* Add the three parts together. */
3166 emit_insn (gen_addv2di3 (t6, t1, t4));
3167 emit_insn (gen_addv2di3 (op0, t6, t5));
3171 (define_expand "vec_widen_smult_hi_v8hi"
3172 [(match_operand:V4SI 0 "register_operand" "")
3173 (match_operand:V8HI 1 "register_operand" "")
3174 (match_operand:V8HI 2 "register_operand" "")]
3177 rtx op1, op2, t1, t2, dest;
3181 t1 = gen_reg_rtx (V8HImode);
3182 t2 = gen_reg_rtx (V8HImode);
3183 dest = gen_lowpart (V8HImode, operands[0]);
3185 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3186 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3187 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3191 (define_expand "vec_widen_smult_lo_v8hi"
3192 [(match_operand:V4SI 0 "register_operand" "")
3193 (match_operand:V8HI 1 "register_operand" "")
3194 (match_operand:V8HI 2 "register_operand" "")]
3197 rtx op1, op2, t1, t2, dest;
3201 t1 = gen_reg_rtx (V8HImode);
3202 t2 = gen_reg_rtx (V8HImode);
3203 dest = gen_lowpart (V8HImode, operands[0]);
3205 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3206 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3207 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3211 (define_expand "vec_widen_umult_hi_v8hi"
3212 [(match_operand:V4SI 0 "register_operand" "")
3213 (match_operand:V8HI 1 "register_operand" "")
3214 (match_operand:V8HI 2 "register_operand" "")]
3217 rtx op1, op2, t1, t2, dest;
3221 t1 = gen_reg_rtx (V8HImode);
3222 t2 = gen_reg_rtx (V8HImode);
3223 dest = gen_lowpart (V8HImode, operands[0]);
3225 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3226 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3227 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3231 (define_expand "vec_widen_umult_lo_v8hi"
3232 [(match_operand:V4SI 0 "register_operand" "")
3233 (match_operand:V8HI 1 "register_operand" "")
3234 (match_operand:V8HI 2 "register_operand" "")]
3237 rtx op1, op2, t1, t2, dest;
3241 t1 = gen_reg_rtx (V8HImode);
3242 t2 = gen_reg_rtx (V8HImode);
3243 dest = gen_lowpart (V8HImode, operands[0]);
3245 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3246 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3247 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3251 (define_expand "vec_widen_smult_hi_v4si"
3252 [(match_operand:V2DI 0 "register_operand" "")
3253 (match_operand:V4SI 1 "register_operand" "")
3254 (match_operand:V4SI 2 "register_operand" "")]
3257 rtx op1, op2, t1, t2;
3261 t1 = gen_reg_rtx (V4SImode);
3262 t2 = gen_reg_rtx (V4SImode);
3264 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3265 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3266 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3270 (define_expand "vec_widen_smult_lo_v4si"
3271 [(match_operand:V2DI 0 "register_operand" "")
3272 (match_operand:V4SI 1 "register_operand" "")
3273 (match_operand:V4SI 2 "register_operand" "")]
3276 rtx op1, op2, t1, t2;
3280 t1 = gen_reg_rtx (V4SImode);
3281 t2 = gen_reg_rtx (V4SImode);
3283 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3284 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3285 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3289 (define_expand "vec_widen_umult_hi_v4si"
3290 [(match_operand:V2DI 0 "register_operand" "")
3291 (match_operand:V4SI 1 "register_operand" "")
3292 (match_operand:V4SI 2 "register_operand" "")]
3295 rtx op1, op2, t1, t2;
3299 t1 = gen_reg_rtx (V4SImode);
3300 t2 = gen_reg_rtx (V4SImode);
3302 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3303 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3304 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3308 (define_expand "vec_widen_umult_lo_v4si"
3309 [(match_operand:V2DI 0 "register_operand" "")
3310 (match_operand:V4SI 1 "register_operand" "")
3311 (match_operand:V4SI 2 "register_operand" "")]
3314 rtx op1, op2, t1, t2;
3318 t1 = gen_reg_rtx (V4SImode);
3319 t2 = gen_reg_rtx (V4SImode);
3321 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3322 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3323 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3327 (define_expand "sdot_prodv8hi"
3328 [(match_operand:V4SI 0 "register_operand" "")
3329 (match_operand:V8HI 1 "register_operand" "")
3330 (match_operand:V8HI 2 "register_operand" "")
3331 (match_operand:V4SI 3 "register_operand" "")]
3334 rtx t = gen_reg_rtx (V4SImode);
3335 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3336 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3340 (define_expand "udot_prodv4si"
3341 [(match_operand:V2DI 0 "register_operand" "")
3342 (match_operand:V4SI 1 "register_operand" "")
3343 (match_operand:V4SI 2 "register_operand" "")
3344 (match_operand:V2DI 3 "register_operand" "")]
3349 t1 = gen_reg_rtx (V2DImode);
3350 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3351 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3353 t2 = gen_reg_rtx (V4SImode);
3354 t3 = gen_reg_rtx (V4SImode);
3355 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3356 gen_lowpart (TImode, operands[1]),
3358 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3359 gen_lowpart (TImode, operands[2]),
3362 t4 = gen_reg_rtx (V2DImode);
3363 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3365 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3369 (define_insn "ashr<mode>3"
3370 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3372 (match_operand:SSEMODE24 1 "register_operand" "0")
3373 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3375 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3376 [(set_attr "type" "sseishft")
3377 (set_attr "prefix_data16" "1")
3378 (set_attr "mode" "TI")])
3380 (define_insn "lshr<mode>3"
3381 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3382 (lshiftrt:SSEMODE248
3383 (match_operand:SSEMODE248 1 "register_operand" "0")
3384 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3386 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3387 [(set_attr "type" "sseishft")
3388 (set_attr "prefix_data16" "1")
3389 (set_attr "mode" "TI")])
3391 (define_insn "ashl<mode>3"
3392 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3394 (match_operand:SSEMODE248 1 "register_operand" "0")
3395 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3397 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3398 [(set_attr "type" "sseishft")
3399 (set_attr "prefix_data16" "1")
3400 (set_attr "mode" "TI")])
3402 (define_expand "vec_shl_<mode>"
3403 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3404 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3405 (match_operand:SI 2 "general_operand" "")))]
3408 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3410 operands[0] = gen_lowpart (TImode, operands[0]);
3411 operands[1] = gen_lowpart (TImode, operands[1]);
3414 (define_expand "vec_shr_<mode>"
3415 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3416 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3417 (match_operand:SI 2 "general_operand" "")))]
3420 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3422 operands[0] = gen_lowpart (TImode, operands[0]);
3423 operands[1] = gen_lowpart (TImode, operands[1]);
3426 (define_expand "umaxv16qi3"
3427 [(set (match_operand:V16QI 0 "register_operand" "")
3428 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3429 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3431 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3433 (define_insn "*umaxv16qi3"
3434 [(set (match_operand:V16QI 0 "register_operand" "=x")
3435 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3436 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3437 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3438 "pmaxub\t{%2, %0|%0, %2}"
3439 [(set_attr "type" "sseiadd")
3440 (set_attr "prefix_data16" "1")
3441 (set_attr "mode" "TI")])
3443 (define_expand "smaxv8hi3"
3444 [(set (match_operand:V8HI 0 "register_operand" "")
3445 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3446 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3448 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3450 (define_insn "*smaxv8hi3"
3451 [(set (match_operand:V8HI 0 "register_operand" "=x")
3452 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3453 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3454 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3455 "pmaxsw\t{%2, %0|%0, %2}"
3456 [(set_attr "type" "sseiadd")
3457 (set_attr "prefix_data16" "1")
3458 (set_attr "mode" "TI")])
3460 (define_expand "umaxv8hi3"
3461 [(set (match_operand:V8HI 0 "register_operand" "")
3462 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3463 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3467 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3470 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3471 if (rtx_equal_p (op3, op2))
3472 op3 = gen_reg_rtx (V8HImode);
3473 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3474 emit_insn (gen_addv8hi3 (op0, op3, op2));
3479 (define_expand "smax<mode>3"
3480 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3481 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3482 (match_operand:SSEMODE14 2 "register_operand" "")))]
3486 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3492 xops[0] = operands[0];
3493 xops[1] = operands[1];
3494 xops[2] = operands[2];
3495 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3496 xops[4] = operands[1];
3497 xops[5] = operands[2];
3498 ok = ix86_expand_int_vcond (xops);
3504 (define_insn "*sse4_1_smax<mode>3"
3505 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3507 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3508 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3509 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3510 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3511 [(set_attr "type" "sseiadd")
3512 (set_attr "prefix_extra" "1")
3513 (set_attr "mode" "TI")])
3515 (define_expand "umaxv4si3"
3516 [(set (match_operand:V4SI 0 "register_operand" "")
3517 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3518 (match_operand:V4SI 2 "register_operand" "")))]
3522 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3528 xops[0] = operands[0];
3529 xops[1] = operands[1];
3530 xops[2] = operands[2];
3531 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3532 xops[4] = operands[1];
3533 xops[5] = operands[2];
3534 ok = ix86_expand_int_vcond (xops);
3540 (define_insn "*sse4_1_umax<mode>3"
3541 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3543 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3544 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3545 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3546 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3547 [(set_attr "type" "sseiadd")
3548 (set_attr "prefix_extra" "1")
3549 (set_attr "mode" "TI")])
3551 (define_expand "uminv16qi3"
3552 [(set (match_operand:V16QI 0 "register_operand" "")
3553 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3554 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3556 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3558 (define_insn "*uminv16qi3"
3559 [(set (match_operand:V16QI 0 "register_operand" "=x")
3560 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3561 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3562 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3563 "pminub\t{%2, %0|%0, %2}"
3564 [(set_attr "type" "sseiadd")
3565 (set_attr "prefix_data16" "1")
3566 (set_attr "mode" "TI")])
3568 (define_expand "sminv8hi3"
3569 [(set (match_operand:V8HI 0 "register_operand" "")
3570 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3571 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3573 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3575 (define_insn "*sminv8hi3"
3576 [(set (match_operand:V8HI 0 "register_operand" "=x")
3577 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3578 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3579 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3580 "pminsw\t{%2, %0|%0, %2}"
3581 [(set_attr "type" "sseiadd")
3582 (set_attr "prefix_data16" "1")
3583 (set_attr "mode" "TI")])
3585 (define_expand "smin<mode>3"
3586 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3587 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3588 (match_operand:SSEMODE14 2 "register_operand" "")))]
3592 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3598 xops[0] = operands[0];
3599 xops[1] = operands[2];
3600 xops[2] = operands[1];
3601 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3602 xops[4] = operands[1];
3603 xops[5] = operands[2];
3604 ok = ix86_expand_int_vcond (xops);
3610 (define_insn "*sse4_1_smin<mode>3"
3611 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3613 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3614 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3615 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3616 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3617 [(set_attr "type" "sseiadd")
3618 (set_attr "prefix_extra" "1")
3619 (set_attr "mode" "TI")])
3621 (define_expand "umin<mode>3"
3622 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3623 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3624 (match_operand:SSEMODE24 2 "register_operand" "")))]
3628 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3634 xops[0] = operands[0];
3635 xops[1] = operands[2];
3636 xops[2] = operands[1];
3637 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3638 xops[4] = operands[1];
3639 xops[5] = operands[2];
3640 ok = ix86_expand_int_vcond (xops);
3646 (define_insn "*sse4_1_umin<mode>3"
3647 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3649 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3650 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3651 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3652 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3653 [(set_attr "type" "sseiadd")
3654 (set_attr "prefix_extra" "1")
3655 (set_attr "mode" "TI")])
3657 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3659 ;; Parallel integral comparisons
3661 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3663 (define_insn "sse2_eq<mode>3"
3664 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3666 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3667 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3668 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3669 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3670 [(set_attr "type" "ssecmp")
3671 (set_attr "prefix_data16" "1")
3672 (set_attr "mode" "TI")])
3674 (define_insn "sse4_1_eqv2di3"
3675 [(set (match_operand:V2DI 0 "register_operand" "=x")
3677 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3678 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3679 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3680 "pcmpeqq\t{%2, %0|%0, %2}"
3681 [(set_attr "type" "ssecmp")
3682 (set_attr "prefix_extra" "1")
3683 (set_attr "mode" "TI")])
3685 (define_insn "sse2_gt<mode>3"
3686 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3688 (match_operand:SSEMODE124 1 "register_operand" "0")
3689 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3691 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3692 [(set_attr "type" "ssecmp")
3693 (set_attr "prefix_data16" "1")
3694 (set_attr "mode" "TI")])
3696 (define_insn "sse4_2_gtv2di3"
3697 [(set (match_operand:V2DI 0 "register_operand" "=x")
3699 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3700 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3702 "pcmpgtq\t{%2, %0|%0, %2}"
3703 [(set_attr "type" "ssecmp")
3704 (set_attr "mode" "TI")])
3706 (define_expand "vcond<mode>"
3707 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3708 (if_then_else:SSEMODEI
3709 (match_operator 3 ""
3710 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3711 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3712 (match_operand:SSEMODEI 1 "general_operand" "")
3713 (match_operand:SSEMODEI 2 "general_operand" "")))]
3716 if (ix86_expand_int_vcond (operands))
3722 (define_expand "vcondu<mode>"
3723 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3724 (if_then_else:SSEMODEI
3725 (match_operator 3 ""
3726 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3727 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3728 (match_operand:SSEMODEI 1 "general_operand" "")
3729 (match_operand:SSEMODEI 2 "general_operand" "")))]
3732 if (ix86_expand_int_vcond (operands))
3738 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3740 ;; Parallel bitwise logical operations
3742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3744 (define_expand "one_cmpl<mode>2"
3745 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3746 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3750 int i, n = GET_MODE_NUNITS (<MODE>mode);
3751 rtvec v = rtvec_alloc (n);
3753 for (i = 0; i < n; ++i)
3754 RTVEC_ELT (v, i) = constm1_rtx;
3756 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3759 (define_expand "and<mode>3"
3760 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3761 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3762 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3764 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3766 (define_insn "*and<mode>3"
3767 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3769 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3770 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3771 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3772 "pand\t{%2, %0|%0, %2}"
3773 [(set_attr "type" "sselog")
3774 (set_attr "prefix_data16" "1")
3775 (set_attr "mode" "TI")])
3777 (define_insn "sse2_nand<mode>3"
3778 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3780 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3781 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3783 "pandn\t{%2, %0|%0, %2}"
3784 [(set_attr "type" "sselog")
3785 (set_attr "prefix_data16" "1")
3786 (set_attr "mode" "TI")])
3788 (define_expand "andtf3"
3789 [(set (match_operand:TF 0 "register_operand" "")
3790 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3791 (match_operand:TF 2 "nonimmediate_operand" "")))]
3793 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3795 (define_insn "*andtf3"
3796 [(set (match_operand:TF 0 "register_operand" "=x")
3798 (match_operand:TF 1 "nonimmediate_operand" "%0")
3799 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3800 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3801 "pand\t{%2, %0|%0, %2}"
3802 [(set_attr "type" "sselog")
3803 (set_attr "prefix_data16" "1")
3804 (set_attr "mode" "TI")])
3806 (define_insn "*nandtf3"
3807 [(set (match_operand:TF 0 "register_operand" "=x")
3809 (not:TF (match_operand:TF 1 "register_operand" "0"))
3810 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3812 "pandn\t{%2, %0|%0, %2}"
3813 [(set_attr "type" "sselog")
3814 (set_attr "prefix_data16" "1")
3815 (set_attr "mode" "TI")])
3817 (define_expand "ior<mode>3"
3818 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3819 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3820 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3822 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3824 (define_insn "*ior<mode>3"
3825 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3827 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3828 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3829 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3830 "por\t{%2, %0|%0, %2}"
3831 [(set_attr "type" "sselog")
3832 (set_attr "prefix_data16" "1")
3833 (set_attr "mode" "TI")])
3835 (define_expand "iortf3"
3836 [(set (match_operand:TF 0 "register_operand" "")
3837 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3838 (match_operand:TF 2 "nonimmediate_operand" "")))]
3840 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3842 (define_insn "*iortf3"
3843 [(set (match_operand:TF 0 "register_operand" "=x")
3845 (match_operand:TF 1 "nonimmediate_operand" "%0")
3846 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3847 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3848 "por\t{%2, %0|%0, %2}"
3849 [(set_attr "type" "sselog")
3850 (set_attr "prefix_data16" "1")
3851 (set_attr "mode" "TI")])
3853 (define_expand "xor<mode>3"
3854 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3855 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3856 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3858 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3860 (define_insn "*xor<mode>3"
3861 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3863 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3864 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3865 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3866 "pxor\t{%2, %0|%0, %2}"
3867 [(set_attr "type" "sselog")
3868 (set_attr "prefix_data16" "1")
3869 (set_attr "mode" "TI")])
3871 (define_expand "xortf3"
3872 [(set (match_operand:TF 0 "register_operand" "")
3873 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3874 (match_operand:TF 2 "nonimmediate_operand" "")))]
3876 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3878 (define_insn "*xortf3"
3879 [(set (match_operand:TF 0 "register_operand" "=x")
3881 (match_operand:TF 1 "nonimmediate_operand" "%0")
3882 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3883 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3884 "pxor\t{%2, %0|%0, %2}"
3885 [(set_attr "type" "sselog")
3886 (set_attr "prefix_data16" "1")
3887 (set_attr "mode" "TI")])
3889 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3891 ;; Parallel integral element swizzling
3893 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3896 ;; op1 = abcdefghijklmnop
3897 ;; op2 = qrstuvwxyz012345
3898 ;; h1 = aqbrcsdteufvgwhx
3899 ;; l1 = iyjzk0l1m2n3o4p5
3900 ;; h2 = aiqybjrzcks0dlt1
3901 ;; l2 = emu2fnv3gow4hpx5
3902 ;; h3 = aeimquy2bfjnrvz3
3903 ;; l3 = cgkosw04dhlptx15
3904 ;; result = bdfhjlnprtvxz135
3905 (define_expand "vec_pack_trunc_v8hi"
3906 [(match_operand:V16QI 0 "register_operand" "")
3907 (match_operand:V8HI 1 "register_operand" "")
3908 (match_operand:V8HI 2 "register_operand" "")]
3911 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3913 op1 = gen_lowpart (V16QImode, operands[1]);
3914 op2 = gen_lowpart (V16QImode, operands[2]);
3915 h1 = gen_reg_rtx (V16QImode);
3916 l1 = gen_reg_rtx (V16QImode);
3917 h2 = gen_reg_rtx (V16QImode);
3918 l2 = gen_reg_rtx (V16QImode);
3919 h3 = gen_reg_rtx (V16QImode);
3920 l3 = gen_reg_rtx (V16QImode);
3922 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3923 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3924 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3925 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3926 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3927 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3928 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3939 ;; result = bdfhjlnp
3940 (define_expand "vec_pack_trunc_v4si"
3941 [(match_operand:V8HI 0 "register_operand" "")
3942 (match_operand:V4SI 1 "register_operand" "")
3943 (match_operand:V4SI 2 "register_operand" "")]
3946 rtx op1, op2, h1, l1, h2, l2;
3948 op1 = gen_lowpart (V8HImode, operands[1]);
3949 op2 = gen_lowpart (V8HImode, operands[2]);
3950 h1 = gen_reg_rtx (V8HImode);
3951 l1 = gen_reg_rtx (V8HImode);
3952 h2 = gen_reg_rtx (V8HImode);
3953 l2 = gen_reg_rtx (V8HImode);
3955 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3956 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3957 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3958 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3959 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3969 (define_expand "vec_pack_trunc_v2di"
3970 [(match_operand:V4SI 0 "register_operand" "")
3971 (match_operand:V2DI 1 "register_operand" "")
3972 (match_operand:V2DI 2 "register_operand" "")]
3975 rtx op1, op2, h1, l1;
3977 op1 = gen_lowpart (V4SImode, operands[1]);
3978 op2 = gen_lowpart (V4SImode, operands[2]);
3979 h1 = gen_reg_rtx (V4SImode);
3980 l1 = gen_reg_rtx (V4SImode);
3982 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3983 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3984 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3988 (define_expand "vec_interleave_highv16qi"
3989 [(set (match_operand:V16QI 0 "register_operand" "=x")
3992 (match_operand:V16QI 1 "register_operand" "0")
3993 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3994 (parallel [(const_int 8) (const_int 24)
3995 (const_int 9) (const_int 25)
3996 (const_int 10) (const_int 26)
3997 (const_int 11) (const_int 27)
3998 (const_int 12) (const_int 28)
3999 (const_int 13) (const_int 29)
4000 (const_int 14) (const_int 30)
4001 (const_int 15) (const_int 31)])))]
4004 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4008 (define_expand "vec_interleave_lowv16qi"
4009 [(set (match_operand:V16QI 0 "register_operand" "=x")
4012 (match_operand:V16QI 1 "register_operand" "0")
4013 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4014 (parallel [(const_int 0) (const_int 16)
4015 (const_int 1) (const_int 17)
4016 (const_int 2) (const_int 18)
4017 (const_int 3) (const_int 19)
4018 (const_int 4) (const_int 20)
4019 (const_int 5) (const_int 21)
4020 (const_int 6) (const_int 22)
4021 (const_int 7) (const_int 23)])))]
4024 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4028 (define_expand "vec_interleave_highv8hi"
4029 [(set (match_operand:V8HI 0 "register_operand" "=x")
4032 (match_operand:V8HI 1 "register_operand" "0")
4033 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4034 (parallel [(const_int 4) (const_int 12)
4035 (const_int 5) (const_int 13)
4036 (const_int 6) (const_int 14)
4037 (const_int 7) (const_int 15)])))]
4040 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4044 (define_expand "vec_interleave_lowv8hi"
4045 [(set (match_operand:V8HI 0 "register_operand" "=x")
4048 (match_operand:V8HI 1 "register_operand" "0")
4049 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4050 (parallel [(const_int 0) (const_int 8)
4051 (const_int 1) (const_int 9)
4052 (const_int 2) (const_int 10)
4053 (const_int 3) (const_int 11)])))]
4056 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4060 (define_expand "vec_interleave_highv4si"
4061 [(set (match_operand:V4SI 0 "register_operand" "=x")
4064 (match_operand:V4SI 1 "register_operand" "0")
4065 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4066 (parallel [(const_int 2) (const_int 6)
4067 (const_int 3) (const_int 7)])))]
4070 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4074 (define_expand "vec_interleave_lowv4si"
4075 [(set (match_operand:V4SI 0 "register_operand" "=x")
4078 (match_operand:V4SI 1 "register_operand" "0")
4079 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4080 (parallel [(const_int 0) (const_int 4)
4081 (const_int 1) (const_int 5)])))]
4084 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4088 (define_expand "vec_interleave_highv2di"
4089 [(set (match_operand:V2DI 0 "register_operand" "=x")
4092 (match_operand:V2DI 1 "register_operand" "0")
4093 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4094 (parallel [(const_int 1)
4098 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4102 (define_expand "vec_interleave_lowv2di"
4103 [(set (match_operand:V2DI 0 "register_operand" "=x")
4106 (match_operand:V2DI 1 "register_operand" "0")
4107 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4108 (parallel [(const_int 0)
4112 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4116 (define_insn "sse2_packsswb"
4117 [(set (match_operand:V16QI 0 "register_operand" "=x")
4120 (match_operand:V8HI 1 "register_operand" "0"))
4122 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4124 "packsswb\t{%2, %0|%0, %2}"
4125 [(set_attr "type" "sselog")
4126 (set_attr "prefix_data16" "1")
4127 (set_attr "mode" "TI")])
4129 (define_insn "sse2_packssdw"
4130 [(set (match_operand:V8HI 0 "register_operand" "=x")
4133 (match_operand:V4SI 1 "register_operand" "0"))
4135 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4137 "packssdw\t{%2, %0|%0, %2}"
4138 [(set_attr "type" "sselog")
4139 (set_attr "prefix_data16" "1")
4140 (set_attr "mode" "TI")])
4142 (define_insn "sse2_packuswb"
4143 [(set (match_operand:V16QI 0 "register_operand" "=x")
4146 (match_operand:V8HI 1 "register_operand" "0"))
4148 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4150 "packuswb\t{%2, %0|%0, %2}"
4151 [(set_attr "type" "sselog")
4152 (set_attr "prefix_data16" "1")
4153 (set_attr "mode" "TI")])
4155 (define_insn "sse2_punpckhbw"
4156 [(set (match_operand:V16QI 0 "register_operand" "=x")
4159 (match_operand:V16QI 1 "register_operand" "0")
4160 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4161 (parallel [(const_int 8) (const_int 24)
4162 (const_int 9) (const_int 25)
4163 (const_int 10) (const_int 26)
4164 (const_int 11) (const_int 27)
4165 (const_int 12) (const_int 28)
4166 (const_int 13) (const_int 29)
4167 (const_int 14) (const_int 30)
4168 (const_int 15) (const_int 31)])))]
4170 "punpckhbw\t{%2, %0|%0, %2}"
4171 [(set_attr "type" "sselog")
4172 (set_attr "prefix_data16" "1")
4173 (set_attr "mode" "TI")])
4175 (define_insn "sse2_punpcklbw"
4176 [(set (match_operand:V16QI 0 "register_operand" "=x")
4179 (match_operand:V16QI 1 "register_operand" "0")
4180 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4181 (parallel [(const_int 0) (const_int 16)
4182 (const_int 1) (const_int 17)
4183 (const_int 2) (const_int 18)
4184 (const_int 3) (const_int 19)
4185 (const_int 4) (const_int 20)
4186 (const_int 5) (const_int 21)
4187 (const_int 6) (const_int 22)
4188 (const_int 7) (const_int 23)])))]
4190 "punpcklbw\t{%2, %0|%0, %2}"
4191 [(set_attr "type" "sselog")
4192 (set_attr "prefix_data16" "1")
4193 (set_attr "mode" "TI")])
4195 (define_insn "sse2_punpckhwd"
4196 [(set (match_operand:V8HI 0 "register_operand" "=x")
4199 (match_operand:V8HI 1 "register_operand" "0")
4200 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4201 (parallel [(const_int 4) (const_int 12)
4202 (const_int 5) (const_int 13)
4203 (const_int 6) (const_int 14)
4204 (const_int 7) (const_int 15)])))]
4206 "punpckhwd\t{%2, %0|%0, %2}"
4207 [(set_attr "type" "sselog")
4208 (set_attr "prefix_data16" "1")
4209 (set_attr "mode" "TI")])
4211 (define_insn "sse2_punpcklwd"
4212 [(set (match_operand:V8HI 0 "register_operand" "=x")
4215 (match_operand:V8HI 1 "register_operand" "0")
4216 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4217 (parallel [(const_int 0) (const_int 8)
4218 (const_int 1) (const_int 9)
4219 (const_int 2) (const_int 10)
4220 (const_int 3) (const_int 11)])))]
4222 "punpcklwd\t{%2, %0|%0, %2}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_data16" "1")
4225 (set_attr "mode" "TI")])
4227 (define_insn "sse2_punpckhdq"
4228 [(set (match_operand:V4SI 0 "register_operand" "=x")
4231 (match_operand:V4SI 1 "register_operand" "0")
4232 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4233 (parallel [(const_int 2) (const_int 6)
4234 (const_int 3) (const_int 7)])))]
4236 "punpckhdq\t{%2, %0|%0, %2}"
4237 [(set_attr "type" "sselog")
4238 (set_attr "prefix_data16" "1")
4239 (set_attr "mode" "TI")])
4241 (define_insn "sse2_punpckldq"
4242 [(set (match_operand:V4SI 0 "register_operand" "=x")
4245 (match_operand:V4SI 1 "register_operand" "0")
4246 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4247 (parallel [(const_int 0) (const_int 4)
4248 (const_int 1) (const_int 5)])))]
4250 "punpckldq\t{%2, %0|%0, %2}"
4251 [(set_attr "type" "sselog")
4252 (set_attr "prefix_data16" "1")
4253 (set_attr "mode" "TI")])
4255 (define_insn "sse2_punpckhqdq"
4256 [(set (match_operand:V2DI 0 "register_operand" "=x")
4259 (match_operand:V2DI 1 "register_operand" "0")
4260 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4261 (parallel [(const_int 1)
4264 "punpckhqdq\t{%2, %0|%0, %2}"
4265 [(set_attr "type" "sselog")
4266 (set_attr "prefix_data16" "1")
4267 (set_attr "mode" "TI")])
4269 (define_insn "sse2_punpcklqdq"
4270 [(set (match_operand:V2DI 0 "register_operand" "=x")
4273 (match_operand:V2DI 1 "register_operand" "0")
4274 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4275 (parallel [(const_int 0)
4278 "punpcklqdq\t{%2, %0|%0, %2}"
4279 [(set_attr "type" "sselog")
4280 (set_attr "prefix_data16" "1")
4281 (set_attr "mode" "TI")])
4283 (define_insn "*sse4_1_pinsrb"
4284 [(set (match_operand:V16QI 0 "register_operand" "=x")
4286 (vec_duplicate:V16QI
4287 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4288 (match_operand:V16QI 1 "register_operand" "0")
4289 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4292 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4293 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4295 [(set_attr "type" "sselog")
4296 (set_attr "prefix_extra" "1")
4297 (set_attr "mode" "TI")])
4299 (define_insn "*sse2_pinsrw"
4300 [(set (match_operand:V8HI 0 "register_operand" "=x")
4303 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4304 (match_operand:V8HI 1 "register_operand" "0")
4305 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4308 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4309 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4311 [(set_attr "type" "sselog")
4312 (set_attr "prefix_data16" "1")
4313 (set_attr "mode" "TI")])
4315 ;; It must come before sse2_loadld since it is preferred.
4316 (define_insn "*sse4_1_pinsrd"
4317 [(set (match_operand:V4SI 0 "register_operand" "=x")
4320 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4321 (match_operand:V4SI 1 "register_operand" "0")
4322 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4325 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4326 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4328 [(set_attr "type" "sselog")
4329 (set_attr "prefix_extra" "1")
4330 (set_attr "mode" "TI")])
4332 (define_insn "*sse4_1_pinsrq"
4333 [(set (match_operand:V2DI 0 "register_operand" "=x")
4336 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4337 (match_operand:V2DI 1 "register_operand" "0")
4338 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4341 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4342 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4344 [(set_attr "type" "sselog")
4345 (set_attr "prefix_extra" "1")
4346 (set_attr "mode" "TI")])
4348 (define_insn "*sse4_1_pextrb"
4349 [(set (match_operand:SI 0 "register_operand" "=r")
4352 (match_operand:V16QI 1 "register_operand" "x")
4353 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4355 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4356 [(set_attr "type" "sselog")
4357 (set_attr "prefix_extra" "1")
4358 (set_attr "mode" "TI")])
4360 (define_insn "*sse4_1_pextrb_memory"
4361 [(set (match_operand:QI 0 "memory_operand" "=m")
4363 (match_operand:V16QI 1 "register_operand" "x")
4364 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4366 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4367 [(set_attr "type" "sselog")
4368 (set_attr "prefix_extra" "1")
4369 (set_attr "mode" "TI")])
4371 (define_insn "*sse2_pextrw"
4372 [(set (match_operand:SI 0 "register_operand" "=r")
4375 (match_operand:V8HI 1 "register_operand" "x")
4376 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4378 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4379 [(set_attr "type" "sselog")
4380 (set_attr "prefix_data16" "1")
4381 (set_attr "mode" "TI")])
4383 (define_insn "*sse4_1_pextrw_memory"
4384 [(set (match_operand:HI 0 "memory_operand" "=m")
4386 (match_operand:V8HI 1 "register_operand" "x")
4387 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4389 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4390 [(set_attr "type" "sselog")
4391 (set_attr "prefix_extra" "1")
4392 (set_attr "mode" "TI")])
4394 (define_insn "*sse4_1_pextrd"
4395 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4397 (match_operand:V4SI 1 "register_operand" "x")
4398 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4400 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4401 [(set_attr "type" "sselog")
4402 (set_attr "prefix_extra" "1")
4403 (set_attr "mode" "TI")])
4405 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4406 (define_insn "*sse4_1_pextrq"
4407 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4409 (match_operand:V2DI 1 "register_operand" "x")
4410 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4411 "TARGET_SSE4_1 && TARGET_64BIT"
4412 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4413 [(set_attr "type" "sselog")
4414 (set_attr "prefix_extra" "1")
4415 (set_attr "mode" "TI")])
4417 (define_expand "sse2_pshufd"
4418 [(match_operand:V4SI 0 "register_operand" "")
4419 (match_operand:V4SI 1 "nonimmediate_operand" "")
4420 (match_operand:SI 2 "const_int_operand" "")]
4423 int mask = INTVAL (operands[2]);
4424 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4425 GEN_INT ((mask >> 0) & 3),
4426 GEN_INT ((mask >> 2) & 3),
4427 GEN_INT ((mask >> 4) & 3),
4428 GEN_INT ((mask >> 6) & 3)));
4432 (define_insn "sse2_pshufd_1"
4433 [(set (match_operand:V4SI 0 "register_operand" "=x")
4435 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4436 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4437 (match_operand 3 "const_0_to_3_operand" "")
4438 (match_operand 4 "const_0_to_3_operand" "")
4439 (match_operand 5 "const_0_to_3_operand" "")])))]
4443 mask |= INTVAL (operands[2]) << 0;
4444 mask |= INTVAL (operands[3]) << 2;
4445 mask |= INTVAL (operands[4]) << 4;
4446 mask |= INTVAL (operands[5]) << 6;
4447 operands[2] = GEN_INT (mask);
4449 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4451 [(set_attr "type" "sselog1")
4452 (set_attr "prefix_data16" "1")
4453 (set_attr "mode" "TI")])
4455 (define_expand "sse2_pshuflw"
4456 [(match_operand:V8HI 0 "register_operand" "")
4457 (match_operand:V8HI 1 "nonimmediate_operand" "")
4458 (match_operand:SI 2 "const_int_operand" "")]
4461 int mask = INTVAL (operands[2]);
4462 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4463 GEN_INT ((mask >> 0) & 3),
4464 GEN_INT ((mask >> 2) & 3),
4465 GEN_INT ((mask >> 4) & 3),
4466 GEN_INT ((mask >> 6) & 3)));
4470 (define_insn "sse2_pshuflw_1"
4471 [(set (match_operand:V8HI 0 "register_operand" "=x")
4473 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4474 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4475 (match_operand 3 "const_0_to_3_operand" "")
4476 (match_operand 4 "const_0_to_3_operand" "")
4477 (match_operand 5 "const_0_to_3_operand" "")
4485 mask |= INTVAL (operands[2]) << 0;
4486 mask |= INTVAL (operands[3]) << 2;
4487 mask |= INTVAL (operands[4]) << 4;
4488 mask |= INTVAL (operands[5]) << 6;
4489 operands[2] = GEN_INT (mask);
4491 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4493 [(set_attr "type" "sselog")
4494 (set_attr "prefix_rep" "1")
4495 (set_attr "mode" "TI")])
4497 (define_expand "sse2_pshufhw"
4498 [(match_operand:V8HI 0 "register_operand" "")
4499 (match_operand:V8HI 1 "nonimmediate_operand" "")
4500 (match_operand:SI 2 "const_int_operand" "")]
4503 int mask = INTVAL (operands[2]);
4504 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4505 GEN_INT (((mask >> 0) & 3) + 4),
4506 GEN_INT (((mask >> 2) & 3) + 4),
4507 GEN_INT (((mask >> 4) & 3) + 4),
4508 GEN_INT (((mask >> 6) & 3) + 4)));
4512 (define_insn "sse2_pshufhw_1"
4513 [(set (match_operand:V8HI 0 "register_operand" "=x")
4515 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4516 (parallel [(const_int 0)
4520 (match_operand 2 "const_4_to_7_operand" "")
4521 (match_operand 3 "const_4_to_7_operand" "")
4522 (match_operand 4 "const_4_to_7_operand" "")
4523 (match_operand 5 "const_4_to_7_operand" "")])))]
4527 mask |= (INTVAL (operands[2]) - 4) << 0;
4528 mask |= (INTVAL (operands[3]) - 4) << 2;
4529 mask |= (INTVAL (operands[4]) - 4) << 4;
4530 mask |= (INTVAL (operands[5]) - 4) << 6;
4531 operands[2] = GEN_INT (mask);
4533 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4535 [(set_attr "type" "sselog")
4536 (set_attr "prefix_rep" "1")
4537 (set_attr "mode" "TI")])
4539 (define_expand "sse2_loadd"
4540 [(set (match_operand:V4SI 0 "register_operand" "")
4543 (match_operand:SI 1 "nonimmediate_operand" ""))
4547 "operands[2] = CONST0_RTX (V4SImode);")
4549 (define_insn "sse2_loadld"
4550 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4553 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4554 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4558 movd\t{%2, %0|%0, %2}
4559 movd\t{%2, %0|%0, %2}
4560 movss\t{%2, %0|%0, %2}
4561 movss\t{%2, %0|%0, %2}"
4562 [(set_attr "type" "ssemov")
4563 (set_attr "mode" "TI,TI,V4SF,SF")])
4565 (define_insn_and_split "sse2_stored"
4566 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4568 (match_operand:V4SI 1 "register_operand" "x,Yi")
4569 (parallel [(const_int 0)])))]
4572 "&& reload_completed
4573 && (TARGET_INTER_UNIT_MOVES
4574 || MEM_P (operands [0])
4575 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4576 [(set (match_dup 0) (match_dup 1))]
4578 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4581 (define_expand "sse_storeq"
4582 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4584 (match_operand:V2DI 1 "register_operand" "")
4585 (parallel [(const_int 0)])))]
4589 (define_insn "*sse2_storeq_rex64"
4590 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4592 (match_operand:V2DI 1 "register_operand" "x,Yi")
4593 (parallel [(const_int 0)])))]
4594 "TARGET_64BIT && TARGET_SSE"
4597 (define_insn "*sse2_storeq"
4598 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4600 (match_operand:V2DI 1 "register_operand" "x")
4601 (parallel [(const_int 0)])))]
4606 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4608 (match_operand:V2DI 1 "register_operand" "")
4609 (parallel [(const_int 0)])))]
4612 && (TARGET_INTER_UNIT_MOVES
4613 || MEM_P (operands [0])
4614 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4615 [(set (match_dup 0) (match_dup 1))]
4617 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4620 (define_insn "*vec_extractv2di_1_sse2"
4621 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4623 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4624 (parallel [(const_int 1)])))]
4625 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4627 movhps\t{%1, %0|%0, %1}
4628 psrldq\t{$8, %0|%0, 8}
4629 movq\t{%H1, %0|%0, %H1}"
4630 [(set_attr "type" "ssemov,sseishft,ssemov")
4631 (set_attr "memory" "*,none,*")
4632 (set_attr "mode" "V2SF,TI,TI")])
4634 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4635 (define_insn "*vec_extractv2di_1_sse"
4636 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4638 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4639 (parallel [(const_int 1)])))]
4640 "!TARGET_SSE2 && TARGET_SSE
4641 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4643 movhps\t{%1, %0|%0, %1}
4644 movhlps\t{%1, %0|%0, %1}
4645 movlps\t{%H1, %0|%0, %H1}"
4646 [(set_attr "type" "ssemov")
4647 (set_attr "mode" "V2SF,V4SF,V2SF")])
4649 (define_insn "*vec_dupv4si"
4650 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4652 (match_operand:SI 1 "register_operand" " Yt,0")))]
4655 pshufd\t{$0, %1, %0|%0, %1, 0}
4656 shufps\t{$0, %0, %0|%0, %0, 0}"
4657 [(set_attr "type" "sselog1")
4658 (set_attr "mode" "TI,V4SF")])
4660 (define_insn "*vec_dupv2di"
4661 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4663 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4668 [(set_attr "type" "sselog1,ssemov")
4669 (set_attr "mode" "TI,V4SF")])
4671 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4672 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4673 ;; alternatives pretty much forces the MMX alternative to be chosen.
4674 (define_insn "*sse2_concatv2si"
4675 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4677 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4678 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4681 punpckldq\t{%2, %0|%0, %2}
4682 movd\t{%1, %0|%0, %1}
4683 punpckldq\t{%2, %0|%0, %2}
4684 movd\t{%1, %0|%0, %1}"
4685 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4686 (set_attr "mode" "TI,TI,DI,DI")])
4688 (define_insn "*sse1_concatv2si"
4689 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4691 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4692 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4695 unpcklps\t{%2, %0|%0, %2}
4696 movss\t{%1, %0|%0, %1}
4697 punpckldq\t{%2, %0|%0, %2}
4698 movd\t{%1, %0|%0, %1}"
4699 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4700 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4702 (define_insn "*vec_concatv4si_1"
4703 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4705 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4706 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4709 punpcklqdq\t{%2, %0|%0, %2}
4710 movlhps\t{%2, %0|%0, %2}
4711 movhps\t{%2, %0|%0, %2}"
4712 [(set_attr "type" "sselog,ssemov,ssemov")
4713 (set_attr "mode" "TI,V4SF,V2SF")])
4715 (define_insn "vec_concatv2di"
4716 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4718 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4719 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4722 movq\t{%1, %0|%0, %1}
4723 movq2dq\t{%1, %0|%0, %1}
4724 punpcklqdq\t{%2, %0|%0, %2}
4725 movlhps\t{%2, %0|%0, %2}
4726 movhps\t{%2, %0|%0, %2}
4727 movlps\t{%1, %0|%0, %1}"
4728 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4729 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4731 (define_expand "vec_setv2di"
4732 [(match_operand:V2DI 0 "register_operand" "")
4733 (match_operand:DI 1 "register_operand" "")
4734 (match_operand 2 "const_int_operand" "")]
4737 ix86_expand_vector_set (false, operands[0], operands[1],
4738 INTVAL (operands[2]));
4742 (define_expand "vec_extractv2di"
4743 [(match_operand:DI 0 "register_operand" "")
4744 (match_operand:V2DI 1 "register_operand" "")
4745 (match_operand 2 "const_int_operand" "")]
4748 ix86_expand_vector_extract (false, operands[0], operands[1],
4749 INTVAL (operands[2]));
4753 (define_expand "vec_initv2di"
4754 [(match_operand:V2DI 0 "register_operand" "")
4755 (match_operand 1 "" "")]
4758 ix86_expand_vector_init (false, operands[0], operands[1]);
4762 (define_expand "vec_setv4si"
4763 [(match_operand:V4SI 0 "register_operand" "")
4764 (match_operand:SI 1 "register_operand" "")
4765 (match_operand 2 "const_int_operand" "")]
4768 ix86_expand_vector_set (false, operands[0], operands[1],
4769 INTVAL (operands[2]));
4773 (define_expand "vec_extractv4si"
4774 [(match_operand:SI 0 "register_operand" "")
4775 (match_operand:V4SI 1 "register_operand" "")
4776 (match_operand 2 "const_int_operand" "")]
4779 ix86_expand_vector_extract (false, operands[0], operands[1],
4780 INTVAL (operands[2]));
4784 (define_expand "vec_initv4si"
4785 [(match_operand:V4SI 0 "register_operand" "")
4786 (match_operand 1 "" "")]
4789 ix86_expand_vector_init (false, operands[0], operands[1]);
4793 (define_expand "vec_setv8hi"
4794 [(match_operand:V8HI 0 "register_operand" "")
4795 (match_operand:HI 1 "register_operand" "")
4796 (match_operand 2 "const_int_operand" "")]
4799 ix86_expand_vector_set (false, operands[0], operands[1],
4800 INTVAL (operands[2]));
4804 (define_expand "vec_extractv8hi"
4805 [(match_operand:HI 0 "register_operand" "")
4806 (match_operand:V8HI 1 "register_operand" "")
4807 (match_operand 2 "const_int_operand" "")]
4810 ix86_expand_vector_extract (false, operands[0], operands[1],
4811 INTVAL (operands[2]));
4815 (define_expand "vec_initv8hi"
4816 [(match_operand:V8HI 0 "register_operand" "")
4817 (match_operand 1 "" "")]
4820 ix86_expand_vector_init (false, operands[0], operands[1]);
4824 (define_expand "vec_setv16qi"
4825 [(match_operand:V16QI 0 "register_operand" "")
4826 (match_operand:QI 1 "register_operand" "")
4827 (match_operand 2 "const_int_operand" "")]
4830 ix86_expand_vector_set (false, operands[0], operands[1],
4831 INTVAL (operands[2]));
4835 (define_expand "vec_extractv16qi"
4836 [(match_operand:QI 0 "register_operand" "")
4837 (match_operand:V16QI 1 "register_operand" "")
4838 (match_operand 2 "const_int_operand" "")]
4841 ix86_expand_vector_extract (false, operands[0], operands[1],
4842 INTVAL (operands[2]));
4846 (define_expand "vec_initv16qi"
4847 [(match_operand:V16QI 0 "register_operand" "")
4848 (match_operand 1 "" "")]
4851 ix86_expand_vector_init (false, operands[0], operands[1]);
4855 (define_expand "vec_unpacku_hi_v16qi"
4856 [(match_operand:V8HI 0 "register_operand" "")
4857 (match_operand:V16QI 1 "register_operand" "")]
4861 ix86_expand_sse4_unpack (operands, true, true);
4863 ix86_expand_sse_unpack (operands, true, true);
4867 (define_expand "vec_unpacks_hi_v16qi"
4868 [(match_operand:V8HI 0 "register_operand" "")
4869 (match_operand:V16QI 1 "register_operand" "")]
4873 ix86_expand_sse4_unpack (operands, false, true);
4875 ix86_expand_sse_unpack (operands, false, true);
4879 (define_expand "vec_unpacku_lo_v16qi"
4880 [(match_operand:V8HI 0 "register_operand" "")
4881 (match_operand:V16QI 1 "register_operand" "")]
4885 ix86_expand_sse4_unpack (operands, true, false);
4887 ix86_expand_sse_unpack (operands, true, false);
4891 (define_expand "vec_unpacks_lo_v16qi"
4892 [(match_operand:V8HI 0 "register_operand" "")
4893 (match_operand:V16QI 1 "register_operand" "")]
4897 ix86_expand_sse4_unpack (operands, false, false);
4899 ix86_expand_sse_unpack (operands, false, false);
4903 (define_expand "vec_unpacku_hi_v8hi"
4904 [(match_operand:V4SI 0 "register_operand" "")
4905 (match_operand:V8HI 1 "register_operand" "")]
4909 ix86_expand_sse4_unpack (operands, true, true);
4911 ix86_expand_sse_unpack (operands, true, true);
4915 (define_expand "vec_unpacks_hi_v8hi"
4916 [(match_operand:V4SI 0 "register_operand" "")
4917 (match_operand:V8HI 1 "register_operand" "")]
4921 ix86_expand_sse4_unpack (operands, false, true);
4923 ix86_expand_sse_unpack (operands, false, true);
4927 (define_expand "vec_unpacku_lo_v8hi"
4928 [(match_operand:V4SI 0 "register_operand" "")
4929 (match_operand:V8HI 1 "register_operand" "")]
4933 ix86_expand_sse4_unpack (operands, true, false);
4935 ix86_expand_sse_unpack (operands, true, false);
4939 (define_expand "vec_unpacks_lo_v8hi"
4940 [(match_operand:V4SI 0 "register_operand" "")
4941 (match_operand:V8HI 1 "register_operand" "")]
4945 ix86_expand_sse4_unpack (operands, false, false);
4947 ix86_expand_sse_unpack (operands, false, false);
4951 (define_expand "vec_unpacku_hi_v4si"
4952 [(match_operand:V2DI 0 "register_operand" "")
4953 (match_operand:V4SI 1 "register_operand" "")]
4957 ix86_expand_sse4_unpack (operands, true, true);
4959 ix86_expand_sse_unpack (operands, true, true);
4963 (define_expand "vec_unpacks_hi_v4si"
4964 [(match_operand:V2DI 0 "register_operand" "")
4965 (match_operand:V4SI 1 "register_operand" "")]
4969 ix86_expand_sse4_unpack (operands, false, true);
4971 ix86_expand_sse_unpack (operands, false, true);
4975 (define_expand "vec_unpacku_lo_v4si"
4976 [(match_operand:V2DI 0 "register_operand" "")
4977 (match_operand:V4SI 1 "register_operand" "")]
4981 ix86_expand_sse4_unpack (operands, true, false);
4983 ix86_expand_sse_unpack (operands, true, false);
4987 (define_expand "vec_unpacks_lo_v4si"
4988 [(match_operand:V2DI 0 "register_operand" "")
4989 (match_operand:V4SI 1 "register_operand" "")]
4993 ix86_expand_sse4_unpack (operands, false, false);
4995 ix86_expand_sse_unpack (operands, false, false);
4999 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5003 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5005 (define_insn "sse2_uavgv16qi3"
5006 [(set (match_operand:V16QI 0 "register_operand" "=x")
5012 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5014 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5015 (const_vector:V16QI [(const_int 1) (const_int 1)
5016 (const_int 1) (const_int 1)
5017 (const_int 1) (const_int 1)
5018 (const_int 1) (const_int 1)
5019 (const_int 1) (const_int 1)
5020 (const_int 1) (const_int 1)
5021 (const_int 1) (const_int 1)
5022 (const_int 1) (const_int 1)]))
5024 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5025 "pavgb\t{%2, %0|%0, %2}"
5026 [(set_attr "type" "sseiadd")
5027 (set_attr "prefix_data16" "1")
5028 (set_attr "mode" "TI")])
5030 (define_insn "sse2_uavgv8hi3"
5031 [(set (match_operand:V8HI 0 "register_operand" "=x")
5037 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5040 (const_vector:V8HI [(const_int 1) (const_int 1)
5041 (const_int 1) (const_int 1)
5042 (const_int 1) (const_int 1)
5043 (const_int 1) (const_int 1)]))
5045 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5046 "pavgw\t{%2, %0|%0, %2}"
5047 [(set_attr "type" "sseiadd")
5048 (set_attr "prefix_data16" "1")
5049 (set_attr "mode" "TI")])
5051 ;; The correct representation for this is absolutely enormous, and
5052 ;; surely not generally useful.
5053 (define_insn "sse2_psadbw"
5054 [(set (match_operand:V2DI 0 "register_operand" "=x")
5055 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5056 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5059 "psadbw\t{%2, %0|%0, %2}"
5060 [(set_attr "type" "sseiadd")
5061 (set_attr "prefix_data16" "1")
5062 (set_attr "mode" "TI")])
5064 (define_insn "sse_movmskps"
5065 [(set (match_operand:SI 0 "register_operand" "=r")
5066 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5069 "movmskps\t{%1, %0|%0, %1}"
5070 [(set_attr "type" "ssecvt")
5071 (set_attr "mode" "V4SF")])
5073 (define_insn "sse2_movmskpd"
5074 [(set (match_operand:SI 0 "register_operand" "=r")
5075 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5078 "movmskpd\t{%1, %0|%0, %1}"
5079 [(set_attr "type" "ssecvt")
5080 (set_attr "mode" "V2DF")])
5082 (define_insn "sse2_pmovmskb"
5083 [(set (match_operand:SI 0 "register_operand" "=r")
5084 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5087 "pmovmskb\t{%1, %0|%0, %1}"
5088 [(set_attr "type" "ssecvt")
5089 (set_attr "prefix_data16" "1")
5090 (set_attr "mode" "SI")])
5092 (define_expand "sse2_maskmovdqu"
5093 [(set (match_operand:V16QI 0 "memory_operand" "")
5094 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5095 (match_operand:V16QI 2 "register_operand" "x")
5101 (define_insn "*sse2_maskmovdqu"
5102 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5103 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5104 (match_operand:V16QI 2 "register_operand" "x")
5105 (mem:V16QI (match_dup 0))]
5107 "TARGET_SSE2 && !TARGET_64BIT"
5108 ;; @@@ check ordering of operands in intel/nonintel syntax
5109 "maskmovdqu\t{%2, %1|%1, %2}"
5110 [(set_attr "type" "ssecvt")
5111 (set_attr "prefix_data16" "1")
5112 (set_attr "mode" "TI")])
5114 (define_insn "*sse2_maskmovdqu_rex64"
5115 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5116 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5117 (match_operand:V16QI 2 "register_operand" "x")
5118 (mem:V16QI (match_dup 0))]
5120 "TARGET_SSE2 && TARGET_64BIT"
5121 ;; @@@ check ordering of operands in intel/nonintel syntax
5122 "maskmovdqu\t{%2, %1|%1, %2}"
5123 [(set_attr "type" "ssecvt")
5124 (set_attr "prefix_data16" "1")
5125 (set_attr "mode" "TI")])
5127 (define_insn "sse_ldmxcsr"
5128 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5132 [(set_attr "type" "sse")
5133 (set_attr "memory" "load")])
5135 (define_insn "sse_stmxcsr"
5136 [(set (match_operand:SI 0 "memory_operand" "=m")
5137 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5140 [(set_attr "type" "sse")
5141 (set_attr "memory" "store")])
5143 (define_expand "sse_sfence"
5145 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5146 "TARGET_SSE || TARGET_3DNOW_A"
5148 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5149 MEM_VOLATILE_P (operands[0]) = 1;
5152 (define_insn "*sse_sfence"
5153 [(set (match_operand:BLK 0 "" "")
5154 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5155 "TARGET_SSE || TARGET_3DNOW_A"
5157 [(set_attr "type" "sse")
5158 (set_attr "memory" "unknown")])
5160 (define_insn "sse2_clflush"
5161 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5165 [(set_attr "type" "sse")
5166 (set_attr "memory" "unknown")])
5168 (define_expand "sse2_mfence"
5170 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5173 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5174 MEM_VOLATILE_P (operands[0]) = 1;
5177 (define_insn "*sse2_mfence"
5178 [(set (match_operand:BLK 0 "" "")
5179 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5182 [(set_attr "type" "sse")
5183 (set_attr "memory" "unknown")])
5185 (define_expand "sse2_lfence"
5187 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5190 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5191 MEM_VOLATILE_P (operands[0]) = 1;
5194 (define_insn "*sse2_lfence"
5195 [(set (match_operand:BLK 0 "" "")
5196 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5199 [(set_attr "type" "sse")
5200 (set_attr "memory" "unknown")])
5202 (define_insn "sse3_mwait"
5203 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5204 (match_operand:SI 1 "register_operand" "c")]
5207 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5208 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5209 ;; we only need to set up 32bit registers.
5211 [(set_attr "length" "3")])
5213 (define_insn "sse3_monitor"
5214 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5215 (match_operand:SI 1 "register_operand" "c")
5216 (match_operand:SI 2 "register_operand" "d")]
5218 "TARGET_SSE3 && !TARGET_64BIT"
5219 "monitor\t%0, %1, %2"
5220 [(set_attr "length" "3")])
5222 (define_insn "sse3_monitor64"
5223 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5224 (match_operand:SI 1 "register_operand" "c")
5225 (match_operand:SI 2 "register_operand" "d")]
5227 "TARGET_SSE3 && TARGET_64BIT"
5228 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5229 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5230 ;; zero extended to 64bit, we only need to set up 32bit registers.
5232 [(set_attr "length" "3")])
5235 (define_insn "ssse3_phaddwv8hi3"
5236 [(set (match_operand:V8HI 0 "register_operand" "=x")
5242 (match_operand:V8HI 1 "register_operand" "0")
5243 (parallel [(const_int 0)]))
5244 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5246 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5247 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5250 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5251 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5253 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5254 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5259 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5260 (parallel [(const_int 0)]))
5261 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5263 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5264 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5267 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5268 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5270 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5271 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5273 "phaddw\t{%2, %0|%0, %2}"
5274 [(set_attr "type" "sseiadd")
5275 (set_attr "prefix_data16" "1")
5276 (set_attr "prefix_extra" "1")
5277 (set_attr "mode" "TI")])
5279 (define_insn "ssse3_phaddwv4hi3"
5280 [(set (match_operand:V4HI 0 "register_operand" "=y")
5285 (match_operand:V4HI 1 "register_operand" "0")
5286 (parallel [(const_int 0)]))
5287 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5289 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5290 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5294 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5295 (parallel [(const_int 0)]))
5296 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5298 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5299 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5301 "phaddw\t{%2, %0|%0, %2}"
5302 [(set_attr "type" "sseiadd")
5303 (set_attr "prefix_extra" "1")
5304 (set_attr "mode" "DI")])
5306 (define_insn "ssse3_phadddv4si3"
5307 [(set (match_operand:V4SI 0 "register_operand" "=x")
5312 (match_operand:V4SI 1 "register_operand" "0")
5313 (parallel [(const_int 0)]))
5314 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5316 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5317 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5321 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5322 (parallel [(const_int 0)]))
5323 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5325 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5326 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5328 "phaddd\t{%2, %0|%0, %2}"
5329 [(set_attr "type" "sseiadd")
5330 (set_attr "prefix_data16" "1")
5331 (set_attr "prefix_extra" "1")
5332 (set_attr "mode" "TI")])
5334 (define_insn "ssse3_phadddv2si3"
5335 [(set (match_operand:V2SI 0 "register_operand" "=y")
5339 (match_operand:V2SI 1 "register_operand" "0")
5340 (parallel [(const_int 0)]))
5341 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5344 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5345 (parallel [(const_int 0)]))
5346 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5348 "phaddd\t{%2, %0|%0, %2}"
5349 [(set_attr "type" "sseiadd")
5350 (set_attr "prefix_extra" "1")
5351 (set_attr "mode" "DI")])
5353 (define_insn "ssse3_phaddswv8hi3"
5354 [(set (match_operand:V8HI 0 "register_operand" "=x")
5360 (match_operand:V8HI 1 "register_operand" "0")
5361 (parallel [(const_int 0)]))
5362 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5364 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5365 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5368 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5369 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5371 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5372 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5377 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5378 (parallel [(const_int 0)]))
5379 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5381 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5382 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5385 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5386 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5388 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5389 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5391 "phaddsw\t{%2, %0|%0, %2}"
5392 [(set_attr "type" "sseiadd")
5393 (set_attr "prefix_data16" "1")
5394 (set_attr "prefix_extra" "1")
5395 (set_attr "mode" "TI")])
5397 (define_insn "ssse3_phaddswv4hi3"
5398 [(set (match_operand:V4HI 0 "register_operand" "=y")
5403 (match_operand:V4HI 1 "register_operand" "0")
5404 (parallel [(const_int 0)]))
5405 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5407 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5408 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5412 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5413 (parallel [(const_int 0)]))
5414 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5416 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5417 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5419 "phaddsw\t{%2, %0|%0, %2}"
5420 [(set_attr "type" "sseiadd")
5421 (set_attr "prefix_extra" "1")
5422 (set_attr "mode" "DI")])
5424 (define_insn "ssse3_phsubwv8hi3"
5425 [(set (match_operand:V8HI 0 "register_operand" "=x")
5431 (match_operand:V8HI 1 "register_operand" "0")
5432 (parallel [(const_int 0)]))
5433 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5435 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5436 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5439 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5440 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5442 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5443 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5448 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5449 (parallel [(const_int 0)]))
5450 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5452 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5453 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5456 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5457 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5459 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5460 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5462 "phsubw\t{%2, %0|%0, %2}"
5463 [(set_attr "type" "sseiadd")
5464 (set_attr "prefix_data16" "1")
5465 (set_attr "prefix_extra" "1")
5466 (set_attr "mode" "TI")])
5468 (define_insn "ssse3_phsubwv4hi3"
5469 [(set (match_operand:V4HI 0 "register_operand" "=y")
5474 (match_operand:V4HI 1 "register_operand" "0")
5475 (parallel [(const_int 0)]))
5476 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5478 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5479 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5483 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5484 (parallel [(const_int 0)]))
5485 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5487 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5488 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5490 "phsubw\t{%2, %0|%0, %2}"
5491 [(set_attr "type" "sseiadd")
5492 (set_attr "prefix_extra" "1")
5493 (set_attr "mode" "DI")])
5495 (define_insn "ssse3_phsubdv4si3"
5496 [(set (match_operand:V4SI 0 "register_operand" "=x")
5501 (match_operand:V4SI 1 "register_operand" "0")
5502 (parallel [(const_int 0)]))
5503 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5505 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5506 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5510 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5511 (parallel [(const_int 0)]))
5512 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5514 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5515 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5517 "phsubd\t{%2, %0|%0, %2}"
5518 [(set_attr "type" "sseiadd")
5519 (set_attr "prefix_data16" "1")
5520 (set_attr "prefix_extra" "1")
5521 (set_attr "mode" "TI")])
5523 (define_insn "ssse3_phsubdv2si3"
5524 [(set (match_operand:V2SI 0 "register_operand" "=y")
5528 (match_operand:V2SI 1 "register_operand" "0")
5529 (parallel [(const_int 0)]))
5530 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5533 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5534 (parallel [(const_int 0)]))
5535 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5537 "phsubd\t{%2, %0|%0, %2}"
5538 [(set_attr "type" "sseiadd")
5539 (set_attr "prefix_extra" "1")
5540 (set_attr "mode" "DI")])
5542 (define_insn "ssse3_phsubswv8hi3"
5543 [(set (match_operand:V8HI 0 "register_operand" "=x")
5549 (match_operand:V8HI 1 "register_operand" "0")
5550 (parallel [(const_int 0)]))
5551 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5553 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5554 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5557 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5558 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5560 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5561 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5566 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5567 (parallel [(const_int 0)]))
5568 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5570 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5571 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5574 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5575 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5577 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5578 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5580 "phsubsw\t{%2, %0|%0, %2}"
5581 [(set_attr "type" "sseiadd")
5582 (set_attr "prefix_data16" "1")
5583 (set_attr "prefix_extra" "1")
5584 (set_attr "mode" "TI")])
5586 (define_insn "ssse3_phsubswv4hi3"
5587 [(set (match_operand:V4HI 0 "register_operand" "=y")
5592 (match_operand:V4HI 1 "register_operand" "0")
5593 (parallel [(const_int 0)]))
5594 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5596 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5597 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5601 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5602 (parallel [(const_int 0)]))
5603 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5605 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5606 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5608 "phsubsw\t{%2, %0|%0, %2}"
5609 [(set_attr "type" "sseiadd")
5610 (set_attr "prefix_extra" "1")
5611 (set_attr "mode" "DI")])
5613 (define_insn "ssse3_pmaddubswv8hi3"
5614 [(set (match_operand:V8HI 0 "register_operand" "=x")
5619 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5620 (parallel [(const_int 0)
5630 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5631 (parallel [(const_int 0)
5641 (vec_select:V16QI (match_dup 1)
5642 (parallel [(const_int 1)
5651 (vec_select:V16QI (match_dup 2)
5652 (parallel [(const_int 1)
5659 (const_int 15)]))))))]
5661 "pmaddubsw\t{%2, %0|%0, %2}"
5662 [(set_attr "type" "sseiadd")
5663 (set_attr "prefix_data16" "1")
5664 (set_attr "prefix_extra" "1")
5665 (set_attr "mode" "TI")])
5667 (define_insn "ssse3_pmaddubswv4hi3"
5668 [(set (match_operand:V4HI 0 "register_operand" "=y")
5673 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5674 (parallel [(const_int 0)
5680 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5681 (parallel [(const_int 0)
5687 (vec_select:V8QI (match_dup 1)
5688 (parallel [(const_int 1)
5693 (vec_select:V8QI (match_dup 2)
5694 (parallel [(const_int 1)
5697 (const_int 7)]))))))]
5699 "pmaddubsw\t{%2, %0|%0, %2}"
5700 [(set_attr "type" "sseiadd")
5701 (set_attr "prefix_extra" "1")
5702 (set_attr "mode" "DI")])
5704 (define_insn "ssse3_pmulhrswv8hi3"
5705 [(set (match_operand:V8HI 0 "register_operand" "=x")
5712 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5714 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5716 (const_vector:V8HI [(const_int 1) (const_int 1)
5717 (const_int 1) (const_int 1)
5718 (const_int 1) (const_int 1)
5719 (const_int 1) (const_int 1)]))
5721 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5722 "pmulhrsw\t{%2, %0|%0, %2}"
5723 [(set_attr "type" "sseimul")
5724 (set_attr "prefix_data16" "1")
5725 (set_attr "prefix_extra" "1")
5726 (set_attr "mode" "TI")])
5728 (define_insn "ssse3_pmulhrswv4hi3"
5729 [(set (match_operand:V4HI 0 "register_operand" "=y")
5736 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5738 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5740 (const_vector:V4HI [(const_int 1) (const_int 1)
5741 (const_int 1) (const_int 1)]))
5743 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5744 "pmulhrsw\t{%2, %0|%0, %2}"
5745 [(set_attr "type" "sseimul")
5746 (set_attr "prefix_extra" "1")
5747 (set_attr "mode" "DI")])
5749 (define_insn "ssse3_pshufbv16qi3"
5750 [(set (match_operand:V16QI 0 "register_operand" "=x")
5751 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5752 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5755 "pshufb\t{%2, %0|%0, %2}";
5756 [(set_attr "type" "sselog1")
5757 (set_attr "prefix_data16" "1")
5758 (set_attr "prefix_extra" "1")
5759 (set_attr "mode" "TI")])
5761 (define_insn "ssse3_pshufbv8qi3"
5762 [(set (match_operand:V8QI 0 "register_operand" "=y")
5763 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5764 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5767 "pshufb\t{%2, %0|%0, %2}";
5768 [(set_attr "type" "sselog1")
5769 (set_attr "prefix_extra" "1")
5770 (set_attr "mode" "DI")])
5772 (define_insn "ssse3_psign<mode>3"
5773 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5774 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5775 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5778 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5779 [(set_attr "type" "sselog1")
5780 (set_attr "prefix_data16" "1")
5781 (set_attr "prefix_extra" "1")
5782 (set_attr "mode" "TI")])
5784 (define_insn "ssse3_psign<mode>3"
5785 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5786 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5787 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5790 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5791 [(set_attr "type" "sselog1")
5792 (set_attr "prefix_extra" "1")
5793 (set_attr "mode" "DI")])
5795 (define_insn "ssse3_palignrti"
5796 [(set (match_operand:TI 0 "register_operand" "=x")
5797 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5798 (match_operand:TI 2 "nonimmediate_operand" "xm")
5799 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5803 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5804 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5806 [(set_attr "type" "sseishft")
5807 (set_attr "prefix_data16" "1")
5808 (set_attr "prefix_extra" "1")
5809 (set_attr "mode" "TI")])
5811 (define_insn "ssse3_palignrdi"
5812 [(set (match_operand:DI 0 "register_operand" "=y")
5813 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5814 (match_operand:DI 2 "nonimmediate_operand" "ym")
5815 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5819 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5820 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5822 [(set_attr "type" "sseishft")
5823 (set_attr "prefix_extra" "1")
5824 (set_attr "mode" "DI")])
5826 (define_insn "abs<mode>2"
5827 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5828 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5830 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5831 [(set_attr "type" "sselog1")
5832 (set_attr "prefix_data16" "1")
5833 (set_attr "prefix_extra" "1")
5834 (set_attr "mode" "TI")])
5836 (define_insn "abs<mode>2"
5837 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5838 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5840 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5841 [(set_attr "type" "sselog1")
5842 (set_attr "prefix_extra" "1")
5843 (set_attr "mode" "DI")])
5845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5847 ;; AMD SSE4A instructions
5849 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5851 (define_insn "sse4a_vmmovntv2df"
5852 [(set (match_operand:DF 0 "memory_operand" "=m")
5853 (unspec:DF [(vec_select:DF
5854 (match_operand:V2DF 1 "register_operand" "x")
5855 (parallel [(const_int 0)]))]
5858 "movntsd\t{%1, %0|%0, %1}"
5859 [(set_attr "type" "ssemov")
5860 (set_attr "mode" "DF")])
5862 (define_insn "sse4a_movntdf"
5863 [(set (match_operand:DF 0 "memory_operand" "=m")
5864 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5867 "movntsd\t{%1, %0|%0, %1}"
5868 [(set_attr "type" "ssemov")
5869 (set_attr "mode" "DF")])
5871 (define_insn "sse4a_vmmovntv4sf"
5872 [(set (match_operand:SF 0 "memory_operand" "=m")
5873 (unspec:SF [(vec_select:SF
5874 (match_operand:V4SF 1 "register_operand" "x")
5875 (parallel [(const_int 0)]))]
5878 "movntss\t{%1, %0|%0, %1}"
5879 [(set_attr "type" "ssemov")
5880 (set_attr "mode" "SF")])
5882 (define_insn "sse4a_movntsf"
5883 [(set (match_operand:SF 0 "memory_operand" "=m")
5884 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5887 "movntss\t{%1, %0|%0, %1}"
5888 [(set_attr "type" "ssemov")
5889 (set_attr "mode" "SF")])
5891 (define_insn "sse4a_extrqi"
5892 [(set (match_operand:V2DI 0 "register_operand" "=x")
5893 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5894 (match_operand 2 "const_int_operand" "")
5895 (match_operand 3 "const_int_operand" "")]
5898 "extrq\t{%3, %2, %0|%0, %2, %3}"
5899 [(set_attr "type" "sse")
5900 (set_attr "prefix_data16" "1")
5901 (set_attr "mode" "TI")])
5903 (define_insn "sse4a_extrq"
5904 [(set (match_operand:V2DI 0 "register_operand" "=x")
5905 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5906 (match_operand:V16QI 2 "register_operand" "x")]
5909 "extrq\t{%2, %0|%0, %2}"
5910 [(set_attr "type" "sse")
5911 (set_attr "prefix_data16" "1")
5912 (set_attr "mode" "TI")])
5914 (define_insn "sse4a_insertqi"
5915 [(set (match_operand:V2DI 0 "register_operand" "=x")
5916 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5917 (match_operand:V2DI 2 "register_operand" "x")
5918 (match_operand 3 "const_int_operand" "")
5919 (match_operand 4 "const_int_operand" "")]
5922 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5923 [(set_attr "type" "sseins")
5924 (set_attr "prefix_rep" "1")
5925 (set_attr "mode" "TI")])
5927 (define_insn "sse4a_insertq"
5928 [(set (match_operand:V2DI 0 "register_operand" "=x")
5929 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5930 (match_operand:V2DI 2 "register_operand" "x")]
5933 "insertq\t{%2, %0|%0, %2}"
5934 [(set_attr "type" "sseins")
5935 (set_attr "prefix_rep" "1")
5936 (set_attr "mode" "TI")])
5938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5940 ;; Intel SSE4.1 instructions
5942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5944 (define_insn "sse4_1_blendpd"
5945 [(set (match_operand:V2DF 0 "register_operand" "=x")
5947 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5948 (match_operand:V2DF 1 "register_operand" "0")
5949 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
5951 "blendpd\t{%3, %2, %0|%0, %2, %3}"
5952 [(set_attr "type" "ssemov")
5953 (set_attr "prefix_extra" "1")
5954 (set_attr "mode" "V2DF")])
5956 (define_insn "sse4_1_blendps"
5957 [(set (match_operand:V4SF 0 "register_operand" "=x")
5959 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5960 (match_operand:V4SF 1 "register_operand" "0")
5961 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
5963 "blendps\t{%3, %2, %0|%0, %2, %3}"
5964 [(set_attr "type" "ssemov")
5965 (set_attr "prefix_extra" "1")
5966 (set_attr "mode" "V4SF")])
5968 (define_insn "sse4_1_blendvpd"
5969 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
5970 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
5971 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
5972 (match_operand:V2DF 3 "register_operand" "Y0")]
5975 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
5976 [(set_attr "type" "ssemov")
5977 (set_attr "prefix_extra" "1")
5978 (set_attr "mode" "V2DF")])
5980 (define_insn "sse4_1_blendvps"
5981 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
5982 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
5983 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
5984 (match_operand:V4SF 3 "register_operand" "Y0")]
5987 "blendvps\t{%3, %2, %0|%0, %2, %3}"
5988 [(set_attr "type" "ssemov")
5989 (set_attr "prefix_extra" "1")
5990 (set_attr "mode" "V4SF")])
5992 (define_insn "sse4_1_dppd"
5993 [(set (match_operand:V2DF 0 "register_operand" "=x")
5994 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
5995 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5996 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5999 "dppd\t{%3, %2, %0|%0, %2, %3}"
6000 [(set_attr "type" "ssemul")
6001 (set_attr "prefix_extra" "1")
6002 (set_attr "mode" "V2DF")])
6004 (define_insn "sse4_1_dpps"
6005 [(set (match_operand:V4SF 0 "register_operand" "=x")
6006 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6007 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6008 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6011 "dpps\t{%3, %2, %0|%0, %2, %3}"
6012 [(set_attr "type" "ssemul")
6013 (set_attr "prefix_extra" "1")
6014 (set_attr "mode" "V4SF")])
6016 (define_insn "sse4_1_movntdqa"
6017 [(set (match_operand:V2DI 0 "register_operand" "=x")
6018 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6021 "movntdqa\t{%1, %0|%0, %1}"
6022 [(set_attr "type" "ssecvt")
6023 (set_attr "prefix_extra" "1")
6024 (set_attr "mode" "TI")])
6026 (define_insn "sse4_1_mpsadbw"
6027 [(set (match_operand:V16QI 0 "register_operand" "=x")
6028 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6029 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6030 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6033 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6034 [(set_attr "type" "sselog1")
6035 (set_attr "prefix_extra" "1")
6036 (set_attr "mode" "TI")])
6038 (define_insn "sse4_1_packusdw"
6039 [(set (match_operand:V8HI 0 "register_operand" "=x")
6042 (match_operand:V4SI 1 "register_operand" "0"))
6044 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6046 "packusdw\t{%2, %0|%0, %2}"
6047 [(set_attr "type" "sselog")
6048 (set_attr "prefix_extra" "1")
6049 (set_attr "mode" "TI")])
6051 (define_insn "sse4_1_pblendvb"
6052 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6053 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6054 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6055 (match_operand:V16QI 3 "register_operand" "Y0")]
6058 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6059 [(set_attr "type" "ssemov")
6060 (set_attr "prefix_extra" "1")
6061 (set_attr "mode" "TI")])
6063 (define_insn "sse4_1_pblendw"
6064 [(set (match_operand:V8HI 0 "register_operand" "=x")
6066 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6067 (match_operand:V8HI 1 "register_operand" "0")
6068 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6070 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6071 [(set_attr "type" "ssemov")
6072 (set_attr "prefix_extra" "1")
6073 (set_attr "mode" "TI")])
6075 (define_insn "sse4_1_phminposuw"
6076 [(set (match_operand:V8HI 0 "register_operand" "=x")
6077 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6078 UNSPEC_PHMINPOSUW))]
6080 "phminposuw\t{%1, %0|%0, %1}"
6081 [(set_attr "type" "sselog1")
6082 (set_attr "prefix_extra" "1")
6083 (set_attr "mode" "TI")])
6085 (define_insn "sse4_1_extendv8qiv8hi2"
6086 [(set (match_operand:V8HI 0 "register_operand" "=x")
6089 (match_operand:V16QI 1 "register_operand" "x")
6090 (parallel [(const_int 0)
6099 "pmovsxbw\t{%1, %0|%0, %1}"
6100 [(set_attr "type" "ssemov")
6101 (set_attr "prefix_extra" "1")
6102 (set_attr "mode" "TI")])
6104 (define_insn "*sse4_1_extendv8qiv8hi2"
6105 [(set (match_operand:V8HI 0 "register_operand" "=x")
6108 (vec_duplicate:V16QI
6109 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6110 (parallel [(const_int 0)
6119 "pmovsxbw\t{%1, %0|%0, %1}"
6120 [(set_attr "type" "ssemov")
6121 (set_attr "prefix_extra" "1")
6122 (set_attr "mode" "TI")])
6124 (define_insn "sse4_1_extendv4qiv4si2"
6125 [(set (match_operand:V4SI 0 "register_operand" "=x")
6128 (match_operand:V16QI 1 "register_operand" "x")
6129 (parallel [(const_int 0)
6134 "pmovsxbd\t{%1, %0|%0, %1}"
6135 [(set_attr "type" "ssemov")
6136 (set_attr "prefix_extra" "1")
6137 (set_attr "mode" "TI")])
6139 (define_insn "*sse4_1_extendv4qiv4si2"
6140 [(set (match_operand:V4SI 0 "register_operand" "=x")
6143 (vec_duplicate:V16QI
6144 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6145 (parallel [(const_int 0)
6150 "pmovsxbd\t{%1, %0|%0, %1}"
6151 [(set_attr "type" "ssemov")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "mode" "TI")])
6155 (define_insn "sse4_1_extendv2qiv2di2"
6156 [(set (match_operand:V2DI 0 "register_operand" "=x")
6159 (match_operand:V16QI 1 "register_operand" "x")
6160 (parallel [(const_int 0)
6163 "pmovsxbq\t{%1, %0|%0, %1}"
6164 [(set_attr "type" "ssemov")
6165 (set_attr "prefix_extra" "1")
6166 (set_attr "mode" "TI")])
6168 (define_insn "*sse4_1_extendv2qiv2di2"
6169 [(set (match_operand:V2DI 0 "register_operand" "=x")
6172 (vec_duplicate:V16QI
6173 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6174 (parallel [(const_int 0)
6177 "pmovsxbq\t{%1, %0|%0, %1}"
6178 [(set_attr "type" "ssemov")
6179 (set_attr "prefix_extra" "1")
6180 (set_attr "mode" "TI")])
6182 (define_insn "sse4_1_extendv4hiv4si2"
6183 [(set (match_operand:V4SI 0 "register_operand" "=x")
6186 (match_operand:V8HI 1 "register_operand" "x")
6187 (parallel [(const_int 0)
6192 "pmovsxwd\t{%1, %0|%0, %1}"
6193 [(set_attr "type" "ssemov")
6194 (set_attr "prefix_extra" "1")
6195 (set_attr "mode" "TI")])
6197 (define_insn "*sse4_1_extendv4hiv4si2"
6198 [(set (match_operand:V4SI 0 "register_operand" "=x")
6202 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6203 (parallel [(const_int 0)
6208 "pmovsxwd\t{%1, %0|%0, %1}"
6209 [(set_attr "type" "ssemov")
6210 (set_attr "prefix_extra" "1")
6211 (set_attr "mode" "TI")])
6213 (define_insn "sse4_1_extendv2hiv2di2"
6214 [(set (match_operand:V2DI 0 "register_operand" "=x")
6217 (match_operand:V8HI 1 "register_operand" "x")
6218 (parallel [(const_int 0)
6221 "pmovsxwq\t{%1, %0|%0, %1}"
6222 [(set_attr "type" "ssemov")
6223 (set_attr "prefix_extra" "1")
6224 (set_attr "mode" "TI")])
6226 (define_insn "*sse4_1_extendv2hiv2di2"
6227 [(set (match_operand:V2DI 0 "register_operand" "=x")
6231 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6232 (parallel [(const_int 0)
6235 "pmovsxwq\t{%1, %0|%0, %1}"
6236 [(set_attr "type" "ssemov")
6237 (set_attr "prefix_extra" "1")
6238 (set_attr "mode" "TI")])
6240 (define_insn "sse4_1_extendv2siv2di2"
6241 [(set (match_operand:V2DI 0 "register_operand" "=x")
6244 (match_operand:V4SI 1 "register_operand" "x")
6245 (parallel [(const_int 0)
6248 "pmovsxdq\t{%1, %0|%0, %1}"
6249 [(set_attr "type" "ssemov")
6250 (set_attr "prefix_extra" "1")
6251 (set_attr "mode" "TI")])
6253 (define_insn "*sse4_1_extendv2siv2di2"
6254 [(set (match_operand:V2DI 0 "register_operand" "=x")
6258 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6259 (parallel [(const_int 0)
6262 "pmovsxdq\t{%1, %0|%0, %1}"
6263 [(set_attr "type" "ssemov")
6264 (set_attr "prefix_extra" "1")
6265 (set_attr "mode" "TI")])
6267 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6268 [(set (match_operand:V8HI 0 "register_operand" "=x")
6271 (match_operand:V16QI 1 "register_operand" "x")
6272 (parallel [(const_int 0)
6281 "pmovzxbw\t{%1, %0|%0, %1}"
6282 [(set_attr "type" "ssemov")
6283 (set_attr "prefix_extra" "1")
6284 (set_attr "mode" "TI")])
6286 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6287 [(set (match_operand:V8HI 0 "register_operand" "=x")
6290 (vec_duplicate:V16QI
6291 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6292 (parallel [(const_int 0)
6301 "pmovzxbw\t{%1, %0|%0, %1}"
6302 [(set_attr "type" "ssemov")
6303 (set_attr "prefix_extra" "1")
6304 (set_attr "mode" "TI")])
6306 (define_insn "sse4_1_zero_extendv4qiv4si2"
6307 [(set (match_operand:V4SI 0 "register_operand" "=x")
6310 (match_operand:V16QI 1 "register_operand" "x")
6311 (parallel [(const_int 0)
6316 "pmovzxbd\t{%1, %0|%0, %1}"
6317 [(set_attr "type" "ssemov")
6318 (set_attr "prefix_extra" "1")
6319 (set_attr "mode" "TI")])
6321 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6322 [(set (match_operand:V4SI 0 "register_operand" "=x")
6325 (vec_duplicate:V16QI
6326 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6327 (parallel [(const_int 0)
6332 "pmovzxbd\t{%1, %0|%0, %1}"
6333 [(set_attr "type" "ssemov")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "mode" "TI")])
6337 (define_insn "sse4_1_zero_extendv2qiv2di2"
6338 [(set (match_operand:V2DI 0 "register_operand" "=x")
6341 (match_operand:V16QI 1 "register_operand" "x")
6342 (parallel [(const_int 0)
6345 "pmovzxbq\t{%1, %0|%0, %1}"
6346 [(set_attr "type" "ssemov")
6347 (set_attr "prefix_extra" "1")
6348 (set_attr "mode" "TI")])
6350 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6351 [(set (match_operand:V2DI 0 "register_operand" "=x")
6354 (vec_duplicate:V16QI
6355 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6356 (parallel [(const_int 0)
6359 "pmovzxbq\t{%1, %0|%0, %1}"
6360 [(set_attr "type" "ssemov")
6361 (set_attr "prefix_extra" "1")
6362 (set_attr "mode" "TI")])
6364 (define_insn "sse4_1_zero_extendv4hiv4si2"
6365 [(set (match_operand:V4SI 0 "register_operand" "=x")
6368 (match_operand:V8HI 1 "register_operand" "x")
6369 (parallel [(const_int 0)
6374 "pmovzxwd\t{%1, %0|%0, %1}"
6375 [(set_attr "type" "ssemov")
6376 (set_attr "prefix_extra" "1")
6377 (set_attr "mode" "TI")])
6379 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6380 [(set (match_operand:V4SI 0 "register_operand" "=x")
6384 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6385 (parallel [(const_int 0)
6390 "pmovzxwd\t{%1, %0|%0, %1}"
6391 [(set_attr "type" "ssemov")
6392 (set_attr "prefix_extra" "1")
6393 (set_attr "mode" "TI")])
6395 (define_insn "sse4_1_zero_extendv2hiv2di2"
6396 [(set (match_operand:V2DI 0 "register_operand" "=x")
6399 (match_operand:V8HI 1 "register_operand" "x")
6400 (parallel [(const_int 0)
6403 "pmovzxwq\t{%1, %0|%0, %1}"
6404 [(set_attr "type" "ssemov")
6405 (set_attr "prefix_extra" "1")
6406 (set_attr "mode" "TI")])
6408 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6409 [(set (match_operand:V2DI 0 "register_operand" "=x")
6413 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6414 (parallel [(const_int 0)
6417 "pmovzxwq\t{%1, %0|%0, %1}"
6418 [(set_attr "type" "ssemov")
6419 (set_attr "prefix_extra" "1")
6420 (set_attr "mode" "TI")])
6422 (define_insn "sse4_1_zero_extendv2siv2di2"
6423 [(set (match_operand:V2DI 0 "register_operand" "=x")
6426 (match_operand:V4SI 1 "register_operand" "x")
6427 (parallel [(const_int 0)
6430 "pmovzxdq\t{%1, %0|%0, %1}"
6431 [(set_attr "type" "ssemov")
6432 (set_attr "prefix_extra" "1")
6433 (set_attr "mode" "TI")])
6435 (define_insn "*sse4_1_zero_extendv2siv2di2"
6436 [(set (match_operand:V2DI 0 "register_operand" "=x")
6440 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6441 (parallel [(const_int 0)
6444 "pmovzxdq\t{%1, %0|%0, %1}"
6445 [(set_attr "type" "ssemov")
6446 (set_attr "prefix_extra" "1")
6447 (set_attr "mode" "TI")])
6449 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6450 ;; But it is not a really compare instruction.
6451 (define_insn "sse4_1_ptest"
6452 [(set (reg:CC FLAGS_REG)
6453 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6454 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6457 "ptest\t{%1, %0|%0, %1}"
6458 [(set_attr "type" "ssecomi")
6459 (set_attr "prefix_extra" "1")
6460 (set_attr "mode" "TI")])
6462 (define_insn "sse4_1_roundpd"
6463 [(set (match_operand:V2DF 0 "register_operand" "=x")
6464 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6465 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6468 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6469 [(set_attr "type" "ssecvt")
6470 (set_attr "prefix_extra" "1")
6471 (set_attr "mode" "V2DF")])
6473 (define_insn "sse4_1_roundps"
6474 [(set (match_operand:V4SF 0 "register_operand" "=x")
6475 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6476 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6479 "roundps\t{%2, %1, %0|%0, %1, %2}"
6480 [(set_attr "type" "ssecvt")
6481 (set_attr "prefix_extra" "1")
6482 (set_attr "mode" "V4SF")])
6484 (define_insn "sse4_1_roundsd"
6485 [(set (match_operand:V2DF 0 "register_operand" "=x")
6487 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6488 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6490 (match_operand:V2DF 1 "register_operand" "0")
6493 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6494 [(set_attr "type" "ssecvt")
6495 (set_attr "prefix_extra" "1")
6496 (set_attr "mode" "V2DF")])
6498 (define_insn "sse4_1_roundss"
6499 [(set (match_operand:V4SF 0 "register_operand" "=x")
6501 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6502 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6504 (match_operand:V4SF 1 "register_operand" "0")
6507 "roundss\t{%3, %2, %0|%0, %2, %3}"
6508 [(set_attr "type" "ssecvt")
6509 (set_attr "prefix_extra" "1")
6510 (set_attr "mode" "V4SF")])
6512 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6514 ;; Intel SSE4.2 string/text processing instructions
6516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6518 (define_insn_and_split "sse4_2_pcmpestr"
6519 [(set (match_operand:SI 0 "register_operand" "=c,c")
6521 [(match_operand:V16QI 2 "register_operand" "x,x")
6522 (match_operand:SI 3 "register_operand" "a,a")
6523 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6524 (match_operand:SI 5 "register_operand" "d,d")
6525 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6527 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6535 (set (reg:CC FLAGS_REG)
6544 && !(reload_completed || reload_in_progress)"
6549 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6550 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6551 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6554 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6555 operands[3], operands[4],
6556 operands[5], operands[6]));
6558 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6559 operands[3], operands[4],
6560 operands[5], operands[6]));
6561 if (flags && !(ecx || xmm0))
6562 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6563 operands[4], operands[5],
6567 [(set_attr "type" "sselog")
6568 (set_attr "prefix_data16" "1")
6569 (set_attr "prefix_extra" "1")
6570 (set_attr "memory" "none,load")
6571 (set_attr "mode" "TI")])
6573 (define_insn "sse4_2_pcmpestri"
6574 [(set (match_operand:SI 0 "register_operand" "=c,c")
6576 [(match_operand:V16QI 1 "register_operand" "x,x")
6577 (match_operand:SI 2 "register_operand" "a,a")
6578 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6579 (match_operand:SI 4 "register_operand" "d,d")
6580 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6582 (set (reg:CC FLAGS_REG)
6591 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6592 [(set_attr "type" "sselog")
6593 (set_attr "prefix_data16" "1")
6594 (set_attr "prefix_extra" "1")
6595 (set_attr "memory" "none,load")
6596 (set_attr "mode" "TI")])
6598 (define_insn "sse4_2_pcmpestrm"
6599 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6601 [(match_operand:V16QI 1 "register_operand" "x,x")
6602 (match_operand:SI 2 "register_operand" "a,a")
6603 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6604 (match_operand:SI 4 "register_operand" "d,d")
6605 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6607 (set (reg:CC FLAGS_REG)
6616 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6617 [(set_attr "type" "sselog")
6618 (set_attr "prefix_data16" "1")
6619 (set_attr "prefix_extra" "1")
6620 (set_attr "memory" "none,load")
6621 (set_attr "mode" "TI")])
6623 (define_insn "sse4_2_pcmpestr_cconly"
6624 [(set (reg:CC FLAGS_REG)
6626 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6627 (match_operand:SI 1 "register_operand" "a,a,a,a")
6628 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6629 (match_operand:SI 3 "register_operand" "d,d,d,d")
6630 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6632 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6633 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6636 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6637 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6638 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6639 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6640 [(set_attr "type" "sselog")
6641 (set_attr "prefix_data16" "1")
6642 (set_attr "prefix_extra" "1")
6643 (set_attr "memory" "none,load,none,load")
6644 (set_attr "mode" "TI")])
6646 (define_insn_and_split "sse4_2_pcmpistr"
6647 [(set (match_operand:SI 0 "register_operand" "=c,c")
6649 [(match_operand:V16QI 2 "register_operand" "x,x")
6650 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6651 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6653 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6659 (set (reg:CC FLAGS_REG)
6666 && !(reload_completed || reload_in_progress)"
6671 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6672 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6673 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6676 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6677 operands[3], operands[4]));
6679 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6680 operands[3], operands[4]));
6681 if (flags && !(ecx || xmm0))
6682 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix_data16" "1")
6688 (set_attr "prefix_extra" "1")
6689 (set_attr "memory" "none,load")
6690 (set_attr "mode" "TI")])
6692 (define_insn "sse4_2_pcmpistri"
6693 [(set (match_operand:SI 0 "register_operand" "=c,c")
6695 [(match_operand:V16QI 1 "register_operand" "x,x")
6696 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6697 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6699 (set (reg:CC FLAGS_REG)
6706 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6707 [(set_attr "type" "sselog")
6708 (set_attr "prefix_data16" "1")
6709 (set_attr "prefix_extra" "1")
6710 (set_attr "memory" "none,load")
6711 (set_attr "mode" "TI")])
6713 (define_insn "sse4_2_pcmpistrm"
6714 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6716 [(match_operand:V16QI 1 "register_operand" "x,x")
6717 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6718 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6720 (set (reg:CC FLAGS_REG)
6727 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6728 [(set_attr "type" "sselog")
6729 (set_attr "prefix_data16" "1")
6730 (set_attr "prefix_extra" "1")
6731 (set_attr "memory" "none,load")
6732 (set_attr "mode" "TI")])
6734 (define_insn "sse4_2_pcmpistr_cconly"
6735 [(set (reg:CC FLAGS_REG)
6737 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6738 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6739 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6741 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6742 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6745 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6746 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6747 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6748 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6749 [(set_attr "type" "sselog")
6750 (set_attr "prefix_data16" "1")
6751 (set_attr "prefix_extra" "1")
6752 (set_attr "memory" "none,load,none,load")
6753 (set_attr "mode" "TI")])