1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
67 switch (which_alternative)
70 return standard_sse_constant_opcode (insn, operands[1]);
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
90 (const_string "TI")))])
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
101 (define_insn_and_split "movdi_to_sse"
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
108 "&& reload_completed"
111 switch (which_alternative)
114 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
115 Assemble the 64-bit DImode value in an xmm register. */
116 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 0)));
118 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
119 gen_rtx_SUBREG (SImode, operands[1], 4)));
120 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
124 emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
133 (define_expand "movv4sf"
134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
135 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
138 ix86_expand_vector_move (V4SFmode, operands);
142 (define_insn "*movv4sf_internal"
143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
144 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
146 && (register_operand (operands[0], V4SFmode)
147 || register_operand (operands[1], V4SFmode))"
149 switch (which_alternative)
152 return standard_sse_constant_opcode (insn, operands[1]);
155 return "movaps\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set_attr "mode" "V4SF")])
164 [(set (match_operand:V4SF 0 "register_operand" "")
165 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
166 "TARGET_SSE && reload_completed"
169 (vec_duplicate:V4SF (match_dup 1))
173 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
174 operands[2] = CONST0_RTX (V4SFmode);
177 (define_expand "movv2df"
178 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
179 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
182 ix86_expand_vector_move (V2DFmode, operands);
186 (define_insn "*movv2df_internal"
187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
188 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
190 && (register_operand (operands[0], V2DFmode)
191 || register_operand (operands[1], V2DFmode))"
193 switch (which_alternative)
196 return standard_sse_constant_opcode (insn, operands[1]);
199 if (get_attr_mode (insn) == MODE_V4SF)
200 return "movaps\t{%1, %0|%0, %1}";
202 return "movapd\t{%1, %0|%0, %1}";
207 [(set_attr "type" "sselog1,ssemov,ssemov")
210 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
211 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
212 (and (eq_attr "alternative" "2")
213 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
215 (const_string "V4SF")
216 (const_string "V2DF")))])
219 [(set (match_operand:V2DF 0 "register_operand" "")
220 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
221 "TARGET_SSE2 && reload_completed"
222 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
224 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
225 operands[2] = CONST0_RTX (DFmode);
228 (define_expand "push<mode>1"
229 [(match_operand:SSEMODE 0 "register_operand" "")]
232 ix86_expand_push (<MODE>mode, operands[0]);
236 (define_expand "movmisalign<mode>"
237 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
238 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
241 ix86_expand_vector_move_misalign (<MODE>mode, operands);
245 (define_insn "sse_movups"
246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
247 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
249 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
250 "movups\t{%1, %0|%0, %1}"
251 [(set_attr "type" "ssemov")
252 (set_attr "mode" "V2DF")])
254 (define_insn "sse2_movupd"
255 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
256 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
258 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
259 "movupd\t{%1, %0|%0, %1}"
260 [(set_attr "type" "ssemov")
261 (set_attr "mode" "V2DF")])
263 (define_insn "sse2_movdqu"
264 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
265 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
267 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
268 "movdqu\t{%1, %0|%0, %1}"
269 [(set_attr "type" "ssemov")
270 (set_attr "prefix_data16" "1")
271 (set_attr "mode" "TI")])
273 (define_insn "sse_movntv4sf"
274 [(set (match_operand:V4SF 0 "memory_operand" "=m")
275 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
278 "movntps\t{%1, %0|%0, %1}"
279 [(set_attr "type" "ssemov")
280 (set_attr "mode" "V4SF")])
282 (define_insn "sse2_movntv2df"
283 [(set (match_operand:V2DF 0 "memory_operand" "=m")
284 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
287 "movntpd\t{%1, %0|%0, %1}"
288 [(set_attr "type" "ssecvt")
289 (set_attr "mode" "V2DF")])
291 (define_insn "sse2_movntv2di"
292 [(set (match_operand:V2DI 0 "memory_operand" "=m")
293 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
296 "movntdq\t{%1, %0|%0, %1}"
297 [(set_attr "type" "ssecvt")
298 (set_attr "prefix_data16" "1")
299 (set_attr "mode" "TI")])
301 (define_insn "sse2_movntsi"
302 [(set (match_operand:SI 0 "memory_operand" "=m")
303 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
306 "movnti\t{%1, %0|%0, %1}"
307 [(set_attr "type" "ssecvt")
308 (set_attr "mode" "V2DF")])
310 (define_insn "sse3_lddqu"
311 [(set (match_operand:V16QI 0 "register_operand" "=x")
312 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
315 "lddqu\t{%1, %0|%0, %1}"
316 [(set_attr "type" "ssecvt")
317 (set_attr "prefix_rep" "1")
318 (set_attr "mode" "TI")])
320 ; Expand patterns for non-temporal stores. At the moment, only those
321 ; that directly map to insns are defined; it would be possible to
322 ; define patterns for other modes that would expand to several insns.
324 (define_expand "storentv4sf"
325 [(set (match_operand:V4SF 0 "memory_operand" "=m")
326 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
331 (define_expand "storentv2df"
332 [(set (match_operand:V2DF 0 "memory_operand" "=m")
333 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
338 (define_expand "storentv2di"
339 [(set (match_operand:V2DI 0 "memory_operand" "=m")
340 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
345 (define_expand "storentsi"
346 [(set (match_operand:SI 0 "memory_operand" "=m")
347 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
354 ;; Parallel single-precision floating point arithmetic
356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
358 (define_expand "negv4sf2"
359 [(set (match_operand:V4SF 0 "register_operand" "")
360 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
362 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
364 (define_expand "absv4sf2"
365 [(set (match_operand:V4SF 0 "register_operand" "")
366 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
368 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
370 (define_expand "addv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "")
372 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
373 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
377 (define_insn "*addv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
381 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
382 "addps\t{%2, %0|%0, %2}"
383 [(set_attr "type" "sseadd")
384 (set_attr "mode" "V4SF")])
386 (define_insn "sse_vmaddv4sf3"
387 [(set (match_operand:V4SF 0 "register_operand" "=x")
389 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
390 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
393 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
394 "addss\t{%2, %0|%0, %2}"
395 [(set_attr "type" "sseadd")
396 (set_attr "mode" "SF")])
398 (define_expand "subv4sf3"
399 [(set (match_operand:V4SF 0 "register_operand" "")
400 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
401 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
403 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
405 (define_insn "*subv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "=x")
407 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
408 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
410 "subps\t{%2, %0|%0, %2}"
411 [(set_attr "type" "sseadd")
412 (set_attr "mode" "V4SF")])
414 (define_insn "sse_vmsubv4sf3"
415 [(set (match_operand:V4SF 0 "register_operand" "=x")
417 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
418 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
422 "subss\t{%2, %0|%0, %2}"
423 [(set_attr "type" "sseadd")
424 (set_attr "mode" "SF")])
426 (define_expand "mulv4sf3"
427 [(set (match_operand:V4SF 0 "register_operand" "")
428 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
429 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
431 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
433 (define_insn "*mulv4sf3"
434 [(set (match_operand:V4SF 0 "register_operand" "=x")
435 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
436 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
437 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
438 "mulps\t{%2, %0|%0, %2}"
439 [(set_attr "type" "ssemul")
440 (set_attr "mode" "V4SF")])
442 (define_insn "sse_vmmulv4sf3"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
445 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
446 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
449 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
450 "mulss\t{%2, %0|%0, %2}"
451 [(set_attr "type" "ssemul")
452 (set_attr "mode" "SF")])
454 (define_expand "divv4sf3"
455 [(set (match_operand:V4SF 0 "register_operand" "")
456 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
457 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
459 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
461 (define_insn "*divv4sf3"
462 [(set (match_operand:V4SF 0 "register_operand" "=x")
463 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
464 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
466 "divps\t{%2, %0|%0, %2}"
467 [(set_attr "type" "ssediv")
468 (set_attr "mode" "V4SF")])
470 (define_insn "sse_vmdivv4sf3"
471 [(set (match_operand:V4SF 0 "register_operand" "=x")
473 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
474 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
478 "divss\t{%2, %0|%0, %2}"
479 [(set_attr "type" "ssediv")
480 (set_attr "mode" "SF")])
482 (define_insn "sse_rcpv4sf2"
483 [(set (match_operand:V4SF 0 "register_operand" "=x")
485 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
487 "rcpps\t{%1, %0|%0, %1}"
488 [(set_attr "type" "sse")
489 (set_attr "mode" "V4SF")])
491 (define_insn "sse_vmrcpv4sf2"
492 [(set (match_operand:V4SF 0 "register_operand" "=x")
494 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
496 (match_operand:V4SF 2 "register_operand" "0")
499 "rcpss\t{%1, %0|%0, %1}"
500 [(set_attr "type" "sse")
501 (set_attr "mode" "SF")])
503 (define_insn "sse_rsqrtv4sf2"
504 [(set (match_operand:V4SF 0 "register_operand" "=x")
506 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
508 "rsqrtps\t{%1, %0|%0, %1}"
509 [(set_attr "type" "sse")
510 (set_attr "mode" "V4SF")])
512 (define_insn "sse_vmrsqrtv4sf2"
513 [(set (match_operand:V4SF 0 "register_operand" "=x")
515 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
517 (match_operand:V4SF 2 "register_operand" "0")
520 "rsqrtss\t{%1, %0|%0, %1}"
521 [(set_attr "type" "sse")
522 (set_attr "mode" "SF")])
524 (define_insn "sqrtv4sf2"
525 [(set (match_operand:V4SF 0 "register_operand" "=x")
526 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
528 "sqrtps\t{%1, %0|%0, %1}"
529 [(set_attr "type" "sse")
530 (set_attr "mode" "V4SF")])
532 (define_insn "sse_vmsqrtv4sf2"
533 [(set (match_operand:V4SF 0 "register_operand" "=x")
535 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
536 (match_operand:V4SF 2 "register_operand" "0")
539 "sqrtss\t{%1, %0|%0, %1}"
540 [(set_attr "type" "sse")
541 (set_attr "mode" "SF")])
543 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
544 ;; isn't really correct, as those rtl operators aren't defined when
545 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
547 (define_expand "smaxv4sf3"
548 [(set (match_operand:V4SF 0 "register_operand" "")
549 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
550 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
553 if (!flag_finite_math_only)
554 operands[1] = force_reg (V4SFmode, operands[1]);
555 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
558 (define_insn "*smaxv4sf3_finite"
559 [(set (match_operand:V4SF 0 "register_operand" "=x")
560 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
561 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
562 "TARGET_SSE && flag_finite_math_only
563 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
564 "maxps\t{%2, %0|%0, %2}"
565 [(set_attr "type" "sse")
566 (set_attr "mode" "V4SF")])
568 (define_insn "*smaxv4sf3"
569 [(set (match_operand:V4SF 0 "register_operand" "=x")
570 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
571 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
573 "maxps\t{%2, %0|%0, %2}"
574 [(set_attr "type" "sse")
575 (set_attr "mode" "V4SF")])
577 (define_insn "sse_vmsmaxv4sf3"
578 [(set (match_operand:V4SF 0 "register_operand" "=x")
580 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
581 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
585 "maxss\t{%2, %0|%0, %2}"
586 [(set_attr "type" "sse")
587 (set_attr "mode" "SF")])
589 (define_expand "sminv4sf3"
590 [(set (match_operand:V4SF 0 "register_operand" "")
591 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
592 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
595 if (!flag_finite_math_only)
596 operands[1] = force_reg (V4SFmode, operands[1]);
597 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
600 (define_insn "*sminv4sf3_finite"
601 [(set (match_operand:V4SF 0 "register_operand" "=x")
602 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
603 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
604 "TARGET_SSE && flag_finite_math_only
605 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
606 "minps\t{%2, %0|%0, %2}"
607 [(set_attr "type" "sse")
608 (set_attr "mode" "V4SF")])
610 (define_insn "*sminv4sf3"
611 [(set (match_operand:V4SF 0 "register_operand" "=x")
612 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
613 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
615 "minps\t{%2, %0|%0, %2}"
616 [(set_attr "type" "sse")
617 (set_attr "mode" "V4SF")])
619 (define_insn "sse_vmsminv4sf3"
620 [(set (match_operand:V4SF 0 "register_operand" "=x")
622 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
623 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
627 "minss\t{%2, %0|%0, %2}"
628 [(set_attr "type" "sse")
629 (set_attr "mode" "SF")])
631 ;; These versions of the min/max patterns implement exactly the operations
632 ;; min = (op1 < op2 ? op1 : op2)
633 ;; max = (!(op1 < op2) ? op1 : op2)
634 ;; Their operands are not commutative, and thus they may be used in the
635 ;; presence of -0.0 and NaN.
637 (define_insn "*ieee_sminv4sf3"
638 [(set (match_operand:V4SF 0 "register_operand" "=x")
639 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
640 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
643 "minps\t{%2, %0|%0, %2}"
644 [(set_attr "type" "sseadd")
645 (set_attr "mode" "V4SF")])
647 (define_insn "*ieee_smaxv4sf3"
648 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
653 "maxps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sseadd")
655 (set_attr "mode" "V4SF")])
657 (define_insn "*ieee_sminv2df3"
658 [(set (match_operand:V2DF 0 "register_operand" "=x")
659 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
660 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
663 "minpd\t{%2, %0|%0, %2}"
664 [(set_attr "type" "sseadd")
665 (set_attr "mode" "V2DF")])
667 (define_insn "*ieee_smaxv2df3"
668 [(set (match_operand:V2DF 0 "register_operand" "=x")
669 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
670 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
673 "maxpd\t{%2, %0|%0, %2}"
674 [(set_attr "type" "sseadd")
675 (set_attr "mode" "V2DF")])
677 (define_insn "sse3_addsubv4sf3"
678 [(set (match_operand:V4SF 0 "register_operand" "=x")
681 (match_operand:V4SF 1 "register_operand" "0")
682 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
683 (minus:V4SF (match_dup 1) (match_dup 2))
686 "addsubps\t{%2, %0|%0, %2}"
687 [(set_attr "type" "sseadd")
688 (set_attr "prefix_rep" "1")
689 (set_attr "mode" "V4SF")])
691 (define_insn "sse3_haddv4sf3"
692 [(set (match_operand:V4SF 0 "register_operand" "=x")
697 (match_operand:V4SF 1 "register_operand" "0")
698 (parallel [(const_int 0)]))
699 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
701 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
702 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
706 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
707 (parallel [(const_int 0)]))
708 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
710 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
711 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
713 "haddps\t{%2, %0|%0, %2}"
714 [(set_attr "type" "sseadd")
715 (set_attr "prefix_rep" "1")
716 (set_attr "mode" "V4SF")])
718 (define_insn "sse3_hsubv4sf3"
719 [(set (match_operand:V4SF 0 "register_operand" "=x")
724 (match_operand:V4SF 1 "register_operand" "0")
725 (parallel [(const_int 0)]))
726 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
728 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
729 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
733 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
734 (parallel [(const_int 0)]))
735 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
737 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
738 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
740 "hsubps\t{%2, %0|%0, %2}"
741 [(set_attr "type" "sseadd")
742 (set_attr "prefix_rep" "1")
743 (set_attr "mode" "V4SF")])
745 (define_expand "reduc_splus_v4sf"
746 [(match_operand:V4SF 0 "register_operand" "")
747 (match_operand:V4SF 1 "register_operand" "")]
752 rtx tmp = gen_reg_rtx (V4SFmode);
753 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
754 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
757 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
761 (define_expand "reduc_smax_v4sf"
762 [(match_operand:V4SF 0 "register_operand" "")
763 (match_operand:V4SF 1 "register_operand" "")]
766 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
770 (define_expand "reduc_smin_v4sf"
771 [(match_operand:V4SF 0 "register_operand" "")
772 (match_operand:V4SF 1 "register_operand" "")]
775 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
781 ;; Parallel single-precision floating point comparisons
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 (define_insn "sse_maskcmpv4sf3"
786 [(set (match_operand:V4SF 0 "register_operand" "=x")
787 (match_operator:V4SF 3 "sse_comparison_operator"
788 [(match_operand:V4SF 1 "register_operand" "0")
789 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
791 "cmp%D3ps\t{%2, %0|%0, %2}"
792 [(set_attr "type" "ssecmp")
793 (set_attr "mode" "V4SF")])
795 (define_insn "sse_maskcmpsf3"
796 [(set (match_operand:SF 0 "register_operand" "=x")
797 (match_operator:SF 3 "sse_comparison_operator"
798 [(match_operand:SF 1 "register_operand" "0")
799 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
801 "cmp%D3ss\t{%2, %0|%0, %2}"
802 [(set_attr "type" "ssecmp")
803 (set_attr "mode" "SF")])
805 (define_insn "sse_vmmaskcmpv4sf3"
806 [(set (match_operand:V4SF 0 "register_operand" "=x")
808 (match_operator:V4SF 3 "sse_comparison_operator"
809 [(match_operand:V4SF 1 "register_operand" "0")
810 (match_operand:V4SF 2 "register_operand" "x")])
814 "cmp%D3ss\t{%2, %0|%0, %2}"
815 [(set_attr "type" "ssecmp")
816 (set_attr "mode" "SF")])
818 (define_insn "sse_comi"
819 [(set (reg:CCFP FLAGS_REG)
822 (match_operand:V4SF 0 "register_operand" "x")
823 (parallel [(const_int 0)]))
825 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
826 (parallel [(const_int 0)]))))]
828 "comiss\t{%1, %0|%0, %1}"
829 [(set_attr "type" "ssecomi")
830 (set_attr "mode" "SF")])
832 (define_insn "sse_ucomi"
833 [(set (reg:CCFPU FLAGS_REG)
836 (match_operand:V4SF 0 "register_operand" "x")
837 (parallel [(const_int 0)]))
839 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
840 (parallel [(const_int 0)]))))]
842 "ucomiss\t{%1, %0|%0, %1}"
843 [(set_attr "type" "ssecomi")
844 (set_attr "mode" "SF")])
846 (define_expand "vcondv4sf"
847 [(set (match_operand:V4SF 0 "register_operand" "")
850 [(match_operand:V4SF 4 "nonimmediate_operand" "")
851 (match_operand:V4SF 5 "nonimmediate_operand" "")])
852 (match_operand:V4SF 1 "general_operand" "")
853 (match_operand:V4SF 2 "general_operand" "")))]
856 if (ix86_expand_fp_vcond (operands))
862 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
864 ;; Parallel single-precision floating point logical operations
866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
868 (define_expand "andv4sf3"
869 [(set (match_operand:V4SF 0 "register_operand" "")
870 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
871 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
873 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
875 (define_insn "*andv4sf3"
876 [(set (match_operand:V4SF 0 "register_operand" "=x")
877 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
878 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
879 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
880 "andps\t{%2, %0|%0, %2}"
881 [(set_attr "type" "sselog")
882 (set_attr "mode" "V4SF")])
884 (define_insn "sse_nandv4sf3"
885 [(set (match_operand:V4SF 0 "register_operand" "=x")
886 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
887 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
889 "andnps\t{%2, %0|%0, %2}"
890 [(set_attr "type" "sselog")
891 (set_attr "mode" "V4SF")])
893 (define_expand "iorv4sf3"
894 [(set (match_operand:V4SF 0 "register_operand" "")
895 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
896 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
898 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
900 (define_insn "*iorv4sf3"
901 [(set (match_operand:V4SF 0 "register_operand" "=x")
902 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
903 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
904 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
905 "orps\t{%2, %0|%0, %2}"
906 [(set_attr "type" "sselog")
907 (set_attr "mode" "V4SF")])
909 (define_expand "xorv4sf3"
910 [(set (match_operand:V4SF 0 "register_operand" "")
911 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
912 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
914 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
916 (define_insn "*xorv4sf3"
917 [(set (match_operand:V4SF 0 "register_operand" "=x")
918 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
919 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
920 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
921 "xorps\t{%2, %0|%0, %2}"
922 [(set_attr "type" "sselog")
923 (set_attr "mode" "V4SF")])
925 ;; Also define scalar versions. These are used for abs, neg, and
926 ;; conditional move. Using subregs into vector modes causes register
927 ;; allocation lossage. These patterns do not allow memory operands
928 ;; because the native instructions read the full 128-bits.
930 (define_insn "*andsf3"
931 [(set (match_operand:SF 0 "register_operand" "=x")
932 (and:SF (match_operand:SF 1 "register_operand" "0")
933 (match_operand:SF 2 "register_operand" "x")))]
935 "andps\t{%2, %0|%0, %2}"
936 [(set_attr "type" "sselog")
937 (set_attr "mode" "V4SF")])
939 (define_insn "*nandsf3"
940 [(set (match_operand:SF 0 "register_operand" "=x")
941 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
942 (match_operand:SF 2 "register_operand" "x")))]
944 "andnps\t{%2, %0|%0, %2}"
945 [(set_attr "type" "sselog")
946 (set_attr "mode" "V4SF")])
948 (define_insn "*iorsf3"
949 [(set (match_operand:SF 0 "register_operand" "=x")
950 (ior:SF (match_operand:SF 1 "register_operand" "0")
951 (match_operand:SF 2 "register_operand" "x")))]
953 "orps\t{%2, %0|%0, %2}"
954 [(set_attr "type" "sselog")
955 (set_attr "mode" "V4SF")])
957 (define_insn "*xorsf3"
958 [(set (match_operand:SF 0 "register_operand" "=x")
959 (xor:SF (match_operand:SF 1 "register_operand" "0")
960 (match_operand:SF 2 "register_operand" "x")))]
962 "xorps\t{%2, %0|%0, %2}"
963 [(set_attr "type" "sselog")
964 (set_attr "mode" "V4SF")])
966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
968 ;; Parallel single-precision floating point conversion operations
970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
972 (define_insn "sse_cvtpi2ps"
973 [(set (match_operand:V4SF 0 "register_operand" "=x")
976 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
977 (match_operand:V4SF 1 "register_operand" "0")
980 "cvtpi2ps\t{%2, %0|%0, %2}"
981 [(set_attr "type" "ssecvt")
982 (set_attr "mode" "V4SF")])
984 (define_insn "sse_cvtps2pi"
985 [(set (match_operand:V2SI 0 "register_operand" "=y")
987 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
989 (parallel [(const_int 0) (const_int 1)])))]
991 "cvtps2pi\t{%1, %0|%0, %1}"
992 [(set_attr "type" "ssecvt")
993 (set_attr "unit" "mmx")
994 (set_attr "mode" "DI")])
996 (define_insn "sse_cvttps2pi"
997 [(set (match_operand:V2SI 0 "register_operand" "=y")
999 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1000 (parallel [(const_int 0) (const_int 1)])))]
1002 "cvttps2pi\t{%1, %0|%0, %1}"
1003 [(set_attr "type" "ssecvt")
1004 (set_attr "unit" "mmx")
1005 (set_attr "mode" "SF")])
1007 (define_insn "sse_cvtsi2ss"
1008 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1011 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1012 (match_operand:V4SF 1 "register_operand" "0,0")
1015 "cvtsi2ss\t{%2, %0|%0, %2}"
1016 [(set_attr "type" "sseicvt")
1017 (set_attr "athlon_decode" "vector,double")
1018 (set_attr "amdfam10_decode" "vector,double")
1019 (set_attr "mode" "SF")])
1021 (define_insn "sse_cvtsi2ssq"
1022 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1025 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1026 (match_operand:V4SF 1 "register_operand" "0,0")
1028 "TARGET_SSE && TARGET_64BIT"
1029 "cvtsi2ssq\t{%2, %0|%0, %2}"
1030 [(set_attr "type" "sseicvt")
1031 (set_attr "athlon_decode" "vector,double")
1032 (set_attr "amdfam10_decode" "vector,double")
1033 (set_attr "mode" "SF")])
1035 (define_insn "sse_cvtss2si"
1036 [(set (match_operand:SI 0 "register_operand" "=r,r")
1039 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1040 (parallel [(const_int 0)]))]
1041 UNSPEC_FIX_NOTRUNC))]
1043 "cvtss2si\t{%1, %0|%0, %1}"
1044 [(set_attr "type" "sseicvt")
1045 (set_attr "athlon_decode" "double,vector")
1046 (set_attr "prefix_rep" "1")
1047 (set_attr "mode" "SI")])
1049 (define_insn "sse_cvtss2si_2"
1050 [(set (match_operand:SI 0 "register_operand" "=r,r")
1051 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1052 UNSPEC_FIX_NOTRUNC))]
1054 "cvtss2si\t{%1, %0|%0, %1}"
1055 [(set_attr "type" "sseicvt")
1056 (set_attr "athlon_decode" "double,vector")
1057 (set_attr "amdfam10_decode" "double,double")
1058 (set_attr "prefix_rep" "1")
1059 (set_attr "mode" "SI")])
1061 (define_insn "sse_cvtss2siq"
1062 [(set (match_operand:DI 0 "register_operand" "=r,r")
1065 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1066 (parallel [(const_int 0)]))]
1067 UNSPEC_FIX_NOTRUNC))]
1068 "TARGET_SSE && TARGET_64BIT"
1069 "cvtss2siq\t{%1, %0|%0, %1}"
1070 [(set_attr "type" "sseicvt")
1071 (set_attr "athlon_decode" "double,vector")
1072 (set_attr "prefix_rep" "1")
1073 (set_attr "mode" "DI")])
1075 (define_insn "sse_cvtss2siq_2"
1076 [(set (match_operand:DI 0 "register_operand" "=r,r")
1077 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1078 UNSPEC_FIX_NOTRUNC))]
1079 "TARGET_SSE && TARGET_64BIT"
1080 "cvtss2siq\t{%1, %0|%0, %1}"
1081 [(set_attr "type" "sseicvt")
1082 (set_attr "athlon_decode" "double,vector")
1083 (set_attr "amdfam10_decode" "double,double")
1084 (set_attr "prefix_rep" "1")
1085 (set_attr "mode" "DI")])
1087 (define_insn "sse_cvttss2si"
1088 [(set (match_operand:SI 0 "register_operand" "=r,r")
1091 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1092 (parallel [(const_int 0)]))))]
1094 "cvttss2si\t{%1, %0|%0, %1}"
1095 [(set_attr "type" "sseicvt")
1096 (set_attr "athlon_decode" "double,vector")
1097 (set_attr "amdfam10_decode" "double,double")
1098 (set_attr "prefix_rep" "1")
1099 (set_attr "mode" "SI")])
1101 (define_insn "sse_cvttss2siq"
1102 [(set (match_operand:DI 0 "register_operand" "=r,r")
1105 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1106 (parallel [(const_int 0)]))))]
1107 "TARGET_SSE && TARGET_64BIT"
1108 "cvttss2siq\t{%1, %0|%0, %1}"
1109 [(set_attr "type" "sseicvt")
1110 (set_attr "athlon_decode" "double,vector")
1111 (set_attr "amdfam10_decode" "double,double")
1112 (set_attr "prefix_rep" "1")
1113 (set_attr "mode" "DI")])
1115 (define_insn "sse2_cvtdq2ps"
1116 [(set (match_operand:V4SF 0 "register_operand" "=x")
1117 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1119 "cvtdq2ps\t{%1, %0|%0, %1}"
1120 [(set_attr "type" "ssecvt")
1121 (set_attr "mode" "V4SF")])
1123 (define_insn "sse2_cvtps2dq"
1124 [(set (match_operand:V4SI 0 "register_operand" "=x")
1125 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1126 UNSPEC_FIX_NOTRUNC))]
1128 "cvtps2dq\t{%1, %0|%0, %1}"
1129 [(set_attr "type" "ssecvt")
1130 (set_attr "prefix_data16" "1")
1131 (set_attr "mode" "TI")])
1133 (define_insn "sse2_cvttps2dq"
1134 [(set (match_operand:V4SI 0 "register_operand" "=x")
1135 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1137 "cvttps2dq\t{%1, %0|%0, %1}"
1138 [(set_attr "type" "ssecvt")
1139 (set_attr "prefix_rep" "1")
1140 (set_attr "mode" "TI")])
1142 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1144 ;; Parallel single-precision floating point element swizzling
1146 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1148 (define_insn "sse_movhlps"
1149 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1152 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1153 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1154 (parallel [(const_int 6)
1158 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1160 movhlps\t{%2, %0|%0, %2}
1161 movlps\t{%H2, %0|%0, %H2}
1162 movhps\t{%2, %0|%0, %2}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "mode" "V4SF,V2SF,V2SF")])
1166 (define_insn "sse_movlhps"
1167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1170 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1171 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1172 (parallel [(const_int 0)
1176 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1178 movlhps\t{%2, %0|%0, %2}
1179 movhps\t{%2, %0|%0, %2}
1180 movlps\t{%2, %H0|%H0, %2}"
1181 [(set_attr "type" "ssemov")
1182 (set_attr "mode" "V4SF,V2SF,V2SF")])
1184 (define_insn "sse_unpckhps"
1185 [(set (match_operand:V4SF 0 "register_operand" "=x")
1188 (match_operand:V4SF 1 "register_operand" "0")
1189 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1190 (parallel [(const_int 2) (const_int 6)
1191 (const_int 3) (const_int 7)])))]
1193 "unpckhps\t{%2, %0|%0, %2}"
1194 [(set_attr "type" "sselog")
1195 (set_attr "mode" "V4SF")])
1197 (define_insn "sse_unpcklps"
1198 [(set (match_operand:V4SF 0 "register_operand" "=x")
1201 (match_operand:V4SF 1 "register_operand" "0")
1202 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1203 (parallel [(const_int 0) (const_int 4)
1204 (const_int 1) (const_int 5)])))]
1206 "unpcklps\t{%2, %0|%0, %2}"
1207 [(set_attr "type" "sselog")
1208 (set_attr "mode" "V4SF")])
1210 ;; These are modeled with the same vec_concat as the others so that we
1211 ;; capture users of shufps that can use the new instructions
1212 (define_insn "sse3_movshdup"
1213 [(set (match_operand:V4SF 0 "register_operand" "=x")
1216 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1218 (parallel [(const_int 1)
1223 "movshdup\t{%1, %0|%0, %1}"
1224 [(set_attr "type" "sse")
1225 (set_attr "prefix_rep" "1")
1226 (set_attr "mode" "V4SF")])
1228 (define_insn "sse3_movsldup"
1229 [(set (match_operand:V4SF 0 "register_operand" "=x")
1232 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1234 (parallel [(const_int 0)
1239 "movsldup\t{%1, %0|%0, %1}"
1240 [(set_attr "type" "sse")
1241 (set_attr "prefix_rep" "1")
1242 (set_attr "mode" "V4SF")])
1244 (define_expand "sse_shufps"
1245 [(match_operand:V4SF 0 "register_operand" "")
1246 (match_operand:V4SF 1 "register_operand" "")
1247 (match_operand:V4SF 2 "nonimmediate_operand" "")
1248 (match_operand:SI 3 "const_int_operand" "")]
1251 int mask = INTVAL (operands[3]);
1252 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1253 GEN_INT ((mask >> 0) & 3),
1254 GEN_INT ((mask >> 2) & 3),
1255 GEN_INT (((mask >> 4) & 3) + 4),
1256 GEN_INT (((mask >> 6) & 3) + 4)));
1260 (define_insn "sse_shufps_1"
1261 [(set (match_operand:V4SF 0 "register_operand" "=x")
1264 (match_operand:V4SF 1 "register_operand" "0")
1265 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1266 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1267 (match_operand 4 "const_0_to_3_operand" "")
1268 (match_operand 5 "const_4_to_7_operand" "")
1269 (match_operand 6 "const_4_to_7_operand" "")])))]
1273 mask |= INTVAL (operands[3]) << 0;
1274 mask |= INTVAL (operands[4]) << 2;
1275 mask |= (INTVAL (operands[5]) - 4) << 4;
1276 mask |= (INTVAL (operands[6]) - 4) << 6;
1277 operands[3] = GEN_INT (mask);
1279 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1281 [(set_attr "type" "sselog")
1282 (set_attr "mode" "V4SF")])
1284 (define_insn "sse_storehps"
1285 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1287 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1288 (parallel [(const_int 2) (const_int 3)])))]
1291 movhps\t{%1, %0|%0, %1}
1292 movhlps\t{%1, %0|%0, %1}
1293 movlps\t{%H1, %0|%0, %H1}"
1294 [(set_attr "type" "ssemov")
1295 (set_attr "mode" "V2SF,V4SF,V2SF")])
1297 (define_insn "sse_loadhps"
1298 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1301 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1302 (parallel [(const_int 0) (const_int 1)]))
1303 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1306 movhps\t{%2, %0|%0, %2}
1307 movlhps\t{%2, %0|%0, %2}
1308 movlps\t{%2, %H0|%H0, %2}"
1309 [(set_attr "type" "ssemov")
1310 (set_attr "mode" "V2SF,V4SF,V2SF")])
1312 (define_insn "sse_storelps"
1313 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1315 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1316 (parallel [(const_int 0) (const_int 1)])))]
1319 movlps\t{%1, %0|%0, %1}
1320 movaps\t{%1, %0|%0, %1}
1321 movlps\t{%1, %0|%0, %1}"
1322 [(set_attr "type" "ssemov")
1323 (set_attr "mode" "V2SF,V4SF,V2SF")])
1325 (define_insn "sse_loadlps"
1326 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1328 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1330 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1331 (parallel [(const_int 2) (const_int 3)]))))]
1334 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1335 movlps\t{%2, %0|%0, %2}
1336 movlps\t{%2, %0|%0, %2}"
1337 [(set_attr "type" "sselog,ssemov,ssemov")
1338 (set_attr "mode" "V4SF,V2SF,V2SF")])
1340 (define_insn "sse_movss"
1341 [(set (match_operand:V4SF 0 "register_operand" "=x")
1343 (match_operand:V4SF 2 "register_operand" "x")
1344 (match_operand:V4SF 1 "register_operand" "0")
1347 "movss\t{%2, %0|%0, %2}"
1348 [(set_attr "type" "ssemov")
1349 (set_attr "mode" "SF")])
1351 (define_insn "*vec_dupv4sf"
1352 [(set (match_operand:V4SF 0 "register_operand" "=x")
1354 (match_operand:SF 1 "register_operand" "0")))]
1356 "shufps\t{$0, %0, %0|%0, %0, 0}"
1357 [(set_attr "type" "sselog1")
1358 (set_attr "mode" "V4SF")])
1360 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1361 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1362 ;; alternatives pretty much forces the MMX alternative to be chosen.
1363 (define_insn "*sse_concatv2sf"
1364 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1366 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1367 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1370 unpcklps\t{%2, %0|%0, %2}
1371 movss\t{%1, %0|%0, %1}
1372 punpckldq\t{%2, %0|%0, %2}
1373 movd\t{%1, %0|%0, %1}"
1374 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1375 (set_attr "mode" "V4SF,SF,DI,DI")])
1377 (define_insn "*sse_concatv4sf"
1378 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1380 (match_operand:V2SF 1 "register_operand" " 0,0")
1381 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1384 movlhps\t{%2, %0|%0, %2}
1385 movhps\t{%2, %0|%0, %2}"
1386 [(set_attr "type" "ssemov")
1387 (set_attr "mode" "V4SF,V2SF")])
1389 (define_expand "vec_initv4sf"
1390 [(match_operand:V4SF 0 "register_operand" "")
1391 (match_operand 1 "" "")]
1394 ix86_expand_vector_init (false, operands[0], operands[1]);
1398 (define_insn "vec_setv4sf_0"
1399 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1402 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1403 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1407 movss\t{%2, %0|%0, %2}
1408 movss\t{%2, %0|%0, %2}
1409 movd\t{%2, %0|%0, %2}
1411 [(set_attr "type" "ssemov")
1412 (set_attr "mode" "SF")])
1414 ;; A subset is vec_setv4sf.
1415 (define_insn "*vec_setv4sf_sse4_1"
1416 [(set (match_operand:V4SF 0 "register_operand" "=x")
1419 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1420 (match_operand:V4SF 1 "register_operand" "0")
1421 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1424 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1425 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1427 [(set_attr "type" "sselog")
1428 (set_attr "prefix_extra" "1")
1429 (set_attr "mode" "V4SF")])
1431 (define_insn "sse4_1_insertps"
1432 [(set (match_operand:V4SF 0 "register_operand" "=x")
1433 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1434 (match_operand:V4SF 1 "register_operand" "0")
1435 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1438 "insertps\t{%3, %2, %0|%0, %2, %3}";
1439 [(set_attr "type" "sselog")
1440 (set_attr "prefix_extra" "1")
1441 (set_attr "mode" "V4SF")])
1444 [(set (match_operand:V4SF 0 "memory_operand" "")
1447 (match_operand:SF 1 "nonmemory_operand" ""))
1450 "TARGET_SSE && reload_completed"
1453 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1457 (define_expand "vec_setv4sf"
1458 [(match_operand:V4SF 0 "register_operand" "")
1459 (match_operand:SF 1 "register_operand" "")
1460 (match_operand 2 "const_int_operand" "")]
1463 ix86_expand_vector_set (false, operands[0], operands[1],
1464 INTVAL (operands[2]));
1468 (define_insn_and_split "*vec_extractv4sf_0"
1469 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1471 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1472 (parallel [(const_int 0)])))]
1473 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1475 "&& reload_completed"
1478 rtx op1 = operands[1];
1480 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1482 op1 = gen_lowpart (SFmode, op1);
1483 emit_move_insn (operands[0], op1);
1487 (define_insn "*sse4_1_extractps"
1488 [(set (match_operand:SF 0 "register_operand" "=rm")
1490 (match_operand:V4SF 1 "register_operand" "x")
1491 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1493 "extractps\t{%2, %1, %0|%0, %1, %2}"
1494 [(set_attr "type" "sselog")
1495 (set_attr "prefix_extra" "1")
1496 (set_attr "mode" "V4SF")])
1498 (define_expand "vec_extractv4sf"
1499 [(match_operand:SF 0 "register_operand" "")
1500 (match_operand:V4SF 1 "register_operand" "")
1501 (match_operand 2 "const_int_operand" "")]
1504 ix86_expand_vector_extract (false, operands[0], operands[1],
1505 INTVAL (operands[2]));
1509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1511 ;; Parallel double-precision floating point arithmetic
1513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1515 (define_expand "negv2df2"
1516 [(set (match_operand:V2DF 0 "register_operand" "")
1517 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1519 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1521 (define_expand "absv2df2"
1522 [(set (match_operand:V2DF 0 "register_operand" "")
1523 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1525 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1527 (define_expand "addv2df3"
1528 [(set (match_operand:V2DF 0 "register_operand" "")
1529 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1530 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1532 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1534 (define_insn "*addv2df3"
1535 [(set (match_operand:V2DF 0 "register_operand" "=x")
1536 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1537 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1538 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1539 "addpd\t{%2, %0|%0, %2}"
1540 [(set_attr "type" "sseadd")
1541 (set_attr "mode" "V2DF")])
1543 (define_insn "sse2_vmaddv2df3"
1544 [(set (match_operand:V2DF 0 "register_operand" "=x")
1546 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1547 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1550 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1551 "addsd\t{%2, %0|%0, %2}"
1552 [(set_attr "type" "sseadd")
1553 (set_attr "mode" "DF")])
1555 (define_expand "subv2df3"
1556 [(set (match_operand:V2DF 0 "register_operand" "")
1557 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1558 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1560 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1562 (define_insn "*subv2df3"
1563 [(set (match_operand:V2DF 0 "register_operand" "=x")
1564 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1565 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1567 "subpd\t{%2, %0|%0, %2}"
1568 [(set_attr "type" "sseadd")
1569 (set_attr "mode" "V2DF")])
1571 (define_insn "sse2_vmsubv2df3"
1572 [(set (match_operand:V2DF 0 "register_operand" "=x")
1574 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1575 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1579 "subsd\t{%2, %0|%0, %2}"
1580 [(set_attr "type" "sseadd")
1581 (set_attr "mode" "DF")])
1583 (define_expand "mulv2df3"
1584 [(set (match_operand:V2DF 0 "register_operand" "")
1585 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1586 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1588 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1590 (define_insn "*mulv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "=x")
1592 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1593 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1594 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1595 "mulpd\t{%2, %0|%0, %2}"
1596 [(set_attr "type" "ssemul")
1597 (set_attr "mode" "V2DF")])
1599 (define_insn "sse2_vmmulv2df3"
1600 [(set (match_operand:V2DF 0 "register_operand" "=x")
1602 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1603 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1606 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1607 "mulsd\t{%2, %0|%0, %2}"
1608 [(set_attr "type" "ssemul")
1609 (set_attr "mode" "DF")])
1611 (define_expand "divv2df3"
1612 [(set (match_operand:V2DF 0 "register_operand" "")
1613 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1614 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1616 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1618 (define_insn "*divv2df3"
1619 [(set (match_operand:V2DF 0 "register_operand" "=x")
1620 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1621 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1623 "divpd\t{%2, %0|%0, %2}"
1624 [(set_attr "type" "ssediv")
1625 (set_attr "mode" "V2DF")])
1627 (define_insn "sse2_vmdivv2df3"
1628 [(set (match_operand:V2DF 0 "register_operand" "=x")
1630 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1631 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1635 "divsd\t{%2, %0|%0, %2}"
1636 [(set_attr "type" "ssediv")
1637 (set_attr "mode" "DF")])
1639 (define_insn "sqrtv2df2"
1640 [(set (match_operand:V2DF 0 "register_operand" "=x")
1641 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1643 "sqrtpd\t{%1, %0|%0, %1}"
1644 [(set_attr "type" "sse")
1645 (set_attr "mode" "V2DF")])
1647 (define_insn "sse2_vmsqrtv2df2"
1648 [(set (match_operand:V2DF 0 "register_operand" "=x")
1650 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1651 (match_operand:V2DF 2 "register_operand" "0")
1654 "sqrtsd\t{%1, %0|%0, %1}"
1655 [(set_attr "type" "sse")
1656 (set_attr "mode" "DF")])
1658 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1659 ;; isn't really correct, as those rtl operators aren't defined when
1660 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1662 (define_expand "smaxv2df3"
1663 [(set (match_operand:V2DF 0 "register_operand" "")
1664 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1665 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1668 if (!flag_finite_math_only)
1669 operands[1] = force_reg (V2DFmode, operands[1]);
1670 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1673 (define_insn "*smaxv2df3_finite"
1674 [(set (match_operand:V2DF 0 "register_operand" "=x")
1675 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1676 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1677 "TARGET_SSE2 && flag_finite_math_only
1678 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1679 "maxpd\t{%2, %0|%0, %2}"
1680 [(set_attr "type" "sseadd")
1681 (set_attr "mode" "V2DF")])
1683 (define_insn "*smaxv2df3"
1684 [(set (match_operand:V2DF 0 "register_operand" "=x")
1685 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1686 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1688 "maxpd\t{%2, %0|%0, %2}"
1689 [(set_attr "type" "sseadd")
1690 (set_attr "mode" "V2DF")])
1692 (define_insn "sse2_vmsmaxv2df3"
1693 [(set (match_operand:V2DF 0 "register_operand" "=x")
1695 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1696 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1700 "maxsd\t{%2, %0|%0, %2}"
1701 [(set_attr "type" "sseadd")
1702 (set_attr "mode" "DF")])
1704 (define_expand "sminv2df3"
1705 [(set (match_operand:V2DF 0 "register_operand" "")
1706 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1707 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1710 if (!flag_finite_math_only)
1711 operands[1] = force_reg (V2DFmode, operands[1]);
1712 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1715 (define_insn "*sminv2df3_finite"
1716 [(set (match_operand:V2DF 0 "register_operand" "=x")
1717 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1718 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1719 "TARGET_SSE2 && flag_finite_math_only
1720 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1721 "minpd\t{%2, %0|%0, %2}"
1722 [(set_attr "type" "sseadd")
1723 (set_attr "mode" "V2DF")])
1725 (define_insn "*sminv2df3"
1726 [(set (match_operand:V2DF 0 "register_operand" "=x")
1727 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1728 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1730 "minpd\t{%2, %0|%0, %2}"
1731 [(set_attr "type" "sseadd")
1732 (set_attr "mode" "V2DF")])
1734 (define_insn "sse2_vmsminv2df3"
1735 [(set (match_operand:V2DF 0 "register_operand" "=x")
1737 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1738 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1742 "minsd\t{%2, %0|%0, %2}"
1743 [(set_attr "type" "sseadd")
1744 (set_attr "mode" "DF")])
1746 (define_insn "sse3_addsubv2df3"
1747 [(set (match_operand:V2DF 0 "register_operand" "=x")
1750 (match_operand:V2DF 1 "register_operand" "0")
1751 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1752 (minus:V2DF (match_dup 1) (match_dup 2))
1755 "addsubpd\t{%2, %0|%0, %2}"
1756 [(set_attr "type" "sseadd")
1757 (set_attr "mode" "V2DF")])
1759 (define_insn "sse3_haddv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "=x")
1764 (match_operand:V2DF 1 "register_operand" "0")
1765 (parallel [(const_int 0)]))
1766 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1769 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1770 (parallel [(const_int 0)]))
1771 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1773 "haddpd\t{%2, %0|%0, %2}"
1774 [(set_attr "type" "sseadd")
1775 (set_attr "mode" "V2DF")])
1777 (define_insn "sse3_hsubv2df3"
1778 [(set (match_operand:V2DF 0 "register_operand" "=x")
1782 (match_operand:V2DF 1 "register_operand" "0")
1783 (parallel [(const_int 0)]))
1784 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1787 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1788 (parallel [(const_int 0)]))
1789 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1791 "hsubpd\t{%2, %0|%0, %2}"
1792 [(set_attr "type" "sseadd")
1793 (set_attr "mode" "V2DF")])
1795 (define_expand "reduc_splus_v2df"
1796 [(match_operand:V2DF 0 "register_operand" "")
1797 (match_operand:V2DF 1 "register_operand" "")]
1800 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1806 ;; Parallel double-precision floating point comparisons
1808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1810 (define_insn "sse2_maskcmpv2df3"
1811 [(set (match_operand:V2DF 0 "register_operand" "=x")
1812 (match_operator:V2DF 3 "sse_comparison_operator"
1813 [(match_operand:V2DF 1 "register_operand" "0")
1814 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1816 "cmp%D3pd\t{%2, %0|%0, %2}"
1817 [(set_attr "type" "ssecmp")
1818 (set_attr "mode" "V2DF")])
1820 (define_insn "sse2_maskcmpdf3"
1821 [(set (match_operand:DF 0 "register_operand" "=x")
1822 (match_operator:DF 3 "sse_comparison_operator"
1823 [(match_operand:DF 1 "register_operand" "0")
1824 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1826 "cmp%D3sd\t{%2, %0|%0, %2}"
1827 [(set_attr "type" "ssecmp")
1828 (set_attr "mode" "DF")])
1830 (define_insn "sse2_vmmaskcmpv2df3"
1831 [(set (match_operand:V2DF 0 "register_operand" "=x")
1833 (match_operator:V2DF 3 "sse_comparison_operator"
1834 [(match_operand:V2DF 1 "register_operand" "0")
1835 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1839 "cmp%D3sd\t{%2, %0|%0, %2}"
1840 [(set_attr "type" "ssecmp")
1841 (set_attr "mode" "DF")])
1843 (define_insn "sse2_comi"
1844 [(set (reg:CCFP FLAGS_REG)
1847 (match_operand:V2DF 0 "register_operand" "x")
1848 (parallel [(const_int 0)]))
1850 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1851 (parallel [(const_int 0)]))))]
1853 "comisd\t{%1, %0|%0, %1}"
1854 [(set_attr "type" "ssecomi")
1855 (set_attr "mode" "DF")])
1857 (define_insn "sse2_ucomi"
1858 [(set (reg:CCFPU FLAGS_REG)
1861 (match_operand:V2DF 0 "register_operand" "x")
1862 (parallel [(const_int 0)]))
1864 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1865 (parallel [(const_int 0)]))))]
1867 "ucomisd\t{%1, %0|%0, %1}"
1868 [(set_attr "type" "ssecomi")
1869 (set_attr "mode" "DF")])
1871 (define_expand "vcondv2df"
1872 [(set (match_operand:V2DF 0 "register_operand" "")
1874 (match_operator 3 ""
1875 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1876 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1877 (match_operand:V2DF 1 "general_operand" "")
1878 (match_operand:V2DF 2 "general_operand" "")))]
1881 if (ix86_expand_fp_vcond (operands))
1887 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1889 ;; Parallel double-precision floating point logical operations
1891 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1893 (define_expand "andv2df3"
1894 [(set (match_operand:V2DF 0 "register_operand" "")
1895 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1896 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1898 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1900 (define_insn "*andv2df3"
1901 [(set (match_operand:V2DF 0 "register_operand" "=x")
1902 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1903 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1904 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1905 "andpd\t{%2, %0|%0, %2}"
1906 [(set_attr "type" "sselog")
1907 (set_attr "mode" "V2DF")])
1909 (define_insn "sse2_nandv2df3"
1910 [(set (match_operand:V2DF 0 "register_operand" "=x")
1911 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1912 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1914 "andnpd\t{%2, %0|%0, %2}"
1915 [(set_attr "type" "sselog")
1916 (set_attr "mode" "V2DF")])
1918 (define_expand "iorv2df3"
1919 [(set (match_operand:V2DF 0 "register_operand" "")
1920 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1921 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1923 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1925 (define_insn "*iorv2df3"
1926 [(set (match_operand:V2DF 0 "register_operand" "=x")
1927 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1928 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1929 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1930 "orpd\t{%2, %0|%0, %2}"
1931 [(set_attr "type" "sselog")
1932 (set_attr "mode" "V2DF")])
1934 (define_expand "xorv2df3"
1935 [(set (match_operand:V2DF 0 "register_operand" "")
1936 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1939 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1941 (define_insn "*xorv2df3"
1942 [(set (match_operand:V2DF 0 "register_operand" "=x")
1943 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1944 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1945 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1946 "xorpd\t{%2, %0|%0, %2}"
1947 [(set_attr "type" "sselog")
1948 (set_attr "mode" "V2DF")])
1950 ;; Also define scalar versions. These are used for abs, neg, and
1951 ;; conditional move. Using subregs into vector modes causes register
1952 ;; allocation lossage. These patterns do not allow memory operands
1953 ;; because the native instructions read the full 128-bits.
1955 (define_insn "*anddf3"
1956 [(set (match_operand:DF 0 "register_operand" "=x")
1957 (and:DF (match_operand:DF 1 "register_operand" "0")
1958 (match_operand:DF 2 "register_operand" "x")))]
1960 "andpd\t{%2, %0|%0, %2}"
1961 [(set_attr "type" "sselog")
1962 (set_attr "mode" "V2DF")])
1964 (define_insn "*nanddf3"
1965 [(set (match_operand:DF 0 "register_operand" "=x")
1966 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1967 (match_operand:DF 2 "register_operand" "x")))]
1969 "andnpd\t{%2, %0|%0, %2}"
1970 [(set_attr "type" "sselog")
1971 (set_attr "mode" "V2DF")])
1973 (define_insn "*iordf3"
1974 [(set (match_operand:DF 0 "register_operand" "=x")
1975 (ior:DF (match_operand:DF 1 "register_operand" "0")
1976 (match_operand:DF 2 "register_operand" "x")))]
1978 "orpd\t{%2, %0|%0, %2}"
1979 [(set_attr "type" "sselog")
1980 (set_attr "mode" "V2DF")])
1982 (define_insn "*xordf3"
1983 [(set (match_operand:DF 0 "register_operand" "=x")
1984 (xor:DF (match_operand:DF 1 "register_operand" "0")
1985 (match_operand:DF 2 "register_operand" "x")))]
1987 "xorpd\t{%2, %0|%0, %2}"
1988 [(set_attr "type" "sselog")
1989 (set_attr "mode" "V2DF")])
1991 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1993 ;; Parallel double-precision floating point conversion operations
1995 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1997 (define_insn "sse2_cvtpi2pd"
1998 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1999 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2001 "cvtpi2pd\t{%1, %0|%0, %1}"
2002 [(set_attr "type" "ssecvt")
2003 (set_attr "unit" "mmx,*")
2004 (set_attr "mode" "V2DF")])
2006 (define_insn "sse2_cvtpd2pi"
2007 [(set (match_operand:V2SI 0 "register_operand" "=y")
2008 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2009 UNSPEC_FIX_NOTRUNC))]
2011 "cvtpd2pi\t{%1, %0|%0, %1}"
2012 [(set_attr "type" "ssecvt")
2013 (set_attr "unit" "mmx")
2014 (set_attr "prefix_data16" "1")
2015 (set_attr "mode" "DI")])
2017 (define_insn "sse2_cvttpd2pi"
2018 [(set (match_operand:V2SI 0 "register_operand" "=y")
2019 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2021 "cvttpd2pi\t{%1, %0|%0, %1}"
2022 [(set_attr "type" "ssecvt")
2023 (set_attr "unit" "mmx")
2024 (set_attr "prefix_data16" "1")
2025 (set_attr "mode" "TI")])
2027 (define_insn "sse2_cvtsi2sd"
2028 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2031 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2032 (match_operand:V2DF 1 "register_operand" "0,0")
2035 "cvtsi2sd\t{%2, %0|%0, %2}"
2036 [(set_attr "type" "sseicvt")
2037 (set_attr "mode" "DF")
2038 (set_attr "athlon_decode" "double,direct")
2039 (set_attr "amdfam10_decode" "vector,double")])
2041 (define_insn "sse2_cvtsi2sdq"
2042 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2045 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2046 (match_operand:V2DF 1 "register_operand" "0,0")
2048 "TARGET_SSE2 && TARGET_64BIT"
2049 "cvtsi2sdq\t{%2, %0|%0, %2}"
2050 [(set_attr "type" "sseicvt")
2051 (set_attr "mode" "DF")
2052 (set_attr "athlon_decode" "double,direct")
2053 (set_attr "amdfam10_decode" "vector,double")])
2055 (define_insn "sse2_cvtsd2si"
2056 [(set (match_operand:SI 0 "register_operand" "=r,r")
2059 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2060 (parallel [(const_int 0)]))]
2061 UNSPEC_FIX_NOTRUNC))]
2063 "cvtsd2si\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "sseicvt")
2065 (set_attr "athlon_decode" "double,vector")
2066 (set_attr "prefix_rep" "1")
2067 (set_attr "mode" "SI")])
2069 (define_insn "sse2_cvtsd2si_2"
2070 [(set (match_operand:SI 0 "register_operand" "=r,r")
2071 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2072 UNSPEC_FIX_NOTRUNC))]
2074 "cvtsd2si\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "sseicvt")
2076 (set_attr "athlon_decode" "double,vector")
2077 (set_attr "amdfam10_decode" "double,double")
2078 (set_attr "prefix_rep" "1")
2079 (set_attr "mode" "SI")])
2081 (define_insn "sse2_cvtsd2siq"
2082 [(set (match_operand:DI 0 "register_operand" "=r,r")
2085 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2086 (parallel [(const_int 0)]))]
2087 UNSPEC_FIX_NOTRUNC))]
2088 "TARGET_SSE2 && TARGET_64BIT"
2089 "cvtsd2siq\t{%1, %0|%0, %1}"
2090 [(set_attr "type" "sseicvt")
2091 (set_attr "athlon_decode" "double,vector")
2092 (set_attr "prefix_rep" "1")
2093 (set_attr "mode" "DI")])
2095 (define_insn "sse2_cvtsd2siq_2"
2096 [(set (match_operand:DI 0 "register_operand" "=r,r")
2097 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2098 UNSPEC_FIX_NOTRUNC))]
2099 "TARGET_SSE2 && TARGET_64BIT"
2100 "cvtsd2siq\t{%1, %0|%0, %1}"
2101 [(set_attr "type" "sseicvt")
2102 (set_attr "athlon_decode" "double,vector")
2103 (set_attr "amdfam10_decode" "double,double")
2104 (set_attr "prefix_rep" "1")
2105 (set_attr "mode" "DI")])
2107 (define_insn "sse2_cvttsd2si"
2108 [(set (match_operand:SI 0 "register_operand" "=r,r")
2111 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2112 (parallel [(const_int 0)]))))]
2114 "cvttsd2si\t{%1, %0|%0, %1}"
2115 [(set_attr "type" "sseicvt")
2116 (set_attr "prefix_rep" "1")
2117 (set_attr "mode" "SI")
2118 (set_attr "athlon_decode" "double,vector")
2119 (set_attr "amdfam10_decode" "double,double")])
2121 (define_insn "sse2_cvttsd2siq"
2122 [(set (match_operand:DI 0 "register_operand" "=r,r")
2125 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2126 (parallel [(const_int 0)]))))]
2127 "TARGET_SSE2 && TARGET_64BIT"
2128 "cvttsd2siq\t{%1, %0|%0, %1}"
2129 [(set_attr "type" "sseicvt")
2130 (set_attr "prefix_rep" "1")
2131 (set_attr "mode" "DI")
2132 (set_attr "athlon_decode" "double,vector")
2133 (set_attr "amdfam10_decode" "double,double")])
2135 (define_insn "sse2_cvtdq2pd"
2136 [(set (match_operand:V2DF 0 "register_operand" "=x")
2139 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2140 (parallel [(const_int 0) (const_int 1)]))))]
2142 "cvtdq2pd\t{%1, %0|%0, %1}"
2143 [(set_attr "type" "ssecvt")
2144 (set_attr "mode" "V2DF")])
2146 (define_expand "sse2_cvtpd2dq"
2147 [(set (match_operand:V4SI 0 "register_operand" "")
2149 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2153 "operands[2] = CONST0_RTX (V2SImode);")
2155 (define_insn "*sse2_cvtpd2dq"
2156 [(set (match_operand:V4SI 0 "register_operand" "=x")
2158 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2160 (match_operand:V2SI 2 "const0_operand" "")))]
2162 "cvtpd2dq\t{%1, %0|%0, %1}"
2163 [(set_attr "type" "ssecvt")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "mode" "TI")
2166 (set_attr "amdfam10_decode" "double")])
2168 (define_expand "sse2_cvttpd2dq"
2169 [(set (match_operand:V4SI 0 "register_operand" "")
2171 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2174 "operands[2] = CONST0_RTX (V2SImode);")
2176 (define_insn "*sse2_cvttpd2dq"
2177 [(set (match_operand:V4SI 0 "register_operand" "=x")
2179 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2180 (match_operand:V2SI 2 "const0_operand" "")))]
2182 "cvttpd2dq\t{%1, %0|%0, %1}"
2183 [(set_attr "type" "ssecvt")
2184 (set_attr "prefix_rep" "1")
2185 (set_attr "mode" "TI")
2186 (set_attr "amdfam10_decode" "double")])
2188 (define_insn "sse2_cvtsd2ss"
2189 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2192 (float_truncate:V2SF
2193 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2194 (match_operand:V4SF 1 "register_operand" "0,0")
2197 "cvtsd2ss\t{%2, %0|%0, %2}"
2198 [(set_attr "type" "ssecvt")
2199 (set_attr "athlon_decode" "vector,double")
2200 (set_attr "amdfam10_decode" "vector,double")
2201 (set_attr "mode" "SF")])
2203 (define_insn "sse2_cvtss2sd"
2204 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2208 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2209 (parallel [(const_int 0) (const_int 1)])))
2210 (match_operand:V2DF 1 "register_operand" "0,0")
2213 "cvtss2sd\t{%2, %0|%0, %2}"
2214 [(set_attr "type" "ssecvt")
2215 (set_attr "amdfam10_decode" "vector,double")
2216 (set_attr "mode" "DF")])
2218 (define_expand "sse2_cvtpd2ps"
2219 [(set (match_operand:V4SF 0 "register_operand" "")
2221 (float_truncate:V2SF
2222 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2225 "operands[2] = CONST0_RTX (V2SFmode);")
2227 (define_insn "*sse2_cvtpd2ps"
2228 [(set (match_operand:V4SF 0 "register_operand" "=x")
2230 (float_truncate:V2SF
2231 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2232 (match_operand:V2SF 2 "const0_operand" "")))]
2234 "cvtpd2ps\t{%1, %0|%0, %1}"
2235 [(set_attr "type" "ssecvt")
2236 (set_attr "prefix_data16" "1")
2237 (set_attr "mode" "V4SF")
2238 (set_attr "amdfam10_decode" "double")])
2240 (define_insn "sse2_cvtps2pd"
2241 [(set (match_operand:V2DF 0 "register_operand" "=x")
2244 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2245 (parallel [(const_int 0) (const_int 1)]))))]
2247 "cvtps2pd\t{%1, %0|%0, %1}"
2248 [(set_attr "type" "ssecvt")
2249 (set_attr "mode" "V2DF")
2250 (set_attr "amdfam10_decode" "direct")])
2252 (define_expand "vec_unpacks_hi_v4sf"
2257 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2258 (parallel [(const_int 6)
2262 (set (match_operand:V2DF 0 "register_operand" "")
2266 (parallel [(const_int 0) (const_int 1)]))))]
2269 operands[2] = gen_reg_rtx (V4SFmode);
2272 (define_expand "vec_unpacks_lo_v4sf"
2273 [(set (match_operand:V2DF 0 "register_operand" "")
2276 (match_operand:V4SF 1 "nonimmediate_operand" "")
2277 (parallel [(const_int 0) (const_int 1)]))))]
2280 (define_expand "vec_unpacks_float_hi_v8hi"
2281 [(match_operand:V4SF 0 "register_operand" "")
2282 (match_operand:V8HI 1 "register_operand" "")]
2285 rtx tmp = gen_reg_rtx (V4SImode);
2287 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2288 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2292 (define_expand "vec_unpacks_float_lo_v8hi"
2293 [(match_operand:V4SF 0 "register_operand" "")
2294 (match_operand:V8HI 1 "register_operand" "")]
2297 rtx tmp = gen_reg_rtx (V4SImode);
2299 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2300 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2304 (define_expand "vec_unpacku_float_hi_v8hi"
2305 [(match_operand:V4SF 0 "register_operand" "")
2306 (match_operand:V8HI 1 "register_operand" "")]
2309 rtx tmp = gen_reg_rtx (V4SImode);
2311 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2312 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2316 (define_expand "vec_unpacku_float_lo_v8hi"
2317 [(match_operand:V4SF 0 "register_operand" "")
2318 (match_operand:V8HI 1 "register_operand" "")]
2321 rtx tmp = gen_reg_rtx (V4SImode);
2323 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2324 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2328 (define_expand "vec_unpacks_float_hi_v4si"
2331 (match_operand:V4SI 1 "nonimmediate_operand" "")
2332 (parallel [(const_int 2)
2336 (set (match_operand:V2DF 0 "register_operand" "")
2340 (parallel [(const_int 0) (const_int 1)]))))]
2343 operands[2] = gen_reg_rtx (V4SImode);
2346 (define_expand "vec_unpacks_float_lo_v4si"
2347 [(set (match_operand:V2DF 0 "register_operand" "")
2350 (match_operand:V4SI 1 "nonimmediate_operand" "")
2351 (parallel [(const_int 0) (const_int 1)]))))]
2354 (define_expand "vec_pack_trunc_v2df"
2355 [(match_operand:V4SF 0 "register_operand" "")
2356 (match_operand:V2DF 1 "nonimmediate_operand" "")
2357 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2362 r1 = gen_reg_rtx (V4SFmode);
2363 r2 = gen_reg_rtx (V4SFmode);
2365 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2366 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2367 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2371 (define_expand "vec_pack_sfix_trunc_v2df"
2372 [(match_operand:V4SI 0 "register_operand" "")
2373 (match_operand:V2DF 1 "nonimmediate_operand" "")
2374 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2379 r1 = gen_reg_rtx (V4SImode);
2380 r2 = gen_reg_rtx (V4SImode);
2382 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2383 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2384 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2385 gen_lowpart (V2DImode, r1),
2386 gen_lowpart (V2DImode, r2)));
2390 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2392 ;; Parallel double-precision floating point element swizzling
2394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2396 (define_insn "sse2_unpckhpd"
2397 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2400 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2401 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2402 (parallel [(const_int 1)
2404 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2406 unpckhpd\t{%2, %0|%0, %2}
2407 movlpd\t{%H1, %0|%0, %H1}
2408 movhpd\t{%1, %0|%0, %1}"
2409 [(set_attr "type" "sselog,ssemov,ssemov")
2410 (set_attr "mode" "V2DF,V1DF,V1DF")])
2412 (define_insn "*sse3_movddup"
2413 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2416 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2418 (parallel [(const_int 0)
2420 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2422 movddup\t{%1, %0|%0, %1}
2424 [(set_attr "type" "sselog1,ssemov")
2425 (set_attr "mode" "V2DF")])
2428 [(set (match_operand:V2DF 0 "memory_operand" "")
2431 (match_operand:V2DF 1 "register_operand" "")
2433 (parallel [(const_int 0)
2435 "TARGET_SSE3 && reload_completed"
2438 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2439 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2440 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2444 (define_insn "sse2_unpcklpd"
2445 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2448 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2449 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2450 (parallel [(const_int 0)
2452 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2454 unpcklpd\t{%2, %0|%0, %2}
2455 movhpd\t{%2, %0|%0, %2}
2456 movlpd\t{%2, %H0|%H0, %2}"
2457 [(set_attr "type" "sselog,ssemov,ssemov")
2458 (set_attr "mode" "V2DF,V1DF,V1DF")])
2460 (define_expand "sse2_shufpd"
2461 [(match_operand:V2DF 0 "register_operand" "")
2462 (match_operand:V2DF 1 "register_operand" "")
2463 (match_operand:V2DF 2 "nonimmediate_operand" "")
2464 (match_operand:SI 3 "const_int_operand" "")]
2467 int mask = INTVAL (operands[3]);
2468 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2470 GEN_INT (mask & 2 ? 3 : 2)));
2474 (define_insn "sse2_shufpd_1"
2475 [(set (match_operand:V2DF 0 "register_operand" "=x")
2478 (match_operand:V2DF 1 "register_operand" "0")
2479 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2480 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2481 (match_operand 4 "const_2_to_3_operand" "")])))]
2485 mask = INTVAL (operands[3]);
2486 mask |= (INTVAL (operands[4]) - 2) << 1;
2487 operands[3] = GEN_INT (mask);
2489 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2491 [(set_attr "type" "sselog")
2492 (set_attr "mode" "V2DF")])
2494 (define_insn "sse2_storehpd"
2495 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2497 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2498 (parallel [(const_int 1)])))]
2499 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2501 movhpd\t{%1, %0|%0, %1}
2504 [(set_attr "type" "ssemov,sselog1,ssemov")
2505 (set_attr "mode" "V1DF,V2DF,DF")])
2508 [(set (match_operand:DF 0 "register_operand" "")
2510 (match_operand:V2DF 1 "memory_operand" "")
2511 (parallel [(const_int 1)])))]
2512 "TARGET_SSE2 && reload_completed"
2513 [(set (match_dup 0) (match_dup 1))]
2515 operands[1] = adjust_address (operands[1], DFmode, 8);
2518 (define_insn "sse2_storelpd"
2519 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2521 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2522 (parallel [(const_int 0)])))]
2523 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2525 movlpd\t{%1, %0|%0, %1}
2528 [(set_attr "type" "ssemov")
2529 (set_attr "mode" "V1DF,DF,DF")])
2532 [(set (match_operand:DF 0 "register_operand" "")
2534 (match_operand:V2DF 1 "nonimmediate_operand" "")
2535 (parallel [(const_int 0)])))]
2536 "TARGET_SSE2 && reload_completed"
2539 rtx op1 = operands[1];
2541 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2543 op1 = gen_lowpart (DFmode, op1);
2544 emit_move_insn (operands[0], op1);
2548 (define_insn "sse2_loadhpd"
2549 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2552 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2553 (parallel [(const_int 0)]))
2554 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2555 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2557 movhpd\t{%2, %0|%0, %2}
2558 unpcklpd\t{%2, %0|%0, %2}
2559 shufpd\t{$1, %1, %0|%0, %1, 1}
2561 [(set_attr "type" "ssemov,sselog,sselog,other")
2562 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2565 [(set (match_operand:V2DF 0 "memory_operand" "")
2567 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2568 (match_operand:DF 1 "register_operand" "")))]
2569 "TARGET_SSE2 && reload_completed"
2570 [(set (match_dup 0) (match_dup 1))]
2572 operands[0] = adjust_address (operands[0], DFmode, 8);
2575 (define_insn "sse2_loadlpd"
2576 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2578 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2580 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2581 (parallel [(const_int 1)]))))]
2582 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2584 movsd\t{%2, %0|%0, %2}
2585 movlpd\t{%2, %0|%0, %2}
2586 movsd\t{%2, %0|%0, %2}
2587 shufpd\t{$2, %2, %0|%0, %2, 2}
2588 movhpd\t{%H1, %0|%0, %H1}
2590 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2591 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2594 [(set (match_operand:V2DF 0 "memory_operand" "")
2596 (match_operand:DF 1 "register_operand" "")
2597 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2598 "TARGET_SSE2 && reload_completed"
2599 [(set (match_dup 0) (match_dup 1))]
2601 operands[0] = adjust_address (operands[0], DFmode, 8);
2604 ;; Not sure these two are ever used, but it doesn't hurt to have
2606 (define_insn "*vec_extractv2df_1_sse"
2607 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2609 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2610 (parallel [(const_int 1)])))]
2611 "!TARGET_SSE2 && TARGET_SSE
2612 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2614 movhps\t{%1, %0|%0, %1}
2615 movhlps\t{%1, %0|%0, %1}
2616 movlps\t{%H1, %0|%0, %H1}"
2617 [(set_attr "type" "ssemov")
2618 (set_attr "mode" "V2SF,V4SF,V2SF")])
2620 (define_insn "*vec_extractv2df_0_sse"
2621 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2623 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2624 (parallel [(const_int 0)])))]
2625 "!TARGET_SSE2 && TARGET_SSE
2626 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2628 movlps\t{%1, %0|%0, %1}
2629 movaps\t{%1, %0|%0, %1}
2630 movlps\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "ssemov")
2632 (set_attr "mode" "V2SF,V4SF,V2SF")])
2634 (define_insn "sse2_movsd"
2635 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2637 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2638 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2642 movsd\t{%2, %0|%0, %2}
2643 movlpd\t{%2, %0|%0, %2}
2644 movlpd\t{%2, %0|%0, %2}
2645 shufpd\t{$2, %2, %0|%0, %2, 2}
2646 movhps\t{%H1, %0|%0, %H1}
2647 movhps\t{%1, %H0|%H0, %1}"
2648 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2649 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2651 (define_insn "*vec_dupv2df_sse3"
2652 [(set (match_operand:V2DF 0 "register_operand" "=x")
2654 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2656 "movddup\t{%1, %0|%0, %1}"
2657 [(set_attr "type" "sselog1")
2658 (set_attr "mode" "DF")])
2660 (define_insn "*vec_dupv2df"
2661 [(set (match_operand:V2DF 0 "register_operand" "=x")
2663 (match_operand:DF 1 "register_operand" "0")))]
2666 [(set_attr "type" "sselog1")
2667 (set_attr "mode" "V2DF")])
2669 (define_insn "*vec_concatv2df_sse3"
2670 [(set (match_operand:V2DF 0 "register_operand" "=x")
2672 (match_operand:DF 1 "nonimmediate_operand" "xm")
2675 "movddup\t{%1, %0|%0, %1}"
2676 [(set_attr "type" "sselog1")
2677 (set_attr "mode" "DF")])
2679 (define_insn "*vec_concatv2df"
2680 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2682 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2683 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2686 unpcklpd\t{%2, %0|%0, %2}
2687 movhpd\t{%2, %0|%0, %2}
2688 movsd\t{%1, %0|%0, %1}
2689 movlhps\t{%2, %0|%0, %2}
2690 movhps\t{%2, %0|%0, %2}"
2691 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2692 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2694 (define_expand "vec_setv2df"
2695 [(match_operand:V2DF 0 "register_operand" "")
2696 (match_operand:DF 1 "register_operand" "")
2697 (match_operand 2 "const_int_operand" "")]
2700 ix86_expand_vector_set (false, operands[0], operands[1],
2701 INTVAL (operands[2]));
2705 (define_expand "vec_extractv2df"
2706 [(match_operand:DF 0 "register_operand" "")
2707 (match_operand:V2DF 1 "register_operand" "")
2708 (match_operand 2 "const_int_operand" "")]
2711 ix86_expand_vector_extract (false, operands[0], operands[1],
2712 INTVAL (operands[2]));
2716 (define_expand "vec_initv2df"
2717 [(match_operand:V2DF 0 "register_operand" "")
2718 (match_operand 1 "" "")]
2721 ix86_expand_vector_init (false, operands[0], operands[1]);
2725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2727 ;; Parallel integral arithmetic
2729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2731 (define_expand "neg<mode>2"
2732 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2735 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2737 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2739 (define_expand "add<mode>3"
2740 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2741 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2742 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2744 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2746 (define_insn "*add<mode>3"
2747 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2749 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2750 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2751 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2752 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2753 [(set_attr "type" "sseiadd")
2754 (set_attr "prefix_data16" "1")
2755 (set_attr "mode" "TI")])
2757 (define_insn "sse2_ssadd<mode>3"
2758 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2760 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2761 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2762 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2763 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2764 [(set_attr "type" "sseiadd")
2765 (set_attr "prefix_data16" "1")
2766 (set_attr "mode" "TI")])
2768 (define_insn "sse2_usadd<mode>3"
2769 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2771 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2772 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2773 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2774 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2775 [(set_attr "type" "sseiadd")
2776 (set_attr "prefix_data16" "1")
2777 (set_attr "mode" "TI")])
2779 (define_expand "sub<mode>3"
2780 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2781 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2782 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2784 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2786 (define_insn "*sub<mode>3"
2787 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2789 (match_operand:SSEMODEI 1 "register_operand" "0")
2790 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2792 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2793 [(set_attr "type" "sseiadd")
2794 (set_attr "prefix_data16" "1")
2795 (set_attr "mode" "TI")])
2797 (define_insn "sse2_sssub<mode>3"
2798 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2800 (match_operand:SSEMODE12 1 "register_operand" "0")
2801 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2803 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2804 [(set_attr "type" "sseiadd")
2805 (set_attr "prefix_data16" "1")
2806 (set_attr "mode" "TI")])
2808 (define_insn "sse2_ussub<mode>3"
2809 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2811 (match_operand:SSEMODE12 1 "register_operand" "0")
2812 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2814 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2815 [(set_attr "type" "sseiadd")
2816 (set_attr "prefix_data16" "1")
2817 (set_attr "mode" "TI")])
2819 (define_expand "mulv16qi3"
2820 [(set (match_operand:V16QI 0 "register_operand" "")
2821 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2822 (match_operand:V16QI 2 "register_operand" "")))]
2828 for (i = 0; i < 12; ++i)
2829 t[i] = gen_reg_rtx (V16QImode);
2831 /* Unpack data such that we've got a source byte in each low byte of
2832 each word. We don't care what goes into the high byte of each word.
2833 Rather than trying to get zero in there, most convenient is to let
2834 it be a copy of the low byte. */
2835 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2836 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2837 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2838 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2840 /* Multiply words. The end-of-line annotations here give a picture of what
2841 the output of that instruction looks like. Dot means don't care; the
2842 letters are the bytes of the result with A being the most significant. */
2843 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2844 gen_lowpart (V8HImode, t[0]),
2845 gen_lowpart (V8HImode, t[1])));
2846 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2847 gen_lowpart (V8HImode, t[2]),
2848 gen_lowpart (V8HImode, t[3])));
2850 /* Extract the relevant bytes and merge them back together. */
2851 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2852 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2853 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2854 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2855 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2856 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2859 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2863 (define_expand "mulv8hi3"
2864 [(set (match_operand:V8HI 0 "register_operand" "")
2865 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2866 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2868 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2870 (define_insn "*mulv8hi3"
2871 [(set (match_operand:V8HI 0 "register_operand" "=x")
2872 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2873 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2874 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2875 "pmullw\t{%2, %0|%0, %2}"
2876 [(set_attr "type" "sseimul")
2877 (set_attr "prefix_data16" "1")
2878 (set_attr "mode" "TI")])
2880 (define_expand "smulv8hi3_highpart"
2881 [(set (match_operand:V8HI 0 "register_operand" "")
2886 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2888 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2891 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2893 (define_insn "*smulv8hi3_highpart"
2894 [(set (match_operand:V8HI 0 "register_operand" "=x")
2899 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2901 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2903 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2904 "pmulhw\t{%2, %0|%0, %2}"
2905 [(set_attr "type" "sseimul")
2906 (set_attr "prefix_data16" "1")
2907 (set_attr "mode" "TI")])
2909 (define_expand "umulv8hi3_highpart"
2910 [(set (match_operand:V8HI 0 "register_operand" "")
2915 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2917 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2920 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2922 (define_insn "*umulv8hi3_highpart"
2923 [(set (match_operand:V8HI 0 "register_operand" "=x")
2928 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2930 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2932 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2933 "pmulhuw\t{%2, %0|%0, %2}"
2934 [(set_attr "type" "sseimul")
2935 (set_attr "prefix_data16" "1")
2936 (set_attr "mode" "TI")])
2938 (define_insn "sse2_umulv2siv2di3"
2939 [(set (match_operand:V2DI 0 "register_operand" "=x")
2943 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2944 (parallel [(const_int 0) (const_int 2)])))
2947 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2948 (parallel [(const_int 0) (const_int 2)])))))]
2949 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2950 "pmuludq\t{%2, %0|%0, %2}"
2951 [(set_attr "type" "sseimul")
2952 (set_attr "prefix_data16" "1")
2953 (set_attr "mode" "TI")])
2955 (define_insn "sse4_1_mulv2siv2di3"
2956 [(set (match_operand:V2DI 0 "register_operand" "=x")
2960 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2961 (parallel [(const_int 0) (const_int 2)])))
2964 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2965 (parallel [(const_int 0) (const_int 2)])))))]
2966 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2967 "pmuldq\t{%2, %0|%0, %2}"
2968 [(set_attr "type" "sseimul")
2969 (set_attr "prefix_extra" "1")
2970 (set_attr "mode" "TI")])
2972 (define_insn "sse2_pmaddwd"
2973 [(set (match_operand:V4SI 0 "register_operand" "=x")
2978 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2979 (parallel [(const_int 0)
2985 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2986 (parallel [(const_int 0)
2992 (vec_select:V4HI (match_dup 1)
2993 (parallel [(const_int 1)
2998 (vec_select:V4HI (match_dup 2)
2999 (parallel [(const_int 1)
3002 (const_int 7)]))))))]
3003 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3004 "pmaddwd\t{%2, %0|%0, %2}"
3005 [(set_attr "type" "sseiadd")
3006 (set_attr "prefix_data16" "1")
3007 (set_attr "mode" "TI")])
3009 (define_expand "mulv4si3"
3010 [(set (match_operand:V4SI 0 "register_operand" "")
3011 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3012 (match_operand:V4SI 2 "register_operand" "")))]
3016 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3019 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3025 t1 = gen_reg_rtx (V4SImode);
3026 t2 = gen_reg_rtx (V4SImode);
3027 t3 = gen_reg_rtx (V4SImode);
3028 t4 = gen_reg_rtx (V4SImode);
3029 t5 = gen_reg_rtx (V4SImode);
3030 t6 = gen_reg_rtx (V4SImode);
3031 thirtytwo = GEN_INT (32);
3033 /* Multiply elements 2 and 0. */
3034 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3037 /* Shift both input vectors down one element, so that elements 3
3038 and 1 are now in the slots for elements 2 and 0. For K8, at
3039 least, this is faster than using a shuffle. */
3040 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3041 gen_lowpart (TImode, op1),
3043 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3044 gen_lowpart (TImode, op2),
3046 /* Multiply elements 3 and 1. */
3047 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3050 /* Move the results in element 2 down to element 1; we don't care
3051 what goes in elements 2 and 3. */
3052 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3053 const0_rtx, const0_rtx));
3054 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3055 const0_rtx, const0_rtx));
3057 /* Merge the parts back together. */
3058 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3063 (define_insn "*sse4_1_mulv4si3"
3064 [(set (match_operand:V4SI 0 "register_operand" "=x")
3065 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3066 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3067 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3068 "pmulld\t{%2, %0|%0, %2}"
3069 [(set_attr "type" "sseimul")
3070 (set_attr "prefix_extra" "1")
3071 (set_attr "mode" "TI")])
3073 (define_expand "mulv2di3"
3074 [(set (match_operand:V2DI 0 "register_operand" "")
3075 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3076 (match_operand:V2DI 2 "register_operand" "")))]
3079 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3085 t1 = gen_reg_rtx (V2DImode);
3086 t2 = gen_reg_rtx (V2DImode);
3087 t3 = gen_reg_rtx (V2DImode);
3088 t4 = gen_reg_rtx (V2DImode);
3089 t5 = gen_reg_rtx (V2DImode);
3090 t6 = gen_reg_rtx (V2DImode);
3091 thirtytwo = GEN_INT (32);
3093 /* Multiply low parts. */
3094 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3095 gen_lowpart (V4SImode, op2)));
3097 /* Shift input vectors left 32 bits so we can multiply high parts. */
3098 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3099 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3101 /* Multiply high parts by low parts. */
3102 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3103 gen_lowpart (V4SImode, t3)));
3104 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3105 gen_lowpart (V4SImode, t2)));
3107 /* Shift them back. */
3108 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3109 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3111 /* Add the three parts together. */
3112 emit_insn (gen_addv2di3 (t6, t1, t4));
3113 emit_insn (gen_addv2di3 (op0, t6, t5));
3117 (define_expand "vec_widen_smult_hi_v8hi"
3118 [(match_operand:V4SI 0 "register_operand" "")
3119 (match_operand:V8HI 1 "register_operand" "")
3120 (match_operand:V8HI 2 "register_operand" "")]
3123 rtx op1, op2, t1, t2, dest;
3127 t1 = gen_reg_rtx (V8HImode);
3128 t2 = gen_reg_rtx (V8HImode);
3129 dest = gen_lowpart (V8HImode, operands[0]);
3131 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3132 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3133 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3137 (define_expand "vec_widen_smult_lo_v8hi"
3138 [(match_operand:V4SI 0 "register_operand" "")
3139 (match_operand:V8HI 1 "register_operand" "")
3140 (match_operand:V8HI 2 "register_operand" "")]
3143 rtx op1, op2, t1, t2, dest;
3147 t1 = gen_reg_rtx (V8HImode);
3148 t2 = gen_reg_rtx (V8HImode);
3149 dest = gen_lowpart (V8HImode, operands[0]);
3151 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3152 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3153 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3157 (define_expand "vec_widen_umult_hi_v8hi"
3158 [(match_operand:V4SI 0 "register_operand" "")
3159 (match_operand:V8HI 1 "register_operand" "")
3160 (match_operand:V8HI 2 "register_operand" "")]
3163 rtx op1, op2, t1, t2, dest;
3167 t1 = gen_reg_rtx (V8HImode);
3168 t2 = gen_reg_rtx (V8HImode);
3169 dest = gen_lowpart (V8HImode, operands[0]);
3171 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3172 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3173 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3177 (define_expand "vec_widen_umult_lo_v8hi"
3178 [(match_operand:V4SI 0 "register_operand" "")
3179 (match_operand:V8HI 1 "register_operand" "")
3180 (match_operand:V8HI 2 "register_operand" "")]
3183 rtx op1, op2, t1, t2, dest;
3187 t1 = gen_reg_rtx (V8HImode);
3188 t2 = gen_reg_rtx (V8HImode);
3189 dest = gen_lowpart (V8HImode, operands[0]);
3191 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3192 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3193 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3197 (define_expand "vec_widen_smult_hi_v4si"
3198 [(match_operand:V2DI 0 "register_operand" "")
3199 (match_operand:V4SI 1 "register_operand" "")
3200 (match_operand:V4SI 2 "register_operand" "")]
3203 rtx op1, op2, t1, t2;
3207 t1 = gen_reg_rtx (V4SImode);
3208 t2 = gen_reg_rtx (V4SImode);
3210 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3211 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3212 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3216 (define_expand "vec_widen_smult_lo_v4si"
3217 [(match_operand:V2DI 0 "register_operand" "")
3218 (match_operand:V4SI 1 "register_operand" "")
3219 (match_operand:V4SI 2 "register_operand" "")]
3222 rtx op1, op2, t1, t2;
3226 t1 = gen_reg_rtx (V4SImode);
3227 t2 = gen_reg_rtx (V4SImode);
3229 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3230 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3231 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3235 (define_expand "vec_widen_umult_hi_v4si"
3236 [(match_operand:V2DI 0 "register_operand" "")
3237 (match_operand:V4SI 1 "register_operand" "")
3238 (match_operand:V4SI 2 "register_operand" "")]
3241 rtx op1, op2, t1, t2;
3245 t1 = gen_reg_rtx (V4SImode);
3246 t2 = gen_reg_rtx (V4SImode);
3248 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3249 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3250 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3254 (define_expand "vec_widen_umult_lo_v4si"
3255 [(match_operand:V2DI 0 "register_operand" "")
3256 (match_operand:V4SI 1 "register_operand" "")
3257 (match_operand:V4SI 2 "register_operand" "")]
3260 rtx op1, op2, t1, t2;
3264 t1 = gen_reg_rtx (V4SImode);
3265 t2 = gen_reg_rtx (V4SImode);
3267 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3268 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3269 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3273 (define_expand "sdot_prodv8hi"
3274 [(match_operand:V4SI 0 "register_operand" "")
3275 (match_operand:V8HI 1 "register_operand" "")
3276 (match_operand:V8HI 2 "register_operand" "")
3277 (match_operand:V4SI 3 "register_operand" "")]
3280 rtx t = gen_reg_rtx (V4SImode);
3281 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3282 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3286 (define_expand "udot_prodv4si"
3287 [(match_operand:V2DI 0 "register_operand" "")
3288 (match_operand:V4SI 1 "register_operand" "")
3289 (match_operand:V4SI 2 "register_operand" "")
3290 (match_operand:V2DI 3 "register_operand" "")]
3295 t1 = gen_reg_rtx (V2DImode);
3296 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3297 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3299 t2 = gen_reg_rtx (V4SImode);
3300 t3 = gen_reg_rtx (V4SImode);
3301 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3302 gen_lowpart (TImode, operands[1]),
3304 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3305 gen_lowpart (TImode, operands[2]),
3308 t4 = gen_reg_rtx (V2DImode);
3309 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3311 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3315 (define_insn "ashr<mode>3"
3316 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3318 (match_operand:SSEMODE24 1 "register_operand" "0")
3319 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3321 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3322 [(set_attr "type" "sseishft")
3323 (set_attr "prefix_data16" "1")
3324 (set_attr "mode" "TI")])
3326 (define_insn "lshr<mode>3"
3327 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3328 (lshiftrt:SSEMODE248
3329 (match_operand:SSEMODE248 1 "register_operand" "0")
3330 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3332 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3333 [(set_attr "type" "sseishft")
3334 (set_attr "prefix_data16" "1")
3335 (set_attr "mode" "TI")])
3337 (define_insn "ashl<mode>3"
3338 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3340 (match_operand:SSEMODE248 1 "register_operand" "0")
3341 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3343 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3344 [(set_attr "type" "sseishft")
3345 (set_attr "prefix_data16" "1")
3346 (set_attr "mode" "TI")])
3348 (define_insn "sse2_ashlti3"
3349 [(set (match_operand:TI 0 "register_operand" "=x")
3350 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3351 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3354 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3355 return "pslldq\t{%2, %0|%0, %2}";
3357 [(set_attr "type" "sseishft")
3358 (set_attr "prefix_data16" "1")
3359 (set_attr "mode" "TI")])
3361 (define_expand "vec_shl_<mode>"
3362 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3363 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3364 (match_operand:SI 2 "general_operand" "")))]
3367 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3369 operands[0] = gen_lowpart (TImode, operands[0]);
3370 operands[1] = gen_lowpart (TImode, operands[1]);
3373 (define_insn "sse2_lshrti3"
3374 [(set (match_operand:TI 0 "register_operand" "=x")
3375 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3376 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3379 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3380 return "psrldq\t{%2, %0|%0, %2}";
3382 [(set_attr "type" "sseishft")
3383 (set_attr "prefix_data16" "1")
3384 (set_attr "mode" "TI")])
3386 (define_expand "vec_shr_<mode>"
3387 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3388 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3389 (match_operand:SI 2 "general_operand" "")))]
3392 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3394 operands[0] = gen_lowpart (TImode, operands[0]);
3395 operands[1] = gen_lowpart (TImode, operands[1]);
3398 (define_expand "umaxv16qi3"
3399 [(set (match_operand:V16QI 0 "register_operand" "")
3400 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3401 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3403 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3405 (define_insn "*umaxv16qi3"
3406 [(set (match_operand:V16QI 0 "register_operand" "=x")
3407 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3408 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3409 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3410 "pmaxub\t{%2, %0|%0, %2}"
3411 [(set_attr "type" "sseiadd")
3412 (set_attr "prefix_data16" "1")
3413 (set_attr "mode" "TI")])
3415 (define_expand "smaxv8hi3"
3416 [(set (match_operand:V8HI 0 "register_operand" "")
3417 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3418 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3420 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3422 (define_insn "*smaxv8hi3"
3423 [(set (match_operand:V8HI 0 "register_operand" "=x")
3424 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3425 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3426 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3427 "pmaxsw\t{%2, %0|%0, %2}"
3428 [(set_attr "type" "sseiadd")
3429 (set_attr "prefix_data16" "1")
3430 (set_attr "mode" "TI")])
3432 (define_expand "umaxv8hi3"
3433 [(set (match_operand:V8HI 0 "register_operand" "")
3434 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3435 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3439 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3442 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3443 if (rtx_equal_p (op3, op2))
3444 op3 = gen_reg_rtx (V8HImode);
3445 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3446 emit_insn (gen_addv8hi3 (op0, op3, op2));
3451 (define_expand "smax<mode>3"
3452 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3453 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3454 (match_operand:SSEMODE14 2 "register_operand" "")))]
3458 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3464 xops[0] = operands[0];
3465 xops[1] = operands[1];
3466 xops[2] = operands[2];
3467 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3468 xops[4] = operands[1];
3469 xops[5] = operands[2];
3470 ok = ix86_expand_int_vcond (xops);
3476 (define_insn "*sse4_1_smax<mode>3"
3477 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3479 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3480 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3481 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3482 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3483 [(set_attr "type" "sseiadd")
3484 (set_attr "prefix_extra" "1")
3485 (set_attr "mode" "TI")])
3487 (define_expand "umaxv4si3"
3488 [(set (match_operand:V4SI 0 "register_operand" "")
3489 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3490 (match_operand:V4SI 2 "register_operand" "")))]
3494 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3500 xops[0] = operands[0];
3501 xops[1] = operands[1];
3502 xops[2] = operands[2];
3503 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3504 xops[4] = operands[1];
3505 xops[5] = operands[2];
3506 ok = ix86_expand_int_vcond (xops);
3512 (define_insn "*sse4_1_umax<mode>3"
3513 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3515 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3516 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3517 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3518 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3519 [(set_attr "type" "sseiadd")
3520 (set_attr "prefix_extra" "1")
3521 (set_attr "mode" "TI")])
3523 (define_expand "uminv16qi3"
3524 [(set (match_operand:V16QI 0 "register_operand" "")
3525 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3526 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3528 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3530 (define_insn "*uminv16qi3"
3531 [(set (match_operand:V16QI 0 "register_operand" "=x")
3532 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3533 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3534 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3535 "pminub\t{%2, %0|%0, %2}"
3536 [(set_attr "type" "sseiadd")
3537 (set_attr "prefix_data16" "1")
3538 (set_attr "mode" "TI")])
3540 (define_expand "sminv8hi3"
3541 [(set (match_operand:V8HI 0 "register_operand" "")
3542 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3543 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3545 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3547 (define_insn "*sminv8hi3"
3548 [(set (match_operand:V8HI 0 "register_operand" "=x")
3549 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3550 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3551 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3552 "pminsw\t{%2, %0|%0, %2}"
3553 [(set_attr "type" "sseiadd")
3554 (set_attr "prefix_data16" "1")
3555 (set_attr "mode" "TI")])
3557 (define_expand "smin<mode>3"
3558 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3559 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3560 (match_operand:SSEMODE14 2 "register_operand" "")))]
3564 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3570 xops[0] = operands[0];
3571 xops[1] = operands[2];
3572 xops[2] = operands[1];
3573 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3574 xops[4] = operands[1];
3575 xops[5] = operands[2];
3576 ok = ix86_expand_int_vcond (xops);
3582 (define_insn "*sse4_1_smin<mode>3"
3583 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3585 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3586 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3587 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3588 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3589 [(set_attr "type" "sseiadd")
3590 (set_attr "prefix_extra" "1")
3591 (set_attr "mode" "TI")])
3593 (define_expand "umin<mode>3"
3594 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3595 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3596 (match_operand:SSEMODE24 2 "register_operand" "")))]
3600 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3606 xops[0] = operands[0];
3607 xops[1] = operands[2];
3608 xops[2] = operands[1];
3609 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3610 xops[4] = operands[1];
3611 xops[5] = operands[2];
3612 ok = ix86_expand_int_vcond (xops);
3618 (define_insn "*sse4_1_umin<mode>3"
3619 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3621 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3622 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3623 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3624 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3625 [(set_attr "type" "sseiadd")
3626 (set_attr "prefix_extra" "1")
3627 (set_attr "mode" "TI")])
3629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3631 ;; Parallel integral comparisons
3633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3635 (define_insn "sse2_eq<mode>3"
3636 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3638 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3639 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3640 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3641 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3642 [(set_attr "type" "ssecmp")
3643 (set_attr "prefix_data16" "1")
3644 (set_attr "mode" "TI")])
3646 (define_insn "sse4_1_eqv2di3"
3647 [(set (match_operand:V2DI 0 "register_operand" "=x")
3649 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3650 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3651 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3652 "pcmpeqq\t{%2, %0|%0, %2}"
3653 [(set_attr "type" "ssecmp")
3654 (set_attr "prefix_extra" "1")
3655 (set_attr "mode" "TI")])
3657 (define_insn "sse2_gt<mode>3"
3658 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3660 (match_operand:SSEMODE124 1 "register_operand" "0")
3661 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3663 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3664 [(set_attr "type" "ssecmp")
3665 (set_attr "prefix_data16" "1")
3666 (set_attr "mode" "TI")])
3668 (define_insn "sse4_2_gtv2di3"
3669 [(set (match_operand:V2DI 0 "register_operand" "=x")
3671 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3672 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3674 "pcmpgtq\t{%2, %0|%0, %2}"
3675 [(set_attr "type" "ssecmp")
3676 (set_attr "mode" "TI")])
3678 (define_expand "vcond<mode>"
3679 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3680 (if_then_else:SSEMODEI
3681 (match_operator 3 ""
3682 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3683 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3684 (match_operand:SSEMODEI 1 "general_operand" "")
3685 (match_operand:SSEMODEI 2 "general_operand" "")))]
3688 if (ix86_expand_int_vcond (operands))
3694 (define_expand "vcondu<mode>"
3695 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3696 (if_then_else:SSEMODEI
3697 (match_operator 3 ""
3698 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3699 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3700 (match_operand:SSEMODEI 1 "general_operand" "")
3701 (match_operand:SSEMODEI 2 "general_operand" "")))]
3704 if (ix86_expand_int_vcond (operands))
3710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3712 ;; Parallel bitwise logical operations
3714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3716 (define_expand "one_cmpl<mode>2"
3717 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3718 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3722 int i, n = GET_MODE_NUNITS (<MODE>mode);
3723 rtvec v = rtvec_alloc (n);
3725 for (i = 0; i < n; ++i)
3726 RTVEC_ELT (v, i) = constm1_rtx;
3728 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3731 (define_expand "and<mode>3"
3732 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3733 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3734 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3736 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3738 (define_insn "*and<mode>3"
3739 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3741 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3742 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3743 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3744 "pand\t{%2, %0|%0, %2}"
3745 [(set_attr "type" "sselog")
3746 (set_attr "prefix_data16" "1")
3747 (set_attr "mode" "TI")])
3749 (define_insn "sse2_nand<mode>3"
3750 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3752 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3753 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3755 "pandn\t{%2, %0|%0, %2}"
3756 [(set_attr "type" "sselog")
3757 (set_attr "prefix_data16" "1")
3758 (set_attr "mode" "TI")])
3760 (define_expand "andtf3"
3761 [(set (match_operand:TF 0 "register_operand" "")
3762 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3763 (match_operand:TF 2 "nonimmediate_operand" "")))]
3765 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3767 (define_insn "*andtf3"
3768 [(set (match_operand:TF 0 "register_operand" "=x")
3770 (match_operand:TF 1 "nonimmediate_operand" "%0")
3771 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3772 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3773 "pand\t{%2, %0|%0, %2}"
3774 [(set_attr "type" "sselog")
3775 (set_attr "prefix_data16" "1")
3776 (set_attr "mode" "TI")])
3778 (define_insn "*nandtf3"
3779 [(set (match_operand:TF 0 "register_operand" "=x")
3781 (not:TF (match_operand:TF 1 "register_operand" "0"))
3782 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3784 "pandn\t{%2, %0|%0, %2}"
3785 [(set_attr "type" "sselog")
3786 (set_attr "prefix_data16" "1")
3787 (set_attr "mode" "TI")])
3789 (define_expand "ior<mode>3"
3790 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3791 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3792 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3794 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3796 (define_insn "*ior<mode>3"
3797 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3799 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3800 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3801 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3802 "por\t{%2, %0|%0, %2}"
3803 [(set_attr "type" "sselog")
3804 (set_attr "prefix_data16" "1")
3805 (set_attr "mode" "TI")])
3807 (define_expand "iortf3"
3808 [(set (match_operand:TF 0 "register_operand" "")
3809 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3810 (match_operand:TF 2 "nonimmediate_operand" "")))]
3812 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3814 (define_insn "*iortf3"
3815 [(set (match_operand:TF 0 "register_operand" "=x")
3817 (match_operand:TF 1 "nonimmediate_operand" "%0")
3818 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3819 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3820 "por\t{%2, %0|%0, %2}"
3821 [(set_attr "type" "sselog")
3822 (set_attr "prefix_data16" "1")
3823 (set_attr "mode" "TI")])
3825 (define_expand "xor<mode>3"
3826 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3827 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3828 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3830 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3832 (define_insn "*xor<mode>3"
3833 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3835 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3836 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3837 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3838 "pxor\t{%2, %0|%0, %2}"
3839 [(set_attr "type" "sselog")
3840 (set_attr "prefix_data16" "1")
3841 (set_attr "mode" "TI")])
3843 (define_expand "xortf3"
3844 [(set (match_operand:TF 0 "register_operand" "")
3845 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3846 (match_operand:TF 2 "nonimmediate_operand" "")))]
3848 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3850 (define_insn "*xortf3"
3851 [(set (match_operand:TF 0 "register_operand" "=x")
3853 (match_operand:TF 1 "nonimmediate_operand" "%0")
3854 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3855 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3856 "pxor\t{%2, %0|%0, %2}"
3857 [(set_attr "type" "sselog")
3858 (set_attr "prefix_data16" "1")
3859 (set_attr "mode" "TI")])
3861 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3863 ;; Parallel integral element swizzling
3865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3868 ;; op1 = abcdefghijklmnop
3869 ;; op2 = qrstuvwxyz012345
3870 ;; h1 = aqbrcsdteufvgwhx
3871 ;; l1 = iyjzk0l1m2n3o4p5
3872 ;; h2 = aiqybjrzcks0dlt1
3873 ;; l2 = emu2fnv3gow4hpx5
3874 ;; h3 = aeimquy2bfjnrvz3
3875 ;; l3 = cgkosw04dhlptx15
3876 ;; result = bdfhjlnprtvxz135
3877 (define_expand "vec_pack_trunc_v8hi"
3878 [(match_operand:V16QI 0 "register_operand" "")
3879 (match_operand:V8HI 1 "register_operand" "")
3880 (match_operand:V8HI 2 "register_operand" "")]
3883 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3885 op1 = gen_lowpart (V16QImode, operands[1]);
3886 op2 = gen_lowpart (V16QImode, operands[2]);
3887 h1 = gen_reg_rtx (V16QImode);
3888 l1 = gen_reg_rtx (V16QImode);
3889 h2 = gen_reg_rtx (V16QImode);
3890 l2 = gen_reg_rtx (V16QImode);
3891 h3 = gen_reg_rtx (V16QImode);
3892 l3 = gen_reg_rtx (V16QImode);
3894 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3895 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3896 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3897 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3898 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3899 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3900 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3911 ;; result = bdfhjlnp
3912 (define_expand "vec_pack_trunc_v4si"
3913 [(match_operand:V8HI 0 "register_operand" "")
3914 (match_operand:V4SI 1 "register_operand" "")
3915 (match_operand:V4SI 2 "register_operand" "")]
3918 rtx op1, op2, h1, l1, h2, l2;
3920 op1 = gen_lowpart (V8HImode, operands[1]);
3921 op2 = gen_lowpart (V8HImode, operands[2]);
3922 h1 = gen_reg_rtx (V8HImode);
3923 l1 = gen_reg_rtx (V8HImode);
3924 h2 = gen_reg_rtx (V8HImode);
3925 l2 = gen_reg_rtx (V8HImode);
3927 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3928 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3929 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3930 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3931 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3941 (define_expand "vec_pack_trunc_v2di"
3942 [(match_operand:V4SI 0 "register_operand" "")
3943 (match_operand:V2DI 1 "register_operand" "")
3944 (match_operand:V2DI 2 "register_operand" "")]
3947 rtx op1, op2, h1, l1;
3949 op1 = gen_lowpart (V4SImode, operands[1]);
3950 op2 = gen_lowpart (V4SImode, operands[2]);
3951 h1 = gen_reg_rtx (V4SImode);
3952 l1 = gen_reg_rtx (V4SImode);
3954 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3955 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3956 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3960 (define_expand "vec_interleave_highv16qi"
3961 [(set (match_operand:V16QI 0 "register_operand" "=x")
3964 (match_operand:V16QI 1 "register_operand" "0")
3965 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3966 (parallel [(const_int 8) (const_int 24)
3967 (const_int 9) (const_int 25)
3968 (const_int 10) (const_int 26)
3969 (const_int 11) (const_int 27)
3970 (const_int 12) (const_int 28)
3971 (const_int 13) (const_int 29)
3972 (const_int 14) (const_int 30)
3973 (const_int 15) (const_int 31)])))]
3976 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3980 (define_expand "vec_interleave_lowv16qi"
3981 [(set (match_operand:V16QI 0 "register_operand" "=x")
3984 (match_operand:V16QI 1 "register_operand" "0")
3985 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3986 (parallel [(const_int 0) (const_int 16)
3987 (const_int 1) (const_int 17)
3988 (const_int 2) (const_int 18)
3989 (const_int 3) (const_int 19)
3990 (const_int 4) (const_int 20)
3991 (const_int 5) (const_int 21)
3992 (const_int 6) (const_int 22)
3993 (const_int 7) (const_int 23)])))]
3996 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4000 (define_expand "vec_interleave_highv8hi"
4001 [(set (match_operand:V8HI 0 "register_operand" "=x")
4004 (match_operand:V8HI 1 "register_operand" "0")
4005 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4006 (parallel [(const_int 4) (const_int 12)
4007 (const_int 5) (const_int 13)
4008 (const_int 6) (const_int 14)
4009 (const_int 7) (const_int 15)])))]
4012 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4016 (define_expand "vec_interleave_lowv8hi"
4017 [(set (match_operand:V8HI 0 "register_operand" "=x")
4020 (match_operand:V8HI 1 "register_operand" "0")
4021 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4022 (parallel [(const_int 0) (const_int 8)
4023 (const_int 1) (const_int 9)
4024 (const_int 2) (const_int 10)
4025 (const_int 3) (const_int 11)])))]
4028 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4032 (define_expand "vec_interleave_highv4si"
4033 [(set (match_operand:V4SI 0 "register_operand" "=x")
4036 (match_operand:V4SI 1 "register_operand" "0")
4037 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4038 (parallel [(const_int 2) (const_int 6)
4039 (const_int 3) (const_int 7)])))]
4042 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4046 (define_expand "vec_interleave_lowv4si"
4047 [(set (match_operand:V4SI 0 "register_operand" "=x")
4050 (match_operand:V4SI 1 "register_operand" "0")
4051 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4052 (parallel [(const_int 0) (const_int 4)
4053 (const_int 1) (const_int 5)])))]
4056 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4060 (define_expand "vec_interleave_highv2di"
4061 [(set (match_operand:V2DI 0 "register_operand" "=x")
4064 (match_operand:V2DI 1 "register_operand" "0")
4065 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4066 (parallel [(const_int 1)
4070 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4074 (define_expand "vec_interleave_lowv2di"
4075 [(set (match_operand:V2DI 0 "register_operand" "=x")
4078 (match_operand:V2DI 1 "register_operand" "0")
4079 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4080 (parallel [(const_int 0)
4084 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4088 (define_insn "sse2_packsswb"
4089 [(set (match_operand:V16QI 0 "register_operand" "=x")
4092 (match_operand:V8HI 1 "register_operand" "0"))
4094 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4096 "packsswb\t{%2, %0|%0, %2}"
4097 [(set_attr "type" "sselog")
4098 (set_attr "prefix_data16" "1")
4099 (set_attr "mode" "TI")])
4101 (define_insn "sse2_packssdw"
4102 [(set (match_operand:V8HI 0 "register_operand" "=x")
4105 (match_operand:V4SI 1 "register_operand" "0"))
4107 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4109 "packssdw\t{%2, %0|%0, %2}"
4110 [(set_attr "type" "sselog")
4111 (set_attr "prefix_data16" "1")
4112 (set_attr "mode" "TI")])
4114 (define_insn "sse2_packuswb"
4115 [(set (match_operand:V16QI 0 "register_operand" "=x")
4118 (match_operand:V8HI 1 "register_operand" "0"))
4120 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4122 "packuswb\t{%2, %0|%0, %2}"
4123 [(set_attr "type" "sselog")
4124 (set_attr "prefix_data16" "1")
4125 (set_attr "mode" "TI")])
4127 (define_insn "sse2_punpckhbw"
4128 [(set (match_operand:V16QI 0 "register_operand" "=x")
4131 (match_operand:V16QI 1 "register_operand" "0")
4132 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4133 (parallel [(const_int 8) (const_int 24)
4134 (const_int 9) (const_int 25)
4135 (const_int 10) (const_int 26)
4136 (const_int 11) (const_int 27)
4137 (const_int 12) (const_int 28)
4138 (const_int 13) (const_int 29)
4139 (const_int 14) (const_int 30)
4140 (const_int 15) (const_int 31)])))]
4142 "punpckhbw\t{%2, %0|%0, %2}"
4143 [(set_attr "type" "sselog")
4144 (set_attr "prefix_data16" "1")
4145 (set_attr "mode" "TI")])
4147 (define_insn "sse2_punpcklbw"
4148 [(set (match_operand:V16QI 0 "register_operand" "=x")
4151 (match_operand:V16QI 1 "register_operand" "0")
4152 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4153 (parallel [(const_int 0) (const_int 16)
4154 (const_int 1) (const_int 17)
4155 (const_int 2) (const_int 18)
4156 (const_int 3) (const_int 19)
4157 (const_int 4) (const_int 20)
4158 (const_int 5) (const_int 21)
4159 (const_int 6) (const_int 22)
4160 (const_int 7) (const_int 23)])))]
4162 "punpcklbw\t{%2, %0|%0, %2}"
4163 [(set_attr "type" "sselog")
4164 (set_attr "prefix_data16" "1")
4165 (set_attr "mode" "TI")])
4167 (define_insn "sse2_punpckhwd"
4168 [(set (match_operand:V8HI 0 "register_operand" "=x")
4171 (match_operand:V8HI 1 "register_operand" "0")
4172 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4173 (parallel [(const_int 4) (const_int 12)
4174 (const_int 5) (const_int 13)
4175 (const_int 6) (const_int 14)
4176 (const_int 7) (const_int 15)])))]
4178 "punpckhwd\t{%2, %0|%0, %2}"
4179 [(set_attr "type" "sselog")
4180 (set_attr "prefix_data16" "1")
4181 (set_attr "mode" "TI")])
4183 (define_insn "sse2_punpcklwd"
4184 [(set (match_operand:V8HI 0 "register_operand" "=x")
4187 (match_operand:V8HI 1 "register_operand" "0")
4188 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4189 (parallel [(const_int 0) (const_int 8)
4190 (const_int 1) (const_int 9)
4191 (const_int 2) (const_int 10)
4192 (const_int 3) (const_int 11)])))]
4194 "punpcklwd\t{%2, %0|%0, %2}"
4195 [(set_attr "type" "sselog")
4196 (set_attr "prefix_data16" "1")
4197 (set_attr "mode" "TI")])
4199 (define_insn "sse2_punpckhdq"
4200 [(set (match_operand:V4SI 0 "register_operand" "=x")
4203 (match_operand:V4SI 1 "register_operand" "0")
4204 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4205 (parallel [(const_int 2) (const_int 6)
4206 (const_int 3) (const_int 7)])))]
4208 "punpckhdq\t{%2, %0|%0, %2}"
4209 [(set_attr "type" "sselog")
4210 (set_attr "prefix_data16" "1")
4211 (set_attr "mode" "TI")])
4213 (define_insn "sse2_punpckldq"
4214 [(set (match_operand:V4SI 0 "register_operand" "=x")
4217 (match_operand:V4SI 1 "register_operand" "0")
4218 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4219 (parallel [(const_int 0) (const_int 4)
4220 (const_int 1) (const_int 5)])))]
4222 "punpckldq\t{%2, %0|%0, %2}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_data16" "1")
4225 (set_attr "mode" "TI")])
4227 (define_insn "sse2_punpckhqdq"
4228 [(set (match_operand:V2DI 0 "register_operand" "=x")
4231 (match_operand:V2DI 1 "register_operand" "0")
4232 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4233 (parallel [(const_int 1)
4236 "punpckhqdq\t{%2, %0|%0, %2}"
4237 [(set_attr "type" "sselog")
4238 (set_attr "prefix_data16" "1")
4239 (set_attr "mode" "TI")])
4241 (define_insn "sse2_punpcklqdq"
4242 [(set (match_operand:V2DI 0 "register_operand" "=x")
4245 (match_operand:V2DI 1 "register_operand" "0")
4246 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4247 (parallel [(const_int 0)
4250 "punpcklqdq\t{%2, %0|%0, %2}"
4251 [(set_attr "type" "sselog")
4252 (set_attr "prefix_data16" "1")
4253 (set_attr "mode" "TI")])
4255 (define_insn "*sse4_1_pinsrb"
4256 [(set (match_operand:V16QI 0 "register_operand" "=x")
4258 (vec_duplicate:V16QI
4259 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4260 (match_operand:V16QI 1 "register_operand" "0")
4261 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4264 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4265 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4267 [(set_attr "type" "sselog")
4268 (set_attr "prefix_extra" "1")
4269 (set_attr "mode" "TI")])
4271 (define_insn "*sse2_pinsrw"
4272 [(set (match_operand:V8HI 0 "register_operand" "=x")
4275 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4276 (match_operand:V8HI 1 "register_operand" "0")
4277 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4280 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4281 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4283 [(set_attr "type" "sselog")
4284 (set_attr "prefix_data16" "1")
4285 (set_attr "mode" "TI")])
4287 ;; It must come before sse2_loadld since it is preferred.
4288 (define_insn "*sse4_1_pinsrd"
4289 [(set (match_operand:V4SI 0 "register_operand" "=x")
4292 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4293 (match_operand:V4SI 1 "register_operand" "0")
4294 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4297 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4298 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4300 [(set_attr "type" "sselog")
4301 (set_attr "prefix_extra" "1")
4302 (set_attr "mode" "TI")])
4304 (define_insn "*sse4_1_pinsrq"
4305 [(set (match_operand:V2DI 0 "register_operand" "=x")
4308 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4309 (match_operand:V2DI 1 "register_operand" "0")
4310 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4313 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4314 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4316 [(set_attr "type" "sselog")
4317 (set_attr "prefix_extra" "1")
4318 (set_attr "mode" "TI")])
4320 (define_insn "*sse4_1_pextrb"
4321 [(set (match_operand:SI 0 "register_operand" "=r")
4324 (match_operand:V16QI 1 "register_operand" "x")
4325 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4327 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4328 [(set_attr "type" "sselog")
4329 (set_attr "prefix_extra" "1")
4330 (set_attr "mode" "TI")])
4332 (define_insn "*sse4_1_pextrb_memory"
4333 [(set (match_operand:QI 0 "memory_operand" "=m")
4335 (match_operand:V16QI 1 "register_operand" "x")
4336 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4338 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4339 [(set_attr "type" "sselog")
4340 (set_attr "prefix_extra" "1")
4341 (set_attr "mode" "TI")])
4343 (define_insn "*sse2_pextrw"
4344 [(set (match_operand:SI 0 "register_operand" "=r")
4347 (match_operand:V8HI 1 "register_operand" "x")
4348 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4350 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4351 [(set_attr "type" "sselog")
4352 (set_attr "prefix_data16" "1")
4353 (set_attr "mode" "TI")])
4355 (define_insn "*sse4_1_pextrw_memory"
4356 [(set (match_operand:HI 0 "memory_operand" "=m")
4358 (match_operand:V8HI 1 "register_operand" "x")
4359 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4361 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4362 [(set_attr "type" "sselog")
4363 (set_attr "prefix_extra" "1")
4364 (set_attr "mode" "TI")])
4366 (define_insn "*sse4_1_pextrd"
4367 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4369 (match_operand:V4SI 1 "register_operand" "x")
4370 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4372 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4373 [(set_attr "type" "sselog")
4374 (set_attr "prefix_extra" "1")
4375 (set_attr "mode" "TI")])
4377 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4378 (define_insn "*sse4_1_pextrq"
4379 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4381 (match_operand:V2DI 1 "register_operand" "x")
4382 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4383 "TARGET_SSE4_1 && TARGET_64BIT"
4384 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4385 [(set_attr "type" "sselog")
4386 (set_attr "prefix_extra" "1")
4387 (set_attr "mode" "TI")])
4389 (define_expand "sse2_pshufd"
4390 [(match_operand:V4SI 0 "register_operand" "")
4391 (match_operand:V4SI 1 "nonimmediate_operand" "")
4392 (match_operand:SI 2 "const_int_operand" "")]
4395 int mask = INTVAL (operands[2]);
4396 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4397 GEN_INT ((mask >> 0) & 3),
4398 GEN_INT ((mask >> 2) & 3),
4399 GEN_INT ((mask >> 4) & 3),
4400 GEN_INT ((mask >> 6) & 3)));
4404 (define_insn "sse2_pshufd_1"
4405 [(set (match_operand:V4SI 0 "register_operand" "=x")
4407 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4408 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4409 (match_operand 3 "const_0_to_3_operand" "")
4410 (match_operand 4 "const_0_to_3_operand" "")
4411 (match_operand 5 "const_0_to_3_operand" "")])))]
4415 mask |= INTVAL (operands[2]) << 0;
4416 mask |= INTVAL (operands[3]) << 2;
4417 mask |= INTVAL (operands[4]) << 4;
4418 mask |= INTVAL (operands[5]) << 6;
4419 operands[2] = GEN_INT (mask);
4421 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4423 [(set_attr "type" "sselog1")
4424 (set_attr "prefix_data16" "1")
4425 (set_attr "mode" "TI")])
4427 (define_expand "sse2_pshuflw"
4428 [(match_operand:V8HI 0 "register_operand" "")
4429 (match_operand:V8HI 1 "nonimmediate_operand" "")
4430 (match_operand:SI 2 "const_int_operand" "")]
4433 int mask = INTVAL (operands[2]);
4434 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4435 GEN_INT ((mask >> 0) & 3),
4436 GEN_INT ((mask >> 2) & 3),
4437 GEN_INT ((mask >> 4) & 3),
4438 GEN_INT ((mask >> 6) & 3)));
4442 (define_insn "sse2_pshuflw_1"
4443 [(set (match_operand:V8HI 0 "register_operand" "=x")
4445 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4446 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4447 (match_operand 3 "const_0_to_3_operand" "")
4448 (match_operand 4 "const_0_to_3_operand" "")
4449 (match_operand 5 "const_0_to_3_operand" "")
4457 mask |= INTVAL (operands[2]) << 0;
4458 mask |= INTVAL (operands[3]) << 2;
4459 mask |= INTVAL (operands[4]) << 4;
4460 mask |= INTVAL (operands[5]) << 6;
4461 operands[2] = GEN_INT (mask);
4463 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4465 [(set_attr "type" "sselog")
4466 (set_attr "prefix_rep" "1")
4467 (set_attr "mode" "TI")])
4469 (define_expand "sse2_pshufhw"
4470 [(match_operand:V8HI 0 "register_operand" "")
4471 (match_operand:V8HI 1 "nonimmediate_operand" "")
4472 (match_operand:SI 2 "const_int_operand" "")]
4475 int mask = INTVAL (operands[2]);
4476 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4477 GEN_INT (((mask >> 0) & 3) + 4),
4478 GEN_INT (((mask >> 2) & 3) + 4),
4479 GEN_INT (((mask >> 4) & 3) + 4),
4480 GEN_INT (((mask >> 6) & 3) + 4)));
4484 (define_insn "sse2_pshufhw_1"
4485 [(set (match_operand:V8HI 0 "register_operand" "=x")
4487 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4488 (parallel [(const_int 0)
4492 (match_operand 2 "const_4_to_7_operand" "")
4493 (match_operand 3 "const_4_to_7_operand" "")
4494 (match_operand 4 "const_4_to_7_operand" "")
4495 (match_operand 5 "const_4_to_7_operand" "")])))]
4499 mask |= (INTVAL (operands[2]) - 4) << 0;
4500 mask |= (INTVAL (operands[3]) - 4) << 2;
4501 mask |= (INTVAL (operands[4]) - 4) << 4;
4502 mask |= (INTVAL (operands[5]) - 4) << 6;
4503 operands[2] = GEN_INT (mask);
4505 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4507 [(set_attr "type" "sselog")
4508 (set_attr "prefix_rep" "1")
4509 (set_attr "mode" "TI")])
4511 (define_expand "sse2_loadd"
4512 [(set (match_operand:V4SI 0 "register_operand" "")
4515 (match_operand:SI 1 "nonimmediate_operand" ""))
4519 "operands[2] = CONST0_RTX (V4SImode);")
4521 (define_insn "sse2_loadld"
4522 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4525 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4526 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4530 movd\t{%2, %0|%0, %2}
4531 movd\t{%2, %0|%0, %2}
4532 movss\t{%2, %0|%0, %2}
4533 movss\t{%2, %0|%0, %2}"
4534 [(set_attr "type" "ssemov")
4535 (set_attr "mode" "TI,TI,V4SF,SF")])
4537 (define_insn_and_split "sse2_stored"
4538 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4540 (match_operand:V4SI 1 "register_operand" "x,Yi")
4541 (parallel [(const_int 0)])))]
4544 "&& reload_completed
4545 && (TARGET_INTER_UNIT_MOVES
4546 || MEM_P (operands [0])
4547 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4548 [(set (match_dup 0) (match_dup 1))]
4550 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4553 (define_expand "sse_storeq"
4554 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4556 (match_operand:V2DI 1 "register_operand" "")
4557 (parallel [(const_int 0)])))]
4561 (define_insn "*sse2_storeq_rex64"
4562 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4564 (match_operand:V2DI 1 "register_operand" "x,Yi")
4565 (parallel [(const_int 0)])))]
4566 "TARGET_64BIT && TARGET_SSE"
4569 (define_insn "*sse2_storeq"
4570 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4572 (match_operand:V2DI 1 "register_operand" "x")
4573 (parallel [(const_int 0)])))]
4578 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4580 (match_operand:V2DI 1 "register_operand" "")
4581 (parallel [(const_int 0)])))]
4584 && (TARGET_INTER_UNIT_MOVES
4585 || MEM_P (operands [0])
4586 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4587 [(set (match_dup 0) (match_dup 1))]
4589 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4592 (define_insn "*vec_extractv2di_1_sse2"
4593 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4595 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4596 (parallel [(const_int 1)])))]
4597 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4599 movhps\t{%1, %0|%0, %1}
4600 psrldq\t{$8, %0|%0, 8}
4601 movq\t{%H1, %0|%0, %H1}"
4602 [(set_attr "type" "ssemov,sseishft,ssemov")
4603 (set_attr "memory" "*,none,*")
4604 (set_attr "mode" "V2SF,TI,TI")])
4606 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4607 (define_insn "*vec_extractv2di_1_sse"
4608 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4610 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4611 (parallel [(const_int 1)])))]
4612 "!TARGET_SSE2 && TARGET_SSE
4613 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4615 movhps\t{%1, %0|%0, %1}
4616 movhlps\t{%1, %0|%0, %1}
4617 movlps\t{%H1, %0|%0, %H1}"
4618 [(set_attr "type" "ssemov")
4619 (set_attr "mode" "V2SF,V4SF,V2SF")])
4621 (define_insn "*vec_dupv4si"
4622 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4624 (match_operand:SI 1 "register_operand" " Yt,0")))]
4627 pshufd\t{$0, %1, %0|%0, %1, 0}
4628 shufps\t{$0, %0, %0|%0, %0, 0}"
4629 [(set_attr "type" "sselog1")
4630 (set_attr "mode" "TI,V4SF")])
4632 (define_insn "*vec_dupv2di"
4633 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4635 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4640 [(set_attr "type" "sselog1,ssemov")
4641 (set_attr "mode" "TI,V4SF")])
4643 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4644 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4645 ;; alternatives pretty much forces the MMX alternative to be chosen.
4646 (define_insn "*sse2_concatv2si"
4647 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4649 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4650 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4653 punpckldq\t{%2, %0|%0, %2}
4654 movd\t{%1, %0|%0, %1}
4655 punpckldq\t{%2, %0|%0, %2}
4656 movd\t{%1, %0|%0, %1}"
4657 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4658 (set_attr "mode" "TI,TI,DI,DI")])
4660 (define_insn "*sse1_concatv2si"
4661 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4663 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4664 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4667 unpcklps\t{%2, %0|%0, %2}
4668 movss\t{%1, %0|%0, %1}
4669 punpckldq\t{%2, %0|%0, %2}
4670 movd\t{%1, %0|%0, %1}"
4671 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4672 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4674 (define_insn "*vec_concatv4si_1"
4675 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4677 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4678 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4681 punpcklqdq\t{%2, %0|%0, %2}
4682 movlhps\t{%2, %0|%0, %2}
4683 movhps\t{%2, %0|%0, %2}"
4684 [(set_attr "type" "sselog,ssemov,ssemov")
4685 (set_attr "mode" "TI,V4SF,V2SF")])
4687 (define_insn "vec_concatv2di"
4688 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4690 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4691 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4694 movq\t{%1, %0|%0, %1}
4695 movq2dq\t{%1, %0|%0, %1}
4696 punpcklqdq\t{%2, %0|%0, %2}
4697 movlhps\t{%2, %0|%0, %2}
4698 movhps\t{%2, %0|%0, %2}
4699 movlps\t{%1, %0|%0, %1}"
4700 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4701 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4703 (define_expand "vec_setv2di"
4704 [(match_operand:V2DI 0 "register_operand" "")
4705 (match_operand:DI 1 "register_operand" "")
4706 (match_operand 2 "const_int_operand" "")]
4709 ix86_expand_vector_set (false, operands[0], operands[1],
4710 INTVAL (operands[2]));
4714 (define_expand "vec_extractv2di"
4715 [(match_operand:DI 0 "register_operand" "")
4716 (match_operand:V2DI 1 "register_operand" "")
4717 (match_operand 2 "const_int_operand" "")]
4720 ix86_expand_vector_extract (false, operands[0], operands[1],
4721 INTVAL (operands[2]));
4725 (define_expand "vec_initv2di"
4726 [(match_operand:V2DI 0 "register_operand" "")
4727 (match_operand 1 "" "")]
4730 ix86_expand_vector_init (false, operands[0], operands[1]);
4734 (define_expand "vec_setv4si"
4735 [(match_operand:V4SI 0 "register_operand" "")
4736 (match_operand:SI 1 "register_operand" "")
4737 (match_operand 2 "const_int_operand" "")]
4740 ix86_expand_vector_set (false, operands[0], operands[1],
4741 INTVAL (operands[2]));
4745 (define_expand "vec_extractv4si"
4746 [(match_operand:SI 0 "register_operand" "")
4747 (match_operand:V4SI 1 "register_operand" "")
4748 (match_operand 2 "const_int_operand" "")]
4751 ix86_expand_vector_extract (false, operands[0], operands[1],
4752 INTVAL (operands[2]));
4756 (define_expand "vec_initv4si"
4757 [(match_operand:V4SI 0 "register_operand" "")
4758 (match_operand 1 "" "")]
4761 ix86_expand_vector_init (false, operands[0], operands[1]);
4765 (define_expand "vec_setv8hi"
4766 [(match_operand:V8HI 0 "register_operand" "")
4767 (match_operand:HI 1 "register_operand" "")
4768 (match_operand 2 "const_int_operand" "")]
4771 ix86_expand_vector_set (false, operands[0], operands[1],
4772 INTVAL (operands[2]));
4776 (define_expand "vec_extractv8hi"
4777 [(match_operand:HI 0 "register_operand" "")
4778 (match_operand:V8HI 1 "register_operand" "")
4779 (match_operand 2 "const_int_operand" "")]
4782 ix86_expand_vector_extract (false, operands[0], operands[1],
4783 INTVAL (operands[2]));
4787 (define_expand "vec_initv8hi"
4788 [(match_operand:V8HI 0 "register_operand" "")
4789 (match_operand 1 "" "")]
4792 ix86_expand_vector_init (false, operands[0], operands[1]);
4796 (define_expand "vec_setv16qi"
4797 [(match_operand:V16QI 0 "register_operand" "")
4798 (match_operand:QI 1 "register_operand" "")
4799 (match_operand 2 "const_int_operand" "")]
4802 ix86_expand_vector_set (false, operands[0], operands[1],
4803 INTVAL (operands[2]));
4807 (define_expand "vec_extractv16qi"
4808 [(match_operand:QI 0 "register_operand" "")
4809 (match_operand:V16QI 1 "register_operand" "")
4810 (match_operand 2 "const_int_operand" "")]
4813 ix86_expand_vector_extract (false, operands[0], operands[1],
4814 INTVAL (operands[2]));
4818 (define_expand "vec_initv16qi"
4819 [(match_operand:V16QI 0 "register_operand" "")
4820 (match_operand 1 "" "")]
4823 ix86_expand_vector_init (false, operands[0], operands[1]);
4827 (define_expand "vec_unpacku_hi_v16qi"
4828 [(match_operand:V8HI 0 "register_operand" "")
4829 (match_operand:V16QI 1 "register_operand" "")]
4833 ix86_expand_sse4_unpack (operands, true, true);
4835 ix86_expand_sse_unpack (operands, true, true);
4839 (define_expand "vec_unpacks_hi_v16qi"
4840 [(match_operand:V8HI 0 "register_operand" "")
4841 (match_operand:V16QI 1 "register_operand" "")]
4845 ix86_expand_sse4_unpack (operands, false, true);
4847 ix86_expand_sse_unpack (operands, false, true);
4851 (define_expand "vec_unpacku_lo_v16qi"
4852 [(match_operand:V8HI 0 "register_operand" "")
4853 (match_operand:V16QI 1 "register_operand" "")]
4857 ix86_expand_sse4_unpack (operands, true, false);
4859 ix86_expand_sse_unpack (operands, true, false);
4863 (define_expand "vec_unpacks_lo_v16qi"
4864 [(match_operand:V8HI 0 "register_operand" "")
4865 (match_operand:V16QI 1 "register_operand" "")]
4869 ix86_expand_sse4_unpack (operands, false, false);
4871 ix86_expand_sse_unpack (operands, false, false);
4875 (define_expand "vec_unpacku_hi_v8hi"
4876 [(match_operand:V4SI 0 "register_operand" "")
4877 (match_operand:V8HI 1 "register_operand" "")]
4881 ix86_expand_sse4_unpack (operands, true, true);
4883 ix86_expand_sse_unpack (operands, true, true);
4887 (define_expand "vec_unpacks_hi_v8hi"
4888 [(match_operand:V4SI 0 "register_operand" "")
4889 (match_operand:V8HI 1 "register_operand" "")]
4893 ix86_expand_sse4_unpack (operands, false, true);
4895 ix86_expand_sse_unpack (operands, false, true);
4899 (define_expand "vec_unpacku_lo_v8hi"
4900 [(match_operand:V4SI 0 "register_operand" "")
4901 (match_operand:V8HI 1 "register_operand" "")]
4905 ix86_expand_sse4_unpack (operands, true, false);
4907 ix86_expand_sse_unpack (operands, true, false);
4911 (define_expand "vec_unpacks_lo_v8hi"
4912 [(match_operand:V4SI 0 "register_operand" "")
4913 (match_operand:V8HI 1 "register_operand" "")]
4917 ix86_expand_sse4_unpack (operands, false, false);
4919 ix86_expand_sse_unpack (operands, false, false);
4923 (define_expand "vec_unpacku_hi_v4si"
4924 [(match_operand:V2DI 0 "register_operand" "")
4925 (match_operand:V4SI 1 "register_operand" "")]
4929 ix86_expand_sse4_unpack (operands, true, true);
4931 ix86_expand_sse_unpack (operands, true, true);
4935 (define_expand "vec_unpacks_hi_v4si"
4936 [(match_operand:V2DI 0 "register_operand" "")
4937 (match_operand:V4SI 1 "register_operand" "")]
4941 ix86_expand_sse4_unpack (operands, false, true);
4943 ix86_expand_sse_unpack (operands, false, true);
4947 (define_expand "vec_unpacku_lo_v4si"
4948 [(match_operand:V2DI 0 "register_operand" "")
4949 (match_operand:V4SI 1 "register_operand" "")]
4953 ix86_expand_sse4_unpack (operands, true, false);
4955 ix86_expand_sse_unpack (operands, true, false);
4959 (define_expand "vec_unpacks_lo_v4si"
4960 [(match_operand:V2DI 0 "register_operand" "")
4961 (match_operand:V4SI 1 "register_operand" "")]
4965 ix86_expand_sse4_unpack (operands, false, false);
4967 ix86_expand_sse_unpack (operands, false, false);
4971 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4975 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4977 (define_insn "sse2_uavgv16qi3"
4978 [(set (match_operand:V16QI 0 "register_operand" "=x")
4984 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4986 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4987 (const_vector:V16QI [(const_int 1) (const_int 1)
4988 (const_int 1) (const_int 1)
4989 (const_int 1) (const_int 1)
4990 (const_int 1) (const_int 1)
4991 (const_int 1) (const_int 1)
4992 (const_int 1) (const_int 1)
4993 (const_int 1) (const_int 1)
4994 (const_int 1) (const_int 1)]))
4996 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4997 "pavgb\t{%2, %0|%0, %2}"
4998 [(set_attr "type" "sseiadd")
4999 (set_attr "prefix_data16" "1")
5000 (set_attr "mode" "TI")])
5002 (define_insn "sse2_uavgv8hi3"
5003 [(set (match_operand:V8HI 0 "register_operand" "=x")
5009 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5011 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5012 (const_vector:V8HI [(const_int 1) (const_int 1)
5013 (const_int 1) (const_int 1)
5014 (const_int 1) (const_int 1)
5015 (const_int 1) (const_int 1)]))
5017 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5018 "pavgw\t{%2, %0|%0, %2}"
5019 [(set_attr "type" "sseiadd")
5020 (set_attr "prefix_data16" "1")
5021 (set_attr "mode" "TI")])
5023 ;; The correct representation for this is absolutely enormous, and
5024 ;; surely not generally useful.
5025 (define_insn "sse2_psadbw"
5026 [(set (match_operand:V2DI 0 "register_operand" "=x")
5027 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5028 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5031 "psadbw\t{%2, %0|%0, %2}"
5032 [(set_attr "type" "sseiadd")
5033 (set_attr "prefix_data16" "1")
5034 (set_attr "mode" "TI")])
5036 (define_insn "sse_movmskps"
5037 [(set (match_operand:SI 0 "register_operand" "=r")
5038 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5041 "movmskps\t{%1, %0|%0, %1}"
5042 [(set_attr "type" "ssecvt")
5043 (set_attr "mode" "V4SF")])
5045 (define_insn "sse2_movmskpd"
5046 [(set (match_operand:SI 0 "register_operand" "=r")
5047 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5050 "movmskpd\t{%1, %0|%0, %1}"
5051 [(set_attr "type" "ssecvt")
5052 (set_attr "mode" "V2DF")])
5054 (define_insn "sse2_pmovmskb"
5055 [(set (match_operand:SI 0 "register_operand" "=r")
5056 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5059 "pmovmskb\t{%1, %0|%0, %1}"
5060 [(set_attr "type" "ssecvt")
5061 (set_attr "prefix_data16" "1")
5062 (set_attr "mode" "SI")])
5064 (define_expand "sse2_maskmovdqu"
5065 [(set (match_operand:V16QI 0 "memory_operand" "")
5066 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5067 (match_operand:V16QI 2 "register_operand" "x")
5073 (define_insn "*sse2_maskmovdqu"
5074 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5075 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5076 (match_operand:V16QI 2 "register_operand" "x")
5077 (mem:V16QI (match_dup 0))]
5079 "TARGET_SSE2 && !TARGET_64BIT"
5080 ;; @@@ check ordering of operands in intel/nonintel syntax
5081 "maskmovdqu\t{%2, %1|%1, %2}"
5082 [(set_attr "type" "ssecvt")
5083 (set_attr "prefix_data16" "1")
5084 (set_attr "mode" "TI")])
5086 (define_insn "*sse2_maskmovdqu_rex64"
5087 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5088 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5089 (match_operand:V16QI 2 "register_operand" "x")
5090 (mem:V16QI (match_dup 0))]
5092 "TARGET_SSE2 && TARGET_64BIT"
5093 ;; @@@ check ordering of operands in intel/nonintel syntax
5094 "maskmovdqu\t{%2, %1|%1, %2}"
5095 [(set_attr "type" "ssecvt")
5096 (set_attr "prefix_data16" "1")
5097 (set_attr "mode" "TI")])
5099 (define_insn "sse_ldmxcsr"
5100 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5104 [(set_attr "type" "sse")
5105 (set_attr "memory" "load")])
5107 (define_insn "sse_stmxcsr"
5108 [(set (match_operand:SI 0 "memory_operand" "=m")
5109 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5112 [(set_attr "type" "sse")
5113 (set_attr "memory" "store")])
5115 (define_expand "sse_sfence"
5117 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5118 "TARGET_SSE || TARGET_3DNOW_A"
5120 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5121 MEM_VOLATILE_P (operands[0]) = 1;
5124 (define_insn "*sse_sfence"
5125 [(set (match_operand:BLK 0 "" "")
5126 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5127 "TARGET_SSE || TARGET_3DNOW_A"
5129 [(set_attr "type" "sse")
5130 (set_attr "memory" "unknown")])
5132 (define_insn "sse2_clflush"
5133 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5137 [(set_attr "type" "sse")
5138 (set_attr "memory" "unknown")])
5140 (define_expand "sse2_mfence"
5142 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5145 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5146 MEM_VOLATILE_P (operands[0]) = 1;
5149 (define_insn "*sse2_mfence"
5150 [(set (match_operand:BLK 0 "" "")
5151 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5154 [(set_attr "type" "sse")
5155 (set_attr "memory" "unknown")])
5157 (define_expand "sse2_lfence"
5159 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5162 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5163 MEM_VOLATILE_P (operands[0]) = 1;
5166 (define_insn "*sse2_lfence"
5167 [(set (match_operand:BLK 0 "" "")
5168 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5171 [(set_attr "type" "sse")
5172 (set_attr "memory" "unknown")])
5174 (define_insn "sse3_mwait"
5175 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5176 (match_operand:SI 1 "register_operand" "c")]
5179 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5180 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5181 ;; we only need to set up 32bit registers.
5183 [(set_attr "length" "3")])
5185 (define_insn "sse3_monitor"
5186 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5187 (match_operand:SI 1 "register_operand" "c")
5188 (match_operand:SI 2 "register_operand" "d")]
5190 "TARGET_SSE3 && !TARGET_64BIT"
5191 "monitor\t%0, %1, %2"
5192 [(set_attr "length" "3")])
5194 (define_insn "sse3_monitor64"
5195 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5196 (match_operand:SI 1 "register_operand" "c")
5197 (match_operand:SI 2 "register_operand" "d")]
5199 "TARGET_SSE3 && TARGET_64BIT"
5200 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5201 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5202 ;; zero extended to 64bit, we only need to set up 32bit registers.
5204 [(set_attr "length" "3")])
5207 (define_insn "ssse3_phaddwv8hi3"
5208 [(set (match_operand:V8HI 0 "register_operand" "=x")
5214 (match_operand:V8HI 1 "register_operand" "0")
5215 (parallel [(const_int 0)]))
5216 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5218 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5219 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5222 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5223 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5225 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5226 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5231 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5232 (parallel [(const_int 0)]))
5233 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5235 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5236 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5239 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5240 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5242 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5243 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5245 "phaddw\t{%2, %0|%0, %2}"
5246 [(set_attr "type" "sseiadd")
5247 (set_attr "prefix_data16" "1")
5248 (set_attr "prefix_extra" "1")
5249 (set_attr "mode" "TI")])
5251 (define_insn "ssse3_phaddwv4hi3"
5252 [(set (match_operand:V4HI 0 "register_operand" "=y")
5257 (match_operand:V4HI 1 "register_operand" "0")
5258 (parallel [(const_int 0)]))
5259 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5261 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5262 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5266 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5267 (parallel [(const_int 0)]))
5268 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5270 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5271 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5273 "phaddw\t{%2, %0|%0, %2}"
5274 [(set_attr "type" "sseiadd")
5275 (set_attr "prefix_extra" "1")
5276 (set_attr "mode" "DI")])
5278 (define_insn "ssse3_phadddv4si3"
5279 [(set (match_operand:V4SI 0 "register_operand" "=x")
5284 (match_operand:V4SI 1 "register_operand" "0")
5285 (parallel [(const_int 0)]))
5286 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5288 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5289 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5293 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5294 (parallel [(const_int 0)]))
5295 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5297 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5298 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5300 "phaddd\t{%2, %0|%0, %2}"
5301 [(set_attr "type" "sseiadd")
5302 (set_attr "prefix_data16" "1")
5303 (set_attr "prefix_extra" "1")
5304 (set_attr "mode" "TI")])
5306 (define_insn "ssse3_phadddv2si3"
5307 [(set (match_operand:V2SI 0 "register_operand" "=y")
5311 (match_operand:V2SI 1 "register_operand" "0")
5312 (parallel [(const_int 0)]))
5313 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5316 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5317 (parallel [(const_int 0)]))
5318 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5320 "phaddd\t{%2, %0|%0, %2}"
5321 [(set_attr "type" "sseiadd")
5322 (set_attr "prefix_extra" "1")
5323 (set_attr "mode" "DI")])
5325 (define_insn "ssse3_phaddswv8hi3"
5326 [(set (match_operand:V8HI 0 "register_operand" "=x")
5332 (match_operand:V8HI 1 "register_operand" "0")
5333 (parallel [(const_int 0)]))
5334 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5336 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5337 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5340 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5341 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5343 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5344 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5349 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5350 (parallel [(const_int 0)]))
5351 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5353 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5354 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5357 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5358 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5360 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5361 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5363 "phaddsw\t{%2, %0|%0, %2}"
5364 [(set_attr "type" "sseiadd")
5365 (set_attr "prefix_data16" "1")
5366 (set_attr "prefix_extra" "1")
5367 (set_attr "mode" "TI")])
5369 (define_insn "ssse3_phaddswv4hi3"
5370 [(set (match_operand:V4HI 0 "register_operand" "=y")
5375 (match_operand:V4HI 1 "register_operand" "0")
5376 (parallel [(const_int 0)]))
5377 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5379 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5380 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5384 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5385 (parallel [(const_int 0)]))
5386 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5388 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5389 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5391 "phaddsw\t{%2, %0|%0, %2}"
5392 [(set_attr "type" "sseiadd")
5393 (set_attr "prefix_extra" "1")
5394 (set_attr "mode" "DI")])
5396 (define_insn "ssse3_phsubwv8hi3"
5397 [(set (match_operand:V8HI 0 "register_operand" "=x")
5403 (match_operand:V8HI 1 "register_operand" "0")
5404 (parallel [(const_int 0)]))
5405 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5407 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5408 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5411 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5412 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5414 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5415 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5420 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5421 (parallel [(const_int 0)]))
5422 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5424 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5425 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5428 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5429 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5431 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5432 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5434 "phsubw\t{%2, %0|%0, %2}"
5435 [(set_attr "type" "sseiadd")
5436 (set_attr "prefix_data16" "1")
5437 (set_attr "prefix_extra" "1")
5438 (set_attr "mode" "TI")])
5440 (define_insn "ssse3_phsubwv4hi3"
5441 [(set (match_operand:V4HI 0 "register_operand" "=y")
5446 (match_operand:V4HI 1 "register_operand" "0")
5447 (parallel [(const_int 0)]))
5448 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5450 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5451 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5455 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5456 (parallel [(const_int 0)]))
5457 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5459 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5460 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5462 "phsubw\t{%2, %0|%0, %2}"
5463 [(set_attr "type" "sseiadd")
5464 (set_attr "prefix_extra" "1")
5465 (set_attr "mode" "DI")])
5467 (define_insn "ssse3_phsubdv4si3"
5468 [(set (match_operand:V4SI 0 "register_operand" "=x")
5473 (match_operand:V4SI 1 "register_operand" "0")
5474 (parallel [(const_int 0)]))
5475 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5477 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5478 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5482 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5483 (parallel [(const_int 0)]))
5484 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5486 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5487 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5489 "phsubd\t{%2, %0|%0, %2}"
5490 [(set_attr "type" "sseiadd")
5491 (set_attr "prefix_data16" "1")
5492 (set_attr "prefix_extra" "1")
5493 (set_attr "mode" "TI")])
5495 (define_insn "ssse3_phsubdv2si3"
5496 [(set (match_operand:V2SI 0 "register_operand" "=y")
5500 (match_operand:V2SI 1 "register_operand" "0")
5501 (parallel [(const_int 0)]))
5502 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5505 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5506 (parallel [(const_int 0)]))
5507 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5509 "phsubd\t{%2, %0|%0, %2}"
5510 [(set_attr "type" "sseiadd")
5511 (set_attr "prefix_extra" "1")
5512 (set_attr "mode" "DI")])
5514 (define_insn "ssse3_phsubswv8hi3"
5515 [(set (match_operand:V8HI 0 "register_operand" "=x")
5521 (match_operand:V8HI 1 "register_operand" "0")
5522 (parallel [(const_int 0)]))
5523 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5525 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5526 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5529 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5530 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5532 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5533 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5538 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5539 (parallel [(const_int 0)]))
5540 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5542 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5543 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5546 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5547 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5549 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5550 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5552 "phsubsw\t{%2, %0|%0, %2}"
5553 [(set_attr "type" "sseiadd")
5554 (set_attr "prefix_data16" "1")
5555 (set_attr "prefix_extra" "1")
5556 (set_attr "mode" "TI")])
5558 (define_insn "ssse3_phsubswv4hi3"
5559 [(set (match_operand:V4HI 0 "register_operand" "=y")
5564 (match_operand:V4HI 1 "register_operand" "0")
5565 (parallel [(const_int 0)]))
5566 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5568 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5569 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5573 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5574 (parallel [(const_int 0)]))
5575 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5577 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5578 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5580 "phsubsw\t{%2, %0|%0, %2}"
5581 [(set_attr "type" "sseiadd")
5582 (set_attr "prefix_extra" "1")
5583 (set_attr "mode" "DI")])
5585 (define_insn "ssse3_pmaddubswv8hi3"
5586 [(set (match_operand:V8HI 0 "register_operand" "=x")
5591 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5592 (parallel [(const_int 0)
5602 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5603 (parallel [(const_int 0)
5613 (vec_select:V16QI (match_dup 1)
5614 (parallel [(const_int 1)
5623 (vec_select:V16QI (match_dup 2)
5624 (parallel [(const_int 1)
5631 (const_int 15)]))))))]
5633 "pmaddubsw\t{%2, %0|%0, %2}"
5634 [(set_attr "type" "sseiadd")
5635 (set_attr "prefix_data16" "1")
5636 (set_attr "prefix_extra" "1")
5637 (set_attr "mode" "TI")])
5639 (define_insn "ssse3_pmaddubswv4hi3"
5640 [(set (match_operand:V4HI 0 "register_operand" "=y")
5645 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5646 (parallel [(const_int 0)
5652 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5653 (parallel [(const_int 0)
5659 (vec_select:V8QI (match_dup 1)
5660 (parallel [(const_int 1)
5665 (vec_select:V8QI (match_dup 2)
5666 (parallel [(const_int 1)
5669 (const_int 7)]))))))]
5671 "pmaddubsw\t{%2, %0|%0, %2}"
5672 [(set_attr "type" "sseiadd")
5673 (set_attr "prefix_extra" "1")
5674 (set_attr "mode" "DI")])
5676 (define_insn "ssse3_pmulhrswv8hi3"
5677 [(set (match_operand:V8HI 0 "register_operand" "=x")
5684 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5686 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5688 (const_vector:V8HI [(const_int 1) (const_int 1)
5689 (const_int 1) (const_int 1)
5690 (const_int 1) (const_int 1)
5691 (const_int 1) (const_int 1)]))
5693 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5694 "pmulhrsw\t{%2, %0|%0, %2}"
5695 [(set_attr "type" "sseimul")
5696 (set_attr "prefix_data16" "1")
5697 (set_attr "prefix_extra" "1")
5698 (set_attr "mode" "TI")])
5700 (define_insn "ssse3_pmulhrswv4hi3"
5701 [(set (match_operand:V4HI 0 "register_operand" "=y")
5708 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5710 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5712 (const_vector:V4HI [(const_int 1) (const_int 1)
5713 (const_int 1) (const_int 1)]))
5715 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5716 "pmulhrsw\t{%2, %0|%0, %2}"
5717 [(set_attr "type" "sseimul")
5718 (set_attr "prefix_extra" "1")
5719 (set_attr "mode" "DI")])
5721 (define_insn "ssse3_pshufbv16qi3"
5722 [(set (match_operand:V16QI 0 "register_operand" "=x")
5723 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5724 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5727 "pshufb\t{%2, %0|%0, %2}";
5728 [(set_attr "type" "sselog1")
5729 (set_attr "prefix_data16" "1")
5730 (set_attr "prefix_extra" "1")
5731 (set_attr "mode" "TI")])
5733 (define_insn "ssse3_pshufbv8qi3"
5734 [(set (match_operand:V8QI 0 "register_operand" "=y")
5735 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5736 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5739 "pshufb\t{%2, %0|%0, %2}";
5740 [(set_attr "type" "sselog1")
5741 (set_attr "prefix_extra" "1")
5742 (set_attr "mode" "DI")])
5744 (define_insn "ssse3_psign<mode>3"
5745 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5746 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5747 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5750 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5751 [(set_attr "type" "sselog1")
5752 (set_attr "prefix_data16" "1")
5753 (set_attr "prefix_extra" "1")
5754 (set_attr "mode" "TI")])
5756 (define_insn "ssse3_psign<mode>3"
5757 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5758 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5759 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5762 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5763 [(set_attr "type" "sselog1")
5764 (set_attr "prefix_extra" "1")
5765 (set_attr "mode" "DI")])
5767 (define_insn "ssse3_palignrti"
5768 [(set (match_operand:TI 0 "register_operand" "=x")
5769 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5770 (match_operand:TI 2 "nonimmediate_operand" "xm")
5771 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5775 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5776 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5778 [(set_attr "type" "sseishft")
5779 (set_attr "prefix_data16" "1")
5780 (set_attr "prefix_extra" "1")
5781 (set_attr "mode" "TI")])
5783 (define_insn "ssse3_palignrdi"
5784 [(set (match_operand:DI 0 "register_operand" "=y")
5785 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5786 (match_operand:DI 2 "nonimmediate_operand" "ym")
5787 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5791 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5792 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5794 [(set_attr "type" "sseishft")
5795 (set_attr "prefix_extra" "1")
5796 (set_attr "mode" "DI")])
5798 (define_insn "abs<mode>2"
5799 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5800 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5802 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5803 [(set_attr "type" "sselog1")
5804 (set_attr "prefix_data16" "1")
5805 (set_attr "prefix_extra" "1")
5806 (set_attr "mode" "TI")])
5808 (define_insn "abs<mode>2"
5809 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5810 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5812 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5813 [(set_attr "type" "sselog1")
5814 (set_attr "prefix_extra" "1")
5815 (set_attr "mode" "DI")])
5817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5819 ;; AMD SSE4A instructions
5821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5823 (define_insn "sse4a_vmmovntv2df"
5824 [(set (match_operand:DF 0 "memory_operand" "=m")
5825 (unspec:DF [(vec_select:DF
5826 (match_operand:V2DF 1 "register_operand" "x")
5827 (parallel [(const_int 0)]))]
5830 "movntsd\t{%1, %0|%0, %1}"
5831 [(set_attr "type" "ssemov")
5832 (set_attr "mode" "DF")])
5834 (define_insn "sse4a_movntdf"
5835 [(set (match_operand:DF 0 "memory_operand" "=m")
5836 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5839 "movntsd\t{%1, %0|%0, %1}"
5840 [(set_attr "type" "ssemov")
5841 (set_attr "mode" "DF")])
5843 (define_insn "sse4a_vmmovntv4sf"
5844 [(set (match_operand:SF 0 "memory_operand" "=m")
5845 (unspec:SF [(vec_select:SF
5846 (match_operand:V4SF 1 "register_operand" "x")
5847 (parallel [(const_int 0)]))]
5850 "movntss\t{%1, %0|%0, %1}"
5851 [(set_attr "type" "ssemov")
5852 (set_attr "mode" "SF")])
5854 (define_insn "sse4a_movntsf"
5855 [(set (match_operand:SF 0 "memory_operand" "=m")
5856 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5859 "movntss\t{%1, %0|%0, %1}"
5860 [(set_attr "type" "ssemov")
5861 (set_attr "mode" "SF")])
5863 (define_insn "sse4a_extrqi"
5864 [(set (match_operand:V2DI 0 "register_operand" "=x")
5865 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5866 (match_operand 2 "const_int_operand" "")
5867 (match_operand 3 "const_int_operand" "")]
5870 "extrq\t{%3, %2, %0|%0, %2, %3}"
5871 [(set_attr "type" "sse")
5872 (set_attr "prefix_data16" "1")
5873 (set_attr "mode" "TI")])
5875 (define_insn "sse4a_extrq"
5876 [(set (match_operand:V2DI 0 "register_operand" "=x")
5877 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5878 (match_operand:V16QI 2 "register_operand" "x")]
5881 "extrq\t{%2, %0|%0, %2}"
5882 [(set_attr "type" "sse")
5883 (set_attr "prefix_data16" "1")
5884 (set_attr "mode" "TI")])
5886 (define_insn "sse4a_insertqi"
5887 [(set (match_operand:V2DI 0 "register_operand" "=x")
5888 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5889 (match_operand:V2DI 2 "register_operand" "x")
5890 (match_operand 3 "const_int_operand" "")
5891 (match_operand 4 "const_int_operand" "")]
5894 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5895 [(set_attr "type" "sseins")
5896 (set_attr "prefix_rep" "1")
5897 (set_attr "mode" "TI")])
5899 (define_insn "sse4a_insertq"
5900 [(set (match_operand:V2DI 0 "register_operand" "=x")
5901 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5902 (match_operand:V2DI 2 "register_operand" "x")]
5905 "insertq\t{%2, %0|%0, %2}"
5906 [(set_attr "type" "sseins")
5907 (set_attr "prefix_rep" "1")
5908 (set_attr "mode" "TI")])
5910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5912 ;; Intel SSE4.1 instructions
5914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5916 (define_insn "sse4_1_blendpd"
5917 [(set (match_operand:V2DF 0 "register_operand" "=x")
5919 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5920 (match_operand:V2DF 1 "register_operand" "0")
5921 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
5923 "blendpd\t{%3, %2, %0|%0, %2, %3}"
5924 [(set_attr "type" "ssemov")
5925 (set_attr "prefix_extra" "1")
5926 (set_attr "mode" "V2DF")])
5928 (define_insn "sse4_1_blendps"
5929 [(set (match_operand:V4SF 0 "register_operand" "=x")
5931 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5932 (match_operand:V4SF 1 "register_operand" "0")
5933 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
5935 "blendps\t{%3, %2, %0|%0, %2, %3}"
5936 [(set_attr "type" "ssemov")
5937 (set_attr "prefix_extra" "1")
5938 (set_attr "mode" "V4SF")])
5940 (define_insn "sse4_1_blendvpd"
5941 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
5942 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
5943 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
5944 (match_operand:V2DF 3 "register_operand" "Y0")]
5947 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
5948 [(set_attr "type" "ssemov")
5949 (set_attr "prefix_extra" "1")
5950 (set_attr "mode" "V2DF")])
5952 (define_insn "sse4_1_blendvps"
5953 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
5954 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
5955 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
5956 (match_operand:V4SF 3 "register_operand" "Y0")]
5959 "blendvps\t{%3, %2, %0|%0, %2, %3}"
5960 [(set_attr "type" "ssemov")
5961 (set_attr "prefix_extra" "1")
5962 (set_attr "mode" "V4SF")])
5964 (define_insn "sse4_1_dppd"
5965 [(set (match_operand:V2DF 0 "register_operand" "=x")
5966 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
5967 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5968 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5971 "dppd\t{%3, %2, %0|%0, %2, %3}"
5972 [(set_attr "type" "ssemul")
5973 (set_attr "prefix_extra" "1")
5974 (set_attr "mode" "V2DF")])
5976 (define_insn "sse4_1_dpps"
5977 [(set (match_operand:V4SF 0 "register_operand" "=x")
5978 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
5979 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5980 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5983 "dpps\t{%3, %2, %0|%0, %2, %3}"
5984 [(set_attr "type" "ssemul")
5985 (set_attr "prefix_extra" "1")
5986 (set_attr "mode" "V4SF")])
5988 (define_insn "sse4_1_movntdqa"
5989 [(set (match_operand:V2DI 0 "register_operand" "=x")
5990 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
5993 "movntdqa\t{%1, %0|%0, %1}"
5994 [(set_attr "type" "ssecvt")
5995 (set_attr "prefix_extra" "1")
5996 (set_attr "mode" "TI")])
5998 (define_insn "sse4_1_mpsadbw"
5999 [(set (match_operand:V16QI 0 "register_operand" "=x")
6000 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6001 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6002 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6005 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6006 [(set_attr "type" "sselog1")
6007 (set_attr "prefix_extra" "1")
6008 (set_attr "mode" "TI")])
6010 (define_insn "sse4_1_packusdw"
6011 [(set (match_operand:V8HI 0 "register_operand" "=x")
6014 (match_operand:V4SI 1 "register_operand" "0"))
6016 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6018 "packusdw\t{%2, %0|%0, %2}"
6019 [(set_attr "type" "sselog")
6020 (set_attr "prefix_extra" "1")
6021 (set_attr "mode" "TI")])
6023 (define_insn "sse4_1_pblendvb"
6024 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6025 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6026 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6027 (match_operand:V16QI 3 "register_operand" "Y0")]
6030 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6031 [(set_attr "type" "ssemov")
6032 (set_attr "prefix_extra" "1")
6033 (set_attr "mode" "TI")])
6035 (define_insn "sse4_1_pblendw"
6036 [(set (match_operand:V8HI 0 "register_operand" "=x")
6038 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6039 (match_operand:V8HI 1 "register_operand" "0")
6040 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6042 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6043 [(set_attr "type" "ssemov")
6044 (set_attr "prefix_extra" "1")
6045 (set_attr "mode" "TI")])
6047 (define_insn "sse4_1_phminposuw"
6048 [(set (match_operand:V8HI 0 "register_operand" "=x")
6049 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6050 UNSPEC_PHMINPOSUW))]
6052 "phminposuw\t{%1, %0|%0, %1}"
6053 [(set_attr "type" "sselog1")
6054 (set_attr "prefix_extra" "1")
6055 (set_attr "mode" "TI")])
6057 (define_insn "sse4_1_extendv8qiv8hi2"
6058 [(set (match_operand:V8HI 0 "register_operand" "=x")
6061 (match_operand:V16QI 1 "register_operand" "x")
6062 (parallel [(const_int 0)
6071 "pmovsxbw\t{%1, %0|%0, %1}"
6072 [(set_attr "type" "ssemov")
6073 (set_attr "prefix_extra" "1")
6074 (set_attr "mode" "TI")])
6076 (define_insn "*sse4_1_extendv8qiv8hi2"
6077 [(set (match_operand:V8HI 0 "register_operand" "=x")
6080 (vec_duplicate:V16QI
6081 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6082 (parallel [(const_int 0)
6091 "pmovsxbw\t{%1, %0|%0, %1}"
6092 [(set_attr "type" "ssemov")
6093 (set_attr "prefix_extra" "1")
6094 (set_attr "mode" "TI")])
6096 (define_insn "sse4_1_extendv4qiv4si2"
6097 [(set (match_operand:V4SI 0 "register_operand" "=x")
6100 (match_operand:V16QI 1 "register_operand" "x")
6101 (parallel [(const_int 0)
6106 "pmovsxbd\t{%1, %0|%0, %1}"
6107 [(set_attr "type" "ssemov")
6108 (set_attr "prefix_extra" "1")
6109 (set_attr "mode" "TI")])
6111 (define_insn "*sse4_1_extendv4qiv4si2"
6112 [(set (match_operand:V4SI 0 "register_operand" "=x")
6115 (vec_duplicate:V16QI
6116 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6117 (parallel [(const_int 0)
6122 "pmovsxbd\t{%1, %0|%0, %1}"
6123 [(set_attr "type" "ssemov")
6124 (set_attr "prefix_extra" "1")
6125 (set_attr "mode" "TI")])
6127 (define_insn "sse4_1_extendv2qiv2di2"
6128 [(set (match_operand:V2DI 0 "register_operand" "=x")
6131 (match_operand:V16QI 1 "register_operand" "x")
6132 (parallel [(const_int 0)
6135 "pmovsxbq\t{%1, %0|%0, %1}"
6136 [(set_attr "type" "ssemov")
6137 (set_attr "prefix_extra" "1")
6138 (set_attr "mode" "TI")])
6140 (define_insn "*sse4_1_extendv2qiv2di2"
6141 [(set (match_operand:V2DI 0 "register_operand" "=x")
6144 (vec_duplicate:V16QI
6145 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6146 (parallel [(const_int 0)
6149 "pmovsxbq\t{%1, %0|%0, %1}"
6150 [(set_attr "type" "ssemov")
6151 (set_attr "prefix_extra" "1")
6152 (set_attr "mode" "TI")])
6154 (define_insn "sse4_1_extendv4hiv4si2"
6155 [(set (match_operand:V4SI 0 "register_operand" "=x")
6158 (match_operand:V8HI 1 "register_operand" "x")
6159 (parallel [(const_int 0)
6164 "pmovsxwd\t{%1, %0|%0, %1}"
6165 [(set_attr "type" "ssemov")
6166 (set_attr "prefix_extra" "1")
6167 (set_attr "mode" "TI")])
6169 (define_insn "*sse4_1_extendv4hiv4si2"
6170 [(set (match_operand:V4SI 0 "register_operand" "=x")
6174 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6175 (parallel [(const_int 0)
6180 "pmovsxwd\t{%1, %0|%0, %1}"
6181 [(set_attr "type" "ssemov")
6182 (set_attr "prefix_extra" "1")
6183 (set_attr "mode" "TI")])
6185 (define_insn "sse4_1_extendv2hiv2di2"
6186 [(set (match_operand:V2DI 0 "register_operand" "=x")
6189 (match_operand:V8HI 1 "register_operand" "x")
6190 (parallel [(const_int 0)
6193 "pmovsxwq\t{%1, %0|%0, %1}"
6194 [(set_attr "type" "ssemov")
6195 (set_attr "prefix_extra" "1")
6196 (set_attr "mode" "TI")])
6198 (define_insn "*sse4_1_extendv2hiv2di2"
6199 [(set (match_operand:V2DI 0 "register_operand" "=x")
6203 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6204 (parallel [(const_int 0)
6207 "pmovsxwq\t{%1, %0|%0, %1}"
6208 [(set_attr "type" "ssemov")
6209 (set_attr "prefix_extra" "1")
6210 (set_attr "mode" "TI")])
6212 (define_insn "sse4_1_extendv2siv2di2"
6213 [(set (match_operand:V2DI 0 "register_operand" "=x")
6216 (match_operand:V4SI 1 "register_operand" "x")
6217 (parallel [(const_int 0)
6220 "pmovsxdq\t{%1, %0|%0, %1}"
6221 [(set_attr "type" "ssemov")
6222 (set_attr "prefix_extra" "1")
6223 (set_attr "mode" "TI")])
6225 (define_insn "*sse4_1_extendv2siv2di2"
6226 [(set (match_operand:V2DI 0 "register_operand" "=x")
6230 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6231 (parallel [(const_int 0)
6234 "pmovsxdq\t{%1, %0|%0, %1}"
6235 [(set_attr "type" "ssemov")
6236 (set_attr "prefix_extra" "1")
6237 (set_attr "mode" "TI")])
6239 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6240 [(set (match_operand:V8HI 0 "register_operand" "=x")
6243 (match_operand:V16QI 1 "register_operand" "x")
6244 (parallel [(const_int 0)
6253 "pmovzxbw\t{%1, %0|%0, %1}"
6254 [(set_attr "type" "ssemov")
6255 (set_attr "prefix_extra" "1")
6256 (set_attr "mode" "TI")])
6258 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6259 [(set (match_operand:V8HI 0 "register_operand" "=x")
6262 (vec_duplicate:V16QI
6263 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6264 (parallel [(const_int 0)
6273 "pmovzxbw\t{%1, %0|%0, %1}"
6274 [(set_attr "type" "ssemov")
6275 (set_attr "prefix_extra" "1")
6276 (set_attr "mode" "TI")])
6278 (define_insn "sse4_1_zero_extendv4qiv4si2"
6279 [(set (match_operand:V4SI 0 "register_operand" "=x")
6282 (match_operand:V16QI 1 "register_operand" "x")
6283 (parallel [(const_int 0)
6288 "pmovzxbd\t{%1, %0|%0, %1}"
6289 [(set_attr "type" "ssemov")
6290 (set_attr "prefix_extra" "1")
6291 (set_attr "mode" "TI")])
6293 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6294 [(set (match_operand:V4SI 0 "register_operand" "=x")
6297 (vec_duplicate:V16QI
6298 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6299 (parallel [(const_int 0)
6304 "pmovzxbd\t{%1, %0|%0, %1}"
6305 [(set_attr "type" "ssemov")
6306 (set_attr "prefix_extra" "1")
6307 (set_attr "mode" "TI")])
6309 (define_insn "sse4_1_zero_extendv2qiv2di2"
6310 [(set (match_operand:V2DI 0 "register_operand" "=x")
6313 (match_operand:V16QI 1 "register_operand" "x")
6314 (parallel [(const_int 0)
6317 "pmovzxbq\t{%1, %0|%0, %1}"
6318 [(set_attr "type" "ssemov")
6319 (set_attr "prefix_extra" "1")
6320 (set_attr "mode" "TI")])
6322 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6323 [(set (match_operand:V2DI 0 "register_operand" "=x")
6326 (vec_duplicate:V16QI
6327 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6328 (parallel [(const_int 0)
6331 "pmovzxbq\t{%1, %0|%0, %1}"
6332 [(set_attr "type" "ssemov")
6333 (set_attr "prefix_extra" "1")
6334 (set_attr "mode" "TI")])
6336 (define_insn "sse4_1_zero_extendv4hiv4si2"
6337 [(set (match_operand:V4SI 0 "register_operand" "=x")
6340 (match_operand:V8HI 1 "register_operand" "x")
6341 (parallel [(const_int 0)
6346 "pmovzxwd\t{%1, %0|%0, %1}"
6347 [(set_attr "type" "ssemov")
6348 (set_attr "prefix_extra" "1")
6349 (set_attr "mode" "TI")])
6351 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6352 [(set (match_operand:V4SI 0 "register_operand" "=x")
6356 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6357 (parallel [(const_int 0)
6362 "pmovzxwd\t{%1, %0|%0, %1}"
6363 [(set_attr "type" "ssemov")
6364 (set_attr "prefix_extra" "1")
6365 (set_attr "mode" "TI")])
6367 (define_insn "sse4_1_zero_extendv2hiv2di2"
6368 [(set (match_operand:V2DI 0 "register_operand" "=x")
6371 (match_operand:V8HI 1 "register_operand" "x")
6372 (parallel [(const_int 0)
6375 "pmovzxwq\t{%1, %0|%0, %1}"
6376 [(set_attr "type" "ssemov")
6377 (set_attr "prefix_extra" "1")
6378 (set_attr "mode" "TI")])
6380 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6381 [(set (match_operand:V2DI 0 "register_operand" "=x")
6385 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6386 (parallel [(const_int 0)
6389 "pmovzxwq\t{%1, %0|%0, %1}"
6390 [(set_attr "type" "ssemov")
6391 (set_attr "prefix_extra" "1")
6392 (set_attr "mode" "TI")])
6394 (define_insn "sse4_1_zero_extendv2siv2di2"
6395 [(set (match_operand:V2DI 0 "register_operand" "=x")
6398 (match_operand:V4SI 1 "register_operand" "x")
6399 (parallel [(const_int 0)
6402 "pmovzxdq\t{%1, %0|%0, %1}"
6403 [(set_attr "type" "ssemov")
6404 (set_attr "prefix_extra" "1")
6405 (set_attr "mode" "TI")])
6407 (define_insn "*sse4_1_zero_extendv2siv2di2"
6408 [(set (match_operand:V2DI 0 "register_operand" "=x")
6412 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6413 (parallel [(const_int 0)
6416 "pmovzxdq\t{%1, %0|%0, %1}"
6417 [(set_attr "type" "ssemov")
6418 (set_attr "prefix_extra" "1")
6419 (set_attr "mode" "TI")])
6421 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6422 ;; But it is not a really compare instruction.
6423 (define_insn "sse4_1_ptest"
6424 [(set (reg:CC FLAGS_REG)
6425 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6426 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6429 "ptest\t{%1, %0|%0, %1}"
6430 [(set_attr "type" "ssecomi")
6431 (set_attr "prefix_extra" "1")
6432 (set_attr "mode" "TI")])
6434 (define_insn "sse4_1_roundpd"
6435 [(set (match_operand:V2DF 0 "register_operand" "=x")
6436 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6437 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6440 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6441 [(set_attr "type" "ssecvt")
6442 (set_attr "prefix_extra" "1")
6443 (set_attr "mode" "V2DF")])
6445 (define_insn "sse4_1_roundps"
6446 [(set (match_operand:V4SF 0 "register_operand" "=x")
6447 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6448 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6451 "roundps\t{%2, %1, %0|%0, %1, %2}"
6452 [(set_attr "type" "ssecvt")
6453 (set_attr "prefix_extra" "1")
6454 (set_attr "mode" "V4SF")])
6456 (define_insn "sse4_1_roundsd"
6457 [(set (match_operand:V2DF 0 "register_operand" "=x")
6459 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6460 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6462 (match_operand:V2DF 1 "register_operand" "0")
6465 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6466 [(set_attr "type" "ssecvt")
6467 (set_attr "prefix_extra" "1")
6468 (set_attr "mode" "V2DF")])
6470 (define_insn "sse4_1_roundss"
6471 [(set (match_operand:V4SF 0 "register_operand" "=x")
6473 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6474 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6476 (match_operand:V4SF 1 "register_operand" "0")
6479 "roundss\t{%3, %2, %0|%0, %2, %3}"
6480 [(set_attr "type" "ssecvt")
6481 (set_attr "prefix_extra" "1")
6482 (set_attr "mode" "V4SF")])
6484 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6486 ;; Intel SSE4.2 string/text processing instructions
6488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6490 (define_insn_and_split "sse4_2_pcmpestr"
6491 [(set (match_operand:SI 0 "register_operand" "=c,c")
6493 [(match_operand:V16QI 2 "register_operand" "x,x")
6494 (match_operand:SI 3 "register_operand" "a,a")
6495 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6496 (match_operand:SI 5 "register_operand" "d,d")
6497 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6499 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6507 (set (reg:CC FLAGS_REG)
6516 && !(reload_completed || reload_in_progress)"
6521 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6522 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6523 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6526 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6527 operands[3], operands[4],
6528 operands[5], operands[6]));
6530 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6531 operands[3], operands[4],
6532 operands[5], operands[6]));
6533 if (flags && !(ecx || xmm0))
6534 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6535 operands[4], operands[5],
6539 [(set_attr "type" "sselog")
6540 (set_attr "prefix_data16" "1")
6541 (set_attr "prefix_extra" "1")
6542 (set_attr "memory" "none,load")
6543 (set_attr "mode" "TI")])
6545 (define_insn "sse4_2_pcmpestri"
6546 [(set (match_operand:SI 0 "register_operand" "=c,c")
6548 [(match_operand:V16QI 1 "register_operand" "x,x")
6549 (match_operand:SI 2 "register_operand" "a,a")
6550 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6551 (match_operand:SI 4 "register_operand" "d,d")
6552 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6554 (set (reg:CC FLAGS_REG)
6563 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6564 [(set_attr "type" "sselog")
6565 (set_attr "prefix_data16" "1")
6566 (set_attr "prefix_extra" "1")
6567 (set_attr "memory" "none,load")
6568 (set_attr "mode" "TI")])
6570 (define_insn "sse4_2_pcmpestrm"
6571 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6573 [(match_operand:V16QI 1 "register_operand" "x,x")
6574 (match_operand:SI 2 "register_operand" "a,a")
6575 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6576 (match_operand:SI 4 "register_operand" "d,d")
6577 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6579 (set (reg:CC FLAGS_REG)
6588 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6589 [(set_attr "type" "sselog")
6590 (set_attr "prefix_data16" "1")
6591 (set_attr "prefix_extra" "1")
6592 (set_attr "memory" "none,load")
6593 (set_attr "mode" "TI")])
6595 (define_insn "sse4_2_pcmpestr_cconly"
6596 [(set (reg:CC FLAGS_REG)
6598 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6599 (match_operand:SI 1 "register_operand" "a,a,a,a")
6600 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6601 (match_operand:SI 3 "register_operand" "d,d,d,d")
6602 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6604 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6605 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6608 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6609 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6610 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6611 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6612 [(set_attr "type" "sselog")
6613 (set_attr "prefix_data16" "1")
6614 (set_attr "prefix_extra" "1")
6615 (set_attr "memory" "none,load,none,load")
6616 (set_attr "mode" "TI")])
6618 (define_insn_and_split "sse4_2_pcmpistr"
6619 [(set (match_operand:SI 0 "register_operand" "=c,c")
6621 [(match_operand:V16QI 2 "register_operand" "x,x")
6622 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6623 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6625 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6631 (set (reg:CC FLAGS_REG)
6638 && !(reload_completed || reload_in_progress)"
6643 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6644 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6645 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6648 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6649 operands[3], operands[4]));
6651 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6652 operands[3], operands[4]));
6653 if (flags && !(ecx || xmm0))
6654 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6658 [(set_attr "type" "sselog")
6659 (set_attr "prefix_data16" "1")
6660 (set_attr "prefix_extra" "1")
6661 (set_attr "memory" "none,load")
6662 (set_attr "mode" "TI")])
6664 (define_insn "sse4_2_pcmpistri"
6665 [(set (match_operand:SI 0 "register_operand" "=c,c")
6667 [(match_operand:V16QI 1 "register_operand" "x,x")
6668 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6669 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6671 (set (reg:CC FLAGS_REG)
6678 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6679 [(set_attr "type" "sselog")
6680 (set_attr "prefix_data16" "1")
6681 (set_attr "prefix_extra" "1")
6682 (set_attr "memory" "none,load")
6683 (set_attr "mode" "TI")])
6685 (define_insn "sse4_2_pcmpistrm"
6686 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6688 [(match_operand:V16QI 1 "register_operand" "x,x")
6689 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6690 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6692 (set (reg:CC FLAGS_REG)
6699 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6700 [(set_attr "type" "sselog")
6701 (set_attr "prefix_data16" "1")
6702 (set_attr "prefix_extra" "1")
6703 (set_attr "memory" "none,load")
6704 (set_attr "mode" "TI")])
6706 (define_insn "sse4_2_pcmpistr_cconly"
6707 [(set (reg:CC FLAGS_REG)
6709 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6710 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6711 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6713 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6714 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6717 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6718 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6719 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6720 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6721 [(set_attr "type" "sselog")
6722 (set_attr "prefix_data16" "1")
6723 (set_attr "prefix_extra" "1")
6724 (set_attr "memory" "none,load,none,load")
6725 (set_attr "mode" "TI")])