1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
67 switch (which_alternative)
70 return standard_sse_constant_opcode (insn, operands[1]);
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
90 (const_string "TI")))])
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
101 (define_insn_and_split "movdi_to_sse"
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
108 "&& reload_completed"
111 switch (which_alternative)
114 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
115 Assemble the 64-bit DImode value in an xmm register. */
116 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 0)));
118 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
119 gen_rtx_SUBREG (SImode, operands[1], 4)));
120 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
124 emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
133 (define_expand "movv4sf"
134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
135 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
138 ix86_expand_vector_move (V4SFmode, operands);
142 (define_insn "*movv4sf_internal"
143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
144 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
146 && (register_operand (operands[0], V4SFmode)
147 || register_operand (operands[1], V4SFmode))"
149 switch (which_alternative)
152 return standard_sse_constant_opcode (insn, operands[1]);
155 return "movaps\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set_attr "mode" "V4SF")])
164 [(set (match_operand:V4SF 0 "register_operand" "")
165 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
166 "TARGET_SSE && reload_completed"
169 (vec_duplicate:V4SF (match_dup 1))
173 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
174 operands[2] = CONST0_RTX (V4SFmode);
177 (define_expand "movv2df"
178 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
179 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
182 ix86_expand_vector_move (V2DFmode, operands);
186 (define_insn "*movv2df_internal"
187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
188 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
190 && (register_operand (operands[0], V2DFmode)
191 || register_operand (operands[1], V2DFmode))"
193 switch (which_alternative)
196 return standard_sse_constant_opcode (insn, operands[1]);
199 if (get_attr_mode (insn) == MODE_V4SF)
200 return "movaps\t{%1, %0|%0, %1}";
202 return "movapd\t{%1, %0|%0, %1}";
207 [(set_attr "type" "sselog1,ssemov,ssemov")
210 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
211 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
212 (and (eq_attr "alternative" "2")
213 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
215 (const_string "V4SF")
216 (const_string "V2DF")))])
219 [(set (match_operand:V2DF 0 "register_operand" "")
220 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
221 "TARGET_SSE2 && reload_completed"
222 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
224 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
225 operands[2] = CONST0_RTX (DFmode);
228 (define_expand "push<mode>1"
229 [(match_operand:SSEMODE 0 "register_operand" "")]
232 ix86_expand_push (<MODE>mode, operands[0]);
236 (define_expand "movmisalign<mode>"
237 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
238 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
241 ix86_expand_vector_move_misalign (<MODE>mode, operands);
245 (define_insn "sse_movups"
246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
247 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
249 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
250 "movups\t{%1, %0|%0, %1}"
251 [(set_attr "type" "ssemov")
252 (set_attr "mode" "V2DF")])
254 (define_insn "sse2_movupd"
255 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
256 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
258 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
259 "movupd\t{%1, %0|%0, %1}"
260 [(set_attr "type" "ssemov")
261 (set_attr "mode" "V2DF")])
263 (define_insn "sse2_movdqu"
264 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
265 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
267 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
268 "movdqu\t{%1, %0|%0, %1}"
269 [(set_attr "type" "ssemov")
270 (set_attr "mode" "TI")])
272 (define_insn "sse_movntv4sf"
273 [(set (match_operand:V4SF 0 "memory_operand" "=m")
274 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
277 "movntps\t{%1, %0|%0, %1}"
278 [(set_attr "type" "ssemov")
279 (set_attr "mode" "V4SF")])
281 (define_insn "sse2_movntv2df"
282 [(set (match_operand:V2DF 0 "memory_operand" "=m")
283 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
286 "movntpd\t{%1, %0|%0, %1}"
287 [(set_attr "type" "ssecvt")
288 (set_attr "mode" "V2DF")])
290 (define_insn "sse2_movntv2di"
291 [(set (match_operand:V2DI 0 "memory_operand" "=m")
292 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
295 "movntdq\t{%1, %0|%0, %1}"
296 [(set_attr "type" "ssecvt")
297 (set_attr "mode" "TI")])
299 (define_insn "sse2_movntsi"
300 [(set (match_operand:SI 0 "memory_operand" "=m")
301 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
304 "movnti\t{%1, %0|%0, %1}"
305 [(set_attr "type" "ssecvt")
306 (set_attr "mode" "V2DF")])
308 (define_insn "sse3_lddqu"
309 [(set (match_operand:V16QI 0 "register_operand" "=x")
310 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
313 "lddqu\t{%1, %0|%0, %1}"
314 [(set_attr "type" "ssecvt")
315 (set_attr "mode" "TI")])
317 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
319 ;; Parallel single-precision floating point arithmetic
321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
323 (define_expand "negv4sf2"
324 [(set (match_operand:V4SF 0 "register_operand" "")
325 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
327 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
329 (define_expand "absv4sf2"
330 [(set (match_operand:V4SF 0 "register_operand" "")
331 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
333 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
335 (define_expand "addv4sf3"
336 [(set (match_operand:V4SF 0 "register_operand" "")
337 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
338 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
340 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
342 (define_insn "*addv4sf3"
343 [(set (match_operand:V4SF 0 "register_operand" "=x")
344 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
345 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
346 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
347 "addps\t{%2, %0|%0, %2}"
348 [(set_attr "type" "sseadd")
349 (set_attr "mode" "V4SF")])
351 (define_insn "sse_vmaddv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
354 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "0")
355 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
358 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
359 "addss\t{%2, %0|%0, %2}"
360 [(set_attr "type" "sseadd")
361 (set_attr "mode" "SF")])
363 (define_expand "subv4sf3"
364 [(set (match_operand:V4SF 0 "register_operand" "")
365 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
366 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
368 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
370 (define_insn "*subv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "=x")
372 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
373 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
375 "subps\t{%2, %0|%0, %2}"
376 [(set_attr "type" "sseadd")
377 (set_attr "mode" "V4SF")])
379 (define_insn "sse_vmsubv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
382 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
383 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
387 "subss\t{%2, %0|%0, %2}"
388 [(set_attr "type" "sseadd")
389 (set_attr "mode" "SF")])
391 (define_expand "mulv4sf3"
392 [(set (match_operand:V4SF 0 "register_operand" "")
393 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
394 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
396 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
398 (define_insn "*mulv4sf3"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
400 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
401 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
402 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
403 "mulps\t{%2, %0|%0, %2}"
404 [(set_attr "type" "ssemul")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmmulv4sf3"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "0")
411 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
414 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
415 "mulss\t{%2, %0|%0, %2}"
416 [(set_attr "type" "ssemul")
417 (set_attr "mode" "SF")])
419 (define_expand "divv4sf3"
420 [(set (match_operand:V4SF 0 "register_operand" "")
421 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
422 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
424 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
426 (define_insn "*divv4sf3"
427 [(set (match_operand:V4SF 0 "register_operand" "=x")
428 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
429 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
431 "divps\t{%2, %0|%0, %2}"
432 [(set_attr "type" "ssediv")
433 (set_attr "mode" "V4SF")])
435 (define_insn "sse_vmdivv4sf3"
436 [(set (match_operand:V4SF 0 "register_operand" "=x")
438 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
439 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
443 "divss\t{%2, %0|%0, %2}"
444 [(set_attr "type" "ssediv")
445 (set_attr "mode" "SF")])
447 (define_insn "sse_rcpv4sf2"
448 [(set (match_operand:V4SF 0 "register_operand" "=x")
450 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
452 "rcpps\t{%1, %0|%0, %1}"
453 [(set_attr "type" "sse")
454 (set_attr "mode" "V4SF")])
456 (define_insn "sse_vmrcpv4sf2"
457 [(set (match_operand:V4SF 0 "register_operand" "=x")
459 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
461 (match_operand:V4SF 2 "register_operand" "0")
464 "rcpss\t{%1, %0|%0, %1}"
465 [(set_attr "type" "sse")
466 (set_attr "mode" "SF")])
468 (define_insn "sse_rsqrtv4sf2"
469 [(set (match_operand:V4SF 0 "register_operand" "=x")
471 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
473 "rsqrtps\t{%1, %0|%0, %1}"
474 [(set_attr "type" "sse")
475 (set_attr "mode" "V4SF")])
477 (define_insn "sse_vmrsqrtv4sf2"
478 [(set (match_operand:V4SF 0 "register_operand" "=x")
480 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
482 (match_operand:V4SF 2 "register_operand" "0")
485 "rsqrtss\t{%1, %0|%0, %1}"
486 [(set_attr "type" "sse")
487 (set_attr "mode" "SF")])
489 (define_insn "sqrtv4sf2"
490 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
493 "sqrtps\t{%1, %0|%0, %1}"
494 [(set_attr "type" "sse")
495 (set_attr "mode" "V4SF")])
497 (define_insn "sse_vmsqrtv4sf2"
498 [(set (match_operand:V4SF 0 "register_operand" "=x")
500 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
501 (match_operand:V4SF 2 "register_operand" "0")
504 "sqrtss\t{%1, %0|%0, %1}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
509 ;; isn't really correct, as those rtl operators aren't defined when
510 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
512 (define_expand "smaxv4sf3"
513 [(set (match_operand:V4SF 0 "register_operand" "")
514 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
515 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
518 if (!flag_finite_math_only)
519 operands[1] = force_reg (V4SFmode, operands[1]);
520 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
523 (define_insn "*smaxv4sf3_finite"
524 [(set (match_operand:V4SF 0 "register_operand" "=x")
525 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
526 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
527 "TARGET_SSE && flag_finite_math_only
528 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
529 "maxps\t{%2, %0|%0, %2}"
530 [(set_attr "type" "sse")
531 (set_attr "mode" "V4SF")])
533 (define_insn "*smaxv4sf3"
534 [(set (match_operand:V4SF 0 "register_operand" "=x")
535 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
536 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
538 "maxps\t{%2, %0|%0, %2}"
539 [(set_attr "type" "sse")
540 (set_attr "mode" "V4SF")])
542 (define_insn "sse_vmsmaxv4sf3"
543 [(set (match_operand:V4SF 0 "register_operand" "=x")
545 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
546 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
550 "maxss\t{%2, %0|%0, %2}"
551 [(set_attr "type" "sse")
552 (set_attr "mode" "SF")])
554 (define_expand "sminv4sf3"
555 [(set (match_operand:V4SF 0 "register_operand" "")
556 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
557 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
560 if (!flag_finite_math_only)
561 operands[1] = force_reg (V4SFmode, operands[1]);
562 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
565 (define_insn "*sminv4sf3_finite"
566 [(set (match_operand:V4SF 0 "register_operand" "=x")
567 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
569 "TARGET_SSE && flag_finite_math_only
570 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
571 "minps\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sse")
573 (set_attr "mode" "V4SF")])
575 (define_insn "*sminv4sf3"
576 [(set (match_operand:V4SF 0 "register_operand" "=x")
577 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
578 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
580 "minps\t{%2, %0|%0, %2}"
581 [(set_attr "type" "sse")
582 (set_attr "mode" "V4SF")])
584 (define_insn "sse_vmsminv4sf3"
585 [(set (match_operand:V4SF 0 "register_operand" "=x")
587 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
588 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
592 "minss\t{%2, %0|%0, %2}"
593 [(set_attr "type" "sse")
594 (set_attr "mode" "SF")])
596 ;; These versions of the min/max patterns implement exactly the operations
597 ;; min = (op1 < op2 ? op1 : op2)
598 ;; max = (!(op1 < op2) ? op1 : op2)
599 ;; Their operands are not commutative, and thus they may be used in the
600 ;; presence of -0.0 and NaN.
602 (define_insn "*ieee_sminv4sf3"
603 [(set (match_operand:V4SF 0 "register_operand" "=x")
604 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
605 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
608 "minps\t{%2, %0|%0, %2}"
609 [(set_attr "type" "sseadd")
610 (set_attr "mode" "V4SF")])
612 (define_insn "*ieee_smaxv4sf3"
613 [(set (match_operand:V4SF 0 "register_operand" "=x")
614 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
615 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
618 "maxps\t{%2, %0|%0, %2}"
619 [(set_attr "type" "sseadd")
620 (set_attr "mode" "V4SF")])
622 (define_insn "*ieee_sminv2df3"
623 [(set (match_operand:V2DF 0 "register_operand" "=x")
624 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
625 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
628 "minpd\t{%2, %0|%0, %2}"
629 [(set_attr "type" "sseadd")
630 (set_attr "mode" "V2DF")])
632 (define_insn "*ieee_smaxv2df3"
633 [(set (match_operand:V2DF 0 "register_operand" "=x")
634 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
635 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
638 "maxpd\t{%2, %0|%0, %2}"
639 [(set_attr "type" "sseadd")
640 (set_attr "mode" "V2DF")])
642 (define_insn "sse3_addsubv4sf3"
643 [(set (match_operand:V4SF 0 "register_operand" "=x")
646 (match_operand:V4SF 1 "register_operand" "0")
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
648 (minus:V4SF (match_dup 1) (match_dup 2))
651 "addsubps\t{%2, %0|%0, %2}"
652 [(set_attr "type" "sseadd")
653 (set_attr "mode" "V4SF")])
655 (define_insn "sse3_haddv4sf3"
656 [(set (match_operand:V4SF 0 "register_operand" "=x")
661 (match_operand:V4SF 1 "register_operand" "0")
662 (parallel [(const_int 0)]))
663 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
665 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
666 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
670 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
671 (parallel [(const_int 0)]))
672 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
674 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
675 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
677 "haddps\t{%2, %0|%0, %2}"
678 [(set_attr "type" "sseadd")
679 (set_attr "mode" "V4SF")])
681 (define_insn "sse3_hsubv4sf3"
682 [(set (match_operand:V4SF 0 "register_operand" "=x")
687 (match_operand:V4SF 1 "register_operand" "0")
688 (parallel [(const_int 0)]))
689 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
691 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
692 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
696 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
697 (parallel [(const_int 0)]))
698 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
700 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
701 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
703 "hsubps\t{%2, %0|%0, %2}"
704 [(set_attr "type" "sseadd")
705 (set_attr "mode" "V4SF")])
707 (define_expand "reduc_splus_v4sf"
708 [(match_operand:V4SF 0 "register_operand" "")
709 (match_operand:V4SF 1 "register_operand" "")]
714 rtx tmp = gen_reg_rtx (V4SFmode);
715 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
716 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
719 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
723 (define_expand "reduc_smax_v4sf"
724 [(match_operand:V4SF 0 "register_operand" "")
725 (match_operand:V4SF 1 "register_operand" "")]
728 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
732 (define_expand "reduc_smin_v4sf"
733 [(match_operand:V4SF 0 "register_operand" "")
734 (match_operand:V4SF 1 "register_operand" "")]
737 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
743 ;; Parallel single-precision floating point comparisons
745 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
747 (define_insn "sse_maskcmpv4sf3"
748 [(set (match_operand:V4SF 0 "register_operand" "=x")
749 (match_operator:V4SF 3 "sse_comparison_operator"
750 [(match_operand:V4SF 1 "register_operand" "0")
751 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
753 "cmp%D3ps\t{%2, %0|%0, %2}"
754 [(set_attr "type" "ssecmp")
755 (set_attr "mode" "V4SF")])
757 (define_insn "sse_maskcmpsf3"
758 [(set (match_operand:SF 0 "register_operand" "=x")
759 (match_operator:SF 3 "sse_comparison_operator"
760 [(match_operand:SF 1 "register_operand" "0")
761 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
763 "cmp%D3ss\t{%2, %0|%0, %2}"
764 [(set_attr "type" "ssecmp")
765 (set_attr "mode" "SF")])
767 (define_insn "sse_vmmaskcmpv4sf3"
768 [(set (match_operand:V4SF 0 "register_operand" "=x")
770 (match_operator:V4SF 3 "sse_comparison_operator"
771 [(match_operand:V4SF 1 "register_operand" "0")
772 (match_operand:V4SF 2 "register_operand" "x")])
776 "cmp%D3ss\t{%2, %0|%0, %2}"
777 [(set_attr "type" "ssecmp")
778 (set_attr "mode" "SF")])
780 (define_insn "sse_comi"
781 [(set (reg:CCFP FLAGS_REG)
784 (match_operand:V4SF 0 "register_operand" "x")
785 (parallel [(const_int 0)]))
787 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
788 (parallel [(const_int 0)]))))]
790 "comiss\t{%1, %0|%0, %1}"
791 [(set_attr "type" "ssecomi")
792 (set_attr "mode" "SF")])
794 (define_insn "sse_ucomi"
795 [(set (reg:CCFPU FLAGS_REG)
798 (match_operand:V4SF 0 "register_operand" "x")
799 (parallel [(const_int 0)]))
801 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
802 (parallel [(const_int 0)]))))]
804 "ucomiss\t{%1, %0|%0, %1}"
805 [(set_attr "type" "ssecomi")
806 (set_attr "mode" "SF")])
808 (define_expand "vcondv4sf"
809 [(set (match_operand:V4SF 0 "register_operand" "")
812 [(match_operand:V4SF 4 "nonimmediate_operand" "")
813 (match_operand:V4SF 5 "nonimmediate_operand" "")])
814 (match_operand:V4SF 1 "general_operand" "")
815 (match_operand:V4SF 2 "general_operand" "")))]
818 if (ix86_expand_fp_vcond (operands))
824 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
826 ;; Parallel single-precision floating point logical operations
828 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
830 (define_expand "andv4sf3"
831 [(set (match_operand:V4SF 0 "register_operand" "")
832 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
833 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
835 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
837 (define_insn "*andv4sf3"
838 [(set (match_operand:V4SF 0 "register_operand" "=x")
839 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
840 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
841 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
842 "andps\t{%2, %0|%0, %2}"
843 [(set_attr "type" "sselog")
844 (set_attr "mode" "V4SF")])
846 (define_insn "sse_nandv4sf3"
847 [(set (match_operand:V4SF 0 "register_operand" "=x")
848 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
849 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
851 "andnps\t{%2, %0|%0, %2}"
852 [(set_attr "type" "sselog")
853 (set_attr "mode" "V4SF")])
855 (define_expand "iorv4sf3"
856 [(set (match_operand:V4SF 0 "register_operand" "")
857 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
858 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
860 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
862 (define_insn "*iorv4sf3"
863 [(set (match_operand:V4SF 0 "register_operand" "=x")
864 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
865 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
866 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
867 "orps\t{%2, %0|%0, %2}"
868 [(set_attr "type" "sselog")
869 (set_attr "mode" "V4SF")])
871 (define_expand "xorv4sf3"
872 [(set (match_operand:V4SF 0 "register_operand" "")
873 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
874 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
876 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
878 (define_insn "*xorv4sf3"
879 [(set (match_operand:V4SF 0 "register_operand" "=x")
880 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
881 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
882 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
883 "xorps\t{%2, %0|%0, %2}"
884 [(set_attr "type" "sselog")
885 (set_attr "mode" "V4SF")])
887 ;; Also define scalar versions. These are used for abs, neg, and
888 ;; conditional move. Using subregs into vector modes causes register
889 ;; allocation lossage. These patterns do not allow memory operands
890 ;; because the native instructions read the full 128-bits.
892 (define_insn "*andsf3"
893 [(set (match_operand:SF 0 "register_operand" "=x")
894 (and:SF (match_operand:SF 1 "register_operand" "0")
895 (match_operand:SF 2 "register_operand" "x")))]
897 "andps\t{%2, %0|%0, %2}"
898 [(set_attr "type" "sselog")
899 (set_attr "mode" "V4SF")])
901 (define_insn "*nandsf3"
902 [(set (match_operand:SF 0 "register_operand" "=x")
903 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
904 (match_operand:SF 2 "register_operand" "x")))]
906 "andnps\t{%2, %0|%0, %2}"
907 [(set_attr "type" "sselog")
908 (set_attr "mode" "V4SF")])
910 (define_insn "*iorsf3"
911 [(set (match_operand:SF 0 "register_operand" "=x")
912 (ior:SF (match_operand:SF 1 "register_operand" "0")
913 (match_operand:SF 2 "register_operand" "x")))]
915 "orps\t{%2, %0|%0, %2}"
916 [(set_attr "type" "sselog")
917 (set_attr "mode" "V4SF")])
919 (define_insn "*xorsf3"
920 [(set (match_operand:SF 0 "register_operand" "=x")
921 (xor:SF (match_operand:SF 1 "register_operand" "0")
922 (match_operand:SF 2 "register_operand" "x")))]
924 "xorps\t{%2, %0|%0, %2}"
925 [(set_attr "type" "sselog")
926 (set_attr "mode" "V4SF")])
928 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
930 ;; Parallel single-precision floating point conversion operations
932 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
934 (define_insn "sse_cvtpi2ps"
935 [(set (match_operand:V4SF 0 "register_operand" "=x")
938 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
939 (match_operand:V4SF 1 "register_operand" "0")
942 "cvtpi2ps\t{%2, %0|%0, %2}"
943 [(set_attr "type" "ssecvt")
944 (set_attr "mode" "V4SF")])
946 (define_insn "sse_cvtps2pi"
947 [(set (match_operand:V2SI 0 "register_operand" "=y")
949 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
951 (parallel [(const_int 0) (const_int 1)])))]
953 "cvtps2pi\t{%1, %0|%0, %1}"
954 [(set_attr "type" "ssecvt")
955 (set_attr "unit" "mmx")
956 (set_attr "mode" "DI")])
958 (define_insn "sse_cvttps2pi"
959 [(set (match_operand:V2SI 0 "register_operand" "=y")
961 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
962 (parallel [(const_int 0) (const_int 1)])))]
964 "cvttps2pi\t{%1, %0|%0, %1}"
965 [(set_attr "type" "ssecvt")
966 (set_attr "unit" "mmx")
967 (set_attr "mode" "SF")])
969 (define_insn "sse_cvtsi2ss"
970 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
973 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
974 (match_operand:V4SF 1 "register_operand" "0,0")
977 "cvtsi2ss\t{%2, %0|%0, %2}"
978 [(set_attr "type" "sseicvt")
979 (set_attr "athlon_decode" "vector,double")
980 (set_attr "amdfam10_decode" "vector,double")
981 (set_attr "mode" "SF")])
983 (define_insn "sse_cvtsi2ssq"
984 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
987 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
988 (match_operand:V4SF 1 "register_operand" "0,0")
990 "TARGET_SSE && TARGET_64BIT"
991 "cvtsi2ssq\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sseicvt")
993 (set_attr "athlon_decode" "vector,double")
994 (set_attr "amdfam10_decode" "vector,double")
995 (set_attr "mode" "SF")])
997 (define_insn "sse_cvtss2si"
998 [(set (match_operand:SI 0 "register_operand" "=r,r")
1001 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1002 (parallel [(const_int 0)]))]
1003 UNSPEC_FIX_NOTRUNC))]
1005 "cvtss2si\t{%1, %0|%0, %1}"
1006 [(set_attr "type" "sseicvt")
1007 (set_attr "athlon_decode" "double,vector")
1008 (set_attr "mode" "SI")])
1010 (define_insn "sse_cvtss2si_2"
1011 [(set (match_operand:SI 0 "register_operand" "=r,r")
1012 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1013 UNSPEC_FIX_NOTRUNC))]
1015 "cvtss2si\t{%1, %0|%0, %1}"
1016 [(set_attr "type" "sseicvt")
1017 (set_attr "athlon_decode" "double,vector")
1018 (set_attr "amdfam10_decode" "double,double")
1019 (set_attr "mode" "SI")])
1021 (define_insn "sse_cvtss2siq"
1022 [(set (match_operand:DI 0 "register_operand" "=r,r")
1025 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1026 (parallel [(const_int 0)]))]
1027 UNSPEC_FIX_NOTRUNC))]
1028 "TARGET_SSE && TARGET_64BIT"
1029 "cvtss2siq\t{%1, %0|%0, %1}"
1030 [(set_attr "type" "sseicvt")
1031 (set_attr "athlon_decode" "double,vector")
1032 (set_attr "mode" "DI")])
1034 (define_insn "sse_cvtss2siq_2"
1035 [(set (match_operand:DI 0 "register_operand" "=r,r")
1036 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1037 UNSPEC_FIX_NOTRUNC))]
1038 "TARGET_SSE && TARGET_64BIT"
1039 "cvtss2siq\t{%1, %0|%0, %1}"
1040 [(set_attr "type" "sseicvt")
1041 (set_attr "athlon_decode" "double,vector")
1042 (set_attr "amdfam10_decode" "double,double")
1043 (set_attr "mode" "DI")])
1045 (define_insn "sse_cvttss2si"
1046 [(set (match_operand:SI 0 "register_operand" "=r,r")
1049 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1050 (parallel [(const_int 0)]))))]
1052 "cvttss2si\t{%1, %0|%0, %1}"
1053 [(set_attr "type" "sseicvt")
1054 (set_attr "athlon_decode" "double,vector")
1055 (set_attr "amdfam10_decode" "double,double")
1056 (set_attr "mode" "SI")])
1058 (define_insn "sse_cvttss2siq"
1059 [(set (match_operand:DI 0 "register_operand" "=r,r")
1062 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1063 (parallel [(const_int 0)]))))]
1064 "TARGET_SSE && TARGET_64BIT"
1065 "cvttss2siq\t{%1, %0|%0, %1}"
1066 [(set_attr "type" "sseicvt")
1067 (set_attr "athlon_decode" "double,vector")
1068 (set_attr "amdfam10_decode" "double,double")
1069 (set_attr "mode" "DI")])
1071 (define_insn "sse2_cvtdq2ps"
1072 [(set (match_operand:V4SF 0 "register_operand" "=x")
1073 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1075 "cvtdq2ps\t{%1, %0|%0, %1}"
1076 [(set_attr "type" "ssecvt")
1077 (set_attr "mode" "V2DF")])
1079 (define_insn "sse2_cvtps2dq"
1080 [(set (match_operand:V4SI 0 "register_operand" "=x")
1081 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1082 UNSPEC_FIX_NOTRUNC))]
1084 "cvtps2dq\t{%1, %0|%0, %1}"
1085 [(set_attr "type" "ssecvt")
1086 (set_attr "mode" "TI")])
1088 (define_insn "sse2_cvttps2dq"
1089 [(set (match_operand:V4SI 0 "register_operand" "=x")
1090 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1092 "cvttps2dq\t{%1, %0|%0, %1}"
1093 [(set_attr "type" "ssecvt")
1094 (set_attr "mode" "TI")])
1096 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1098 ;; Parallel single-precision floating point element swizzling
1100 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1102 (define_insn "sse_movhlps"
1103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1106 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1107 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1108 (parallel [(const_int 6)
1112 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1114 movhlps\t{%2, %0|%0, %2}
1115 movlps\t{%H2, %0|%0, %H2}
1116 movhps\t{%2, %0|%0, %2}"
1117 [(set_attr "type" "ssemov")
1118 (set_attr "mode" "V4SF,V2SF,V2SF")])
1120 (define_insn "sse_movlhps"
1121 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1124 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1125 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1126 (parallel [(const_int 0)
1130 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1132 movlhps\t{%2, %0|%0, %2}
1133 movhps\t{%2, %0|%0, %2}
1134 movlps\t{%2, %H0|%H0, %2}"
1135 [(set_attr "type" "ssemov")
1136 (set_attr "mode" "V4SF,V2SF,V2SF")])
1138 (define_insn "sse_unpckhps"
1139 [(set (match_operand:V4SF 0 "register_operand" "=x")
1142 (match_operand:V4SF 1 "register_operand" "0")
1143 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1144 (parallel [(const_int 2) (const_int 6)
1145 (const_int 3) (const_int 7)])))]
1147 "unpckhps\t{%2, %0|%0, %2}"
1148 [(set_attr "type" "sselog")
1149 (set_attr "mode" "V4SF")])
1151 (define_insn "sse_unpcklps"
1152 [(set (match_operand:V4SF 0 "register_operand" "=x")
1155 (match_operand:V4SF 1 "register_operand" "0")
1156 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1157 (parallel [(const_int 0) (const_int 4)
1158 (const_int 1) (const_int 5)])))]
1160 "unpcklps\t{%2, %0|%0, %2}"
1161 [(set_attr "type" "sselog")
1162 (set_attr "mode" "V4SF")])
1164 ;; These are modeled with the same vec_concat as the others so that we
1165 ;; capture users of shufps that can use the new instructions
1166 (define_insn "sse3_movshdup"
1167 [(set (match_operand:V4SF 0 "register_operand" "=x")
1170 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1172 (parallel [(const_int 1)
1177 "movshdup\t{%1, %0|%0, %1}"
1178 [(set_attr "type" "sse")
1179 (set_attr "mode" "V4SF")])
1181 (define_insn "sse3_movsldup"
1182 [(set (match_operand:V4SF 0 "register_operand" "=x")
1185 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1187 (parallel [(const_int 0)
1192 "movsldup\t{%1, %0|%0, %1}"
1193 [(set_attr "type" "sse")
1194 (set_attr "mode" "V4SF")])
1196 (define_expand "sse_shufps"
1197 [(match_operand:V4SF 0 "register_operand" "")
1198 (match_operand:V4SF 1 "register_operand" "")
1199 (match_operand:V4SF 2 "nonimmediate_operand" "")
1200 (match_operand:SI 3 "const_int_operand" "")]
1203 int mask = INTVAL (operands[3]);
1204 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1205 GEN_INT ((mask >> 0) & 3),
1206 GEN_INT ((mask >> 2) & 3),
1207 GEN_INT (((mask >> 4) & 3) + 4),
1208 GEN_INT (((mask >> 6) & 3) + 4)));
1212 (define_insn "sse_shufps_1"
1213 [(set (match_operand:V4SF 0 "register_operand" "=x")
1216 (match_operand:V4SF 1 "register_operand" "0")
1217 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1218 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1219 (match_operand 4 "const_0_to_3_operand" "")
1220 (match_operand 5 "const_4_to_7_operand" "")
1221 (match_operand 6 "const_4_to_7_operand" "")])))]
1225 mask |= INTVAL (operands[3]) << 0;
1226 mask |= INTVAL (operands[4]) << 2;
1227 mask |= (INTVAL (operands[5]) - 4) << 4;
1228 mask |= (INTVAL (operands[6]) - 4) << 6;
1229 operands[3] = GEN_INT (mask);
1231 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1233 [(set_attr "type" "sselog")
1234 (set_attr "mode" "V4SF")])
1236 (define_insn "sse_storehps"
1237 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1239 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1240 (parallel [(const_int 2) (const_int 3)])))]
1243 movhps\t{%1, %0|%0, %1}
1244 movhlps\t{%1, %0|%0, %1}
1245 movlps\t{%H1, %0|%0, %H1}"
1246 [(set_attr "type" "ssemov")
1247 (set_attr "mode" "V2SF,V4SF,V2SF")])
1249 (define_insn "sse_loadhps"
1250 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1253 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1254 (parallel [(const_int 0) (const_int 1)]))
1255 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1258 movhps\t{%2, %0|%0, %2}
1259 movlhps\t{%2, %0|%0, %2}
1260 movlps\t{%2, %H0|%H0, %2}"
1261 [(set_attr "type" "ssemov")
1262 (set_attr "mode" "V2SF,V4SF,V2SF")])
1264 (define_insn "sse_storelps"
1265 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1267 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1268 (parallel [(const_int 0) (const_int 1)])))]
1271 movlps\t{%1, %0|%0, %1}
1272 movaps\t{%1, %0|%0, %1}
1273 movlps\t{%1, %0|%0, %1}"
1274 [(set_attr "type" "ssemov")
1275 (set_attr "mode" "V2SF,V4SF,V2SF")])
1277 (define_insn "sse_loadlps"
1278 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1280 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1282 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1283 (parallel [(const_int 2) (const_int 3)]))))]
1286 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1287 movlps\t{%2, %0|%0, %2}
1288 movlps\t{%2, %0|%0, %2}"
1289 [(set_attr "type" "sselog,ssemov,ssemov")
1290 (set_attr "mode" "V4SF,V2SF,V2SF")])
1292 (define_insn "sse_movss"
1293 [(set (match_operand:V4SF 0 "register_operand" "=x")
1295 (match_operand:V4SF 2 "register_operand" "x")
1296 (match_operand:V4SF 1 "register_operand" "0")
1299 "movss\t{%2, %0|%0, %2}"
1300 [(set_attr "type" "ssemov")
1301 (set_attr "mode" "SF")])
1303 (define_insn "*vec_dupv4sf"
1304 [(set (match_operand:V4SF 0 "register_operand" "=x")
1306 (match_operand:SF 1 "register_operand" "0")))]
1308 "shufps\t{$0, %0, %0|%0, %0, 0}"
1309 [(set_attr "type" "sselog1")
1310 (set_attr "mode" "V4SF")])
1312 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1313 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1314 ;; alternatives pretty much forces the MMX alternative to be chosen.
1315 (define_insn "*sse_concatv2sf"
1316 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1318 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1319 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1322 unpcklps\t{%2, %0|%0, %2}
1323 movss\t{%1, %0|%0, %1}
1324 punpckldq\t{%2, %0|%0, %2}
1325 movd\t{%1, %0|%0, %1}"
1326 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1327 (set_attr "mode" "V4SF,SF,DI,DI")])
1329 (define_insn "*sse_concatv4sf"
1330 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1332 (match_operand:V2SF 1 "register_operand" " 0,0")
1333 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1336 movlhps\t{%2, %0|%0, %2}
1337 movhps\t{%2, %0|%0, %2}"
1338 [(set_attr "type" "ssemov")
1339 (set_attr "mode" "V4SF,V2SF")])
1341 (define_expand "vec_initv4sf"
1342 [(match_operand:V4SF 0 "register_operand" "")
1343 (match_operand 1 "" "")]
1346 ix86_expand_vector_init (false, operands[0], operands[1]);
1350 (define_insn "vec_setv4sf_0"
1351 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
1354 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1355 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1359 movss\t{%2, %0|%0, %2}
1360 movss\t{%2, %0|%0, %2}
1361 movd\t{%2, %0|%0, %2}
1363 [(set_attr "type" "ssemov")
1364 (set_attr "mode" "SF")])
1367 [(set (match_operand:V4SF 0 "memory_operand" "")
1370 (match_operand:SF 1 "nonmemory_operand" ""))
1373 "TARGET_SSE && reload_completed"
1376 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1380 (define_expand "vec_setv4sf"
1381 [(match_operand:V4SF 0 "register_operand" "")
1382 (match_operand:SF 1 "register_operand" "")
1383 (match_operand 2 "const_int_operand" "")]
1386 ix86_expand_vector_set (false, operands[0], operands[1],
1387 INTVAL (operands[2]));
1391 (define_insn_and_split "*vec_extractv4sf_0"
1392 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1394 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1395 (parallel [(const_int 0)])))]
1396 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1398 "&& reload_completed"
1401 rtx op1 = operands[1];
1403 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1405 op1 = gen_lowpart (SFmode, op1);
1406 emit_move_insn (operands[0], op1);
1410 (define_expand "vec_extractv4sf"
1411 [(match_operand:SF 0 "register_operand" "")
1412 (match_operand:V4SF 1 "register_operand" "")
1413 (match_operand 2 "const_int_operand" "")]
1416 ix86_expand_vector_extract (false, operands[0], operands[1],
1417 INTVAL (operands[2]));
1421 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1423 ;; Parallel double-precision floating point arithmetic
1425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1427 (define_expand "negv2df2"
1428 [(set (match_operand:V2DF 0 "register_operand" "")
1429 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1431 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1433 (define_expand "absv2df2"
1434 [(set (match_operand:V2DF 0 "register_operand" "")
1435 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1437 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1439 (define_expand "addv2df3"
1440 [(set (match_operand:V2DF 0 "register_operand" "")
1441 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1442 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1444 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1446 (define_insn "*addv2df3"
1447 [(set (match_operand:V2DF 0 "register_operand" "=x")
1448 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1449 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1450 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1451 "addpd\t{%2, %0|%0, %2}"
1452 [(set_attr "type" "sseadd")
1453 (set_attr "mode" "V2DF")])
1455 (define_insn "sse2_vmaddv2df3"
1456 [(set (match_operand:V2DF 0 "register_operand" "=x")
1458 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "0")
1459 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1462 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1463 "addsd\t{%2, %0|%0, %2}"
1464 [(set_attr "type" "sseadd")
1465 (set_attr "mode" "DF")])
1467 (define_expand "subv2df3"
1468 [(set (match_operand:V2DF 0 "register_operand" "")
1469 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1470 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1472 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1474 (define_insn "*subv2df3"
1475 [(set (match_operand:V2DF 0 "register_operand" "=x")
1476 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1477 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1479 "subpd\t{%2, %0|%0, %2}"
1480 [(set_attr "type" "sseadd")
1481 (set_attr "mode" "V2DF")])
1483 (define_insn "sse2_vmsubv2df3"
1484 [(set (match_operand:V2DF 0 "register_operand" "=x")
1486 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1487 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1491 "subsd\t{%2, %0|%0, %2}"
1492 [(set_attr "type" "sseadd")
1493 (set_attr "mode" "DF")])
1495 (define_expand "mulv2df3"
1496 [(set (match_operand:V2DF 0 "register_operand" "")
1497 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1498 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1500 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1502 (define_insn "*mulv2df3"
1503 [(set (match_operand:V2DF 0 "register_operand" "=x")
1504 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1505 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1506 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1507 "mulpd\t{%2, %0|%0, %2}"
1508 [(set_attr "type" "ssemul")
1509 (set_attr "mode" "V2DF")])
1511 (define_insn "sse2_vmmulv2df3"
1512 [(set (match_operand:V2DF 0 "register_operand" "=x")
1514 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "0")
1515 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1518 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1519 "mulsd\t{%2, %0|%0, %2}"
1520 [(set_attr "type" "ssemul")
1521 (set_attr "mode" "DF")])
1523 (define_expand "divv2df3"
1524 [(set (match_operand:V2DF 0 "register_operand" "")
1525 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1526 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1528 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1530 (define_insn "*divv2df3"
1531 [(set (match_operand:V2DF 0 "register_operand" "=x")
1532 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1533 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1535 "divpd\t{%2, %0|%0, %2}"
1536 [(set_attr "type" "ssediv")
1537 (set_attr "mode" "V2DF")])
1539 (define_insn "sse2_vmdivv2df3"
1540 [(set (match_operand:V2DF 0 "register_operand" "=x")
1542 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1543 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1547 "divsd\t{%2, %0|%0, %2}"
1548 [(set_attr "type" "ssediv")
1549 (set_attr "mode" "DF")])
1551 (define_insn "sqrtv2df2"
1552 [(set (match_operand:V2DF 0 "register_operand" "=x")
1553 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1555 "sqrtpd\t{%1, %0|%0, %1}"
1556 [(set_attr "type" "sse")
1557 (set_attr "mode" "V2DF")])
1559 (define_insn "sse2_vmsqrtv2df2"
1560 [(set (match_operand:V2DF 0 "register_operand" "=x")
1562 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1563 (match_operand:V2DF 2 "register_operand" "0")
1566 "sqrtsd\t{%1, %0|%0, %1}"
1567 [(set_attr "type" "sse")
1568 (set_attr "mode" "DF")])
1570 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1571 ;; isn't really correct, as those rtl operators aren't defined when
1572 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1574 (define_expand "smaxv2df3"
1575 [(set (match_operand:V2DF 0 "register_operand" "")
1576 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1577 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1580 if (!flag_finite_math_only)
1581 operands[1] = force_reg (V2DFmode, operands[1]);
1582 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1585 (define_insn "*smaxv2df3_finite"
1586 [(set (match_operand:V2DF 0 "register_operand" "=x")
1587 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1589 "TARGET_SSE2 && flag_finite_math_only
1590 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1591 "maxpd\t{%2, %0|%0, %2}"
1592 [(set_attr "type" "sseadd")
1593 (set_attr "mode" "V2DF")])
1595 (define_insn "*smaxv2df3"
1596 [(set (match_operand:V2DF 0 "register_operand" "=x")
1597 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1598 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1600 "maxpd\t{%2, %0|%0, %2}"
1601 [(set_attr "type" "sseadd")
1602 (set_attr "mode" "V2DF")])
1604 (define_insn "sse2_vmsmaxv2df3"
1605 [(set (match_operand:V2DF 0 "register_operand" "=x")
1607 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1608 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1612 "maxsd\t{%2, %0|%0, %2}"
1613 [(set_attr "type" "sseadd")
1614 (set_attr "mode" "DF")])
1616 (define_expand "sminv2df3"
1617 [(set (match_operand:V2DF 0 "register_operand" "")
1618 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1619 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1622 if (!flag_finite_math_only)
1623 operands[1] = force_reg (V2DFmode, operands[1]);
1624 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1627 (define_insn "*sminv2df3_finite"
1628 [(set (match_operand:V2DF 0 "register_operand" "=x")
1629 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1630 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1631 "TARGET_SSE2 && flag_finite_math_only
1632 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1633 "minpd\t{%2, %0|%0, %2}"
1634 [(set_attr "type" "sseadd")
1635 (set_attr "mode" "V2DF")])
1637 (define_insn "*sminv2df3"
1638 [(set (match_operand:V2DF 0 "register_operand" "=x")
1639 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1640 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1642 "minpd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "sseadd")
1644 (set_attr "mode" "V2DF")])
1646 (define_insn "sse2_vmsminv2df3"
1647 [(set (match_operand:V2DF 0 "register_operand" "=x")
1649 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1650 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1654 "minsd\t{%2, %0|%0, %2}"
1655 [(set_attr "type" "sseadd")
1656 (set_attr "mode" "DF")])
1658 (define_insn "sse3_addsubv2df3"
1659 [(set (match_operand:V2DF 0 "register_operand" "=x")
1662 (match_operand:V2DF 1 "register_operand" "0")
1663 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1664 (minus:V2DF (match_dup 1) (match_dup 2))
1667 "addsubpd\t{%2, %0|%0, %2}"
1668 [(set_attr "type" "sseadd")
1669 (set_attr "mode" "V2DF")])
1671 (define_insn "sse3_haddv2df3"
1672 [(set (match_operand:V2DF 0 "register_operand" "=x")
1676 (match_operand:V2DF 1 "register_operand" "0")
1677 (parallel [(const_int 0)]))
1678 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1681 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1682 (parallel [(const_int 0)]))
1683 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1685 "haddpd\t{%2, %0|%0, %2}"
1686 [(set_attr "type" "sseadd")
1687 (set_attr "mode" "V2DF")])
1689 (define_insn "sse3_hsubv2df3"
1690 [(set (match_operand:V2DF 0 "register_operand" "=x")
1694 (match_operand:V2DF 1 "register_operand" "0")
1695 (parallel [(const_int 0)]))
1696 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1699 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1700 (parallel [(const_int 0)]))
1701 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1703 "hsubpd\t{%2, %0|%0, %2}"
1704 [(set_attr "type" "sseadd")
1705 (set_attr "mode" "V2DF")])
1707 (define_expand "reduc_splus_v2df"
1708 [(match_operand:V2DF 0 "register_operand" "")
1709 (match_operand:V2DF 1 "register_operand" "")]
1712 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1716 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1718 ;; Parallel double-precision floating point comparisons
1720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1722 (define_insn "sse2_maskcmpv2df3"
1723 [(set (match_operand:V2DF 0 "register_operand" "=x")
1724 (match_operator:V2DF 3 "sse_comparison_operator"
1725 [(match_operand:V2DF 1 "register_operand" "0")
1726 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1728 "cmp%D3pd\t{%2, %0|%0, %2}"
1729 [(set_attr "type" "ssecmp")
1730 (set_attr "mode" "V2DF")])
1732 (define_insn "sse2_maskcmpdf3"
1733 [(set (match_operand:DF 0 "register_operand" "=x")
1734 (match_operator:DF 3 "sse_comparison_operator"
1735 [(match_operand:DF 1 "register_operand" "0")
1736 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1738 "cmp%D3sd\t{%2, %0|%0, %2}"
1739 [(set_attr "type" "ssecmp")
1740 (set_attr "mode" "DF")])
1742 (define_insn "sse2_vmmaskcmpv2df3"
1743 [(set (match_operand:V2DF 0 "register_operand" "=x")
1745 (match_operator:V2DF 3 "sse_comparison_operator"
1746 [(match_operand:V2DF 1 "register_operand" "0")
1747 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1751 "cmp%D3sd\t{%2, %0|%0, %2}"
1752 [(set_attr "type" "ssecmp")
1753 (set_attr "mode" "DF")])
1755 (define_insn "sse2_comi"
1756 [(set (reg:CCFP FLAGS_REG)
1759 (match_operand:V2DF 0 "register_operand" "x")
1760 (parallel [(const_int 0)]))
1762 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1763 (parallel [(const_int 0)]))))]
1765 "comisd\t{%1, %0|%0, %1}"
1766 [(set_attr "type" "ssecomi")
1767 (set_attr "mode" "DF")])
1769 (define_insn "sse2_ucomi"
1770 [(set (reg:CCFPU FLAGS_REG)
1773 (match_operand:V2DF 0 "register_operand" "x")
1774 (parallel [(const_int 0)]))
1776 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1777 (parallel [(const_int 0)]))))]
1779 "ucomisd\t{%1, %0|%0, %1}"
1780 [(set_attr "type" "ssecomi")
1781 (set_attr "mode" "DF")])
1783 (define_expand "vcondv2df"
1784 [(set (match_operand:V2DF 0 "register_operand" "")
1786 (match_operator 3 ""
1787 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1788 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1789 (match_operand:V2DF 1 "general_operand" "")
1790 (match_operand:V2DF 2 "general_operand" "")))]
1793 if (ix86_expand_fp_vcond (operands))
1799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1801 ;; Parallel double-precision floating point logical operations
1803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1805 (define_expand "andv2df3"
1806 [(set (match_operand:V2DF 0 "register_operand" "")
1807 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1808 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1810 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1812 (define_insn "*andv2df3"
1813 [(set (match_operand:V2DF 0 "register_operand" "=x")
1814 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1815 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1816 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1817 "andpd\t{%2, %0|%0, %2}"
1818 [(set_attr "type" "sselog")
1819 (set_attr "mode" "V2DF")])
1821 (define_insn "sse2_nandv2df3"
1822 [(set (match_operand:V2DF 0 "register_operand" "=x")
1823 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1824 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1826 "andnpd\t{%2, %0|%0, %2}"
1827 [(set_attr "type" "sselog")
1828 (set_attr "mode" "V2DF")])
1830 (define_expand "iorv2df3"
1831 [(set (match_operand:V2DF 0 "register_operand" "")
1832 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1833 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1835 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1837 (define_insn "*iorv2df3"
1838 [(set (match_operand:V2DF 0 "register_operand" "=x")
1839 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1840 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1841 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1842 "orpd\t{%2, %0|%0, %2}"
1843 [(set_attr "type" "sselog")
1844 (set_attr "mode" "V2DF")])
1846 (define_expand "xorv2df3"
1847 [(set (match_operand:V2DF 0 "register_operand" "")
1848 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1849 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1851 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1853 (define_insn "*xorv2df3"
1854 [(set (match_operand:V2DF 0 "register_operand" "=x")
1855 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1856 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1857 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1858 "xorpd\t{%2, %0|%0, %2}"
1859 [(set_attr "type" "sselog")
1860 (set_attr "mode" "V2DF")])
1862 ;; Also define scalar versions. These are used for abs, neg, and
1863 ;; conditional move. Using subregs into vector modes causes register
1864 ;; allocation lossage. These patterns do not allow memory operands
1865 ;; because the native instructions read the full 128-bits.
1867 (define_insn "*anddf3"
1868 [(set (match_operand:DF 0 "register_operand" "=x")
1869 (and:DF (match_operand:DF 1 "register_operand" "0")
1870 (match_operand:DF 2 "register_operand" "x")))]
1872 "andpd\t{%2, %0|%0, %2}"
1873 [(set_attr "type" "sselog")
1874 (set_attr "mode" "V2DF")])
1876 (define_insn "*nanddf3"
1877 [(set (match_operand:DF 0 "register_operand" "=x")
1878 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1879 (match_operand:DF 2 "register_operand" "x")))]
1881 "andnpd\t{%2, %0|%0, %2}"
1882 [(set_attr "type" "sselog")
1883 (set_attr "mode" "V2DF")])
1885 (define_insn "*iordf3"
1886 [(set (match_operand:DF 0 "register_operand" "=x")
1887 (ior:DF (match_operand:DF 1 "register_operand" "0")
1888 (match_operand:DF 2 "register_operand" "x")))]
1890 "orpd\t{%2, %0|%0, %2}"
1891 [(set_attr "type" "sselog")
1892 (set_attr "mode" "V2DF")])
1894 (define_insn "*xordf3"
1895 [(set (match_operand:DF 0 "register_operand" "=x")
1896 (xor:DF (match_operand:DF 1 "register_operand" "0")
1897 (match_operand:DF 2 "register_operand" "x")))]
1899 "xorpd\t{%2, %0|%0, %2}"
1900 [(set_attr "type" "sselog")
1901 (set_attr "mode" "V2DF")])
1903 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1905 ;; Parallel double-precision floating point conversion operations
1907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1909 (define_insn "sse2_cvtpi2pd"
1910 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1911 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1913 "cvtpi2pd\t{%1, %0|%0, %1}"
1914 [(set_attr "type" "ssecvt")
1915 (set_attr "unit" "mmx,*")
1916 (set_attr "mode" "V2DF")])
1918 (define_insn "sse2_cvtpd2pi"
1919 [(set (match_operand:V2SI 0 "register_operand" "=y")
1920 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1921 UNSPEC_FIX_NOTRUNC))]
1923 "cvtpd2pi\t{%1, %0|%0, %1}"
1924 [(set_attr "type" "ssecvt")
1925 (set_attr "unit" "mmx")
1926 (set_attr "mode" "DI")])
1928 (define_insn "sse2_cvttpd2pi"
1929 [(set (match_operand:V2SI 0 "register_operand" "=y")
1930 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1932 "cvttpd2pi\t{%1, %0|%0, %1}"
1933 [(set_attr "type" "ssecvt")
1934 (set_attr "unit" "mmx")
1935 (set_attr "mode" "TI")])
1937 (define_insn "sse2_cvtsi2sd"
1938 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1941 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1942 (match_operand:V2DF 1 "register_operand" "0,0")
1945 "cvtsi2sd\t{%2, %0|%0, %2}"
1946 [(set_attr "type" "sseicvt")
1947 (set_attr "mode" "DF")
1948 (set_attr "athlon_decode" "double,direct")
1949 (set_attr "amdfam10_decode" "vector,double")])
1951 (define_insn "sse2_cvtsi2sdq"
1952 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1955 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1956 (match_operand:V2DF 1 "register_operand" "0,0")
1958 "TARGET_SSE2 && TARGET_64BIT"
1959 "cvtsi2sdq\t{%2, %0|%0, %2}"
1960 [(set_attr "type" "sseicvt")
1961 (set_attr "mode" "DF")
1962 (set_attr "athlon_decode" "double,direct")
1963 (set_attr "amdfam10_decode" "vector,double")])
1965 (define_insn "sse2_cvtsd2si"
1966 [(set (match_operand:SI 0 "register_operand" "=r,r")
1969 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1970 (parallel [(const_int 0)]))]
1971 UNSPEC_FIX_NOTRUNC))]
1973 "cvtsd2si\t{%1, %0|%0, %1}"
1974 [(set_attr "type" "sseicvt")
1975 (set_attr "athlon_decode" "double,vector")
1976 (set_attr "mode" "SI")])
1978 (define_insn "sse2_cvtsd2si_2"
1979 [(set (match_operand:SI 0 "register_operand" "=r,r")
1980 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1981 UNSPEC_FIX_NOTRUNC))]
1983 "cvtsd2si\t{%1, %0|%0, %1}"
1984 [(set_attr "type" "sseicvt")
1985 (set_attr "athlon_decode" "double,vector")
1986 (set_attr "amdfam10_decode" "double,double")
1987 (set_attr "mode" "SI")])
1989 (define_insn "sse2_cvtsd2siq"
1990 [(set (match_operand:DI 0 "register_operand" "=r,r")
1993 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1994 (parallel [(const_int 0)]))]
1995 UNSPEC_FIX_NOTRUNC))]
1996 "TARGET_SSE2 && TARGET_64BIT"
1997 "cvtsd2siq\t{%1, %0|%0, %1}"
1998 [(set_attr "type" "sseicvt")
1999 (set_attr "athlon_decode" "double,vector")
2000 (set_attr "mode" "DI")])
2002 (define_insn "sse2_cvtsd2siq_2"
2003 [(set (match_operand:DI 0 "register_operand" "=r,r")
2004 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2005 UNSPEC_FIX_NOTRUNC))]
2006 "TARGET_SSE2 && TARGET_64BIT"
2007 "cvtsd2siq\t{%1, %0|%0, %1}"
2008 [(set_attr "type" "sseicvt")
2009 (set_attr "athlon_decode" "double,vector")
2010 (set_attr "amdfam10_decode" "double,double")
2011 (set_attr "mode" "DI")])
2013 (define_insn "sse2_cvttsd2si"
2014 [(set (match_operand:SI 0 "register_operand" "=r,r")
2017 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2018 (parallel [(const_int 0)]))))]
2020 "cvttsd2si\t{%1, %0|%0, %1}"
2021 [(set_attr "type" "sseicvt")
2022 (set_attr "mode" "SI")
2023 (set_attr "athlon_decode" "double,vector")
2024 (set_attr "amdfam10_decode" "double,double")])
2026 (define_insn "sse2_cvttsd2siq"
2027 [(set (match_operand:DI 0 "register_operand" "=r,r")
2030 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2031 (parallel [(const_int 0)]))))]
2032 "TARGET_SSE2 && TARGET_64BIT"
2033 "cvttsd2siq\t{%1, %0|%0, %1}"
2034 [(set_attr "type" "sseicvt")
2035 (set_attr "mode" "DI")
2036 (set_attr "athlon_decode" "double,vector")
2037 (set_attr "amdfam10_decode" "double,double")])
2039 (define_insn "sse2_cvtdq2pd"
2040 [(set (match_operand:V2DF 0 "register_operand" "=x")
2043 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2044 (parallel [(const_int 0) (const_int 1)]))))]
2046 "cvtdq2pd\t{%1, %0|%0, %1}"
2047 [(set_attr "type" "ssecvt")
2048 (set_attr "mode" "V2DF")])
2050 (define_expand "sse2_cvtpd2dq"
2051 [(set (match_operand:V4SI 0 "register_operand" "")
2053 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2057 "operands[2] = CONST0_RTX (V2SImode);")
2059 (define_insn "*sse2_cvtpd2dq"
2060 [(set (match_operand:V4SI 0 "register_operand" "=x")
2062 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2064 (match_operand:V2SI 2 "const0_operand" "")))]
2066 "cvtpd2dq\t{%1, %0|%0, %1}"
2067 [(set_attr "type" "ssecvt")
2068 (set_attr "mode" "TI")
2069 (set_attr "amdfam10_decode" "double")])
2071 (define_expand "sse2_cvttpd2dq"
2072 [(set (match_operand:V4SI 0 "register_operand" "")
2074 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2077 "operands[2] = CONST0_RTX (V2SImode);")
2079 (define_insn "*sse2_cvttpd2dq"
2080 [(set (match_operand:V4SI 0 "register_operand" "=x")
2082 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2083 (match_operand:V2SI 2 "const0_operand" "")))]
2085 "cvttpd2dq\t{%1, %0|%0, %1}"
2086 [(set_attr "type" "ssecvt")
2087 (set_attr "mode" "TI")
2088 (set_attr "amdfam10_decode" "double")])
2090 (define_insn "sse2_cvtsd2ss"
2091 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2094 (float_truncate:V2SF
2095 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2096 (match_operand:V4SF 1 "register_operand" "0,0")
2099 "cvtsd2ss\t{%2, %0|%0, %2}"
2100 [(set_attr "type" "ssecvt")
2101 (set_attr "athlon_decode" "vector,double")
2102 (set_attr "amdfam10_decode" "vector,double")
2103 (set_attr "mode" "SF")])
2105 (define_insn "sse2_cvtss2sd"
2106 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2110 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2111 (parallel [(const_int 0) (const_int 1)])))
2112 (match_operand:V2DF 1 "register_operand" "0,0")
2115 "cvtss2sd\t{%2, %0|%0, %2}"
2116 [(set_attr "type" "ssecvt")
2117 (set_attr "amdfam10_decode" "vector,double")
2118 (set_attr "mode" "DF")])
2120 (define_expand "sse2_cvtpd2ps"
2121 [(set (match_operand:V4SF 0 "register_operand" "")
2123 (float_truncate:V2SF
2124 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2127 "operands[2] = CONST0_RTX (V2SFmode);")
2129 (define_insn "*sse2_cvtpd2ps"
2130 [(set (match_operand:V4SF 0 "register_operand" "=x")
2132 (float_truncate:V2SF
2133 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2134 (match_operand:V2SF 2 "const0_operand" "")))]
2136 "cvtpd2ps\t{%1, %0|%0, %1}"
2137 [(set_attr "type" "ssecvt")
2138 (set_attr "mode" "V4SF")
2139 (set_attr "amdfam10_decode" "double")])
2141 (define_insn "sse2_cvtps2pd"
2142 [(set (match_operand:V2DF 0 "register_operand" "=x")
2145 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2146 (parallel [(const_int 0) (const_int 1)]))))]
2148 "cvtps2pd\t{%1, %0|%0, %1}"
2149 [(set_attr "type" "ssecvt")
2150 (set_attr "mode" "V2DF")
2151 (set_attr "amdfam10_decode" "direct")])
2153 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2155 ;; Parallel double-precision floating point element swizzling
2157 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2159 (define_insn "sse2_unpckhpd"
2160 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2163 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2164 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2165 (parallel [(const_int 1)
2167 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2169 unpckhpd\t{%2, %0|%0, %2}
2170 movlpd\t{%H1, %0|%0, %H1}
2171 movhpd\t{%1, %0|%0, %1}"
2172 [(set_attr "type" "sselog,ssemov,ssemov")
2173 (set_attr "mode" "V2DF,V1DF,V1DF")])
2175 (define_insn "*sse3_movddup"
2176 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2179 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2181 (parallel [(const_int 0)
2183 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2185 movddup\t{%1, %0|%0, %1}
2187 [(set_attr "type" "sselog1,ssemov")
2188 (set_attr "mode" "V2DF")])
2191 [(set (match_operand:V2DF 0 "memory_operand" "")
2194 (match_operand:V2DF 1 "register_operand" "")
2196 (parallel [(const_int 0)
2198 "TARGET_SSE3 && reload_completed"
2201 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2202 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2203 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2207 (define_insn "sse2_unpcklpd"
2208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2211 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2212 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2213 (parallel [(const_int 0)
2215 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2217 unpcklpd\t{%2, %0|%0, %2}
2218 movhpd\t{%2, %0|%0, %2}
2219 movlpd\t{%2, %H0|%H0, %2}"
2220 [(set_attr "type" "sselog,ssemov,ssemov")
2221 (set_attr "mode" "V2DF,V1DF,V1DF")])
2223 (define_expand "sse2_shufpd"
2224 [(match_operand:V2DF 0 "register_operand" "")
2225 (match_operand:V2DF 1 "register_operand" "")
2226 (match_operand:V2DF 2 "nonimmediate_operand" "")
2227 (match_operand:SI 3 "const_int_operand" "")]
2230 int mask = INTVAL (operands[3]);
2231 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2233 GEN_INT (mask & 2 ? 3 : 2)));
2237 (define_insn "sse2_shufpd_1"
2238 [(set (match_operand:V2DF 0 "register_operand" "=x")
2241 (match_operand:V2DF 1 "register_operand" "0")
2242 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2243 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2244 (match_operand 4 "const_2_to_3_operand" "")])))]
2248 mask = INTVAL (operands[3]);
2249 mask |= (INTVAL (operands[4]) - 2) << 1;
2250 operands[3] = GEN_INT (mask);
2252 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2254 [(set_attr "type" "sselog")
2255 (set_attr "mode" "V2DF")])
2257 (define_insn "sse2_storehpd"
2258 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2260 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2261 (parallel [(const_int 1)])))]
2262 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2264 movhpd\t{%1, %0|%0, %1}
2267 [(set_attr "type" "ssemov,sselog1,ssemov")
2268 (set_attr "mode" "V1DF,V2DF,DF")])
2271 [(set (match_operand:DF 0 "register_operand" "")
2273 (match_operand:V2DF 1 "memory_operand" "")
2274 (parallel [(const_int 1)])))]
2275 "TARGET_SSE2 && reload_completed"
2276 [(set (match_dup 0) (match_dup 1))]
2278 operands[1] = adjust_address (operands[1], DFmode, 8);
2281 (define_insn "sse2_storelpd"
2282 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2284 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2285 (parallel [(const_int 0)])))]
2286 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2288 movlpd\t{%1, %0|%0, %1}
2291 [(set_attr "type" "ssemov")
2292 (set_attr "mode" "V1DF,DF,DF")])
2295 [(set (match_operand:DF 0 "register_operand" "")
2297 (match_operand:V2DF 1 "nonimmediate_operand" "")
2298 (parallel [(const_int 0)])))]
2299 "TARGET_SSE2 && reload_completed"
2302 rtx op1 = operands[1];
2304 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2306 op1 = gen_lowpart (DFmode, op1);
2307 emit_move_insn (operands[0], op1);
2311 (define_insn "sse2_loadhpd"
2312 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2315 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2316 (parallel [(const_int 0)]))
2317 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2318 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2320 movhpd\t{%2, %0|%0, %2}
2321 unpcklpd\t{%2, %0|%0, %2}
2322 shufpd\t{$1, %1, %0|%0, %1, 1}
2324 [(set_attr "type" "ssemov,sselog,sselog,other")
2325 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2328 [(set (match_operand:V2DF 0 "memory_operand" "")
2330 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2331 (match_operand:DF 1 "register_operand" "")))]
2332 "TARGET_SSE2 && reload_completed"
2333 [(set (match_dup 0) (match_dup 1))]
2335 operands[0] = adjust_address (operands[0], DFmode, 8);
2338 (define_insn "sse2_loadlpd"
2339 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2341 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2343 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2344 (parallel [(const_int 1)]))))]
2345 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2347 movsd\t{%2, %0|%0, %2}
2348 movlpd\t{%2, %0|%0, %2}
2349 movsd\t{%2, %0|%0, %2}
2350 shufpd\t{$2, %2, %0|%0, %2, 2}
2351 movhpd\t{%H1, %0|%0, %H1}
2353 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2354 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2357 [(set (match_operand:V2DF 0 "memory_operand" "")
2359 (match_operand:DF 1 "register_operand" "")
2360 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2361 "TARGET_SSE2 && reload_completed"
2362 [(set (match_dup 0) (match_dup 1))]
2364 operands[0] = adjust_address (operands[0], DFmode, 8);
2367 ;; Not sure these two are ever used, but it doesn't hurt to have
2369 (define_insn "*vec_extractv2df_1_sse"
2370 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2372 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2373 (parallel [(const_int 1)])))]
2374 "!TARGET_SSE2 && TARGET_SSE
2375 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2377 movhps\t{%1, %0|%0, %1}
2378 movhlps\t{%1, %0|%0, %1}
2379 movlps\t{%H1, %0|%0, %H1}"
2380 [(set_attr "type" "ssemov")
2381 (set_attr "mode" "V2SF,V4SF,V2SF")])
2383 (define_insn "*vec_extractv2df_0_sse"
2384 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2386 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2387 (parallel [(const_int 0)])))]
2388 "!TARGET_SSE2 && TARGET_SSE
2389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2391 movlps\t{%1, %0|%0, %1}
2392 movaps\t{%1, %0|%0, %1}
2393 movlps\t{%1, %0|%0, %1}"
2394 [(set_attr "type" "ssemov")
2395 (set_attr "mode" "V2SF,V4SF,V2SF")])
2397 (define_insn "sse2_movsd"
2398 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2400 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2401 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2405 movsd\t{%2, %0|%0, %2}
2406 movlpd\t{%2, %0|%0, %2}
2407 movlpd\t{%2, %0|%0, %2}
2408 shufpd\t{$2, %2, %0|%0, %2, 2}
2409 movhps\t{%H1, %0|%0, %H1}
2410 movhps\t{%1, %H0|%H0, %1}"
2411 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2412 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2414 (define_insn "*vec_dupv2df_sse3"
2415 [(set (match_operand:V2DF 0 "register_operand" "=x")
2417 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2419 "movddup\t{%1, %0|%0, %1}"
2420 [(set_attr "type" "sselog1")
2421 (set_attr "mode" "DF")])
2423 (define_insn "*vec_dupv2df"
2424 [(set (match_operand:V2DF 0 "register_operand" "=x")
2426 (match_operand:DF 1 "register_operand" "0")))]
2429 [(set_attr "type" "sselog1")
2430 (set_attr "mode" "V4SF")])
2432 (define_insn "*vec_concatv2df_sse3"
2433 [(set (match_operand:V2DF 0 "register_operand" "=x")
2435 (match_operand:DF 1 "nonimmediate_operand" "xm")
2438 "movddup\t{%1, %0|%0, %1}"
2439 [(set_attr "type" "sselog1")
2440 (set_attr "mode" "DF")])
2442 (define_insn "*vec_concatv2df"
2443 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2445 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2446 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2449 unpcklpd\t{%2, %0|%0, %2}
2450 movhpd\t{%2, %0|%0, %2}
2451 movsd\t{%1, %0|%0, %1}
2452 movlhps\t{%2, %0|%0, %2}
2453 movhps\t{%2, %0|%0, %2}"
2454 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2455 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2457 (define_expand "vec_setv2df"
2458 [(match_operand:V2DF 0 "register_operand" "")
2459 (match_operand:DF 1 "register_operand" "")
2460 (match_operand 2 "const_int_operand" "")]
2463 ix86_expand_vector_set (false, operands[0], operands[1],
2464 INTVAL (operands[2]));
2468 (define_expand "vec_extractv2df"
2469 [(match_operand:DF 0 "register_operand" "")
2470 (match_operand:V2DF 1 "register_operand" "")
2471 (match_operand 2 "const_int_operand" "")]
2474 ix86_expand_vector_extract (false, operands[0], operands[1],
2475 INTVAL (operands[2]));
2479 (define_expand "vec_initv2df"
2480 [(match_operand:V2DF 0 "register_operand" "")
2481 (match_operand 1 "" "")]
2484 ix86_expand_vector_init (false, operands[0], operands[1]);
2488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2490 ;; Parallel integral arithmetic
2492 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2494 (define_expand "neg<mode>2"
2495 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2498 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2500 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2502 (define_expand "add<mode>3"
2503 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2504 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2505 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2507 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2509 (define_insn "*add<mode>3"
2510 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2512 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2513 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2514 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2515 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2516 [(set_attr "type" "sseiadd")
2517 (set_attr "mode" "TI")])
2519 (define_insn "sse2_ssadd<mode>3"
2520 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2522 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2523 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2524 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2525 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2526 [(set_attr "type" "sseiadd")
2527 (set_attr "mode" "TI")])
2529 (define_insn "sse2_usadd<mode>3"
2530 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2532 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2533 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2534 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2535 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2536 [(set_attr "type" "sseiadd")
2537 (set_attr "mode" "TI")])
2539 (define_expand "sub<mode>3"
2540 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2541 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2542 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2544 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2546 (define_insn "*sub<mode>3"
2547 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2549 (match_operand:SSEMODEI 1 "register_operand" "0")
2550 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2552 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2553 [(set_attr "type" "sseiadd")
2554 (set_attr "mode" "TI")])
2556 (define_insn "sse2_sssub<mode>3"
2557 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2559 (match_operand:SSEMODE12 1 "register_operand" "0")
2560 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2562 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2563 [(set_attr "type" "sseiadd")
2564 (set_attr "mode" "TI")])
2566 (define_insn "sse2_ussub<mode>3"
2567 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2569 (match_operand:SSEMODE12 1 "register_operand" "0")
2570 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2572 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2573 [(set_attr "type" "sseiadd")
2574 (set_attr "mode" "TI")])
2576 (define_expand "mulv16qi3"
2577 [(set (match_operand:V16QI 0 "register_operand" "")
2578 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2579 (match_operand:V16QI 2 "register_operand" "")))]
2585 for (i = 0; i < 12; ++i)
2586 t[i] = gen_reg_rtx (V16QImode);
2588 /* Unpack data such that we've got a source byte in each low byte of
2589 each word. We don't care what goes into the high byte of each word.
2590 Rather than trying to get zero in there, most convenient is to let
2591 it be a copy of the low byte. */
2592 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2593 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2594 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2595 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2597 /* Multiply words. The end-of-line annotations here give a picture of what
2598 the output of that instruction looks like. Dot means don't care; the
2599 letters are the bytes of the result with A being the most significant. */
2600 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2601 gen_lowpart (V8HImode, t[0]),
2602 gen_lowpart (V8HImode, t[1])));
2603 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2604 gen_lowpart (V8HImode, t[2]),
2605 gen_lowpart (V8HImode, t[3])));
2607 /* Extract the relevant bytes and merge them back together. */
2608 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2609 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2610 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2611 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2612 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2613 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2616 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2620 (define_expand "mulv8hi3"
2621 [(set (match_operand:V8HI 0 "register_operand" "")
2622 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2623 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2625 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2627 (define_insn "*mulv8hi3"
2628 [(set (match_operand:V8HI 0 "register_operand" "=x")
2629 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2630 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2631 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2632 "pmullw\t{%2, %0|%0, %2}"
2633 [(set_attr "type" "sseimul")
2634 (set_attr "mode" "TI")])
2636 (define_expand "smulv8hi3_highpart"
2637 [(set (match_operand:V8HI 0 "register_operand" "")
2642 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2644 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2647 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2649 (define_insn "*smulv8hi3_highpart"
2650 [(set (match_operand:V8HI 0 "register_operand" "=x")
2655 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2657 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2659 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2660 "pmulhw\t{%2, %0|%0, %2}"
2661 [(set_attr "type" "sseimul")
2662 (set_attr "mode" "TI")])
2664 (define_expand "umulv8hi3_highpart"
2665 [(set (match_operand:V8HI 0 "register_operand" "")
2670 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2672 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2675 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2677 (define_insn "*umulv8hi3_highpart"
2678 [(set (match_operand:V8HI 0 "register_operand" "=x")
2683 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2685 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2687 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2688 "pmulhuw\t{%2, %0|%0, %2}"
2689 [(set_attr "type" "sseimul")
2690 (set_attr "mode" "TI")])
2692 (define_insn "sse2_umulv2siv2di3"
2693 [(set (match_operand:V2DI 0 "register_operand" "=x")
2697 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2698 (parallel [(const_int 0) (const_int 2)])))
2701 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2702 (parallel [(const_int 0) (const_int 2)])))))]
2703 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2704 "pmuludq\t{%2, %0|%0, %2}"
2705 [(set_attr "type" "sseimul")
2706 (set_attr "mode" "TI")])
2708 (define_insn "sse2_pmaddwd"
2709 [(set (match_operand:V4SI 0 "register_operand" "=x")
2714 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2715 (parallel [(const_int 0)
2721 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2722 (parallel [(const_int 0)
2728 (vec_select:V4HI (match_dup 1)
2729 (parallel [(const_int 1)
2734 (vec_select:V4HI (match_dup 2)
2735 (parallel [(const_int 1)
2738 (const_int 7)]))))))]
2739 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2740 "pmaddwd\t{%2, %0|%0, %2}"
2741 [(set_attr "type" "sseiadd")
2742 (set_attr "mode" "TI")])
2744 (define_expand "mulv4si3"
2745 [(set (match_operand:V4SI 0 "register_operand" "")
2746 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2747 (match_operand:V4SI 2 "register_operand" "")))]
2750 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2756 t1 = gen_reg_rtx (V4SImode);
2757 t2 = gen_reg_rtx (V4SImode);
2758 t3 = gen_reg_rtx (V4SImode);
2759 t4 = gen_reg_rtx (V4SImode);
2760 t5 = gen_reg_rtx (V4SImode);
2761 t6 = gen_reg_rtx (V4SImode);
2762 thirtytwo = GEN_INT (32);
2764 /* Multiply elements 2 and 0. */
2765 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2767 /* Shift both input vectors down one element, so that elements 3 and 1
2768 are now in the slots for elements 2 and 0. For K8, at least, this is
2769 faster than using a shuffle. */
2770 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2771 gen_lowpart (TImode, op1), thirtytwo));
2772 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2773 gen_lowpart (TImode, op2), thirtytwo));
2775 /* Multiply elements 3 and 1. */
2776 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2778 /* Move the results in element 2 down to element 1; we don't care what
2779 goes in elements 2 and 3. */
2780 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2781 const0_rtx, const0_rtx));
2782 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2783 const0_rtx, const0_rtx));
2785 /* Merge the parts back together. */
2786 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2790 (define_expand "mulv2di3"
2791 [(set (match_operand:V2DI 0 "register_operand" "")
2792 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2793 (match_operand:V2DI 2 "register_operand" "")))]
2796 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2802 t1 = gen_reg_rtx (V2DImode);
2803 t2 = gen_reg_rtx (V2DImode);
2804 t3 = gen_reg_rtx (V2DImode);
2805 t4 = gen_reg_rtx (V2DImode);
2806 t5 = gen_reg_rtx (V2DImode);
2807 t6 = gen_reg_rtx (V2DImode);
2808 thirtytwo = GEN_INT (32);
2810 /* Multiply low parts. */
2811 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2812 gen_lowpart (V4SImode, op2)));
2814 /* Shift input vectors left 32 bits so we can multiply high parts. */
2815 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2816 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2818 /* Multiply high parts by low parts. */
2819 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2820 gen_lowpart (V4SImode, t3)));
2821 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2822 gen_lowpart (V4SImode, t2)));
2824 /* Shift them back. */
2825 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2826 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2828 /* Add the three parts together. */
2829 emit_insn (gen_addv2di3 (t6, t1, t4));
2830 emit_insn (gen_addv2di3 (op0, t6, t5));
2834 (define_expand "vec_widen_smult_hi_v8hi"
2835 [(match_operand:V4SI 0 "register_operand" "")
2836 (match_operand:V8HI 1 "register_operand" "")
2837 (match_operand:V8HI 2 "register_operand" "")]
2840 rtx op1, op2, t1, t2, dest;
2844 t1 = gen_reg_rtx (V8HImode);
2845 t2 = gen_reg_rtx (V8HImode);
2846 dest = gen_lowpart (V8HImode, operands[0]);
2848 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2849 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2850 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2854 (define_expand "vec_widen_smult_lo_v8hi"
2855 [(match_operand:V4SI 0 "register_operand" "")
2856 (match_operand:V8HI 1 "register_operand" "")
2857 (match_operand:V8HI 2 "register_operand" "")]
2860 rtx op1, op2, t1, t2, dest;
2864 t1 = gen_reg_rtx (V8HImode);
2865 t2 = gen_reg_rtx (V8HImode);
2866 dest = gen_lowpart (V8HImode, operands[0]);
2868 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2869 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2870 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2874 (define_expand "vec_widen_umult_hi_v8hi"
2875 [(match_operand:V4SI 0 "register_operand" "")
2876 (match_operand:V8HI 1 "register_operand" "")
2877 (match_operand:V8HI 2 "register_operand" "")]
2880 rtx op1, op2, t1, t2, dest;
2884 t1 = gen_reg_rtx (V8HImode);
2885 t2 = gen_reg_rtx (V8HImode);
2886 dest = gen_lowpart (V8HImode, operands[0]);
2888 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2889 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2890 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2894 (define_expand "vec_widen_umult_lo_v8hi"
2895 [(match_operand:V4SI 0 "register_operand" "")
2896 (match_operand:V8HI 1 "register_operand" "")
2897 (match_operand:V8HI 2 "register_operand" "")]
2900 rtx op1, op2, t1, t2, dest;
2904 t1 = gen_reg_rtx (V8HImode);
2905 t2 = gen_reg_rtx (V8HImode);
2906 dest = gen_lowpart (V8HImode, operands[0]);
2908 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2909 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2910 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2914 (define_expand "vec_widen_smult_hi_v4si"
2915 [(match_operand:V2DI 0 "register_operand" "")
2916 (match_operand:V4SI 1 "register_operand" "")
2917 (match_operand:V4SI 2 "register_operand" "")]
2920 rtx op1, op2, t1, t2;
2924 t1 = gen_reg_rtx (V4SImode);
2925 t2 = gen_reg_rtx (V4SImode);
2927 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2928 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2929 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2933 (define_expand "vec_widen_smult_lo_v4si"
2934 [(match_operand:V2DI 0 "register_operand" "")
2935 (match_operand:V4SI 1 "register_operand" "")
2936 (match_operand:V4SI 2 "register_operand" "")]
2939 rtx op1, op2, t1, t2;
2943 t1 = gen_reg_rtx (V4SImode);
2944 t2 = gen_reg_rtx (V4SImode);
2946 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2947 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2948 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2952 (define_expand "vec_widen_umult_hi_v4si"
2953 [(match_operand:V2DI 0 "register_operand" "")
2954 (match_operand:V4SI 1 "register_operand" "")
2955 (match_operand:V4SI 2 "register_operand" "")]
2958 rtx op1, op2, t1, t2;
2962 t1 = gen_reg_rtx (V4SImode);
2963 t2 = gen_reg_rtx (V4SImode);
2965 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2966 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2967 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2971 (define_expand "vec_widen_umult_lo_v4si"
2972 [(match_operand:V2DI 0 "register_operand" "")
2973 (match_operand:V4SI 1 "register_operand" "")
2974 (match_operand:V4SI 2 "register_operand" "")]
2977 rtx op1, op2, t1, t2;
2981 t1 = gen_reg_rtx (V4SImode);
2982 t2 = gen_reg_rtx (V4SImode);
2984 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2985 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2986 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2990 (define_expand "sdot_prodv8hi"
2991 [(match_operand:V4SI 0 "register_operand" "")
2992 (match_operand:V8HI 1 "register_operand" "")
2993 (match_operand:V8HI 2 "register_operand" "")
2994 (match_operand:V4SI 3 "register_operand" "")]
2997 rtx t = gen_reg_rtx (V4SImode);
2998 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2999 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3003 (define_expand "udot_prodv4si"
3004 [(match_operand:V2DI 0 "register_operand" "")
3005 (match_operand:V4SI 1 "register_operand" "")
3006 (match_operand:V4SI 2 "register_operand" "")
3007 (match_operand:V2DI 3 "register_operand" "")]
3012 t1 = gen_reg_rtx (V2DImode);
3013 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3014 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3016 t2 = gen_reg_rtx (V4SImode);
3017 t3 = gen_reg_rtx (V4SImode);
3018 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3019 gen_lowpart (TImode, operands[1]),
3021 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3022 gen_lowpart (TImode, operands[2]),
3025 t4 = gen_reg_rtx (V2DImode);
3026 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3028 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3032 (define_insn "ashr<mode>3"
3033 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3035 (match_operand:SSEMODE24 1 "register_operand" "0")
3036 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3038 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3039 [(set_attr "type" "sseishft")
3040 (set_attr "mode" "TI")])
3042 (define_insn "lshr<mode>3"
3043 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3044 (lshiftrt:SSEMODE248
3045 (match_operand:SSEMODE248 1 "register_operand" "0")
3046 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3048 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3049 [(set_attr "type" "sseishft")
3050 (set_attr "mode" "TI")])
3052 (define_insn "ashl<mode>3"
3053 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3055 (match_operand:SSEMODE248 1 "register_operand" "0")
3056 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3058 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3059 [(set_attr "type" "sseishft")
3060 (set_attr "mode" "TI")])
3062 (define_insn "sse2_ashlti3"
3063 [(set (match_operand:TI 0 "register_operand" "=x")
3064 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3065 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3068 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3069 return "pslldq\t{%2, %0|%0, %2}";
3071 [(set_attr "type" "sseishft")
3072 (set_attr "mode" "TI")])
3074 (define_expand "vec_shl_<mode>"
3075 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3076 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3077 (match_operand:SI 2 "general_operand" "")))]
3080 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3082 operands[0] = gen_lowpart (TImode, operands[0]);
3083 operands[1] = gen_lowpart (TImode, operands[1]);
3086 (define_insn "sse2_lshrti3"
3087 [(set (match_operand:TI 0 "register_operand" "=x")
3088 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3089 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3092 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3093 return "psrldq\t{%2, %0|%0, %2}";
3095 [(set_attr "type" "sseishft")
3096 (set_attr "mode" "TI")])
3098 (define_expand "vec_shr_<mode>"
3099 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3100 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3101 (match_operand:SI 2 "general_operand" "")))]
3104 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3106 operands[0] = gen_lowpart (TImode, operands[0]);
3107 operands[1] = gen_lowpart (TImode, operands[1]);
3110 (define_expand "umaxv16qi3"
3111 [(set (match_operand:V16QI 0 "register_operand" "")
3112 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3113 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3115 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3117 (define_insn "*umaxv16qi3"
3118 [(set (match_operand:V16QI 0 "register_operand" "=x")
3119 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3120 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3121 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3122 "pmaxub\t{%2, %0|%0, %2}"
3123 [(set_attr "type" "sseiadd")
3124 (set_attr "mode" "TI")])
3126 (define_expand "smaxv8hi3"
3127 [(set (match_operand:V8HI 0 "register_operand" "")
3128 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3129 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3131 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3133 (define_insn "*smaxv8hi3"
3134 [(set (match_operand:V8HI 0 "register_operand" "=x")
3135 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3136 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3137 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3138 "pmaxsw\t{%2, %0|%0, %2}"
3139 [(set_attr "type" "sseiadd")
3140 (set_attr "mode" "TI")])
3142 (define_expand "umaxv8hi3"
3143 [(set (match_operand:V8HI 0 "register_operand" "=x")
3144 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
3145 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3147 (plus:V8HI (match_dup 0) (match_dup 2)))]
3150 operands[3] = operands[0];
3151 if (rtx_equal_p (operands[0], operands[2]))
3152 operands[0] = gen_reg_rtx (V8HImode);
3155 (define_expand "smax<mode>3"
3156 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3157 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3158 (match_operand:SSEMODE14 2 "register_operand" "")))]
3164 xops[0] = operands[0];
3165 xops[1] = operands[1];
3166 xops[2] = operands[2];
3167 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3168 xops[4] = operands[1];
3169 xops[5] = operands[2];
3170 ok = ix86_expand_int_vcond (xops);
3175 (define_expand "umaxv4si3"
3176 [(set (match_operand:V4SI 0 "register_operand" "")
3177 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3178 (match_operand:V4SI 2 "register_operand" "")))]
3184 xops[0] = operands[0];
3185 xops[1] = operands[1];
3186 xops[2] = operands[2];
3187 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3188 xops[4] = operands[1];
3189 xops[5] = operands[2];
3190 ok = ix86_expand_int_vcond (xops);
3195 (define_expand "uminv16qi3"
3196 [(set (match_operand:V16QI 0 "register_operand" "")
3197 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3198 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3200 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3202 (define_insn "*uminv16qi3"
3203 [(set (match_operand:V16QI 0 "register_operand" "=x")
3204 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3205 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3206 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3207 "pminub\t{%2, %0|%0, %2}"
3208 [(set_attr "type" "sseiadd")
3209 (set_attr "mode" "TI")])
3211 (define_expand "sminv8hi3"
3212 [(set (match_operand:V8HI 0 "register_operand" "")
3213 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3214 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3216 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3218 (define_insn "*sminv8hi3"
3219 [(set (match_operand:V8HI 0 "register_operand" "=x")
3220 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3221 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3222 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3223 "pminsw\t{%2, %0|%0, %2}"
3224 [(set_attr "type" "sseiadd")
3225 (set_attr "mode" "TI")])
3227 (define_expand "smin<mode>3"
3228 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3229 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3230 (match_operand:SSEMODE14 2 "register_operand" "")))]
3236 xops[0] = operands[0];
3237 xops[1] = operands[2];
3238 xops[2] = operands[1];
3239 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3240 xops[4] = operands[1];
3241 xops[5] = operands[2];
3242 ok = ix86_expand_int_vcond (xops);
3247 (define_expand "umin<mode>3"
3248 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3249 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3250 (match_operand:SSEMODE24 2 "register_operand" "")))]
3256 xops[0] = operands[0];
3257 xops[1] = operands[2];
3258 xops[2] = operands[1];
3259 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3260 xops[4] = operands[1];
3261 xops[5] = operands[2];
3262 ok = ix86_expand_int_vcond (xops);
3267 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3269 ;; Parallel integral comparisons
3271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3273 (define_insn "sse2_eq<mode>3"
3274 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3276 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3277 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3278 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3279 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3280 [(set_attr "type" "ssecmp")
3281 (set_attr "mode" "TI")])
3283 (define_insn "sse2_gt<mode>3"
3284 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3286 (match_operand:SSEMODE124 1 "register_operand" "0")
3287 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3289 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3290 [(set_attr "type" "ssecmp")
3291 (set_attr "mode" "TI")])
3293 (define_expand "vcond<mode>"
3294 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3295 (if_then_else:SSEMODE124
3296 (match_operator 3 ""
3297 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3298 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3299 (match_operand:SSEMODE124 1 "general_operand" "")
3300 (match_operand:SSEMODE124 2 "general_operand" "")))]
3303 if (ix86_expand_int_vcond (operands))
3309 (define_expand "vcondu<mode>"
3310 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3311 (if_then_else:SSEMODE124
3312 (match_operator 3 ""
3313 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3314 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3315 (match_operand:SSEMODE124 1 "general_operand" "")
3316 (match_operand:SSEMODE124 2 "general_operand" "")))]
3319 if (ix86_expand_int_vcond (operands))
3325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3327 ;; Parallel integral logical operations
3329 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3331 (define_expand "one_cmpl<mode>2"
3332 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3333 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3337 int i, n = GET_MODE_NUNITS (<MODE>mode);
3338 rtvec v = rtvec_alloc (n);
3340 for (i = 0; i < n; ++i)
3341 RTVEC_ELT (v, i) = constm1_rtx;
3343 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3346 (define_expand "and<mode>3"
3347 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3348 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3349 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3351 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3353 (define_insn "*and<mode>3"
3354 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3356 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3357 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3358 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3359 "pand\t{%2, %0|%0, %2}"
3360 [(set_attr "type" "sselog")
3361 (set_attr "mode" "TI")])
3363 (define_insn "sse2_nand<mode>3"
3364 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3366 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3367 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3369 "pandn\t{%2, %0|%0, %2}"
3370 [(set_attr "type" "sselog")
3371 (set_attr "mode" "TI")])
3373 (define_expand "ior<mode>3"
3374 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3375 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3376 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3378 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3380 (define_insn "*ior<mode>3"
3381 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3383 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3384 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3385 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3386 "por\t{%2, %0|%0, %2}"
3387 [(set_attr "type" "sselog")
3388 (set_attr "mode" "TI")])
3390 (define_expand "xor<mode>3"
3391 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3392 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3393 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3395 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3397 (define_insn "*xor<mode>3"
3398 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3400 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3401 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3402 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3403 "pxor\t{%2, %0|%0, %2}"
3404 [(set_attr "type" "sselog")
3405 (set_attr "mode" "TI")])
3407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3409 ;; Parallel integral element swizzling
3411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3414 ;; op1 = abcdefghijklmnop
3415 ;; op2 = qrstuvwxyz012345
3416 ;; h1 = aqbrcsdteufvgwhx
3417 ;; l1 = iyjzk0l1m2n3o4p5
3418 ;; h2 = aiqybjrzcks0dlt1
3419 ;; l2 = emu2fnv3gow4hpx5
3420 ;; h3 = aeimquy2bfjnrvz3
3421 ;; l3 = cgkosw04dhlptx15
3422 ;; result = bdfhjlnprtvxz135
3423 (define_expand "vec_pack_mod_v8hi"
3424 [(match_operand:V16QI 0 "register_operand" "")
3425 (match_operand:V8HI 1 "register_operand" "")
3426 (match_operand:V8HI 2 "register_operand" "")]
3429 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3431 op1 = gen_lowpart (V16QImode, operands[1]);
3432 op2 = gen_lowpart (V16QImode, operands[2]);
3433 h1 = gen_reg_rtx (V16QImode);
3434 l1 = gen_reg_rtx (V16QImode);
3435 h2 = gen_reg_rtx (V16QImode);
3436 l2 = gen_reg_rtx (V16QImode);
3437 h3 = gen_reg_rtx (V16QImode);
3438 l3 = gen_reg_rtx (V16QImode);
3440 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3441 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3442 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3443 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3444 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3445 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3446 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3457 ;; result = bdfhjlnp
3458 (define_expand "vec_pack_mod_v4si"
3459 [(match_operand:V8HI 0 "register_operand" "")
3460 (match_operand:V4SI 1 "register_operand" "")
3461 (match_operand:V4SI 2 "register_operand" "")]
3464 rtx op1, op2, h1, l1, h2, l2;
3466 op1 = gen_lowpart (V8HImode, operands[1]);
3467 op2 = gen_lowpart (V8HImode, operands[2]);
3468 h1 = gen_reg_rtx (V8HImode);
3469 l1 = gen_reg_rtx (V8HImode);
3470 h2 = gen_reg_rtx (V8HImode);
3471 l2 = gen_reg_rtx (V8HImode);
3473 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3474 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3475 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3476 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3477 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3487 (define_expand "vec_pack_mod_v2di"
3488 [(match_operand:V4SI 0 "register_operand" "")
3489 (match_operand:V2DI 1 "register_operand" "")
3490 (match_operand:V2DI 2 "register_operand" "")]
3493 rtx op1, op2, h1, l1;
3495 op1 = gen_lowpart (V4SImode, operands[1]);
3496 op2 = gen_lowpart (V4SImode, operands[2]);
3497 h1 = gen_reg_rtx (V4SImode);
3498 l1 = gen_reg_rtx (V4SImode);
3500 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3501 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3502 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3506 (define_expand "vec_interleave_highv16qi"
3507 [(set (match_operand:V16QI 0 "register_operand" "=x")
3510 (match_operand:V16QI 1 "register_operand" "0")
3511 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3512 (parallel [(const_int 8) (const_int 24)
3513 (const_int 9) (const_int 25)
3514 (const_int 10) (const_int 26)
3515 (const_int 11) (const_int 27)
3516 (const_int 12) (const_int 28)
3517 (const_int 13) (const_int 29)
3518 (const_int 14) (const_int 30)
3519 (const_int 15) (const_int 31)])))]
3522 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3526 (define_expand "vec_interleave_lowv16qi"
3527 [(set (match_operand:V16QI 0 "register_operand" "=x")
3530 (match_operand:V16QI 1 "register_operand" "0")
3531 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3532 (parallel [(const_int 0) (const_int 16)
3533 (const_int 1) (const_int 17)
3534 (const_int 2) (const_int 18)
3535 (const_int 3) (const_int 19)
3536 (const_int 4) (const_int 20)
3537 (const_int 5) (const_int 21)
3538 (const_int 6) (const_int 22)
3539 (const_int 7) (const_int 23)])))]
3542 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3546 (define_expand "vec_interleave_highv8hi"
3547 [(set (match_operand:V8HI 0 "register_operand" "=x")
3550 (match_operand:V8HI 1 "register_operand" "0")
3551 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3552 (parallel [(const_int 4) (const_int 12)
3553 (const_int 5) (const_int 13)
3554 (const_int 6) (const_int 14)
3555 (const_int 7) (const_int 15)])))]
3558 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3562 (define_expand "vec_interleave_lowv8hi"
3563 [(set (match_operand:V8HI 0 "register_operand" "=x")
3566 (match_operand:V8HI 1 "register_operand" "0")
3567 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3568 (parallel [(const_int 0) (const_int 8)
3569 (const_int 1) (const_int 9)
3570 (const_int 2) (const_int 10)
3571 (const_int 3) (const_int 11)])))]
3574 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
3578 (define_expand "vec_interleave_highv4si"
3579 [(set (match_operand:V4SI 0 "register_operand" "=x")
3582 (match_operand:V4SI 1 "register_operand" "0")
3583 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3584 (parallel [(const_int 2) (const_int 6)
3585 (const_int 3) (const_int 7)])))]
3588 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
3592 (define_expand "vec_interleave_lowv4si"
3593 [(set (match_operand:V4SI 0 "register_operand" "=x")
3596 (match_operand:V4SI 1 "register_operand" "0")
3597 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3598 (parallel [(const_int 0) (const_int 4)
3599 (const_int 1) (const_int 5)])))]
3602 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
3606 (define_expand "vec_interleave_highv2di"
3607 [(set (match_operand:V2DI 0 "register_operand" "=x")
3610 (match_operand:V2DI 1 "register_operand" "0")
3611 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3612 (parallel [(const_int 1)
3616 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
3620 (define_expand "vec_interleave_lowv2di"
3621 [(set (match_operand:V2DI 0 "register_operand" "=x")
3624 (match_operand:V2DI 1 "register_operand" "0")
3625 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3626 (parallel [(const_int 0)
3630 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
3634 (define_insn "sse2_packsswb"
3635 [(set (match_operand:V16QI 0 "register_operand" "=x")
3638 (match_operand:V8HI 1 "register_operand" "0"))
3640 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3642 "packsswb\t{%2, %0|%0, %2}"
3643 [(set_attr "type" "sselog")
3644 (set_attr "mode" "TI")])
3646 (define_insn "sse2_packssdw"
3647 [(set (match_operand:V8HI 0 "register_operand" "=x")
3650 (match_operand:V4SI 1 "register_operand" "0"))
3652 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3654 "packssdw\t{%2, %0|%0, %2}"
3655 [(set_attr "type" "sselog")
3656 (set_attr "mode" "TI")])
3658 (define_insn "sse2_packuswb"
3659 [(set (match_operand:V16QI 0 "register_operand" "=x")
3662 (match_operand:V8HI 1 "register_operand" "0"))
3664 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3666 "packuswb\t{%2, %0|%0, %2}"
3667 [(set_attr "type" "sselog")
3668 (set_attr "mode" "TI")])
3670 (define_insn "sse2_punpckhbw"
3671 [(set (match_operand:V16QI 0 "register_operand" "=x")
3674 (match_operand:V16QI 1 "register_operand" "0")
3675 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3676 (parallel [(const_int 8) (const_int 24)
3677 (const_int 9) (const_int 25)
3678 (const_int 10) (const_int 26)
3679 (const_int 11) (const_int 27)
3680 (const_int 12) (const_int 28)
3681 (const_int 13) (const_int 29)
3682 (const_int 14) (const_int 30)
3683 (const_int 15) (const_int 31)])))]
3685 "punpckhbw\t{%2, %0|%0, %2}"
3686 [(set_attr "type" "sselog")
3687 (set_attr "mode" "TI")])
3689 (define_insn "sse2_punpcklbw"
3690 [(set (match_operand:V16QI 0 "register_operand" "=x")
3693 (match_operand:V16QI 1 "register_operand" "0")
3694 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3695 (parallel [(const_int 0) (const_int 16)
3696 (const_int 1) (const_int 17)
3697 (const_int 2) (const_int 18)
3698 (const_int 3) (const_int 19)
3699 (const_int 4) (const_int 20)
3700 (const_int 5) (const_int 21)
3701 (const_int 6) (const_int 22)
3702 (const_int 7) (const_int 23)])))]
3704 "punpcklbw\t{%2, %0|%0, %2}"
3705 [(set_attr "type" "sselog")
3706 (set_attr "mode" "TI")])
3708 (define_insn "sse2_punpckhwd"
3709 [(set (match_operand:V8HI 0 "register_operand" "=x")
3712 (match_operand:V8HI 1 "register_operand" "0")
3713 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3714 (parallel [(const_int 4) (const_int 12)
3715 (const_int 5) (const_int 13)
3716 (const_int 6) (const_int 14)
3717 (const_int 7) (const_int 15)])))]
3719 "punpckhwd\t{%2, %0|%0, %2}"
3720 [(set_attr "type" "sselog")
3721 (set_attr "mode" "TI")])
3723 (define_insn "sse2_punpcklwd"
3724 [(set (match_operand:V8HI 0 "register_operand" "=x")
3727 (match_operand:V8HI 1 "register_operand" "0")
3728 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3729 (parallel [(const_int 0) (const_int 8)
3730 (const_int 1) (const_int 9)
3731 (const_int 2) (const_int 10)
3732 (const_int 3) (const_int 11)])))]
3734 "punpcklwd\t{%2, %0|%0, %2}"
3735 [(set_attr "type" "sselog")
3736 (set_attr "mode" "TI")])
3738 (define_insn "sse2_punpckhdq"
3739 [(set (match_operand:V4SI 0 "register_operand" "=x")
3742 (match_operand:V4SI 1 "register_operand" "0")
3743 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3744 (parallel [(const_int 2) (const_int 6)
3745 (const_int 3) (const_int 7)])))]
3747 "punpckhdq\t{%2, %0|%0, %2}"
3748 [(set_attr "type" "sselog")
3749 (set_attr "mode" "TI")])
3751 (define_insn "sse2_punpckldq"
3752 [(set (match_operand:V4SI 0 "register_operand" "=x")
3755 (match_operand:V4SI 1 "register_operand" "0")
3756 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3757 (parallel [(const_int 0) (const_int 4)
3758 (const_int 1) (const_int 5)])))]
3760 "punpckldq\t{%2, %0|%0, %2}"
3761 [(set_attr "type" "sselog")
3762 (set_attr "mode" "TI")])
3764 (define_insn "sse2_punpckhqdq"
3765 [(set (match_operand:V2DI 0 "register_operand" "=x")
3768 (match_operand:V2DI 1 "register_operand" "0")
3769 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3770 (parallel [(const_int 1)
3773 "punpckhqdq\t{%2, %0|%0, %2}"
3774 [(set_attr "type" "sselog")
3775 (set_attr "mode" "TI")])
3777 (define_insn "sse2_punpcklqdq"
3778 [(set (match_operand:V2DI 0 "register_operand" "=x")
3781 (match_operand:V2DI 1 "register_operand" "0")
3782 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3783 (parallel [(const_int 0)
3786 "punpcklqdq\t{%2, %0|%0, %2}"
3787 [(set_attr "type" "sselog")
3788 (set_attr "mode" "TI")])
3790 (define_insn "*sse2_pinsrw"
3791 [(set (match_operand:V8HI 0 "register_operand" "=x")
3794 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3795 (match_operand:V8HI 1 "register_operand" "0")
3796 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3799 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3800 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3802 [(set_attr "type" "sselog")
3803 (set_attr "mode" "TI")])
3805 (define_insn "*sse2_pextrw"
3806 [(set (match_operand:SI 0 "register_operand" "=r")
3809 (match_operand:V8HI 1 "register_operand" "x")
3810 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3812 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3813 [(set_attr "type" "sselog")
3814 (set_attr "mode" "TI")])
3816 (define_expand "sse2_pshufd"
3817 [(match_operand:V4SI 0 "register_operand" "")
3818 (match_operand:V4SI 1 "nonimmediate_operand" "")
3819 (match_operand:SI 2 "const_int_operand" "")]
3822 int mask = INTVAL (operands[2]);
3823 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3824 GEN_INT ((mask >> 0) & 3),
3825 GEN_INT ((mask >> 2) & 3),
3826 GEN_INT ((mask >> 4) & 3),
3827 GEN_INT ((mask >> 6) & 3)));
3831 (define_insn "sse2_pshufd_1"
3832 [(set (match_operand:V4SI 0 "register_operand" "=x")
3834 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3835 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3836 (match_operand 3 "const_0_to_3_operand" "")
3837 (match_operand 4 "const_0_to_3_operand" "")
3838 (match_operand 5 "const_0_to_3_operand" "")])))]
3842 mask |= INTVAL (operands[2]) << 0;
3843 mask |= INTVAL (operands[3]) << 2;
3844 mask |= INTVAL (operands[4]) << 4;
3845 mask |= INTVAL (operands[5]) << 6;
3846 operands[2] = GEN_INT (mask);
3848 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3850 [(set_attr "type" "sselog1")
3851 (set_attr "mode" "TI")])
3853 (define_expand "sse2_pshuflw"
3854 [(match_operand:V8HI 0 "register_operand" "")
3855 (match_operand:V8HI 1 "nonimmediate_operand" "")
3856 (match_operand:SI 2 "const_int_operand" "")]
3859 int mask = INTVAL (operands[2]);
3860 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3861 GEN_INT ((mask >> 0) & 3),
3862 GEN_INT ((mask >> 2) & 3),
3863 GEN_INT ((mask >> 4) & 3),
3864 GEN_INT ((mask >> 6) & 3)));
3868 (define_insn "sse2_pshuflw_1"
3869 [(set (match_operand:V8HI 0 "register_operand" "=x")
3871 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3872 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3873 (match_operand 3 "const_0_to_3_operand" "")
3874 (match_operand 4 "const_0_to_3_operand" "")
3875 (match_operand 5 "const_0_to_3_operand" "")
3883 mask |= INTVAL (operands[2]) << 0;
3884 mask |= INTVAL (operands[3]) << 2;
3885 mask |= INTVAL (operands[4]) << 4;
3886 mask |= INTVAL (operands[5]) << 6;
3887 operands[2] = GEN_INT (mask);
3889 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3891 [(set_attr "type" "sselog")
3892 (set_attr "mode" "TI")])
3894 (define_expand "sse2_pshufhw"
3895 [(match_operand:V8HI 0 "register_operand" "")
3896 (match_operand:V8HI 1 "nonimmediate_operand" "")
3897 (match_operand:SI 2 "const_int_operand" "")]
3900 int mask = INTVAL (operands[2]);
3901 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3902 GEN_INT (((mask >> 0) & 3) + 4),
3903 GEN_INT (((mask >> 2) & 3) + 4),
3904 GEN_INT (((mask >> 4) & 3) + 4),
3905 GEN_INT (((mask >> 6) & 3) + 4)));
3909 (define_insn "sse2_pshufhw_1"
3910 [(set (match_operand:V8HI 0 "register_operand" "=x")
3912 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3913 (parallel [(const_int 0)
3917 (match_operand 2 "const_4_to_7_operand" "")
3918 (match_operand 3 "const_4_to_7_operand" "")
3919 (match_operand 4 "const_4_to_7_operand" "")
3920 (match_operand 5 "const_4_to_7_operand" "")])))]
3924 mask |= (INTVAL (operands[2]) - 4) << 0;
3925 mask |= (INTVAL (operands[3]) - 4) << 2;
3926 mask |= (INTVAL (operands[4]) - 4) << 4;
3927 mask |= (INTVAL (operands[5]) - 4) << 6;
3928 operands[2] = GEN_INT (mask);
3930 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3932 [(set_attr "type" "sselog")
3933 (set_attr "mode" "TI")])
3935 (define_expand "sse2_loadd"
3936 [(set (match_operand:V4SI 0 "register_operand" "")
3939 (match_operand:SI 1 "nonimmediate_operand" ""))
3943 "operands[2] = CONST0_RTX (V4SImode);")
3945 (define_insn "sse2_loadld"
3946 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
3949 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
3950 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
3954 movd\t{%2, %0|%0, %2}
3955 movd\t{%2, %0|%0, %2}
3956 movss\t{%2, %0|%0, %2}
3957 movss\t{%2, %0|%0, %2}"
3958 [(set_attr "type" "ssemov")
3959 (set_attr "mode" "TI,TI,V4SF,SF")])
3961 (define_insn_and_split "sse2_stored"
3962 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
3964 (match_operand:V4SI 1 "register_operand" "x,Yi")
3965 (parallel [(const_int 0)])))]
3968 "&& reload_completed"
3969 [(set (match_dup 0) (match_dup 1))]
3971 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3974 (define_expand "sse_storeq"
3975 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3977 (match_operand:V2DI 1 "register_operand" "")
3978 (parallel [(const_int 0)])))]
3982 (define_insn "*sse2_storeq_rex64"
3983 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
3985 (match_operand:V2DI 1 "register_operand" "x,Yi")
3986 (parallel [(const_int 0)])))]
3987 "TARGET_64BIT && TARGET_SSE"
3990 (define_insn "*sse2_storeq"
3991 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3993 (match_operand:V2DI 1 "register_operand" "x")
3994 (parallel [(const_int 0)])))]
3999 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4001 (match_operand:V2DI 1 "register_operand" "")
4002 (parallel [(const_int 0)])))]
4003 "TARGET_SSE && reload_completed"
4004 [(set (match_dup 0) (match_dup 1))]
4006 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4009 (define_insn "*vec_extractv2di_1_sse2"
4010 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4012 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4013 (parallel [(const_int 1)])))]
4014 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4016 movhps\t{%1, %0|%0, %1}
4017 psrldq\t{$4, %0|%0, 4}
4018 movq\t{%H1, %0|%0, %H1}"
4019 [(set_attr "type" "ssemov,sseishft,ssemov")
4020 (set_attr "mode" "V2SF,TI,TI")])
4022 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4023 (define_insn "*vec_extractv2di_1_sse"
4024 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4026 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4027 (parallel [(const_int 1)])))]
4028 "!TARGET_SSE2 && TARGET_SSE
4029 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4031 movhps\t{%1, %0|%0, %1}
4032 movhlps\t{%1, %0|%0, %1}
4033 movlps\t{%H1, %0|%0, %H1}"
4034 [(set_attr "type" "ssemov")
4035 (set_attr "mode" "V2SF,V4SF,V2SF")])
4037 (define_insn "*vec_dupv4si"
4038 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4040 (match_operand:SI 1 "register_operand" " Y2,0")))]
4043 pshufd\t{$0, %1, %0|%0, %1, 0}
4044 shufps\t{$0, %0, %0|%0, %0, 0}"
4045 [(set_attr "type" "sselog1")
4046 (set_attr "mode" "TI,V4SF")])
4048 (define_insn "*vec_dupv2di"
4049 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4051 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4056 [(set_attr "type" "sselog1,ssemov")
4057 (set_attr "mode" "TI,V4SF")])
4059 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4060 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4061 ;; alternatives pretty much forces the MMX alternative to be chosen.
4062 (define_insn "*sse2_concatv2si"
4063 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4065 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4066 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4069 punpckldq\t{%2, %0|%0, %2}
4070 movd\t{%1, %0|%0, %1}
4071 punpckldq\t{%2, %0|%0, %2}
4072 movd\t{%1, %0|%0, %1}"
4073 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4074 (set_attr "mode" "TI,TI,DI,DI")])
4076 (define_insn "*sse1_concatv2si"
4077 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4079 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4080 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4083 unpcklps\t{%2, %0|%0, %2}
4084 movss\t{%1, %0|%0, %1}
4085 punpckldq\t{%2, %0|%0, %2}
4086 movd\t{%1, %0|%0, %1}"
4087 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4088 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4090 (define_insn "*vec_concatv4si_1"
4091 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4093 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4094 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4097 punpcklqdq\t{%2, %0|%0, %2}
4098 movlhps\t{%2, %0|%0, %2}
4099 movhps\t{%2, %0|%0, %2}"
4100 [(set_attr "type" "sselog,ssemov,ssemov")
4101 (set_attr "mode" "TI,V4SF,V2SF")])
4103 (define_insn "vec_concatv2di"
4104 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4106 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4107 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4110 movq\t{%1, %0|%0, %1}
4111 movq2dq\t{%1, %0|%0, %1}
4112 punpcklqdq\t{%2, %0|%0, %2}
4113 movlhps\t{%2, %0|%0, %2}
4114 movhps\t{%2, %0|%0, %2}
4115 movlps\t{%1, %0|%0, %1}"
4116 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4117 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4119 (define_expand "vec_setv2di"
4120 [(match_operand:V2DI 0 "register_operand" "")
4121 (match_operand:DI 1 "register_operand" "")
4122 (match_operand 2 "const_int_operand" "")]
4125 ix86_expand_vector_set (false, operands[0], operands[1],
4126 INTVAL (operands[2]));
4130 (define_expand "vec_extractv2di"
4131 [(match_operand:DI 0 "register_operand" "")
4132 (match_operand:V2DI 1 "register_operand" "")
4133 (match_operand 2 "const_int_operand" "")]
4136 ix86_expand_vector_extract (false, operands[0], operands[1],
4137 INTVAL (operands[2]));
4141 (define_expand "vec_initv2di"
4142 [(match_operand:V2DI 0 "register_operand" "")
4143 (match_operand 1 "" "")]
4146 ix86_expand_vector_init (false, operands[0], operands[1]);
4150 (define_expand "vec_setv4si"
4151 [(match_operand:V4SI 0 "register_operand" "")
4152 (match_operand:SI 1 "register_operand" "")
4153 (match_operand 2 "const_int_operand" "")]
4156 ix86_expand_vector_set (false, operands[0], operands[1],
4157 INTVAL (operands[2]));
4161 (define_expand "vec_extractv4si"
4162 [(match_operand:SI 0 "register_operand" "")
4163 (match_operand:V4SI 1 "register_operand" "")
4164 (match_operand 2 "const_int_operand" "")]
4167 ix86_expand_vector_extract (false, operands[0], operands[1],
4168 INTVAL (operands[2]));
4172 (define_expand "vec_initv4si"
4173 [(match_operand:V4SI 0 "register_operand" "")
4174 (match_operand 1 "" "")]
4177 ix86_expand_vector_init (false, operands[0], operands[1]);
4181 (define_expand "vec_setv8hi"
4182 [(match_operand:V8HI 0 "register_operand" "")
4183 (match_operand:HI 1 "register_operand" "")
4184 (match_operand 2 "const_int_operand" "")]
4187 ix86_expand_vector_set (false, operands[0], operands[1],
4188 INTVAL (operands[2]));
4192 (define_expand "vec_extractv8hi"
4193 [(match_operand:HI 0 "register_operand" "")
4194 (match_operand:V8HI 1 "register_operand" "")
4195 (match_operand 2 "const_int_operand" "")]
4198 ix86_expand_vector_extract (false, operands[0], operands[1],
4199 INTVAL (operands[2]));
4203 (define_expand "vec_initv8hi"
4204 [(match_operand:V8HI 0 "register_operand" "")
4205 (match_operand 1 "" "")]
4208 ix86_expand_vector_init (false, operands[0], operands[1]);
4212 (define_expand "vec_setv16qi"
4213 [(match_operand:V16QI 0 "register_operand" "")
4214 (match_operand:QI 1 "register_operand" "")
4215 (match_operand 2 "const_int_operand" "")]
4218 ix86_expand_vector_set (false, operands[0], operands[1],
4219 INTVAL (operands[2]));
4223 (define_expand "vec_extractv16qi"
4224 [(match_operand:QI 0 "register_operand" "")
4225 (match_operand:V16QI 1 "register_operand" "")
4226 (match_operand 2 "const_int_operand" "")]
4229 ix86_expand_vector_extract (false, operands[0], operands[1],
4230 INTVAL (operands[2]));
4234 (define_expand "vec_initv16qi"
4235 [(match_operand:V16QI 0 "register_operand" "")
4236 (match_operand 1 "" "")]
4239 ix86_expand_vector_init (false, operands[0], operands[1]);
4243 (define_expand "vec_unpacku_hi_v16qi"
4244 [(match_operand:V8HI 0 "register_operand" "")
4245 (match_operand:V16QI 1 "register_operand" "")]
4248 ix86_expand_sse_unpack (operands, true, true);
4252 (define_expand "vec_unpacks_hi_v16qi"
4253 [(match_operand:V8HI 0 "register_operand" "")
4254 (match_operand:V16QI 1 "register_operand" "")]
4257 ix86_expand_sse_unpack (operands, false, true);
4261 (define_expand "vec_unpacku_lo_v16qi"
4262 [(match_operand:V8HI 0 "register_operand" "")
4263 (match_operand:V16QI 1 "register_operand" "")]
4266 ix86_expand_sse_unpack (operands, true, false);
4270 (define_expand "vec_unpacks_lo_v16qi"
4271 [(match_operand:V8HI 0 "register_operand" "")
4272 (match_operand:V16QI 1 "register_operand" "")]
4275 ix86_expand_sse_unpack (operands, false, false);
4279 (define_expand "vec_unpacku_hi_v8hi"
4280 [(match_operand:V4SI 0 "register_operand" "")
4281 (match_operand:V8HI 1 "register_operand" "")]
4284 ix86_expand_sse_unpack (operands, true, true);
4288 (define_expand "vec_unpacks_hi_v8hi"
4289 [(match_operand:V4SI 0 "register_operand" "")
4290 (match_operand:V8HI 1 "register_operand" "")]
4293 ix86_expand_sse_unpack (operands, false, true);
4297 (define_expand "vec_unpacku_lo_v8hi"
4298 [(match_operand:V4SI 0 "register_operand" "")
4299 (match_operand:V8HI 1 "register_operand" "")]
4302 ix86_expand_sse_unpack (operands, true, false);
4306 (define_expand "vec_unpacks_lo_v8hi"
4307 [(match_operand:V4SI 0 "register_operand" "")
4308 (match_operand:V8HI 1 "register_operand" "")]
4311 ix86_expand_sse_unpack (operands, false, false);
4315 (define_expand "vec_unpacku_hi_v4si"
4316 [(match_operand:V2DI 0 "register_operand" "")
4317 (match_operand:V4SI 1 "register_operand" "")]
4320 ix86_expand_sse_unpack (operands, true, true);
4324 (define_expand "vec_unpacks_hi_v4si"
4325 [(match_operand:V2DI 0 "register_operand" "")
4326 (match_operand:V4SI 1 "register_operand" "")]
4329 ix86_expand_sse_unpack (operands, false, true);
4333 (define_expand "vec_unpacku_lo_v4si"
4334 [(match_operand:V2DI 0 "register_operand" "")
4335 (match_operand:V4SI 1 "register_operand" "")]
4338 ix86_expand_sse_unpack (operands, true, false);
4342 (define_expand "vec_unpacks_lo_v4si"
4343 [(match_operand:V2DI 0 "register_operand" "")
4344 (match_operand:V4SI 1 "register_operand" "")]
4347 ix86_expand_sse_unpack (operands, false, false);
4351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4355 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4357 (define_insn "sse2_uavgv16qi3"
4358 [(set (match_operand:V16QI 0 "register_operand" "=x")
4364 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4366 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4367 (const_vector:V16QI [(const_int 1) (const_int 1)
4368 (const_int 1) (const_int 1)
4369 (const_int 1) (const_int 1)
4370 (const_int 1) (const_int 1)
4371 (const_int 1) (const_int 1)
4372 (const_int 1) (const_int 1)
4373 (const_int 1) (const_int 1)
4374 (const_int 1) (const_int 1)]))
4376 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4377 "pavgb\t{%2, %0|%0, %2}"
4378 [(set_attr "type" "sseiadd")
4379 (set_attr "mode" "TI")])
4381 (define_insn "sse2_uavgv8hi3"
4382 [(set (match_operand:V8HI 0 "register_operand" "=x")
4388 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4390 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4391 (const_vector:V8HI [(const_int 1) (const_int 1)
4392 (const_int 1) (const_int 1)
4393 (const_int 1) (const_int 1)
4394 (const_int 1) (const_int 1)]))
4396 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4397 "pavgw\t{%2, %0|%0, %2}"
4398 [(set_attr "type" "sseiadd")
4399 (set_attr "mode" "TI")])
4401 ;; The correct representation for this is absolutely enormous, and
4402 ;; surely not generally useful.
4403 (define_insn "sse2_psadbw"
4404 [(set (match_operand:V2DI 0 "register_operand" "=x")
4405 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4406 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4409 "psadbw\t{%2, %0|%0, %2}"
4410 [(set_attr "type" "sseiadd")
4411 (set_attr "mode" "TI")])
4413 (define_insn "sse_movmskps"
4414 [(set (match_operand:SI 0 "register_operand" "=r")
4415 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
4418 "movmskps\t{%1, %0|%0, %1}"
4419 [(set_attr "type" "ssecvt")
4420 (set_attr "mode" "V4SF")])
4422 (define_insn "sse2_movmskpd"
4423 [(set (match_operand:SI 0 "register_operand" "=r")
4424 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
4427 "movmskpd\t{%1, %0|%0, %1}"
4428 [(set_attr "type" "ssecvt")
4429 (set_attr "mode" "V2DF")])
4431 (define_insn "sse2_pmovmskb"
4432 [(set (match_operand:SI 0 "register_operand" "=r")
4433 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
4436 "pmovmskb\t{%1, %0|%0, %1}"
4437 [(set_attr "type" "ssecvt")
4438 (set_attr "mode" "V2DF")])
4440 (define_expand "sse2_maskmovdqu"
4441 [(set (match_operand:V16QI 0 "memory_operand" "")
4442 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4443 (match_operand:V16QI 2 "register_operand" "x")
4449 (define_insn "*sse2_maskmovdqu"
4450 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
4451 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4452 (match_operand:V16QI 2 "register_operand" "x")
4453 (mem:V16QI (match_dup 0))]
4455 "TARGET_SSE2 && !TARGET_64BIT"
4456 ;; @@@ check ordering of operands in intel/nonintel syntax
4457 "maskmovdqu\t{%2, %1|%1, %2}"
4458 [(set_attr "type" "ssecvt")
4459 (set_attr "mode" "TI")])
4461 (define_insn "*sse2_maskmovdqu_rex64"
4462 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
4463 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4464 (match_operand:V16QI 2 "register_operand" "x")
4465 (mem:V16QI (match_dup 0))]
4467 "TARGET_SSE2 && TARGET_64BIT"
4468 ;; @@@ check ordering of operands in intel/nonintel syntax
4469 "maskmovdqu\t{%2, %1|%1, %2}"
4470 [(set_attr "type" "ssecvt")
4471 (set_attr "mode" "TI")])
4473 (define_insn "sse_ldmxcsr"
4474 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
4478 [(set_attr "type" "sse")
4479 (set_attr "memory" "load")])
4481 (define_insn "sse_stmxcsr"
4482 [(set (match_operand:SI 0 "memory_operand" "=m")
4483 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
4486 [(set_attr "type" "sse")
4487 (set_attr "memory" "store")])
4489 (define_expand "sse_sfence"
4491 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4492 "TARGET_SSE || TARGET_3DNOW_A"
4494 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4495 MEM_VOLATILE_P (operands[0]) = 1;
4498 (define_insn "*sse_sfence"
4499 [(set (match_operand:BLK 0 "" "")
4500 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4501 "TARGET_SSE || TARGET_3DNOW_A"
4503 [(set_attr "type" "sse")
4504 (set_attr "memory" "unknown")])
4506 (define_insn "sse2_clflush"
4507 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
4511 [(set_attr "type" "sse")
4512 (set_attr "memory" "unknown")])
4514 (define_expand "sse2_mfence"
4516 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4519 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4520 MEM_VOLATILE_P (operands[0]) = 1;
4523 (define_insn "*sse2_mfence"
4524 [(set (match_operand:BLK 0 "" "")
4525 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4528 [(set_attr "type" "sse")
4529 (set_attr "memory" "unknown")])
4531 (define_expand "sse2_lfence"
4533 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4536 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4537 MEM_VOLATILE_P (operands[0]) = 1;
4540 (define_insn "*sse2_lfence"
4541 [(set (match_operand:BLK 0 "" "")
4542 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4545 [(set_attr "type" "sse")
4546 (set_attr "memory" "unknown")])
4548 (define_insn "sse3_mwait"
4549 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4550 (match_operand:SI 1 "register_operand" "c")]
4553 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
4554 ;; Since 32bit register operands are implicitly zero extended to 64bit,
4555 ;; we only need to set up 32bit registers.
4557 [(set_attr "length" "3")])
4559 (define_insn "sse3_monitor"
4560 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4561 (match_operand:SI 1 "register_operand" "c")
4562 (match_operand:SI 2 "register_operand" "d")]
4564 "TARGET_SSE3 && !TARGET_64BIT"
4565 "monitor\t%0, %1, %2"
4566 [(set_attr "length" "3")])
4568 (define_insn "sse3_monitor64"
4569 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
4570 (match_operand:SI 1 "register_operand" "c")
4571 (match_operand:SI 2 "register_operand" "d")]
4573 "TARGET_SSE3 && TARGET_64BIT"
4574 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
4575 ;; RCX and RDX are used. Since 32bit register operands are implicitly
4576 ;; zero extended to 64bit, we only need to set up 32bit registers.
4578 [(set_attr "length" "3")])
4581 (define_insn "ssse3_phaddwv8hi3"
4582 [(set (match_operand:V8HI 0 "register_operand" "=x")
4588 (match_operand:V8HI 1 "register_operand" "0")
4589 (parallel [(const_int 0)]))
4590 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4592 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4593 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4596 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4597 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4599 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4600 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4605 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4606 (parallel [(const_int 0)]))
4607 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4609 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4610 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4613 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4614 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4616 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4617 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4619 "phaddw\t{%2, %0|%0, %2}"
4620 [(set_attr "type" "sseiadd")
4621 (set_attr "mode" "TI")])
4623 (define_insn "ssse3_phaddwv4hi3"
4624 [(set (match_operand:V4HI 0 "register_operand" "=y")
4629 (match_operand:V4HI 1 "register_operand" "0")
4630 (parallel [(const_int 0)]))
4631 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4633 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4634 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4638 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4639 (parallel [(const_int 0)]))
4640 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4642 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4643 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4645 "phaddw\t{%2, %0|%0, %2}"
4646 [(set_attr "type" "sseiadd")
4647 (set_attr "mode" "DI")])
4649 (define_insn "ssse3_phadddv4si3"
4650 [(set (match_operand:V4SI 0 "register_operand" "=x")
4655 (match_operand:V4SI 1 "register_operand" "0")
4656 (parallel [(const_int 0)]))
4657 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4659 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4660 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4664 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4665 (parallel [(const_int 0)]))
4666 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4668 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4669 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4671 "phaddd\t{%2, %0|%0, %2}"
4672 [(set_attr "type" "sseiadd")
4673 (set_attr "mode" "TI")])
4675 (define_insn "ssse3_phadddv2si3"
4676 [(set (match_operand:V2SI 0 "register_operand" "=y")
4680 (match_operand:V2SI 1 "register_operand" "0")
4681 (parallel [(const_int 0)]))
4682 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4685 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4686 (parallel [(const_int 0)]))
4687 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4689 "phaddd\t{%2, %0|%0, %2}"
4690 [(set_attr "type" "sseiadd")
4691 (set_attr "mode" "DI")])
4693 (define_insn "ssse3_phaddswv8hi3"
4694 [(set (match_operand:V8HI 0 "register_operand" "=x")
4700 (match_operand:V8HI 1 "register_operand" "0")
4701 (parallel [(const_int 0)]))
4702 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4704 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4705 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4708 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4709 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4711 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4712 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4717 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4718 (parallel [(const_int 0)]))
4719 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4721 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4722 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4725 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4726 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4728 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4729 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4731 "phaddsw\t{%2, %0|%0, %2}"
4732 [(set_attr "type" "sseiadd")
4733 (set_attr "mode" "TI")])
4735 (define_insn "ssse3_phaddswv4hi3"
4736 [(set (match_operand:V4HI 0 "register_operand" "=y")
4741 (match_operand:V4HI 1 "register_operand" "0")
4742 (parallel [(const_int 0)]))
4743 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4745 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4746 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4750 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4751 (parallel [(const_int 0)]))
4752 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4754 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4755 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4757 "phaddsw\t{%2, %0|%0, %2}"
4758 [(set_attr "type" "sseiadd")
4759 (set_attr "mode" "DI")])
4761 (define_insn "ssse3_phsubwv8hi3"
4762 [(set (match_operand:V8HI 0 "register_operand" "=x")
4768 (match_operand:V8HI 1 "register_operand" "0")
4769 (parallel [(const_int 0)]))
4770 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4772 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4773 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4776 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4777 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4779 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4780 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4785 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4786 (parallel [(const_int 0)]))
4787 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4789 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4790 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4793 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4794 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4796 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4797 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4799 "phsubw\t{%2, %0|%0, %2}"
4800 [(set_attr "type" "sseiadd")
4801 (set_attr "mode" "TI")])
4803 (define_insn "ssse3_phsubwv4hi3"
4804 [(set (match_operand:V4HI 0 "register_operand" "=y")
4809 (match_operand:V4HI 1 "register_operand" "0")
4810 (parallel [(const_int 0)]))
4811 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4813 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4814 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4818 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4819 (parallel [(const_int 0)]))
4820 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4822 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4823 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4825 "phsubw\t{%2, %0|%0, %2}"
4826 [(set_attr "type" "sseiadd")
4827 (set_attr "mode" "DI")])
4829 (define_insn "ssse3_phsubdv4si3"
4830 [(set (match_operand:V4SI 0 "register_operand" "=x")
4835 (match_operand:V4SI 1 "register_operand" "0")
4836 (parallel [(const_int 0)]))
4837 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4839 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4840 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4844 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4845 (parallel [(const_int 0)]))
4846 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4848 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4849 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4851 "phsubd\t{%2, %0|%0, %2}"
4852 [(set_attr "type" "sseiadd")
4853 (set_attr "mode" "TI")])
4855 (define_insn "ssse3_phsubdv2si3"
4856 [(set (match_operand:V2SI 0 "register_operand" "=y")
4860 (match_operand:V2SI 1 "register_operand" "0")
4861 (parallel [(const_int 0)]))
4862 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4865 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4866 (parallel [(const_int 0)]))
4867 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4869 "phsubd\t{%2, %0|%0, %2}"
4870 [(set_attr "type" "sseiadd")
4871 (set_attr "mode" "DI")])
4873 (define_insn "ssse3_phsubswv8hi3"
4874 [(set (match_operand:V8HI 0 "register_operand" "=x")
4880 (match_operand:V8HI 1 "register_operand" "0")
4881 (parallel [(const_int 0)]))
4882 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4884 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4885 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4888 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4889 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4891 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4892 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4897 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4898 (parallel [(const_int 0)]))
4899 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4901 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4902 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4905 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4906 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4908 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4909 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4911 "phsubsw\t{%2, %0|%0, %2}"
4912 [(set_attr "type" "sseiadd")
4913 (set_attr "mode" "TI")])
4915 (define_insn "ssse3_phsubswv4hi3"
4916 [(set (match_operand:V4HI 0 "register_operand" "=y")
4921 (match_operand:V4HI 1 "register_operand" "0")
4922 (parallel [(const_int 0)]))
4923 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4925 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4926 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4930 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4931 (parallel [(const_int 0)]))
4932 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4934 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4935 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4937 "phsubsw\t{%2, %0|%0, %2}"
4938 [(set_attr "type" "sseiadd")
4939 (set_attr "mode" "DI")])
4941 (define_insn "ssse3_pmaddubswv8hi3"
4942 [(set (match_operand:V8HI 0 "register_operand" "=x")
4947 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4948 (parallel [(const_int 0)
4958 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
4959 (parallel [(const_int 0)
4969 (vec_select:V16QI (match_dup 1)
4970 (parallel [(const_int 1)
4979 (vec_select:V16QI (match_dup 2)
4980 (parallel [(const_int 1)
4987 (const_int 15)]))))))]
4989 "pmaddubsw\t{%2, %0|%0, %2}"
4990 [(set_attr "type" "sseiadd")
4991 (set_attr "mode" "TI")])
4993 (define_insn "ssse3_pmaddubswv4hi3"
4994 [(set (match_operand:V4HI 0 "register_operand" "=y")
4999 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5000 (parallel [(const_int 0)
5006 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5007 (parallel [(const_int 0)
5013 (vec_select:V8QI (match_dup 1)
5014 (parallel [(const_int 1)
5019 (vec_select:V8QI (match_dup 2)
5020 (parallel [(const_int 1)
5023 (const_int 7)]))))))]
5025 "pmaddubsw\t{%2, %0|%0, %2}"
5026 [(set_attr "type" "sseiadd")
5027 (set_attr "mode" "DI")])
5029 (define_insn "ssse3_pmulhrswv8hi3"
5030 [(set (match_operand:V8HI 0 "register_operand" "=x")
5037 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5041 (const_vector:V8HI [(const_int 1) (const_int 1)
5042 (const_int 1) (const_int 1)
5043 (const_int 1) (const_int 1)
5044 (const_int 1) (const_int 1)]))
5046 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5047 "pmulhrsw\t{%2, %0|%0, %2}"
5048 [(set_attr "type" "sseimul")
5049 (set_attr "mode" "TI")])
5051 (define_insn "ssse3_pmulhrswv4hi3"
5052 [(set (match_operand:V4HI 0 "register_operand" "=y")
5059 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5061 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5063 (const_vector:V4HI [(const_int 1) (const_int 1)
5064 (const_int 1) (const_int 1)]))
5066 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5067 "pmulhrsw\t{%2, %0|%0, %2}"
5068 [(set_attr "type" "sseimul")
5069 (set_attr "mode" "DI")])
5071 (define_insn "ssse3_pshufbv16qi3"
5072 [(set (match_operand:V16QI 0 "register_operand" "=x")
5073 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5074 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5077 "pshufb\t{%2, %0|%0, %2}";
5078 [(set_attr "type" "sselog1")
5079 (set_attr "mode" "TI")])
5081 (define_insn "ssse3_pshufbv8qi3"
5082 [(set (match_operand:V8QI 0 "register_operand" "=y")
5083 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5084 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5087 "pshufb\t{%2, %0|%0, %2}";
5088 [(set_attr "type" "sselog1")
5089 (set_attr "mode" "DI")])
5091 (define_insn "ssse3_psign<mode>3"
5092 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5093 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5094 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5097 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5098 [(set_attr "type" "sselog1")
5099 (set_attr "mode" "TI")])
5101 (define_insn "ssse3_psign<mode>3"
5102 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5103 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5104 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5107 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5108 [(set_attr "type" "sselog1")
5109 (set_attr "mode" "DI")])
5111 (define_insn "ssse3_palignrti"
5112 [(set (match_operand:TI 0 "register_operand" "=x")
5113 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5114 (match_operand:TI 2 "nonimmediate_operand" "xm")
5115 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5119 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5120 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5122 [(set_attr "type" "sseishft")
5123 (set_attr "mode" "TI")])
5125 (define_insn "ssse3_palignrdi"
5126 [(set (match_operand:DI 0 "register_operand" "=y")
5127 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5128 (match_operand:DI 2 "nonimmediate_operand" "ym")
5129 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5133 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5134 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5136 [(set_attr "type" "sseishft")
5137 (set_attr "mode" "DI")])
5139 (define_insn "abs<mode>2"
5140 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5141 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5143 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5144 [(set_attr "type" "sselog1")
5145 (set_attr "mode" "TI")])
5147 (define_insn "abs<mode>2"
5148 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5149 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5151 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5152 [(set_attr "type" "sselog1")
5153 (set_attr "mode" "DI")])
5155 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5157 ;; AMD SSE4A instructions
5159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5161 (define_insn "sse4a_vmmovntv2df"
5162 [(set (match_operand:DF 0 "memory_operand" "=m")
5163 (unspec:DF [(vec_select:DF
5164 (match_operand:V2DF 1 "register_operand" "x")
5165 (parallel [(const_int 0)]))]
5168 "movntsd\t{%1, %0|%0, %1}"
5169 [(set_attr "type" "ssemov")
5170 (set_attr "mode" "DF")])
5172 (define_insn "sse4a_movntdf"
5173 [(set (match_operand:DF 0 "memory_operand" "=m")
5174 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5177 "movntsd\t{%1, %0|%0, %1}"
5178 [(set_attr "type" "ssemov")
5179 (set_attr "mode" "DF")])
5181 (define_insn "sse4a_vmmovntv4sf"
5182 [(set (match_operand:SF 0 "memory_operand" "=m")
5183 (unspec:SF [(vec_select:SF
5184 (match_operand:V4SF 1 "register_operand" "x")
5185 (parallel [(const_int 0)]))]
5188 "movntss\t{%1, %0|%0, %1}"
5189 [(set_attr "type" "ssemov")
5190 (set_attr "mode" "SF")])
5192 (define_insn "sse4a_movntsf"
5193 [(set (match_operand:SF 0 "memory_operand" "=m")
5194 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5197 "movntss\t{%1, %0|%0, %1}"
5198 [(set_attr "type" "ssemov")
5199 (set_attr "mode" "SF")])
5201 (define_insn "sse4a_extrqi"
5202 [(set (match_operand:V2DI 0 "register_operand" "=x")
5203 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5204 (match_operand 2 "const_int_operand" "")
5205 (match_operand 3 "const_int_operand" "")]
5208 "extrq\t{%3, %2, %0|%0, %2, %3}"
5209 [(set_attr "type" "sse")
5210 (set_attr "mode" "TI")])
5212 (define_insn "sse4a_extrq"
5213 [(set (match_operand:V2DI 0 "register_operand" "=x")
5214 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5215 (match_operand:V16QI 2 "register_operand" "x")]
5218 "extrq\t{%2, %0|%0, %2}"
5219 [(set_attr "type" "sse")
5220 (set_attr "mode" "TI")])
5222 (define_insn "sse4a_insertqi"
5223 [(set (match_operand:V2DI 0 "register_operand" "=x")
5224 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5225 (match_operand:V2DI 2 "register_operand" "x")
5226 (match_operand 3 "const_int_operand" "")
5227 (match_operand 4 "const_int_operand" "")]
5230 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5231 [(set_attr "type" "sseins")
5232 (set_attr "mode" "TI")])
5234 (define_insn "sse4a_insertq"
5235 [(set (match_operand:V2DI 0 "register_operand" "=x")
5236 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5237 (match_operand:V2DI 2 "register_operand" "x")]
5240 "insertq\t{%2, %0|%0, %2}"
5241 [(set_attr "type" "sseins")
5242 (set_attr "mode" "TI")])