1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
67 switch (which_alternative)
70 return standard_sse_constant_opcode (insn, operands[1]);
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
90 (const_string "TI")))])
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
101 (define_insn_and_split "movdi_to_sse"
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
108 "&& reload_completed"
111 switch (which_alternative)
114 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
115 Assemble the 64-bit DImode value in an xmm register. */
116 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 0)));
118 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
119 gen_rtx_SUBREG (SImode, operands[1], 4)));
120 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
124 emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
133 (define_expand "movv4sf"
134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
135 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
138 ix86_expand_vector_move (V4SFmode, operands);
142 (define_insn "*movv4sf_internal"
143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
144 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
146 && (register_operand (operands[0], V4SFmode)
147 || register_operand (operands[1], V4SFmode))"
149 switch (which_alternative)
152 return standard_sse_constant_opcode (insn, operands[1]);
155 return "movaps\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set_attr "mode" "V4SF")])
164 [(set (match_operand:V4SF 0 "register_operand" "")
165 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
166 "TARGET_SSE && reload_completed"
169 (vec_duplicate:V4SF (match_dup 1))
173 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
174 operands[2] = CONST0_RTX (V4SFmode);
177 (define_expand "movv2df"
178 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
179 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
182 ix86_expand_vector_move (V2DFmode, operands);
186 (define_insn "*movv2df_internal"
187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
188 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
190 && (register_operand (operands[0], V2DFmode)
191 || register_operand (operands[1], V2DFmode))"
193 switch (which_alternative)
196 return standard_sse_constant_opcode (insn, operands[1]);
199 if (get_attr_mode (insn) == MODE_V4SF)
200 return "movaps\t{%1, %0|%0, %1}";
202 return "movapd\t{%1, %0|%0, %1}";
207 [(set_attr "type" "sselog1,ssemov,ssemov")
210 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
211 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
212 (and (eq_attr "alternative" "2")
213 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
215 (const_string "V4SF")
216 (const_string "V2DF")))])
219 [(set (match_operand:V2DF 0 "register_operand" "")
220 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
221 "TARGET_SSE2 && reload_completed"
222 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
224 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
225 operands[2] = CONST0_RTX (DFmode);
228 (define_expand "push<mode>1"
229 [(match_operand:SSEMODE 0 "register_operand" "")]
232 ix86_expand_push (<MODE>mode, operands[0]);
236 (define_expand "movmisalign<mode>"
237 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
238 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
241 ix86_expand_vector_move_misalign (<MODE>mode, operands);
245 (define_insn "sse_movups"
246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
247 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
249 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
250 "movups\t{%1, %0|%0, %1}"
251 [(set_attr "type" "ssemov")
252 (set_attr "mode" "V2DF")])
254 (define_insn "sse2_movupd"
255 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
256 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
258 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
259 "movupd\t{%1, %0|%0, %1}"
260 [(set_attr "type" "ssemov")
261 (set_attr "mode" "V2DF")])
263 (define_insn "sse2_movdqu"
264 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
265 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
267 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
268 "movdqu\t{%1, %0|%0, %1}"
269 [(set_attr "type" "ssemov")
270 (set_attr "prefix_data16" "1")
271 (set_attr "mode" "TI")])
273 (define_insn "sse_movntv4sf"
274 [(set (match_operand:V4SF 0 "memory_operand" "=m")
275 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
278 "movntps\t{%1, %0|%0, %1}"
279 [(set_attr "type" "ssemov")
280 (set_attr "mode" "V4SF")])
282 (define_insn "sse2_movntv2df"
283 [(set (match_operand:V2DF 0 "memory_operand" "=m")
284 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
287 "movntpd\t{%1, %0|%0, %1}"
288 [(set_attr "type" "ssecvt")
289 (set_attr "mode" "V2DF")])
291 (define_insn "sse2_movntv2di"
292 [(set (match_operand:V2DI 0 "memory_operand" "=m")
293 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
296 "movntdq\t{%1, %0|%0, %1}"
297 [(set_attr "type" "ssecvt")
298 (set_attr "prefix_data16" "1")
299 (set_attr "mode" "TI")])
301 (define_insn "sse2_movntsi"
302 [(set (match_operand:SI 0 "memory_operand" "=m")
303 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
306 "movnti\t{%1, %0|%0, %1}"
307 [(set_attr "type" "ssecvt")
308 (set_attr "mode" "V2DF")])
310 (define_insn "sse3_lddqu"
311 [(set (match_operand:V16QI 0 "register_operand" "=x")
312 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
315 "lddqu\t{%1, %0|%0, %1}"
316 [(set_attr "type" "ssecvt")
317 (set_attr "prefix_rep" "1")
318 (set_attr "mode" "TI")])
320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
322 ;; Parallel single-precision floating point arithmetic
324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
326 (define_expand "negv4sf2"
327 [(set (match_operand:V4SF 0 "register_operand" "")
328 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
330 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
332 (define_expand "absv4sf2"
333 [(set (match_operand:V4SF 0 "register_operand" "")
334 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
336 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
338 (define_expand "addv4sf3"
339 [(set (match_operand:V4SF 0 "register_operand" "")
340 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
341 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
343 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
345 (define_insn "*addv4sf3"
346 [(set (match_operand:V4SF 0 "register_operand" "=x")
347 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
348 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
349 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
350 "addps\t{%2, %0|%0, %2}"
351 [(set_attr "type" "sseadd")
352 (set_attr "mode" "V4SF")])
354 (define_insn "sse_vmaddv4sf3"
355 [(set (match_operand:V4SF 0 "register_operand" "=x")
357 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
358 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
361 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
362 "addss\t{%2, %0|%0, %2}"
363 [(set_attr "type" "sseadd")
364 (set_attr "mode" "SF")])
366 (define_expand "subv4sf3"
367 [(set (match_operand:V4SF 0 "register_operand" "")
368 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
369 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
371 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
373 (define_insn "*subv4sf3"
374 [(set (match_operand:V4SF 0 "register_operand" "=x")
375 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
376 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
378 "subps\t{%2, %0|%0, %2}"
379 [(set_attr "type" "sseadd")
380 (set_attr "mode" "V4SF")])
382 (define_insn "sse_vmsubv4sf3"
383 [(set (match_operand:V4SF 0 "register_operand" "=x")
385 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
386 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
390 "subss\t{%2, %0|%0, %2}"
391 [(set_attr "type" "sseadd")
392 (set_attr "mode" "SF")])
394 (define_expand "mulv4sf3"
395 [(set (match_operand:V4SF 0 "register_operand" "")
396 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
397 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
399 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
401 (define_insn "*mulv4sf3"
402 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
404 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
405 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
406 "mulps\t{%2, %0|%0, %2}"
407 [(set_attr "type" "ssemul")
408 (set_attr "mode" "V4SF")])
410 (define_insn "sse_vmmulv4sf3"
411 [(set (match_operand:V4SF 0 "register_operand" "=x")
413 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
414 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
417 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
418 "mulss\t{%2, %0|%0, %2}"
419 [(set_attr "type" "ssemul")
420 (set_attr "mode" "SF")])
422 (define_expand "divv4sf3"
423 [(set (match_operand:V4SF 0 "register_operand" "")
424 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
425 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
427 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
429 (define_insn "*divv4sf3"
430 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
432 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
434 "divps\t{%2, %0|%0, %2}"
435 [(set_attr "type" "ssediv")
436 (set_attr "mode" "V4SF")])
438 (define_insn "sse_vmdivv4sf3"
439 [(set (match_operand:V4SF 0 "register_operand" "=x")
441 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
442 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
446 "divss\t{%2, %0|%0, %2}"
447 [(set_attr "type" "ssediv")
448 (set_attr "mode" "SF")])
450 (define_insn "sse_rcpv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
455 "rcpps\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "V4SF")])
459 (define_insn "sse_vmrcpv4sf2"
460 [(set (match_operand:V4SF 0 "register_operand" "=x")
462 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
464 (match_operand:V4SF 2 "register_operand" "0")
467 "rcpss\t{%1, %0|%0, %1}"
468 [(set_attr "type" "sse")
469 (set_attr "mode" "SF")])
471 (define_insn "sse_rsqrtv4sf2"
472 [(set (match_operand:V4SF 0 "register_operand" "=x")
474 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
476 "rsqrtps\t{%1, %0|%0, %1}"
477 [(set_attr "type" "sse")
478 (set_attr "mode" "V4SF")])
480 (define_insn "sse_vmrsqrtv4sf2"
481 [(set (match_operand:V4SF 0 "register_operand" "=x")
483 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
485 (match_operand:V4SF 2 "register_operand" "0")
488 "rsqrtss\t{%1, %0|%0, %1}"
489 [(set_attr "type" "sse")
490 (set_attr "mode" "SF")])
492 (define_insn "sqrtv4sf2"
493 [(set (match_operand:V4SF 0 "register_operand" "=x")
494 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
496 "sqrtps\t{%1, %0|%0, %1}"
497 [(set_attr "type" "sse")
498 (set_attr "mode" "V4SF")])
500 (define_insn "sse_vmsqrtv4sf2"
501 [(set (match_operand:V4SF 0 "register_operand" "=x")
503 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
504 (match_operand:V4SF 2 "register_operand" "0")
507 "sqrtss\t{%1, %0|%0, %1}"
508 [(set_attr "type" "sse")
509 (set_attr "mode" "SF")])
511 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
512 ;; isn't really correct, as those rtl operators aren't defined when
513 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
515 (define_expand "smaxv4sf3"
516 [(set (match_operand:V4SF 0 "register_operand" "")
517 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
518 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
521 if (!flag_finite_math_only)
522 operands[1] = force_reg (V4SFmode, operands[1]);
523 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
526 (define_insn "*smaxv4sf3_finite"
527 [(set (match_operand:V4SF 0 "register_operand" "=x")
528 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
529 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
530 "TARGET_SSE && flag_finite_math_only
531 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
532 "maxps\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sse")
534 (set_attr "mode" "V4SF")])
536 (define_insn "*smaxv4sf3"
537 [(set (match_operand:V4SF 0 "register_operand" "=x")
538 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
539 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
541 "maxps\t{%2, %0|%0, %2}"
542 [(set_attr "type" "sse")
543 (set_attr "mode" "V4SF")])
545 (define_insn "sse_vmsmaxv4sf3"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
549 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
553 "maxss\t{%2, %0|%0, %2}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "SF")])
557 (define_expand "sminv4sf3"
558 [(set (match_operand:V4SF 0 "register_operand" "")
559 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
560 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
563 if (!flag_finite_math_only)
564 operands[1] = force_reg (V4SFmode, operands[1]);
565 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
568 (define_insn "*sminv4sf3_finite"
569 [(set (match_operand:V4SF 0 "register_operand" "=x")
570 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
571 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
572 "TARGET_SSE && flag_finite_math_only
573 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
574 "minps\t{%2, %0|%0, %2}"
575 [(set_attr "type" "sse")
576 (set_attr "mode" "V4SF")])
578 (define_insn "*sminv4sf3"
579 [(set (match_operand:V4SF 0 "register_operand" "=x")
580 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
581 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
583 "minps\t{%2, %0|%0, %2}"
584 [(set_attr "type" "sse")
585 (set_attr "mode" "V4SF")])
587 (define_insn "sse_vmsminv4sf3"
588 [(set (match_operand:V4SF 0 "register_operand" "=x")
590 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
591 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
595 "minss\t{%2, %0|%0, %2}"
596 [(set_attr "type" "sse")
597 (set_attr "mode" "SF")])
599 ;; These versions of the min/max patterns implement exactly the operations
600 ;; min = (op1 < op2 ? op1 : op2)
601 ;; max = (!(op1 < op2) ? op1 : op2)
602 ;; Their operands are not commutative, and thus they may be used in the
603 ;; presence of -0.0 and NaN.
605 (define_insn "*ieee_sminv4sf3"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
607 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
608 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
611 "minps\t{%2, %0|%0, %2}"
612 [(set_attr "type" "sseadd")
613 (set_attr "mode" "V4SF")])
615 (define_insn "*ieee_smaxv4sf3"
616 [(set (match_operand:V4SF 0 "register_operand" "=x")
617 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
618 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
621 "maxps\t{%2, %0|%0, %2}"
622 [(set_attr "type" "sseadd")
623 (set_attr "mode" "V4SF")])
625 (define_insn "*ieee_sminv2df3"
626 [(set (match_operand:V2DF 0 "register_operand" "=x")
627 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
628 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
631 "minpd\t{%2, %0|%0, %2}"
632 [(set_attr "type" "sseadd")
633 (set_attr "mode" "V2DF")])
635 (define_insn "*ieee_smaxv2df3"
636 [(set (match_operand:V2DF 0 "register_operand" "=x")
637 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
638 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
641 "maxpd\t{%2, %0|%0, %2}"
642 [(set_attr "type" "sseadd")
643 (set_attr "mode" "V2DF")])
645 (define_insn "sse3_addsubv4sf3"
646 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (match_operand:V4SF 1 "register_operand" "0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
651 (minus:V4SF (match_dup 1) (match_dup 2))
654 "addsubps\t{%2, %0|%0, %2}"
655 [(set_attr "type" "sseadd")
656 (set_attr "prefix_rep" "1")
657 (set_attr "mode" "V4SF")])
659 (define_insn "sse3_haddv4sf3"
660 [(set (match_operand:V4SF 0 "register_operand" "=x")
665 (match_operand:V4SF 1 "register_operand" "0")
666 (parallel [(const_int 0)]))
667 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
669 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
670 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
674 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
675 (parallel [(const_int 0)]))
676 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
678 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
679 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
681 "haddps\t{%2, %0|%0, %2}"
682 [(set_attr "type" "sseadd")
683 (set_attr "prefix_rep" "1")
684 (set_attr "mode" "V4SF")])
686 (define_insn "sse3_hsubv4sf3"
687 [(set (match_operand:V4SF 0 "register_operand" "=x")
692 (match_operand:V4SF 1 "register_operand" "0")
693 (parallel [(const_int 0)]))
694 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
696 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
697 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
701 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
702 (parallel [(const_int 0)]))
703 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
705 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
706 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
708 "hsubps\t{%2, %0|%0, %2}"
709 [(set_attr "type" "sseadd")
710 (set_attr "prefix_rep" "1")
711 (set_attr "mode" "V4SF")])
713 (define_expand "reduc_splus_v4sf"
714 [(match_operand:V4SF 0 "register_operand" "")
715 (match_operand:V4SF 1 "register_operand" "")]
720 rtx tmp = gen_reg_rtx (V4SFmode);
721 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
722 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
725 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
729 (define_expand "reduc_smax_v4sf"
730 [(match_operand:V4SF 0 "register_operand" "")
731 (match_operand:V4SF 1 "register_operand" "")]
734 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
738 (define_expand "reduc_smin_v4sf"
739 [(match_operand:V4SF 0 "register_operand" "")
740 (match_operand:V4SF 1 "register_operand" "")]
743 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
747 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
749 ;; Parallel single-precision floating point comparisons
751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
753 (define_insn "sse_maskcmpv4sf3"
754 [(set (match_operand:V4SF 0 "register_operand" "=x")
755 (match_operator:V4SF 3 "sse_comparison_operator"
756 [(match_operand:V4SF 1 "register_operand" "0")
757 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
759 "cmp%D3ps\t{%2, %0|%0, %2}"
760 [(set_attr "type" "ssecmp")
761 (set_attr "mode" "V4SF")])
763 (define_insn "sse_maskcmpsf3"
764 [(set (match_operand:SF 0 "register_operand" "=x")
765 (match_operator:SF 3 "sse_comparison_operator"
766 [(match_operand:SF 1 "register_operand" "0")
767 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
769 "cmp%D3ss\t{%2, %0|%0, %2}"
770 [(set_attr "type" "ssecmp")
771 (set_attr "mode" "SF")])
773 (define_insn "sse_vmmaskcmpv4sf3"
774 [(set (match_operand:V4SF 0 "register_operand" "=x")
776 (match_operator:V4SF 3 "sse_comparison_operator"
777 [(match_operand:V4SF 1 "register_operand" "0")
778 (match_operand:V4SF 2 "register_operand" "x")])
782 "cmp%D3ss\t{%2, %0|%0, %2}"
783 [(set_attr "type" "ssecmp")
784 (set_attr "mode" "SF")])
786 (define_insn "sse_comi"
787 [(set (reg:CCFP FLAGS_REG)
790 (match_operand:V4SF 0 "register_operand" "x")
791 (parallel [(const_int 0)]))
793 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
794 (parallel [(const_int 0)]))))]
796 "comiss\t{%1, %0|%0, %1}"
797 [(set_attr "type" "ssecomi")
798 (set_attr "mode" "SF")])
800 (define_insn "sse_ucomi"
801 [(set (reg:CCFPU FLAGS_REG)
804 (match_operand:V4SF 0 "register_operand" "x")
805 (parallel [(const_int 0)]))
807 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
808 (parallel [(const_int 0)]))))]
810 "ucomiss\t{%1, %0|%0, %1}"
811 [(set_attr "type" "ssecomi")
812 (set_attr "mode" "SF")])
814 (define_expand "vcondv4sf"
815 [(set (match_operand:V4SF 0 "register_operand" "")
818 [(match_operand:V4SF 4 "nonimmediate_operand" "")
819 (match_operand:V4SF 5 "nonimmediate_operand" "")])
820 (match_operand:V4SF 1 "general_operand" "")
821 (match_operand:V4SF 2 "general_operand" "")))]
824 if (ix86_expand_fp_vcond (operands))
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
832 ;; Parallel single-precision floating point logical operations
834 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
836 (define_expand "andv4sf3"
837 [(set (match_operand:V4SF 0 "register_operand" "")
838 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
839 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
841 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
843 (define_insn "*andv4sf3"
844 [(set (match_operand:V4SF 0 "register_operand" "=x")
845 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
846 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
847 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
848 "andps\t{%2, %0|%0, %2}"
849 [(set_attr "type" "sselog")
850 (set_attr "mode" "V4SF")])
852 (define_insn "sse_nandv4sf3"
853 [(set (match_operand:V4SF 0 "register_operand" "=x")
854 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
855 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
857 "andnps\t{%2, %0|%0, %2}"
858 [(set_attr "type" "sselog")
859 (set_attr "mode" "V4SF")])
861 (define_expand "iorv4sf3"
862 [(set (match_operand:V4SF 0 "register_operand" "")
863 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
864 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
866 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
868 (define_insn "*iorv4sf3"
869 [(set (match_operand:V4SF 0 "register_operand" "=x")
870 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
871 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
872 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
873 "orps\t{%2, %0|%0, %2}"
874 [(set_attr "type" "sselog")
875 (set_attr "mode" "V4SF")])
877 (define_expand "xorv4sf3"
878 [(set (match_operand:V4SF 0 "register_operand" "")
879 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
880 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
882 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
884 (define_insn "*xorv4sf3"
885 [(set (match_operand:V4SF 0 "register_operand" "=x")
886 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
887 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
888 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
889 "xorps\t{%2, %0|%0, %2}"
890 [(set_attr "type" "sselog")
891 (set_attr "mode" "V4SF")])
893 ;; Also define scalar versions. These are used for abs, neg, and
894 ;; conditional move. Using subregs into vector modes causes register
895 ;; allocation lossage. These patterns do not allow memory operands
896 ;; because the native instructions read the full 128-bits.
898 (define_insn "*andsf3"
899 [(set (match_operand:SF 0 "register_operand" "=x")
900 (and:SF (match_operand:SF 1 "register_operand" "0")
901 (match_operand:SF 2 "register_operand" "x")))]
903 "andps\t{%2, %0|%0, %2}"
904 [(set_attr "type" "sselog")
905 (set_attr "mode" "V4SF")])
907 (define_insn "*nandsf3"
908 [(set (match_operand:SF 0 "register_operand" "=x")
909 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
910 (match_operand:SF 2 "register_operand" "x")))]
912 "andnps\t{%2, %0|%0, %2}"
913 [(set_attr "type" "sselog")
914 (set_attr "mode" "V4SF")])
916 (define_insn "*iorsf3"
917 [(set (match_operand:SF 0 "register_operand" "=x")
918 (ior:SF (match_operand:SF 1 "register_operand" "0")
919 (match_operand:SF 2 "register_operand" "x")))]
921 "orps\t{%2, %0|%0, %2}"
922 [(set_attr "type" "sselog")
923 (set_attr "mode" "V4SF")])
925 (define_insn "*xorsf3"
926 [(set (match_operand:SF 0 "register_operand" "=x")
927 (xor:SF (match_operand:SF 1 "register_operand" "0")
928 (match_operand:SF 2 "register_operand" "x")))]
930 "xorps\t{%2, %0|%0, %2}"
931 [(set_attr "type" "sselog")
932 (set_attr "mode" "V4SF")])
934 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
936 ;; Parallel single-precision floating point conversion operations
938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
940 (define_insn "sse_cvtpi2ps"
941 [(set (match_operand:V4SF 0 "register_operand" "=x")
944 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
945 (match_operand:V4SF 1 "register_operand" "0")
948 "cvtpi2ps\t{%2, %0|%0, %2}"
949 [(set_attr "type" "ssecvt")
950 (set_attr "mode" "V4SF")])
952 (define_insn "sse_cvtps2pi"
953 [(set (match_operand:V2SI 0 "register_operand" "=y")
955 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
957 (parallel [(const_int 0) (const_int 1)])))]
959 "cvtps2pi\t{%1, %0|%0, %1}"
960 [(set_attr "type" "ssecvt")
961 (set_attr "unit" "mmx")
962 (set_attr "mode" "DI")])
964 (define_insn "sse_cvttps2pi"
965 [(set (match_operand:V2SI 0 "register_operand" "=y")
967 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
968 (parallel [(const_int 0) (const_int 1)])))]
970 "cvttps2pi\t{%1, %0|%0, %1}"
971 [(set_attr "type" "ssecvt")
972 (set_attr "unit" "mmx")
973 (set_attr "mode" "SF")])
975 (define_insn "sse_cvtsi2ss"
976 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
979 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
980 (match_operand:V4SF 1 "register_operand" "0,0")
983 "cvtsi2ss\t{%2, %0|%0, %2}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "vector,double")
986 (set_attr "amdfam10_decode" "vector,double")
987 (set_attr "mode" "SF")])
989 (define_insn "sse_cvtsi2ssq"
990 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
993 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
994 (match_operand:V4SF 1 "register_operand" "0,0")
996 "TARGET_SSE && TARGET_64BIT"
997 "cvtsi2ssq\t{%2, %0|%0, %2}"
998 [(set_attr "type" "sseicvt")
999 (set_attr "athlon_decode" "vector,double")
1000 (set_attr "amdfam10_decode" "vector,double")
1001 (set_attr "mode" "SF")])
1003 (define_insn "sse_cvtss2si"
1004 [(set (match_operand:SI 0 "register_operand" "=r,r")
1007 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1008 (parallel [(const_int 0)]))]
1009 UNSPEC_FIX_NOTRUNC))]
1011 "cvtss2si\t{%1, %0|%0, %1}"
1012 [(set_attr "type" "sseicvt")
1013 (set_attr "athlon_decode" "double,vector")
1014 (set_attr "prefix_rep" "1")
1015 (set_attr "mode" "SI")])
1017 (define_insn "sse_cvtss2si_2"
1018 [(set (match_operand:SI 0 "register_operand" "=r,r")
1019 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1020 UNSPEC_FIX_NOTRUNC))]
1022 "cvtss2si\t{%1, %0|%0, %1}"
1023 [(set_attr "type" "sseicvt")
1024 (set_attr "athlon_decode" "double,vector")
1025 (set_attr "amdfam10_decode" "double,double")
1026 (set_attr "prefix_rep" "1")
1027 (set_attr "mode" "SI")])
1029 (define_insn "sse_cvtss2siq"
1030 [(set (match_operand:DI 0 "register_operand" "=r,r")
1033 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1034 (parallel [(const_int 0)]))]
1035 UNSPEC_FIX_NOTRUNC))]
1036 "TARGET_SSE && TARGET_64BIT"
1037 "cvtss2siq\t{%1, %0|%0, %1}"
1038 [(set_attr "type" "sseicvt")
1039 (set_attr "athlon_decode" "double,vector")
1040 (set_attr "prefix_rep" "1")
1041 (set_attr "mode" "DI")])
1043 (define_insn "sse_cvtss2siq_2"
1044 [(set (match_operand:DI 0 "register_operand" "=r,r")
1045 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1046 UNSPEC_FIX_NOTRUNC))]
1047 "TARGET_SSE && TARGET_64BIT"
1048 "cvtss2siq\t{%1, %0|%0, %1}"
1049 [(set_attr "type" "sseicvt")
1050 (set_attr "athlon_decode" "double,vector")
1051 (set_attr "amdfam10_decode" "double,double")
1052 (set_attr "prefix_rep" "1")
1053 (set_attr "mode" "DI")])
1055 (define_insn "sse_cvttss2si"
1056 [(set (match_operand:SI 0 "register_operand" "=r,r")
1059 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1060 (parallel [(const_int 0)]))))]
1062 "cvttss2si\t{%1, %0|%0, %1}"
1063 [(set_attr "type" "sseicvt")
1064 (set_attr "athlon_decode" "double,vector")
1065 (set_attr "amdfam10_decode" "double,double")
1066 (set_attr "prefix_rep" "1")
1067 (set_attr "mode" "SI")])
1069 (define_insn "sse_cvttss2siq"
1070 [(set (match_operand:DI 0 "register_operand" "=r,r")
1073 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1074 (parallel [(const_int 0)]))))]
1075 "TARGET_SSE && TARGET_64BIT"
1076 "cvttss2siq\t{%1, %0|%0, %1}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "double,vector")
1079 (set_attr "amdfam10_decode" "double,double")
1080 (set_attr "prefix_rep" "1")
1081 (set_attr "mode" "DI")])
1083 (define_insn "sse2_cvtdq2ps"
1084 [(set (match_operand:V4SF 0 "register_operand" "=x")
1085 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1087 "cvtdq2ps\t{%1, %0|%0, %1}"
1088 [(set_attr "type" "ssecvt")
1089 (set_attr "mode" "V4SF")])
1091 (define_insn "sse2_cvtps2dq"
1092 [(set (match_operand:V4SI 0 "register_operand" "=x")
1093 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1094 UNSPEC_FIX_NOTRUNC))]
1096 "cvtps2dq\t{%1, %0|%0, %1}"
1097 [(set_attr "type" "ssecvt")
1098 (set_attr "prefix_data16" "1")
1099 (set_attr "mode" "TI")])
1101 (define_insn "sse2_cvttps2dq"
1102 [(set (match_operand:V4SI 0 "register_operand" "=x")
1103 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1105 "cvttps2dq\t{%1, %0|%0, %1}"
1106 [(set_attr "type" "ssecvt")
1107 (set_attr "prefix_rep" "1")
1108 (set_attr "mode" "TI")])
1110 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1112 ;; Parallel single-precision floating point element swizzling
1114 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1116 (define_insn "sse_movhlps"
1117 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1120 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1121 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1122 (parallel [(const_int 6)
1126 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1128 movhlps\t{%2, %0|%0, %2}
1129 movlps\t{%H2, %0|%0, %H2}
1130 movhps\t{%2, %0|%0, %2}"
1131 [(set_attr "type" "ssemov")
1132 (set_attr "mode" "V4SF,V2SF,V2SF")])
1134 (define_insn "sse_movlhps"
1135 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1138 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1139 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1140 (parallel [(const_int 0)
1144 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1146 movlhps\t{%2, %0|%0, %2}
1147 movhps\t{%2, %0|%0, %2}
1148 movlps\t{%2, %H0|%H0, %2}"
1149 [(set_attr "type" "ssemov")
1150 (set_attr "mode" "V4SF,V2SF,V2SF")])
1152 (define_insn "sse_unpckhps"
1153 [(set (match_operand:V4SF 0 "register_operand" "=x")
1156 (match_operand:V4SF 1 "register_operand" "0")
1157 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1158 (parallel [(const_int 2) (const_int 6)
1159 (const_int 3) (const_int 7)])))]
1161 "unpckhps\t{%2, %0|%0, %2}"
1162 [(set_attr "type" "sselog")
1163 (set_attr "mode" "V4SF")])
1165 (define_insn "sse_unpcklps"
1166 [(set (match_operand:V4SF 0 "register_operand" "=x")
1169 (match_operand:V4SF 1 "register_operand" "0")
1170 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1171 (parallel [(const_int 0) (const_int 4)
1172 (const_int 1) (const_int 5)])))]
1174 "unpcklps\t{%2, %0|%0, %2}"
1175 [(set_attr "type" "sselog")
1176 (set_attr "mode" "V4SF")])
1178 ;; These are modeled with the same vec_concat as the others so that we
1179 ;; capture users of shufps that can use the new instructions
1180 (define_insn "sse3_movshdup"
1181 [(set (match_operand:V4SF 0 "register_operand" "=x")
1184 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1186 (parallel [(const_int 1)
1191 "movshdup\t{%1, %0|%0, %1}"
1192 [(set_attr "type" "sse")
1193 (set_attr "prefix_rep" "1")
1194 (set_attr "mode" "V4SF")])
1196 (define_insn "sse3_movsldup"
1197 [(set (match_operand:V4SF 0 "register_operand" "=x")
1200 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1202 (parallel [(const_int 0)
1207 "movsldup\t{%1, %0|%0, %1}"
1208 [(set_attr "type" "sse")
1209 (set_attr "prefix_rep" "1")
1210 (set_attr "mode" "V4SF")])
1212 (define_expand "sse_shufps"
1213 [(match_operand:V4SF 0 "register_operand" "")
1214 (match_operand:V4SF 1 "register_operand" "")
1215 (match_operand:V4SF 2 "nonimmediate_operand" "")
1216 (match_operand:SI 3 "const_int_operand" "")]
1219 int mask = INTVAL (operands[3]);
1220 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1221 GEN_INT ((mask >> 0) & 3),
1222 GEN_INT ((mask >> 2) & 3),
1223 GEN_INT (((mask >> 4) & 3) + 4),
1224 GEN_INT (((mask >> 6) & 3) + 4)));
1228 (define_insn "sse_shufps_1"
1229 [(set (match_operand:V4SF 0 "register_operand" "=x")
1232 (match_operand:V4SF 1 "register_operand" "0")
1233 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1234 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1235 (match_operand 4 "const_0_to_3_operand" "")
1236 (match_operand 5 "const_4_to_7_operand" "")
1237 (match_operand 6 "const_4_to_7_operand" "")])))]
1241 mask |= INTVAL (operands[3]) << 0;
1242 mask |= INTVAL (operands[4]) << 2;
1243 mask |= (INTVAL (operands[5]) - 4) << 4;
1244 mask |= (INTVAL (operands[6]) - 4) << 6;
1245 operands[3] = GEN_INT (mask);
1247 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1249 [(set_attr "type" "sselog")
1250 (set_attr "mode" "V4SF")])
1252 (define_insn "sse_storehps"
1253 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1255 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1256 (parallel [(const_int 2) (const_int 3)])))]
1259 movhps\t{%1, %0|%0, %1}
1260 movhlps\t{%1, %0|%0, %1}
1261 movlps\t{%H1, %0|%0, %H1}"
1262 [(set_attr "type" "ssemov")
1263 (set_attr "mode" "V2SF,V4SF,V2SF")])
1265 (define_insn "sse_loadhps"
1266 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1269 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1270 (parallel [(const_int 0) (const_int 1)]))
1271 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1274 movhps\t{%2, %0|%0, %2}
1275 movlhps\t{%2, %0|%0, %2}
1276 movlps\t{%2, %H0|%H0, %2}"
1277 [(set_attr "type" "ssemov")
1278 (set_attr "mode" "V2SF,V4SF,V2SF")])
1280 (define_insn "sse_storelps"
1281 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1283 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1284 (parallel [(const_int 0) (const_int 1)])))]
1287 movlps\t{%1, %0|%0, %1}
1288 movaps\t{%1, %0|%0, %1}
1289 movlps\t{%1, %0|%0, %1}"
1290 [(set_attr "type" "ssemov")
1291 (set_attr "mode" "V2SF,V4SF,V2SF")])
1293 (define_insn "sse_loadlps"
1294 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1296 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1298 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1299 (parallel [(const_int 2) (const_int 3)]))))]
1302 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1303 movlps\t{%2, %0|%0, %2}
1304 movlps\t{%2, %0|%0, %2}"
1305 [(set_attr "type" "sselog,ssemov,ssemov")
1306 (set_attr "mode" "V4SF,V2SF,V2SF")])
1308 (define_insn "sse_movss"
1309 [(set (match_operand:V4SF 0 "register_operand" "=x")
1311 (match_operand:V4SF 2 "register_operand" "x")
1312 (match_operand:V4SF 1 "register_operand" "0")
1315 "movss\t{%2, %0|%0, %2}"
1316 [(set_attr "type" "ssemov")
1317 (set_attr "mode" "SF")])
1319 (define_insn "*vec_dupv4sf"
1320 [(set (match_operand:V4SF 0 "register_operand" "=x")
1322 (match_operand:SF 1 "register_operand" "0")))]
1324 "shufps\t{$0, %0, %0|%0, %0, 0}"
1325 [(set_attr "type" "sselog1")
1326 (set_attr "mode" "V4SF")])
1328 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1329 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1330 ;; alternatives pretty much forces the MMX alternative to be chosen.
1331 (define_insn "*sse_concatv2sf"
1332 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1334 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1335 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1338 unpcklps\t{%2, %0|%0, %2}
1339 movss\t{%1, %0|%0, %1}
1340 punpckldq\t{%2, %0|%0, %2}
1341 movd\t{%1, %0|%0, %1}"
1342 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1343 (set_attr "mode" "V4SF,SF,DI,DI")])
1345 (define_insn "*sse_concatv4sf"
1346 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1348 (match_operand:V2SF 1 "register_operand" " 0,0")
1349 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1352 movlhps\t{%2, %0|%0, %2}
1353 movhps\t{%2, %0|%0, %2}"
1354 [(set_attr "type" "ssemov")
1355 (set_attr "mode" "V4SF,V2SF")])
1357 (define_expand "vec_initv4sf"
1358 [(match_operand:V4SF 0 "register_operand" "")
1359 (match_operand 1 "" "")]
1362 ix86_expand_vector_init (false, operands[0], operands[1]);
1366 (define_insn "vec_setv4sf_0"
1367 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
1370 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1371 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1375 movss\t{%2, %0|%0, %2}
1376 movss\t{%2, %0|%0, %2}
1377 movd\t{%2, %0|%0, %2}
1379 [(set_attr "type" "ssemov")
1380 (set_attr "mode" "SF")])
1383 [(set (match_operand:V4SF 0 "memory_operand" "")
1386 (match_operand:SF 1 "nonmemory_operand" ""))
1389 "TARGET_SSE && reload_completed"
1392 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1396 (define_expand "vec_setv4sf"
1397 [(match_operand:V4SF 0 "register_operand" "")
1398 (match_operand:SF 1 "register_operand" "")
1399 (match_operand 2 "const_int_operand" "")]
1402 ix86_expand_vector_set (false, operands[0], operands[1],
1403 INTVAL (operands[2]));
1407 (define_insn_and_split "*vec_extractv4sf_0"
1408 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1410 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1411 (parallel [(const_int 0)])))]
1412 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1414 "&& reload_completed"
1417 rtx op1 = operands[1];
1419 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1421 op1 = gen_lowpart (SFmode, op1);
1422 emit_move_insn (operands[0], op1);
1426 (define_expand "vec_extractv4sf"
1427 [(match_operand:SF 0 "register_operand" "")
1428 (match_operand:V4SF 1 "register_operand" "")
1429 (match_operand 2 "const_int_operand" "")]
1432 ix86_expand_vector_extract (false, operands[0], operands[1],
1433 INTVAL (operands[2]));
1437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1439 ;; Parallel double-precision floating point arithmetic
1441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1443 (define_expand "negv2df2"
1444 [(set (match_operand:V2DF 0 "register_operand" "")
1445 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1447 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1449 (define_expand "absv2df2"
1450 [(set (match_operand:V2DF 0 "register_operand" "")
1451 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1453 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1455 (define_expand "addv2df3"
1456 [(set (match_operand:V2DF 0 "register_operand" "")
1457 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1458 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1460 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1462 (define_insn "*addv2df3"
1463 [(set (match_operand:V2DF 0 "register_operand" "=x")
1464 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1465 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1466 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1467 "addpd\t{%2, %0|%0, %2}"
1468 [(set_attr "type" "sseadd")
1469 (set_attr "mode" "V2DF")])
1471 (define_insn "sse2_vmaddv2df3"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1474 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1475 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1478 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1479 "addsd\t{%2, %0|%0, %2}"
1480 [(set_attr "type" "sseadd")
1481 (set_attr "mode" "DF")])
1483 (define_expand "subv2df3"
1484 [(set (match_operand:V2DF 0 "register_operand" "")
1485 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1486 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1488 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1490 (define_insn "*subv2df3"
1491 [(set (match_operand:V2DF 0 "register_operand" "=x")
1492 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1493 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1495 "subpd\t{%2, %0|%0, %2}"
1496 [(set_attr "type" "sseadd")
1497 (set_attr "mode" "V2DF")])
1499 (define_insn "sse2_vmsubv2df3"
1500 [(set (match_operand:V2DF 0 "register_operand" "=x")
1502 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1503 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1507 "subsd\t{%2, %0|%0, %2}"
1508 [(set_attr "type" "sseadd")
1509 (set_attr "mode" "DF")])
1511 (define_expand "mulv2df3"
1512 [(set (match_operand:V2DF 0 "register_operand" "")
1513 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1514 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1516 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1518 (define_insn "*mulv2df3"
1519 [(set (match_operand:V2DF 0 "register_operand" "=x")
1520 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1521 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1522 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1523 "mulpd\t{%2, %0|%0, %2}"
1524 [(set_attr "type" "ssemul")
1525 (set_attr "mode" "V2DF")])
1527 (define_insn "sse2_vmmulv2df3"
1528 [(set (match_operand:V2DF 0 "register_operand" "=x")
1530 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1531 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1534 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1535 "mulsd\t{%2, %0|%0, %2}"
1536 [(set_attr "type" "ssemul")
1537 (set_attr "mode" "DF")])
1539 (define_expand "divv2df3"
1540 [(set (match_operand:V2DF 0 "register_operand" "")
1541 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1542 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1544 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1546 (define_insn "*divv2df3"
1547 [(set (match_operand:V2DF 0 "register_operand" "=x")
1548 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1549 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1551 "divpd\t{%2, %0|%0, %2}"
1552 [(set_attr "type" "ssediv")
1553 (set_attr "mode" "V2DF")])
1555 (define_insn "sse2_vmdivv2df3"
1556 [(set (match_operand:V2DF 0 "register_operand" "=x")
1558 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1559 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1563 "divsd\t{%2, %0|%0, %2}"
1564 [(set_attr "type" "ssediv")
1565 (set_attr "mode" "DF")])
1567 (define_insn "sqrtv2df2"
1568 [(set (match_operand:V2DF 0 "register_operand" "=x")
1569 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1571 "sqrtpd\t{%1, %0|%0, %1}"
1572 [(set_attr "type" "sse")
1573 (set_attr "mode" "V2DF")])
1575 (define_insn "sse2_vmsqrtv2df2"
1576 [(set (match_operand:V2DF 0 "register_operand" "=x")
1578 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1579 (match_operand:V2DF 2 "register_operand" "0")
1582 "sqrtsd\t{%1, %0|%0, %1}"
1583 [(set_attr "type" "sse")
1584 (set_attr "mode" "DF")])
1586 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1587 ;; isn't really correct, as those rtl operators aren't defined when
1588 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1590 (define_expand "smaxv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "")
1592 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1593 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1596 if (!flag_finite_math_only)
1597 operands[1] = force_reg (V2DFmode, operands[1]);
1598 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1601 (define_insn "*smaxv2df3_finite"
1602 [(set (match_operand:V2DF 0 "register_operand" "=x")
1603 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1604 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1605 "TARGET_SSE2 && flag_finite_math_only
1606 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1607 "maxpd\t{%2, %0|%0, %2}"
1608 [(set_attr "type" "sseadd")
1609 (set_attr "mode" "V2DF")])
1611 (define_insn "*smaxv2df3"
1612 [(set (match_operand:V2DF 0 "register_operand" "=x")
1613 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1614 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1616 "maxpd\t{%2, %0|%0, %2}"
1617 [(set_attr "type" "sseadd")
1618 (set_attr "mode" "V2DF")])
1620 (define_insn "sse2_vmsmaxv2df3"
1621 [(set (match_operand:V2DF 0 "register_operand" "=x")
1623 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1624 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1628 "maxsd\t{%2, %0|%0, %2}"
1629 [(set_attr "type" "sseadd")
1630 (set_attr "mode" "DF")])
1632 (define_expand "sminv2df3"
1633 [(set (match_operand:V2DF 0 "register_operand" "")
1634 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1635 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1638 if (!flag_finite_math_only)
1639 operands[1] = force_reg (V2DFmode, operands[1]);
1640 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1643 (define_insn "*sminv2df3_finite"
1644 [(set (match_operand:V2DF 0 "register_operand" "=x")
1645 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1646 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1647 "TARGET_SSE2 && flag_finite_math_only
1648 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1649 "minpd\t{%2, %0|%0, %2}"
1650 [(set_attr "type" "sseadd")
1651 (set_attr "mode" "V2DF")])
1653 (define_insn "*sminv2df3"
1654 [(set (match_operand:V2DF 0 "register_operand" "=x")
1655 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1656 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1658 "minpd\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "sseadd")
1660 (set_attr "mode" "V2DF")])
1662 (define_insn "sse2_vmsminv2df3"
1663 [(set (match_operand:V2DF 0 "register_operand" "=x")
1665 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1666 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1670 "minsd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "sseadd")
1672 (set_attr "mode" "DF")])
1674 (define_insn "sse3_addsubv2df3"
1675 [(set (match_operand:V2DF 0 "register_operand" "=x")
1678 (match_operand:V2DF 1 "register_operand" "0")
1679 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1680 (minus:V2DF (match_dup 1) (match_dup 2))
1683 "addsubpd\t{%2, %0|%0, %2}"
1684 [(set_attr "type" "sseadd")
1685 (set_attr "mode" "V2DF")])
1687 (define_insn "sse3_haddv2df3"
1688 [(set (match_operand:V2DF 0 "register_operand" "=x")
1692 (match_operand:V2DF 1 "register_operand" "0")
1693 (parallel [(const_int 0)]))
1694 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1697 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1698 (parallel [(const_int 0)]))
1699 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1701 "haddpd\t{%2, %0|%0, %2}"
1702 [(set_attr "type" "sseadd")
1703 (set_attr "mode" "V2DF")])
1705 (define_insn "sse3_hsubv2df3"
1706 [(set (match_operand:V2DF 0 "register_operand" "=x")
1710 (match_operand:V2DF 1 "register_operand" "0")
1711 (parallel [(const_int 0)]))
1712 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1715 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1716 (parallel [(const_int 0)]))
1717 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1719 "hsubpd\t{%2, %0|%0, %2}"
1720 [(set_attr "type" "sseadd")
1721 (set_attr "mode" "V2DF")])
1723 (define_expand "reduc_splus_v2df"
1724 [(match_operand:V2DF 0 "register_operand" "")
1725 (match_operand:V2DF 1 "register_operand" "")]
1728 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1734 ;; Parallel double-precision floating point comparisons
1736 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1738 (define_insn "sse2_maskcmpv2df3"
1739 [(set (match_operand:V2DF 0 "register_operand" "=x")
1740 (match_operator:V2DF 3 "sse_comparison_operator"
1741 [(match_operand:V2DF 1 "register_operand" "0")
1742 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1744 "cmp%D3pd\t{%2, %0|%0, %2}"
1745 [(set_attr "type" "ssecmp")
1746 (set_attr "mode" "V2DF")])
1748 (define_insn "sse2_maskcmpdf3"
1749 [(set (match_operand:DF 0 "register_operand" "=x")
1750 (match_operator:DF 3 "sse_comparison_operator"
1751 [(match_operand:DF 1 "register_operand" "0")
1752 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1754 "cmp%D3sd\t{%2, %0|%0, %2}"
1755 [(set_attr "type" "ssecmp")
1756 (set_attr "mode" "DF")])
1758 (define_insn "sse2_vmmaskcmpv2df3"
1759 [(set (match_operand:V2DF 0 "register_operand" "=x")
1761 (match_operator:V2DF 3 "sse_comparison_operator"
1762 [(match_operand:V2DF 1 "register_operand" "0")
1763 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1767 "cmp%D3sd\t{%2, %0|%0, %2}"
1768 [(set_attr "type" "ssecmp")
1769 (set_attr "mode" "DF")])
1771 (define_insn "sse2_comi"
1772 [(set (reg:CCFP FLAGS_REG)
1775 (match_operand:V2DF 0 "register_operand" "x")
1776 (parallel [(const_int 0)]))
1778 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1779 (parallel [(const_int 0)]))))]
1781 "comisd\t{%1, %0|%0, %1}"
1782 [(set_attr "type" "ssecomi")
1783 (set_attr "mode" "DF")])
1785 (define_insn "sse2_ucomi"
1786 [(set (reg:CCFPU FLAGS_REG)
1789 (match_operand:V2DF 0 "register_operand" "x")
1790 (parallel [(const_int 0)]))
1792 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1793 (parallel [(const_int 0)]))))]
1795 "ucomisd\t{%1, %0|%0, %1}"
1796 [(set_attr "type" "ssecomi")
1797 (set_attr "mode" "DF")])
1799 (define_expand "vcondv2df"
1800 [(set (match_operand:V2DF 0 "register_operand" "")
1802 (match_operator 3 ""
1803 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1804 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1805 (match_operand:V2DF 1 "general_operand" "")
1806 (match_operand:V2DF 2 "general_operand" "")))]
1809 if (ix86_expand_fp_vcond (operands))
1815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1817 ;; Parallel double-precision floating point logical operations
1819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1821 (define_expand "andv2df3"
1822 [(set (match_operand:V2DF 0 "register_operand" "")
1823 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1824 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1826 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1828 (define_insn "*andv2df3"
1829 [(set (match_operand:V2DF 0 "register_operand" "=x")
1830 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1831 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1832 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1833 "andpd\t{%2, %0|%0, %2}"
1834 [(set_attr "type" "sselog")
1835 (set_attr "mode" "V2DF")])
1837 (define_insn "sse2_nandv2df3"
1838 [(set (match_operand:V2DF 0 "register_operand" "=x")
1839 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1840 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1842 "andnpd\t{%2, %0|%0, %2}"
1843 [(set_attr "type" "sselog")
1844 (set_attr "mode" "V2DF")])
1846 (define_expand "iorv2df3"
1847 [(set (match_operand:V2DF 0 "register_operand" "")
1848 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1849 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1851 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1853 (define_insn "*iorv2df3"
1854 [(set (match_operand:V2DF 0 "register_operand" "=x")
1855 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1856 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1857 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1858 "orpd\t{%2, %0|%0, %2}"
1859 [(set_attr "type" "sselog")
1860 (set_attr "mode" "V2DF")])
1862 (define_expand "xorv2df3"
1863 [(set (match_operand:V2DF 0 "register_operand" "")
1864 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1865 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1867 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1869 (define_insn "*xorv2df3"
1870 [(set (match_operand:V2DF 0 "register_operand" "=x")
1871 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1872 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1873 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1874 "xorpd\t{%2, %0|%0, %2}"
1875 [(set_attr "type" "sselog")
1876 (set_attr "mode" "V2DF")])
1878 ;; Also define scalar versions. These are used for abs, neg, and
1879 ;; conditional move. Using subregs into vector modes causes register
1880 ;; allocation lossage. These patterns do not allow memory operands
1881 ;; because the native instructions read the full 128-bits.
1883 (define_insn "*anddf3"
1884 [(set (match_operand:DF 0 "register_operand" "=x")
1885 (and:DF (match_operand:DF 1 "register_operand" "0")
1886 (match_operand:DF 2 "register_operand" "x")))]
1888 "andpd\t{%2, %0|%0, %2}"
1889 [(set_attr "type" "sselog")
1890 (set_attr "mode" "V2DF")])
1892 (define_insn "*nanddf3"
1893 [(set (match_operand:DF 0 "register_operand" "=x")
1894 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1895 (match_operand:DF 2 "register_operand" "x")))]
1897 "andnpd\t{%2, %0|%0, %2}"
1898 [(set_attr "type" "sselog")
1899 (set_attr "mode" "V2DF")])
1901 (define_insn "*iordf3"
1902 [(set (match_operand:DF 0 "register_operand" "=x")
1903 (ior:DF (match_operand:DF 1 "register_operand" "0")
1904 (match_operand:DF 2 "register_operand" "x")))]
1906 "orpd\t{%2, %0|%0, %2}"
1907 [(set_attr "type" "sselog")
1908 (set_attr "mode" "V2DF")])
1910 (define_insn "*xordf3"
1911 [(set (match_operand:DF 0 "register_operand" "=x")
1912 (xor:DF (match_operand:DF 1 "register_operand" "0")
1913 (match_operand:DF 2 "register_operand" "x")))]
1915 "xorpd\t{%2, %0|%0, %2}"
1916 [(set_attr "type" "sselog")
1917 (set_attr "mode" "V2DF")])
1919 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1921 ;; Parallel double-precision floating point conversion operations
1923 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1925 (define_insn "sse2_cvtpi2pd"
1926 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1927 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1929 "cvtpi2pd\t{%1, %0|%0, %1}"
1930 [(set_attr "type" "ssecvt")
1931 (set_attr "unit" "mmx,*")
1932 (set_attr "mode" "V2DF")])
1934 (define_insn "sse2_cvtpd2pi"
1935 [(set (match_operand:V2SI 0 "register_operand" "=y")
1936 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1937 UNSPEC_FIX_NOTRUNC))]
1939 "cvtpd2pi\t{%1, %0|%0, %1}"
1940 [(set_attr "type" "ssecvt")
1941 (set_attr "unit" "mmx")
1942 (set_attr "prefix_data16" "1")
1943 (set_attr "mode" "DI")])
1945 (define_insn "sse2_cvttpd2pi"
1946 [(set (match_operand:V2SI 0 "register_operand" "=y")
1947 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1949 "cvttpd2pi\t{%1, %0|%0, %1}"
1950 [(set_attr "type" "ssecvt")
1951 (set_attr "unit" "mmx")
1952 (set_attr "prefix_data16" "1")
1953 (set_attr "mode" "TI")])
1955 (define_insn "sse2_cvtsi2sd"
1956 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1959 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1960 (match_operand:V2DF 1 "register_operand" "0,0")
1963 "cvtsi2sd\t{%2, %0|%0, %2}"
1964 [(set_attr "type" "sseicvt")
1965 (set_attr "mode" "DF")
1966 (set_attr "athlon_decode" "double,direct")
1967 (set_attr "amdfam10_decode" "vector,double")])
1969 (define_insn "sse2_cvtsi2sdq"
1970 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1973 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1974 (match_operand:V2DF 1 "register_operand" "0,0")
1976 "TARGET_SSE2 && TARGET_64BIT"
1977 "cvtsi2sdq\t{%2, %0|%0, %2}"
1978 [(set_attr "type" "sseicvt")
1979 (set_attr "mode" "DF")
1980 (set_attr "athlon_decode" "double,direct")
1981 (set_attr "amdfam10_decode" "vector,double")])
1983 (define_insn "sse2_cvtsd2si"
1984 [(set (match_operand:SI 0 "register_operand" "=r,r")
1987 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1988 (parallel [(const_int 0)]))]
1989 UNSPEC_FIX_NOTRUNC))]
1991 "cvtsd2si\t{%1, %0|%0, %1}"
1992 [(set_attr "type" "sseicvt")
1993 (set_attr "athlon_decode" "double,vector")
1994 (set_attr "prefix_rep" "1")
1995 (set_attr "mode" "SI")])
1997 (define_insn "sse2_cvtsd2si_2"
1998 [(set (match_operand:SI 0 "register_operand" "=r,r")
1999 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2000 UNSPEC_FIX_NOTRUNC))]
2002 "cvtsd2si\t{%1, %0|%0, %1}"
2003 [(set_attr "type" "sseicvt")
2004 (set_attr "athlon_decode" "double,vector")
2005 (set_attr "amdfam10_decode" "double,double")
2006 (set_attr "prefix_rep" "1")
2007 (set_attr "mode" "SI")])
2009 (define_insn "sse2_cvtsd2siq"
2010 [(set (match_operand:DI 0 "register_operand" "=r,r")
2013 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2014 (parallel [(const_int 0)]))]
2015 UNSPEC_FIX_NOTRUNC))]
2016 "TARGET_SSE2 && TARGET_64BIT"
2017 "cvtsd2siq\t{%1, %0|%0, %1}"
2018 [(set_attr "type" "sseicvt")
2019 (set_attr "athlon_decode" "double,vector")
2020 (set_attr "prefix_rep" "1")
2021 (set_attr "mode" "DI")])
2023 (define_insn "sse2_cvtsd2siq_2"
2024 [(set (match_operand:DI 0 "register_operand" "=r,r")
2025 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2026 UNSPEC_FIX_NOTRUNC))]
2027 "TARGET_SSE2 && TARGET_64BIT"
2028 "cvtsd2siq\t{%1, %0|%0, %1}"
2029 [(set_attr "type" "sseicvt")
2030 (set_attr "athlon_decode" "double,vector")
2031 (set_attr "amdfam10_decode" "double,double")
2032 (set_attr "prefix_rep" "1")
2033 (set_attr "mode" "DI")])
2035 (define_insn "sse2_cvttsd2si"
2036 [(set (match_operand:SI 0 "register_operand" "=r,r")
2039 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2040 (parallel [(const_int 0)]))))]
2042 "cvttsd2si\t{%1, %0|%0, %1}"
2043 [(set_attr "type" "sseicvt")
2044 (set_attr "prefix_rep" "1")
2045 (set_attr "mode" "SI")
2046 (set_attr "athlon_decode" "double,vector")
2047 (set_attr "amdfam10_decode" "double,double")])
2049 (define_insn "sse2_cvttsd2siq"
2050 [(set (match_operand:DI 0 "register_operand" "=r,r")
2053 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2054 (parallel [(const_int 0)]))))]
2055 "TARGET_SSE2 && TARGET_64BIT"
2056 "cvttsd2siq\t{%1, %0|%0, %1}"
2057 [(set_attr "type" "sseicvt")
2058 (set_attr "prefix_rep" "1")
2059 (set_attr "mode" "DI")
2060 (set_attr "athlon_decode" "double,vector")
2061 (set_attr "amdfam10_decode" "double,double")])
2063 (define_insn "sse2_cvtdq2pd"
2064 [(set (match_operand:V2DF 0 "register_operand" "=x")
2067 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2068 (parallel [(const_int 0) (const_int 1)]))))]
2070 "cvtdq2pd\t{%1, %0|%0, %1}"
2071 [(set_attr "type" "ssecvt")
2072 (set_attr "mode" "V2DF")])
2074 (define_expand "sse2_cvtpd2dq"
2075 [(set (match_operand:V4SI 0 "register_operand" "")
2077 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2081 "operands[2] = CONST0_RTX (V2SImode);")
2083 (define_insn "*sse2_cvtpd2dq"
2084 [(set (match_operand:V4SI 0 "register_operand" "=x")
2086 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2088 (match_operand:V2SI 2 "const0_operand" "")))]
2090 "cvtpd2dq\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "prefix_rep" "1")
2093 (set_attr "mode" "TI")
2094 (set_attr "amdfam10_decode" "double")])
2096 (define_expand "sse2_cvttpd2dq"
2097 [(set (match_operand:V4SI 0 "register_operand" "")
2099 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2102 "operands[2] = CONST0_RTX (V2SImode);")
2104 (define_insn "*sse2_cvttpd2dq"
2105 [(set (match_operand:V4SI 0 "register_operand" "=x")
2107 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2108 (match_operand:V2SI 2 "const0_operand" "")))]
2110 "cvttpd2dq\t{%1, %0|%0, %1}"
2111 [(set_attr "type" "ssecvt")
2112 (set_attr "prefix_rep" "1")
2113 (set_attr "mode" "TI")
2114 (set_attr "amdfam10_decode" "double")])
2116 (define_insn "sse2_cvtsd2ss"
2117 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2120 (float_truncate:V2SF
2121 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2122 (match_operand:V4SF 1 "register_operand" "0,0")
2125 "cvtsd2ss\t{%2, %0|%0, %2}"
2126 [(set_attr "type" "ssecvt")
2127 (set_attr "athlon_decode" "vector,double")
2128 (set_attr "amdfam10_decode" "vector,double")
2129 (set_attr "mode" "SF")])
2131 (define_insn "sse2_cvtss2sd"
2132 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2136 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2137 (parallel [(const_int 0) (const_int 1)])))
2138 (match_operand:V2DF 1 "register_operand" "0,0")
2141 "cvtss2sd\t{%2, %0|%0, %2}"
2142 [(set_attr "type" "ssecvt")
2143 (set_attr "amdfam10_decode" "vector,double")
2144 (set_attr "mode" "DF")])
2146 (define_expand "sse2_cvtpd2ps"
2147 [(set (match_operand:V4SF 0 "register_operand" "")
2149 (float_truncate:V2SF
2150 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2153 "operands[2] = CONST0_RTX (V2SFmode);")
2155 (define_insn "*sse2_cvtpd2ps"
2156 [(set (match_operand:V4SF 0 "register_operand" "=x")
2158 (float_truncate:V2SF
2159 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2160 (match_operand:V2SF 2 "const0_operand" "")))]
2162 "cvtpd2ps\t{%1, %0|%0, %1}"
2163 [(set_attr "type" "ssecvt")
2164 (set_attr "prefix_data16" "1")
2165 (set_attr "mode" "V4SF")
2166 (set_attr "amdfam10_decode" "double")])
2168 (define_insn "sse2_cvtps2pd"
2169 [(set (match_operand:V2DF 0 "register_operand" "=x")
2172 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2173 (parallel [(const_int 0) (const_int 1)]))))]
2175 "cvtps2pd\t{%1, %0|%0, %1}"
2176 [(set_attr "type" "ssecvt")
2177 (set_attr "mode" "V2DF")
2178 (set_attr "amdfam10_decode" "direct")])
2180 (define_expand "vec_unpacks_hi_v4sf"
2185 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2186 (parallel [(const_int 6)
2190 (set (match_operand:V2DF 0 "register_operand" "")
2194 (parallel [(const_int 0) (const_int 1)]))))]
2197 operands[2] = gen_reg_rtx (V4SFmode);
2200 (define_expand "vec_unpacks_lo_v4sf"
2201 [(set (match_operand:V2DF 0 "register_operand" "")
2204 (match_operand:V4SF 1 "nonimmediate_operand" "")
2205 (parallel [(const_int 0) (const_int 1)]))))]
2208 (define_expand "vec_unpacks_float_hi_v8hi"
2209 [(match_operand:V4SF 0 "register_operand" "")
2210 (match_operand:V8HI 1 "register_operand" "")]
2213 rtx tmp = gen_reg_rtx (V4SImode);
2215 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2216 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2220 (define_expand "vec_unpacks_float_lo_v8hi"
2221 [(match_operand:V4SF 0 "register_operand" "")
2222 (match_operand:V8HI 1 "register_operand" "")]
2225 rtx tmp = gen_reg_rtx (V4SImode);
2227 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2228 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2232 (define_expand "vec_unpacku_float_hi_v8hi"
2233 [(match_operand:V4SF 0 "register_operand" "")
2234 (match_operand:V8HI 1 "register_operand" "")]
2237 rtx tmp = gen_reg_rtx (V4SImode);
2239 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2240 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2244 (define_expand "vec_unpacku_float_lo_v8hi"
2245 [(match_operand:V4SF 0 "register_operand" "")
2246 (match_operand:V8HI 1 "register_operand" "")]
2249 rtx tmp = gen_reg_rtx (V4SImode);
2251 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2252 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2256 (define_expand "vec_unpacks_float_hi_v4si"
2259 (match_operand:V4SI 1 "nonimmediate_operand" "")
2260 (parallel [(const_int 2)
2264 (set (match_operand:V2DF 0 "register_operand" "")
2268 (parallel [(const_int 0) (const_int 1)]))))]
2271 operands[2] = gen_reg_rtx (V4SImode);
2274 (define_expand "vec_unpacks_float_lo_v4si"
2275 [(set (match_operand:V2DF 0 "register_operand" "")
2278 (match_operand:V4SI 1 "nonimmediate_operand" "")
2279 (parallel [(const_int 0) (const_int 1)]))))]
2282 (define_expand "vec_pack_trunc_v2df"
2283 [(match_operand:V4SF 0 "register_operand" "")
2284 (match_operand:V2DF 1 "nonimmediate_operand" "")
2285 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2290 r1 = gen_reg_rtx (V4SFmode);
2291 r2 = gen_reg_rtx (V4SFmode);
2293 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2294 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2295 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2299 (define_expand "vec_pack_sfix_trunc_v2df"
2300 [(match_operand:V4SI 0 "register_operand" "")
2301 (match_operand:V2DF 1 "nonimmediate_operand" "")
2302 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2307 r1 = gen_reg_rtx (V4SImode);
2308 r2 = gen_reg_rtx (V4SImode);
2310 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2311 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2312 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2313 gen_lowpart (V2DImode, r1),
2314 gen_lowpart (V2DImode, r2)));
2318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2320 ;; Parallel double-precision floating point element swizzling
2322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2324 (define_insn "sse2_unpckhpd"
2325 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2328 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2329 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2330 (parallel [(const_int 1)
2332 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2334 unpckhpd\t{%2, %0|%0, %2}
2335 movlpd\t{%H1, %0|%0, %H1}
2336 movhpd\t{%1, %0|%0, %1}"
2337 [(set_attr "type" "sselog,ssemov,ssemov")
2338 (set_attr "mode" "V2DF,V1DF,V1DF")])
2340 (define_insn "*sse3_movddup"
2341 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2344 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2346 (parallel [(const_int 0)
2348 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2350 movddup\t{%1, %0|%0, %1}
2352 [(set_attr "type" "sselog1,ssemov")
2353 (set_attr "mode" "V2DF")])
2356 [(set (match_operand:V2DF 0 "memory_operand" "")
2359 (match_operand:V2DF 1 "register_operand" "")
2361 (parallel [(const_int 0)
2363 "TARGET_SSE3 && reload_completed"
2366 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2367 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2368 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2372 (define_insn "sse2_unpcklpd"
2373 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2376 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2377 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2378 (parallel [(const_int 0)
2380 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2382 unpcklpd\t{%2, %0|%0, %2}
2383 movhpd\t{%2, %0|%0, %2}
2384 movlpd\t{%2, %H0|%H0, %2}"
2385 [(set_attr "type" "sselog,ssemov,ssemov")
2386 (set_attr "mode" "V2DF,V1DF,V1DF")])
2388 (define_expand "sse2_shufpd"
2389 [(match_operand:V2DF 0 "register_operand" "")
2390 (match_operand:V2DF 1 "register_operand" "")
2391 (match_operand:V2DF 2 "nonimmediate_operand" "")
2392 (match_operand:SI 3 "const_int_operand" "")]
2395 int mask = INTVAL (operands[3]);
2396 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2398 GEN_INT (mask & 2 ? 3 : 2)));
2402 (define_insn "sse2_shufpd_1"
2403 [(set (match_operand:V2DF 0 "register_operand" "=x")
2406 (match_operand:V2DF 1 "register_operand" "0")
2407 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2408 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2409 (match_operand 4 "const_2_to_3_operand" "")])))]
2413 mask = INTVAL (operands[3]);
2414 mask |= (INTVAL (operands[4]) - 2) << 1;
2415 operands[3] = GEN_INT (mask);
2417 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2419 [(set_attr "type" "sselog")
2420 (set_attr "mode" "V2DF")])
2422 (define_insn "sse2_storehpd"
2423 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2425 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2426 (parallel [(const_int 1)])))]
2427 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2429 movhpd\t{%1, %0|%0, %1}
2432 [(set_attr "type" "ssemov,sselog1,ssemov")
2433 (set_attr "mode" "V1DF,V2DF,DF")])
2436 [(set (match_operand:DF 0 "register_operand" "")
2438 (match_operand:V2DF 1 "memory_operand" "")
2439 (parallel [(const_int 1)])))]
2440 "TARGET_SSE2 && reload_completed"
2441 [(set (match_dup 0) (match_dup 1))]
2443 operands[1] = adjust_address (operands[1], DFmode, 8);
2446 (define_insn "sse2_storelpd"
2447 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2449 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2450 (parallel [(const_int 0)])))]
2451 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2453 movlpd\t{%1, %0|%0, %1}
2456 [(set_attr "type" "ssemov")
2457 (set_attr "mode" "V1DF,DF,DF")])
2460 [(set (match_operand:DF 0 "register_operand" "")
2462 (match_operand:V2DF 1 "nonimmediate_operand" "")
2463 (parallel [(const_int 0)])))]
2464 "TARGET_SSE2 && reload_completed"
2467 rtx op1 = operands[1];
2469 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2471 op1 = gen_lowpart (DFmode, op1);
2472 emit_move_insn (operands[0], op1);
2476 (define_insn "sse2_loadhpd"
2477 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2480 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2481 (parallel [(const_int 0)]))
2482 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2483 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2485 movhpd\t{%2, %0|%0, %2}
2486 unpcklpd\t{%2, %0|%0, %2}
2487 shufpd\t{$1, %1, %0|%0, %1, 1}
2489 [(set_attr "type" "ssemov,sselog,sselog,other")
2490 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2493 [(set (match_operand:V2DF 0 "memory_operand" "")
2495 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2496 (match_operand:DF 1 "register_operand" "")))]
2497 "TARGET_SSE2 && reload_completed"
2498 [(set (match_dup 0) (match_dup 1))]
2500 operands[0] = adjust_address (operands[0], DFmode, 8);
2503 (define_insn "sse2_loadlpd"
2504 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2506 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2508 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2509 (parallel [(const_int 1)]))))]
2510 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2512 movsd\t{%2, %0|%0, %2}
2513 movlpd\t{%2, %0|%0, %2}
2514 movsd\t{%2, %0|%0, %2}
2515 shufpd\t{$2, %2, %0|%0, %2, 2}
2516 movhpd\t{%H1, %0|%0, %H1}
2518 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2519 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2522 [(set (match_operand:V2DF 0 "memory_operand" "")
2524 (match_operand:DF 1 "register_operand" "")
2525 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2526 "TARGET_SSE2 && reload_completed"
2527 [(set (match_dup 0) (match_dup 1))]
2529 operands[0] = adjust_address (operands[0], DFmode, 8);
2532 ;; Not sure these two are ever used, but it doesn't hurt to have
2534 (define_insn "*vec_extractv2df_1_sse"
2535 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2537 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2538 (parallel [(const_int 1)])))]
2539 "!TARGET_SSE2 && TARGET_SSE
2540 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2542 movhps\t{%1, %0|%0, %1}
2543 movhlps\t{%1, %0|%0, %1}
2544 movlps\t{%H1, %0|%0, %H1}"
2545 [(set_attr "type" "ssemov")
2546 (set_attr "mode" "V2SF,V4SF,V2SF")])
2548 (define_insn "*vec_extractv2df_0_sse"
2549 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2551 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2552 (parallel [(const_int 0)])))]
2553 "!TARGET_SSE2 && TARGET_SSE
2554 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2556 movlps\t{%1, %0|%0, %1}
2557 movaps\t{%1, %0|%0, %1}
2558 movlps\t{%1, %0|%0, %1}"
2559 [(set_attr "type" "ssemov")
2560 (set_attr "mode" "V2SF,V4SF,V2SF")])
2562 (define_insn "sse2_movsd"
2563 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2565 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2566 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2570 movsd\t{%2, %0|%0, %2}
2571 movlpd\t{%2, %0|%0, %2}
2572 movlpd\t{%2, %0|%0, %2}
2573 shufpd\t{$2, %2, %0|%0, %2, 2}
2574 movhps\t{%H1, %0|%0, %H1}
2575 movhps\t{%1, %H0|%H0, %1}"
2576 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2577 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2579 (define_insn "*vec_dupv2df_sse3"
2580 [(set (match_operand:V2DF 0 "register_operand" "=x")
2582 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2584 "movddup\t{%1, %0|%0, %1}"
2585 [(set_attr "type" "sselog1")
2586 (set_attr "mode" "DF")])
2588 (define_insn "*vec_dupv2df"
2589 [(set (match_operand:V2DF 0 "register_operand" "=x")
2591 (match_operand:DF 1 "register_operand" "0")))]
2594 [(set_attr "type" "sselog1")
2595 (set_attr "mode" "V2DF")])
2597 (define_insn "*vec_concatv2df_sse3"
2598 [(set (match_operand:V2DF 0 "register_operand" "=x")
2600 (match_operand:DF 1 "nonimmediate_operand" "xm")
2603 "movddup\t{%1, %0|%0, %1}"
2604 [(set_attr "type" "sselog1")
2605 (set_attr "mode" "DF")])
2607 (define_insn "*vec_concatv2df"
2608 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2610 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2611 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2614 unpcklpd\t{%2, %0|%0, %2}
2615 movhpd\t{%2, %0|%0, %2}
2616 movsd\t{%1, %0|%0, %1}
2617 movlhps\t{%2, %0|%0, %2}
2618 movhps\t{%2, %0|%0, %2}"
2619 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2620 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2622 (define_expand "vec_setv2df"
2623 [(match_operand:V2DF 0 "register_operand" "")
2624 (match_operand:DF 1 "register_operand" "")
2625 (match_operand 2 "const_int_operand" "")]
2628 ix86_expand_vector_set (false, operands[0], operands[1],
2629 INTVAL (operands[2]));
2633 (define_expand "vec_extractv2df"
2634 [(match_operand:DF 0 "register_operand" "")
2635 (match_operand:V2DF 1 "register_operand" "")
2636 (match_operand 2 "const_int_operand" "")]
2639 ix86_expand_vector_extract (false, operands[0], operands[1],
2640 INTVAL (operands[2]));
2644 (define_expand "vec_initv2df"
2645 [(match_operand:V2DF 0 "register_operand" "")
2646 (match_operand 1 "" "")]
2649 ix86_expand_vector_init (false, operands[0], operands[1]);
2653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2655 ;; Parallel integral arithmetic
2657 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2659 (define_expand "neg<mode>2"
2660 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2663 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2665 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2667 (define_expand "add<mode>3"
2668 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2669 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2670 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2672 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2674 (define_insn "*add<mode>3"
2675 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2677 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2678 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2679 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2680 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2681 [(set_attr "type" "sseiadd")
2682 (set_attr "prefix_data16" "1")
2683 (set_attr "mode" "TI")])
2685 (define_insn "sse2_ssadd<mode>3"
2686 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2688 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2689 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2690 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2691 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2692 [(set_attr "type" "sseiadd")
2693 (set_attr "prefix_data16" "1")
2694 (set_attr "mode" "TI")])
2696 (define_insn "sse2_usadd<mode>3"
2697 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2699 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2700 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2701 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2702 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2703 [(set_attr "type" "sseiadd")
2704 (set_attr "prefix_data16" "1")
2705 (set_attr "mode" "TI")])
2707 (define_expand "sub<mode>3"
2708 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2709 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2710 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2712 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2714 (define_insn "*sub<mode>3"
2715 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2717 (match_operand:SSEMODEI 1 "register_operand" "0")
2718 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2720 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2721 [(set_attr "type" "sseiadd")
2722 (set_attr "prefix_data16" "1")
2723 (set_attr "mode" "TI")])
2725 (define_insn "sse2_sssub<mode>3"
2726 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2728 (match_operand:SSEMODE12 1 "register_operand" "0")
2729 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2731 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2732 [(set_attr "type" "sseiadd")
2733 (set_attr "prefix_data16" "1")
2734 (set_attr "mode" "TI")])
2736 (define_insn "sse2_ussub<mode>3"
2737 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2739 (match_operand:SSEMODE12 1 "register_operand" "0")
2740 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2742 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2743 [(set_attr "type" "sseiadd")
2744 (set_attr "prefix_data16" "1")
2745 (set_attr "mode" "TI")])
2747 (define_expand "mulv16qi3"
2748 [(set (match_operand:V16QI 0 "register_operand" "")
2749 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2750 (match_operand:V16QI 2 "register_operand" "")))]
2756 for (i = 0; i < 12; ++i)
2757 t[i] = gen_reg_rtx (V16QImode);
2759 /* Unpack data such that we've got a source byte in each low byte of
2760 each word. We don't care what goes into the high byte of each word.
2761 Rather than trying to get zero in there, most convenient is to let
2762 it be a copy of the low byte. */
2763 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2764 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2765 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2766 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2768 /* Multiply words. The end-of-line annotations here give a picture of what
2769 the output of that instruction looks like. Dot means don't care; the
2770 letters are the bytes of the result with A being the most significant. */
2771 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2772 gen_lowpart (V8HImode, t[0]),
2773 gen_lowpart (V8HImode, t[1])));
2774 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2775 gen_lowpart (V8HImode, t[2]),
2776 gen_lowpart (V8HImode, t[3])));
2778 /* Extract the relevant bytes and merge them back together. */
2779 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2780 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2781 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2782 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2783 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2784 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2787 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2791 (define_expand "mulv8hi3"
2792 [(set (match_operand:V8HI 0 "register_operand" "")
2793 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2794 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2796 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2798 (define_insn "*mulv8hi3"
2799 [(set (match_operand:V8HI 0 "register_operand" "=x")
2800 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2801 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2802 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2803 "pmullw\t{%2, %0|%0, %2}"
2804 [(set_attr "type" "sseimul")
2805 (set_attr "prefix_data16" "1")
2806 (set_attr "mode" "TI")])
2808 (define_expand "smulv8hi3_highpart"
2809 [(set (match_operand:V8HI 0 "register_operand" "")
2814 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2816 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2819 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2821 (define_insn "*smulv8hi3_highpart"
2822 [(set (match_operand:V8HI 0 "register_operand" "=x")
2827 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2831 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2832 "pmulhw\t{%2, %0|%0, %2}"
2833 [(set_attr "type" "sseimul")
2834 (set_attr "prefix_data16" "1")
2835 (set_attr "mode" "TI")])
2837 (define_expand "umulv8hi3_highpart"
2838 [(set (match_operand:V8HI 0 "register_operand" "")
2843 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2845 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2848 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2850 (define_insn "*umulv8hi3_highpart"
2851 [(set (match_operand:V8HI 0 "register_operand" "=x")
2856 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2858 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2860 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2861 "pmulhuw\t{%2, %0|%0, %2}"
2862 [(set_attr "type" "sseimul")
2863 (set_attr "prefix_data16" "1")
2864 (set_attr "mode" "TI")])
2866 (define_insn "sse2_umulv2siv2di3"
2867 [(set (match_operand:V2DI 0 "register_operand" "=x")
2871 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2872 (parallel [(const_int 0) (const_int 2)])))
2875 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2876 (parallel [(const_int 0) (const_int 2)])))))]
2877 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2878 "pmuludq\t{%2, %0|%0, %2}"
2879 [(set_attr "type" "sseimul")
2880 (set_attr "prefix_data16" "1")
2881 (set_attr "mode" "TI")])
2883 (define_insn "sse2_pmaddwd"
2884 [(set (match_operand:V4SI 0 "register_operand" "=x")
2889 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2890 (parallel [(const_int 0)
2896 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2897 (parallel [(const_int 0)
2903 (vec_select:V4HI (match_dup 1)
2904 (parallel [(const_int 1)
2909 (vec_select:V4HI (match_dup 2)
2910 (parallel [(const_int 1)
2913 (const_int 7)]))))))]
2914 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2915 "pmaddwd\t{%2, %0|%0, %2}"
2916 [(set_attr "type" "sseiadd")
2917 (set_attr "prefix_data16" "1")
2918 (set_attr "mode" "TI")])
2920 (define_expand "mulv4si3"
2921 [(set (match_operand:V4SI 0 "register_operand" "")
2922 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2923 (match_operand:V4SI 2 "register_operand" "")))]
2926 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2932 t1 = gen_reg_rtx (V4SImode);
2933 t2 = gen_reg_rtx (V4SImode);
2934 t3 = gen_reg_rtx (V4SImode);
2935 t4 = gen_reg_rtx (V4SImode);
2936 t5 = gen_reg_rtx (V4SImode);
2937 t6 = gen_reg_rtx (V4SImode);
2938 thirtytwo = GEN_INT (32);
2940 /* Multiply elements 2 and 0. */
2941 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2943 /* Shift both input vectors down one element, so that elements 3 and 1
2944 are now in the slots for elements 2 and 0. For K8, at least, this is
2945 faster than using a shuffle. */
2946 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2947 gen_lowpart (TImode, op1), thirtytwo));
2948 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2949 gen_lowpart (TImode, op2), thirtytwo));
2951 /* Multiply elements 3 and 1. */
2952 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2954 /* Move the results in element 2 down to element 1; we don't care what
2955 goes in elements 2 and 3. */
2956 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2957 const0_rtx, const0_rtx));
2958 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2959 const0_rtx, const0_rtx));
2961 /* Merge the parts back together. */
2962 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2966 (define_expand "mulv2di3"
2967 [(set (match_operand:V2DI 0 "register_operand" "")
2968 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2969 (match_operand:V2DI 2 "register_operand" "")))]
2972 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2978 t1 = gen_reg_rtx (V2DImode);
2979 t2 = gen_reg_rtx (V2DImode);
2980 t3 = gen_reg_rtx (V2DImode);
2981 t4 = gen_reg_rtx (V2DImode);
2982 t5 = gen_reg_rtx (V2DImode);
2983 t6 = gen_reg_rtx (V2DImode);
2984 thirtytwo = GEN_INT (32);
2986 /* Multiply low parts. */
2987 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2988 gen_lowpart (V4SImode, op2)));
2990 /* Shift input vectors left 32 bits so we can multiply high parts. */
2991 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2992 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2994 /* Multiply high parts by low parts. */
2995 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2996 gen_lowpart (V4SImode, t3)));
2997 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2998 gen_lowpart (V4SImode, t2)));
3000 /* Shift them back. */
3001 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3002 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3004 /* Add the three parts together. */
3005 emit_insn (gen_addv2di3 (t6, t1, t4));
3006 emit_insn (gen_addv2di3 (op0, t6, t5));
3010 (define_expand "vec_widen_smult_hi_v8hi"
3011 [(match_operand:V4SI 0 "register_operand" "")
3012 (match_operand:V8HI 1 "register_operand" "")
3013 (match_operand:V8HI 2 "register_operand" "")]
3016 rtx op1, op2, t1, t2, dest;
3020 t1 = gen_reg_rtx (V8HImode);
3021 t2 = gen_reg_rtx (V8HImode);
3022 dest = gen_lowpart (V8HImode, operands[0]);
3024 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3025 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3026 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3030 (define_expand "vec_widen_smult_lo_v8hi"
3031 [(match_operand:V4SI 0 "register_operand" "")
3032 (match_operand:V8HI 1 "register_operand" "")
3033 (match_operand:V8HI 2 "register_operand" "")]
3036 rtx op1, op2, t1, t2, dest;
3040 t1 = gen_reg_rtx (V8HImode);
3041 t2 = gen_reg_rtx (V8HImode);
3042 dest = gen_lowpart (V8HImode, operands[0]);
3044 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3045 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3046 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3050 (define_expand "vec_widen_umult_hi_v8hi"
3051 [(match_operand:V4SI 0 "register_operand" "")
3052 (match_operand:V8HI 1 "register_operand" "")
3053 (match_operand:V8HI 2 "register_operand" "")]
3056 rtx op1, op2, t1, t2, dest;
3060 t1 = gen_reg_rtx (V8HImode);
3061 t2 = gen_reg_rtx (V8HImode);
3062 dest = gen_lowpart (V8HImode, operands[0]);
3064 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3065 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3066 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3070 (define_expand "vec_widen_umult_lo_v8hi"
3071 [(match_operand:V4SI 0 "register_operand" "")
3072 (match_operand:V8HI 1 "register_operand" "")
3073 (match_operand:V8HI 2 "register_operand" "")]
3076 rtx op1, op2, t1, t2, dest;
3080 t1 = gen_reg_rtx (V8HImode);
3081 t2 = gen_reg_rtx (V8HImode);
3082 dest = gen_lowpart (V8HImode, operands[0]);
3084 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3085 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3086 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3090 (define_expand "vec_widen_smult_hi_v4si"
3091 [(match_operand:V2DI 0 "register_operand" "")
3092 (match_operand:V4SI 1 "register_operand" "")
3093 (match_operand:V4SI 2 "register_operand" "")]
3096 rtx op1, op2, t1, t2;
3100 t1 = gen_reg_rtx (V4SImode);
3101 t2 = gen_reg_rtx (V4SImode);
3103 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3104 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3105 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3109 (define_expand "vec_widen_smult_lo_v4si"
3110 [(match_operand:V2DI 0 "register_operand" "")
3111 (match_operand:V4SI 1 "register_operand" "")
3112 (match_operand:V4SI 2 "register_operand" "")]
3115 rtx op1, op2, t1, t2;
3119 t1 = gen_reg_rtx (V4SImode);
3120 t2 = gen_reg_rtx (V4SImode);
3122 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3123 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3124 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3128 (define_expand "vec_widen_umult_hi_v4si"
3129 [(match_operand:V2DI 0 "register_operand" "")
3130 (match_operand:V4SI 1 "register_operand" "")
3131 (match_operand:V4SI 2 "register_operand" "")]
3134 rtx op1, op2, t1, t2;
3138 t1 = gen_reg_rtx (V4SImode);
3139 t2 = gen_reg_rtx (V4SImode);
3141 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3142 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3143 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3147 (define_expand "vec_widen_umult_lo_v4si"
3148 [(match_operand:V2DI 0 "register_operand" "")
3149 (match_operand:V4SI 1 "register_operand" "")
3150 (match_operand:V4SI 2 "register_operand" "")]
3153 rtx op1, op2, t1, t2;
3157 t1 = gen_reg_rtx (V4SImode);
3158 t2 = gen_reg_rtx (V4SImode);
3160 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3161 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3162 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3166 (define_expand "sdot_prodv8hi"
3167 [(match_operand:V4SI 0 "register_operand" "")
3168 (match_operand:V8HI 1 "register_operand" "")
3169 (match_operand:V8HI 2 "register_operand" "")
3170 (match_operand:V4SI 3 "register_operand" "")]
3173 rtx t = gen_reg_rtx (V4SImode);
3174 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3175 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3179 (define_expand "udot_prodv4si"
3180 [(match_operand:V2DI 0 "register_operand" "")
3181 (match_operand:V4SI 1 "register_operand" "")
3182 (match_operand:V4SI 2 "register_operand" "")
3183 (match_operand:V2DI 3 "register_operand" "")]
3188 t1 = gen_reg_rtx (V2DImode);
3189 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3190 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3192 t2 = gen_reg_rtx (V4SImode);
3193 t3 = gen_reg_rtx (V4SImode);
3194 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3195 gen_lowpart (TImode, operands[1]),
3197 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3198 gen_lowpart (TImode, operands[2]),
3201 t4 = gen_reg_rtx (V2DImode);
3202 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3204 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3208 (define_insn "ashr<mode>3"
3209 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3211 (match_operand:SSEMODE24 1 "register_operand" "0")
3212 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3214 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3215 [(set_attr "type" "sseishft")
3216 (set_attr "prefix_data16" "1")
3217 (set_attr "mode" "TI")])
3219 (define_insn "lshr<mode>3"
3220 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3221 (lshiftrt:SSEMODE248
3222 (match_operand:SSEMODE248 1 "register_operand" "0")
3223 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3225 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3226 [(set_attr "type" "sseishft")
3227 (set_attr "prefix_data16" "1")
3228 (set_attr "mode" "TI")])
3230 (define_insn "ashl<mode>3"
3231 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3233 (match_operand:SSEMODE248 1 "register_operand" "0")
3234 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3236 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3237 [(set_attr "type" "sseishft")
3238 (set_attr "prefix_data16" "1")
3239 (set_attr "mode" "TI")])
3241 (define_insn "sse2_ashlti3"
3242 [(set (match_operand:TI 0 "register_operand" "=x")
3243 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3244 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3247 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3248 return "pslldq\t{%2, %0|%0, %2}";
3250 [(set_attr "type" "sseishft")
3251 (set_attr "prefix_data16" "1")
3252 (set_attr "mode" "TI")])
3254 (define_expand "vec_shl_<mode>"
3255 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3256 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3257 (match_operand:SI 2 "general_operand" "")))]
3260 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3262 operands[0] = gen_lowpart (TImode, operands[0]);
3263 operands[1] = gen_lowpart (TImode, operands[1]);
3266 (define_insn "sse2_lshrti3"
3267 [(set (match_operand:TI 0 "register_operand" "=x")
3268 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3269 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3272 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3273 return "psrldq\t{%2, %0|%0, %2}";
3275 [(set_attr "type" "sseishft")
3276 (set_attr "prefix_data16" "1")
3277 (set_attr "mode" "TI")])
3279 (define_expand "vec_shr_<mode>"
3280 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3281 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3282 (match_operand:SI 2 "general_operand" "")))]
3285 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3287 operands[0] = gen_lowpart (TImode, operands[0]);
3288 operands[1] = gen_lowpart (TImode, operands[1]);
3291 (define_expand "umaxv16qi3"
3292 [(set (match_operand:V16QI 0 "register_operand" "")
3293 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3294 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3296 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3298 (define_insn "*umaxv16qi3"
3299 [(set (match_operand:V16QI 0 "register_operand" "=x")
3300 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3301 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3302 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3303 "pmaxub\t{%2, %0|%0, %2}"
3304 [(set_attr "type" "sseiadd")
3305 (set_attr "prefix_data16" "1")
3306 (set_attr "mode" "TI")])
3308 (define_expand "smaxv8hi3"
3309 [(set (match_operand:V8HI 0 "register_operand" "")
3310 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3311 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3313 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3315 (define_insn "*smaxv8hi3"
3316 [(set (match_operand:V8HI 0 "register_operand" "=x")
3317 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3318 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3319 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3320 "pmaxsw\t{%2, %0|%0, %2}"
3321 [(set_attr "type" "sseiadd")
3322 (set_attr "prefix_data16" "1")
3323 (set_attr "mode" "TI")])
3325 (define_expand "umaxv8hi3"
3326 [(set (match_operand:V8HI 0 "register_operand" "=x")
3327 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
3328 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3330 (plus:V8HI (match_dup 0) (match_dup 2)))]
3333 operands[3] = operands[0];
3334 if (rtx_equal_p (operands[0], operands[2]))
3335 operands[0] = gen_reg_rtx (V8HImode);
3338 (define_expand "smax<mode>3"
3339 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3340 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3341 (match_operand:SSEMODE14 2 "register_operand" "")))]
3347 xops[0] = operands[0];
3348 xops[1] = operands[1];
3349 xops[2] = operands[2];
3350 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3351 xops[4] = operands[1];
3352 xops[5] = operands[2];
3353 ok = ix86_expand_int_vcond (xops);
3358 (define_expand "umaxv4si3"
3359 [(set (match_operand:V4SI 0 "register_operand" "")
3360 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3361 (match_operand:V4SI 2 "register_operand" "")))]
3367 xops[0] = operands[0];
3368 xops[1] = operands[1];
3369 xops[2] = operands[2];
3370 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3371 xops[4] = operands[1];
3372 xops[5] = operands[2];
3373 ok = ix86_expand_int_vcond (xops);
3378 (define_expand "uminv16qi3"
3379 [(set (match_operand:V16QI 0 "register_operand" "")
3380 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3381 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3383 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3385 (define_insn "*uminv16qi3"
3386 [(set (match_operand:V16QI 0 "register_operand" "=x")
3387 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3388 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3389 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3390 "pminub\t{%2, %0|%0, %2}"
3391 [(set_attr "type" "sseiadd")
3392 (set_attr "prefix_data16" "1")
3393 (set_attr "mode" "TI")])
3395 (define_expand "sminv8hi3"
3396 [(set (match_operand:V8HI 0 "register_operand" "")
3397 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3398 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3400 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3402 (define_insn "*sminv8hi3"
3403 [(set (match_operand:V8HI 0 "register_operand" "=x")
3404 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3405 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3406 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3407 "pminsw\t{%2, %0|%0, %2}"
3408 [(set_attr "type" "sseiadd")
3409 (set_attr "prefix_data16" "1")
3410 (set_attr "mode" "TI")])
3412 (define_expand "smin<mode>3"
3413 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3414 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3415 (match_operand:SSEMODE14 2 "register_operand" "")))]
3421 xops[0] = operands[0];
3422 xops[1] = operands[2];
3423 xops[2] = operands[1];
3424 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3425 xops[4] = operands[1];
3426 xops[5] = operands[2];
3427 ok = ix86_expand_int_vcond (xops);
3432 (define_expand "umin<mode>3"
3433 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3434 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3435 (match_operand:SSEMODE24 2 "register_operand" "")))]
3441 xops[0] = operands[0];
3442 xops[1] = operands[2];
3443 xops[2] = operands[1];
3444 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3445 xops[4] = operands[1];
3446 xops[5] = operands[2];
3447 ok = ix86_expand_int_vcond (xops);
3452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3454 ;; Parallel integral comparisons
3456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3458 (define_insn "sse2_eq<mode>3"
3459 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3461 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3462 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3463 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3464 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3465 [(set_attr "type" "ssecmp")
3466 (set_attr "prefix_data16" "1")
3467 (set_attr "mode" "TI")])
3469 (define_insn "sse2_gt<mode>3"
3470 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3472 (match_operand:SSEMODE124 1 "register_operand" "0")
3473 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3475 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3476 [(set_attr "type" "ssecmp")
3477 (set_attr "prefix_data16" "1")
3478 (set_attr "mode" "TI")])
3480 (define_expand "vcond<mode>"
3481 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3482 (if_then_else:SSEMODE124
3483 (match_operator 3 ""
3484 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3485 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3486 (match_operand:SSEMODE124 1 "general_operand" "")
3487 (match_operand:SSEMODE124 2 "general_operand" "")))]
3490 if (ix86_expand_int_vcond (operands))
3496 (define_expand "vcondu<mode>"
3497 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3498 (if_then_else:SSEMODE124
3499 (match_operator 3 ""
3500 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3501 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3502 (match_operand:SSEMODE124 1 "general_operand" "")
3503 (match_operand:SSEMODE124 2 "general_operand" "")))]
3506 if (ix86_expand_int_vcond (operands))
3512 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3514 ;; Parallel integral logical operations
3516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3518 (define_expand "one_cmpl<mode>2"
3519 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3520 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3524 int i, n = GET_MODE_NUNITS (<MODE>mode);
3525 rtvec v = rtvec_alloc (n);
3527 for (i = 0; i < n; ++i)
3528 RTVEC_ELT (v, i) = constm1_rtx;
3530 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3533 (define_expand "and<mode>3"
3534 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3535 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3536 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3538 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3540 (define_insn "*and<mode>3"
3541 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3543 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3544 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3545 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3546 "pand\t{%2, %0|%0, %2}"
3547 [(set_attr "type" "sselog")
3548 (set_attr "prefix_data16" "1")
3549 (set_attr "mode" "TI")])
3551 (define_insn "sse2_nand<mode>3"
3552 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3554 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3555 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3557 "pandn\t{%2, %0|%0, %2}"
3558 [(set_attr "type" "sselog")
3559 (set_attr "prefix_data16" "1")
3560 (set_attr "mode" "TI")])
3562 (define_expand "ior<mode>3"
3563 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3564 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3565 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3567 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3569 (define_insn "*ior<mode>3"
3570 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3572 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3573 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3574 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3575 "por\t{%2, %0|%0, %2}"
3576 [(set_attr "type" "sselog")
3577 (set_attr "prefix_data16" "1")
3578 (set_attr "mode" "TI")])
3580 (define_expand "xor<mode>3"
3581 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3582 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3583 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3585 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3587 (define_insn "*xor<mode>3"
3588 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3590 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3591 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3592 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3593 "pxor\t{%2, %0|%0, %2}"
3594 [(set_attr "type" "sselog")
3595 (set_attr "prefix_data16" "1")
3596 (set_attr "mode" "TI")])
3598 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3600 ;; Parallel integral element swizzling
3602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3605 ;; op1 = abcdefghijklmnop
3606 ;; op2 = qrstuvwxyz012345
3607 ;; h1 = aqbrcsdteufvgwhx
3608 ;; l1 = iyjzk0l1m2n3o4p5
3609 ;; h2 = aiqybjrzcks0dlt1
3610 ;; l2 = emu2fnv3gow4hpx5
3611 ;; h3 = aeimquy2bfjnrvz3
3612 ;; l3 = cgkosw04dhlptx15
3613 ;; result = bdfhjlnprtvxz135
3614 (define_expand "vec_pack_trunc_v8hi"
3615 [(match_operand:V16QI 0 "register_operand" "")
3616 (match_operand:V8HI 1 "register_operand" "")
3617 (match_operand:V8HI 2 "register_operand" "")]
3620 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3622 op1 = gen_lowpart (V16QImode, operands[1]);
3623 op2 = gen_lowpart (V16QImode, operands[2]);
3624 h1 = gen_reg_rtx (V16QImode);
3625 l1 = gen_reg_rtx (V16QImode);
3626 h2 = gen_reg_rtx (V16QImode);
3627 l2 = gen_reg_rtx (V16QImode);
3628 h3 = gen_reg_rtx (V16QImode);
3629 l3 = gen_reg_rtx (V16QImode);
3631 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3632 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3633 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3634 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3635 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3636 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3637 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3648 ;; result = bdfhjlnp
3649 (define_expand "vec_pack_trunc_v4si"
3650 [(match_operand:V8HI 0 "register_operand" "")
3651 (match_operand:V4SI 1 "register_operand" "")
3652 (match_operand:V4SI 2 "register_operand" "")]
3655 rtx op1, op2, h1, l1, h2, l2;
3657 op1 = gen_lowpart (V8HImode, operands[1]);
3658 op2 = gen_lowpart (V8HImode, operands[2]);
3659 h1 = gen_reg_rtx (V8HImode);
3660 l1 = gen_reg_rtx (V8HImode);
3661 h2 = gen_reg_rtx (V8HImode);
3662 l2 = gen_reg_rtx (V8HImode);
3664 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3665 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3666 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3667 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3668 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3678 (define_expand "vec_pack_trunc_v2di"
3679 [(match_operand:V4SI 0 "register_operand" "")
3680 (match_operand:V2DI 1 "register_operand" "")
3681 (match_operand:V2DI 2 "register_operand" "")]
3684 rtx op1, op2, h1, l1;
3686 op1 = gen_lowpart (V4SImode, operands[1]);
3687 op2 = gen_lowpart (V4SImode, operands[2]);
3688 h1 = gen_reg_rtx (V4SImode);
3689 l1 = gen_reg_rtx (V4SImode);
3691 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3692 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3693 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3697 (define_expand "vec_interleave_highv16qi"
3698 [(set (match_operand:V16QI 0 "register_operand" "=x")
3701 (match_operand:V16QI 1 "register_operand" "0")
3702 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3703 (parallel [(const_int 8) (const_int 24)
3704 (const_int 9) (const_int 25)
3705 (const_int 10) (const_int 26)
3706 (const_int 11) (const_int 27)
3707 (const_int 12) (const_int 28)
3708 (const_int 13) (const_int 29)
3709 (const_int 14) (const_int 30)
3710 (const_int 15) (const_int 31)])))]
3713 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3717 (define_expand "vec_interleave_lowv16qi"
3718 [(set (match_operand:V16QI 0 "register_operand" "=x")
3721 (match_operand:V16QI 1 "register_operand" "0")
3722 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3723 (parallel [(const_int 0) (const_int 16)
3724 (const_int 1) (const_int 17)
3725 (const_int 2) (const_int 18)
3726 (const_int 3) (const_int 19)
3727 (const_int 4) (const_int 20)
3728 (const_int 5) (const_int 21)
3729 (const_int 6) (const_int 22)
3730 (const_int 7) (const_int 23)])))]
3733 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3737 (define_expand "vec_interleave_highv8hi"
3738 [(set (match_operand:V8HI 0 "register_operand" "=x")
3741 (match_operand:V8HI 1 "register_operand" "0")
3742 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3743 (parallel [(const_int 4) (const_int 12)
3744 (const_int 5) (const_int 13)
3745 (const_int 6) (const_int 14)
3746 (const_int 7) (const_int 15)])))]
3749 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3753 (define_expand "vec_interleave_lowv8hi"
3754 [(set (match_operand:V8HI 0 "register_operand" "=x")
3757 (match_operand:V8HI 1 "register_operand" "0")
3758 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3759 (parallel [(const_int 0) (const_int 8)
3760 (const_int 1) (const_int 9)
3761 (const_int 2) (const_int 10)
3762 (const_int 3) (const_int 11)])))]
3765 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
3769 (define_expand "vec_interleave_highv4si"
3770 [(set (match_operand:V4SI 0 "register_operand" "=x")
3773 (match_operand:V4SI 1 "register_operand" "0")
3774 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3775 (parallel [(const_int 2) (const_int 6)
3776 (const_int 3) (const_int 7)])))]
3779 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
3783 (define_expand "vec_interleave_lowv4si"
3784 [(set (match_operand:V4SI 0 "register_operand" "=x")
3787 (match_operand:V4SI 1 "register_operand" "0")
3788 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3789 (parallel [(const_int 0) (const_int 4)
3790 (const_int 1) (const_int 5)])))]
3793 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
3797 (define_expand "vec_interleave_highv2di"
3798 [(set (match_operand:V2DI 0 "register_operand" "=x")
3801 (match_operand:V2DI 1 "register_operand" "0")
3802 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3803 (parallel [(const_int 1)
3807 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
3811 (define_expand "vec_interleave_lowv2di"
3812 [(set (match_operand:V2DI 0 "register_operand" "=x")
3815 (match_operand:V2DI 1 "register_operand" "0")
3816 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3817 (parallel [(const_int 0)
3821 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
3825 (define_insn "sse2_packsswb"
3826 [(set (match_operand:V16QI 0 "register_operand" "=x")
3829 (match_operand:V8HI 1 "register_operand" "0"))
3831 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3833 "packsswb\t{%2, %0|%0, %2}"
3834 [(set_attr "type" "sselog")
3835 (set_attr "prefix_data16" "1")
3836 (set_attr "mode" "TI")])
3838 (define_insn "sse2_packssdw"
3839 [(set (match_operand:V8HI 0 "register_operand" "=x")
3842 (match_operand:V4SI 1 "register_operand" "0"))
3844 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3846 "packssdw\t{%2, %0|%0, %2}"
3847 [(set_attr "type" "sselog")
3848 (set_attr "prefix_data16" "1")
3849 (set_attr "mode" "TI")])
3851 (define_insn "sse2_packuswb"
3852 [(set (match_operand:V16QI 0 "register_operand" "=x")
3855 (match_operand:V8HI 1 "register_operand" "0"))
3857 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3859 "packuswb\t{%2, %0|%0, %2}"
3860 [(set_attr "type" "sselog")
3861 (set_attr "prefix_data16" "1")
3862 (set_attr "mode" "TI")])
3864 (define_insn "sse2_punpckhbw"
3865 [(set (match_operand:V16QI 0 "register_operand" "=x")
3868 (match_operand:V16QI 1 "register_operand" "0")
3869 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3870 (parallel [(const_int 8) (const_int 24)
3871 (const_int 9) (const_int 25)
3872 (const_int 10) (const_int 26)
3873 (const_int 11) (const_int 27)
3874 (const_int 12) (const_int 28)
3875 (const_int 13) (const_int 29)
3876 (const_int 14) (const_int 30)
3877 (const_int 15) (const_int 31)])))]
3879 "punpckhbw\t{%2, %0|%0, %2}"
3880 [(set_attr "type" "sselog")
3881 (set_attr "prefix_data16" "1")
3882 (set_attr "mode" "TI")])
3884 (define_insn "sse2_punpcklbw"
3885 [(set (match_operand:V16QI 0 "register_operand" "=x")
3888 (match_operand:V16QI 1 "register_operand" "0")
3889 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3890 (parallel [(const_int 0) (const_int 16)
3891 (const_int 1) (const_int 17)
3892 (const_int 2) (const_int 18)
3893 (const_int 3) (const_int 19)
3894 (const_int 4) (const_int 20)
3895 (const_int 5) (const_int 21)
3896 (const_int 6) (const_int 22)
3897 (const_int 7) (const_int 23)])))]
3899 "punpcklbw\t{%2, %0|%0, %2}"
3900 [(set_attr "type" "sselog")
3901 (set_attr "prefix_data16" "1")
3902 (set_attr "mode" "TI")])
3904 (define_insn "sse2_punpckhwd"
3905 [(set (match_operand:V8HI 0 "register_operand" "=x")
3908 (match_operand:V8HI 1 "register_operand" "0")
3909 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3910 (parallel [(const_int 4) (const_int 12)
3911 (const_int 5) (const_int 13)
3912 (const_int 6) (const_int 14)
3913 (const_int 7) (const_int 15)])))]
3915 "punpckhwd\t{%2, %0|%0, %2}"
3916 [(set_attr "type" "sselog")
3917 (set_attr "prefix_data16" "1")
3918 (set_attr "mode" "TI")])
3920 (define_insn "sse2_punpcklwd"
3921 [(set (match_operand:V8HI 0 "register_operand" "=x")
3924 (match_operand:V8HI 1 "register_operand" "0")
3925 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3926 (parallel [(const_int 0) (const_int 8)
3927 (const_int 1) (const_int 9)
3928 (const_int 2) (const_int 10)
3929 (const_int 3) (const_int 11)])))]
3931 "punpcklwd\t{%2, %0|%0, %2}"
3932 [(set_attr "type" "sselog")
3933 (set_attr "prefix_data16" "1")
3934 (set_attr "mode" "TI")])
3936 (define_insn "sse2_punpckhdq"
3937 [(set (match_operand:V4SI 0 "register_operand" "=x")
3940 (match_operand:V4SI 1 "register_operand" "0")
3941 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3942 (parallel [(const_int 2) (const_int 6)
3943 (const_int 3) (const_int 7)])))]
3945 "punpckhdq\t{%2, %0|%0, %2}"
3946 [(set_attr "type" "sselog")
3947 (set_attr "prefix_data16" "1")
3948 (set_attr "mode" "TI")])
3950 (define_insn "sse2_punpckldq"
3951 [(set (match_operand:V4SI 0 "register_operand" "=x")
3954 (match_operand:V4SI 1 "register_operand" "0")
3955 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3956 (parallel [(const_int 0) (const_int 4)
3957 (const_int 1) (const_int 5)])))]
3959 "punpckldq\t{%2, %0|%0, %2}"
3960 [(set_attr "type" "sselog")
3961 (set_attr "prefix_data16" "1")
3962 (set_attr "mode" "TI")])
3964 (define_insn "sse2_punpckhqdq"
3965 [(set (match_operand:V2DI 0 "register_operand" "=x")
3968 (match_operand:V2DI 1 "register_operand" "0")
3969 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3970 (parallel [(const_int 1)
3973 "punpckhqdq\t{%2, %0|%0, %2}"
3974 [(set_attr "type" "sselog")
3975 (set_attr "prefix_data16" "1")
3976 (set_attr "mode" "TI")])
3978 (define_insn "sse2_punpcklqdq"
3979 [(set (match_operand:V2DI 0 "register_operand" "=x")
3982 (match_operand:V2DI 1 "register_operand" "0")
3983 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3984 (parallel [(const_int 0)
3987 "punpcklqdq\t{%2, %0|%0, %2}"
3988 [(set_attr "type" "sselog")
3989 (set_attr "prefix_data16" "1")
3990 (set_attr "mode" "TI")])
3992 (define_insn "*sse2_pinsrw"
3993 [(set (match_operand:V8HI 0 "register_operand" "=x")
3996 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3997 (match_operand:V8HI 1 "register_operand" "0")
3998 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4001 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4002 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4004 [(set_attr "type" "sselog")
4005 (set_attr "prefix_data16" "1")
4006 (set_attr "mode" "TI")])
4008 (define_insn "*sse2_pextrw"
4009 [(set (match_operand:SI 0 "register_operand" "=r")
4012 (match_operand:V8HI 1 "register_operand" "x")
4013 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4015 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4016 [(set_attr "type" "sselog")
4017 (set_attr "prefix_data16" "1")
4018 (set_attr "mode" "TI")])
4020 (define_expand "sse2_pshufd"
4021 [(match_operand:V4SI 0 "register_operand" "")
4022 (match_operand:V4SI 1 "nonimmediate_operand" "")
4023 (match_operand:SI 2 "const_int_operand" "")]
4026 int mask = INTVAL (operands[2]);
4027 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4028 GEN_INT ((mask >> 0) & 3),
4029 GEN_INT ((mask >> 2) & 3),
4030 GEN_INT ((mask >> 4) & 3),
4031 GEN_INT ((mask >> 6) & 3)));
4035 (define_insn "sse2_pshufd_1"
4036 [(set (match_operand:V4SI 0 "register_operand" "=x")
4038 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4039 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4040 (match_operand 3 "const_0_to_3_operand" "")
4041 (match_operand 4 "const_0_to_3_operand" "")
4042 (match_operand 5 "const_0_to_3_operand" "")])))]
4046 mask |= INTVAL (operands[2]) << 0;
4047 mask |= INTVAL (operands[3]) << 2;
4048 mask |= INTVAL (operands[4]) << 4;
4049 mask |= INTVAL (operands[5]) << 6;
4050 operands[2] = GEN_INT (mask);
4052 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4054 [(set_attr "type" "sselog1")
4055 (set_attr "prefix_data16" "1")
4056 (set_attr "mode" "TI")])
4058 (define_expand "sse2_pshuflw"
4059 [(match_operand:V8HI 0 "register_operand" "")
4060 (match_operand:V8HI 1 "nonimmediate_operand" "")
4061 (match_operand:SI 2 "const_int_operand" "")]
4064 int mask = INTVAL (operands[2]);
4065 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4066 GEN_INT ((mask >> 0) & 3),
4067 GEN_INT ((mask >> 2) & 3),
4068 GEN_INT ((mask >> 4) & 3),
4069 GEN_INT ((mask >> 6) & 3)));
4073 (define_insn "sse2_pshuflw_1"
4074 [(set (match_operand:V8HI 0 "register_operand" "=x")
4076 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4077 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4078 (match_operand 3 "const_0_to_3_operand" "")
4079 (match_operand 4 "const_0_to_3_operand" "")
4080 (match_operand 5 "const_0_to_3_operand" "")
4088 mask |= INTVAL (operands[2]) << 0;
4089 mask |= INTVAL (operands[3]) << 2;
4090 mask |= INTVAL (operands[4]) << 4;
4091 mask |= INTVAL (operands[5]) << 6;
4092 operands[2] = GEN_INT (mask);
4094 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4096 [(set_attr "type" "sselog")
4097 (set_attr "prefix_rep" "1")
4098 (set_attr "mode" "TI")])
4100 (define_expand "sse2_pshufhw"
4101 [(match_operand:V8HI 0 "register_operand" "")
4102 (match_operand:V8HI 1 "nonimmediate_operand" "")
4103 (match_operand:SI 2 "const_int_operand" "")]
4106 int mask = INTVAL (operands[2]);
4107 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4108 GEN_INT (((mask >> 0) & 3) + 4),
4109 GEN_INT (((mask >> 2) & 3) + 4),
4110 GEN_INT (((mask >> 4) & 3) + 4),
4111 GEN_INT (((mask >> 6) & 3) + 4)));
4115 (define_insn "sse2_pshufhw_1"
4116 [(set (match_operand:V8HI 0 "register_operand" "=x")
4118 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4119 (parallel [(const_int 0)
4123 (match_operand 2 "const_4_to_7_operand" "")
4124 (match_operand 3 "const_4_to_7_operand" "")
4125 (match_operand 4 "const_4_to_7_operand" "")
4126 (match_operand 5 "const_4_to_7_operand" "")])))]
4130 mask |= (INTVAL (operands[2]) - 4) << 0;
4131 mask |= (INTVAL (operands[3]) - 4) << 2;
4132 mask |= (INTVAL (operands[4]) - 4) << 4;
4133 mask |= (INTVAL (operands[5]) - 4) << 6;
4134 operands[2] = GEN_INT (mask);
4136 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4138 [(set_attr "type" "sselog")
4139 (set_attr "prefix_rep" "1")
4140 (set_attr "mode" "TI")])
4142 (define_expand "sse2_loadd"
4143 [(set (match_operand:V4SI 0 "register_operand" "")
4146 (match_operand:SI 1 "nonimmediate_operand" ""))
4150 "operands[2] = CONST0_RTX (V4SImode);")
4152 (define_insn "sse2_loadld"
4153 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4156 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4157 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4161 movd\t{%2, %0|%0, %2}
4162 movd\t{%2, %0|%0, %2}
4163 movss\t{%2, %0|%0, %2}
4164 movss\t{%2, %0|%0, %2}"
4165 [(set_attr "type" "ssemov")
4166 (set_attr "mode" "TI,TI,V4SF,SF")])
4168 (define_insn_and_split "sse2_stored"
4169 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4171 (match_operand:V4SI 1 "register_operand" "x,Yi")
4172 (parallel [(const_int 0)])))]
4175 "&& reload_completed
4176 && (TARGET_INTER_UNIT_MOVES
4177 || MEM_P (operands [0])
4178 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4179 [(set (match_dup 0) (match_dup 1))]
4181 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4184 (define_expand "sse_storeq"
4185 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4187 (match_operand:V2DI 1 "register_operand" "")
4188 (parallel [(const_int 0)])))]
4192 (define_insn "*sse2_storeq_rex64"
4193 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4195 (match_operand:V2DI 1 "register_operand" "x,Yi")
4196 (parallel [(const_int 0)])))]
4197 "TARGET_64BIT && TARGET_SSE"
4200 (define_insn "*sse2_storeq"
4201 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4203 (match_operand:V2DI 1 "register_operand" "x")
4204 (parallel [(const_int 0)])))]
4209 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4211 (match_operand:V2DI 1 "register_operand" "")
4212 (parallel [(const_int 0)])))]
4215 && (TARGET_INTER_UNIT_MOVES
4216 || MEM_P (operands [0])
4217 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4218 [(set (match_dup 0) (match_dup 1))]
4220 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4223 (define_insn "*vec_extractv2di_1_sse2"
4224 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4226 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4227 (parallel [(const_int 1)])))]
4228 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4230 movhps\t{%1, %0|%0, %1}
4231 psrldq\t{$4, %0|%0, 4}
4232 movq\t{%H1, %0|%0, %H1}"
4233 [(set_attr "type" "ssemov,sseishft,ssemov")
4234 (set_attr "mode" "V2SF,TI,TI")])
4236 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4237 (define_insn "*vec_extractv2di_1_sse"
4238 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4240 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4241 (parallel [(const_int 1)])))]
4242 "!TARGET_SSE2 && TARGET_SSE
4243 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4245 movhps\t{%1, %0|%0, %1}
4246 movhlps\t{%1, %0|%0, %1}
4247 movlps\t{%H1, %0|%0, %H1}"
4248 [(set_attr "type" "ssemov")
4249 (set_attr "mode" "V2SF,V4SF,V2SF")])
4251 (define_insn "*vec_dupv4si"
4252 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4254 (match_operand:SI 1 "register_operand" " Y2,0")))]
4257 pshufd\t{$0, %1, %0|%0, %1, 0}
4258 shufps\t{$0, %0, %0|%0, %0, 0}"
4259 [(set_attr "type" "sselog1")
4260 (set_attr "mode" "TI,V4SF")])
4262 (define_insn "*vec_dupv2di"
4263 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4265 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4270 [(set_attr "type" "sselog1,ssemov")
4271 (set_attr "mode" "TI,V4SF")])
4273 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4274 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4275 ;; alternatives pretty much forces the MMX alternative to be chosen.
4276 (define_insn "*sse2_concatv2si"
4277 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4279 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4280 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4283 punpckldq\t{%2, %0|%0, %2}
4284 movd\t{%1, %0|%0, %1}
4285 punpckldq\t{%2, %0|%0, %2}
4286 movd\t{%1, %0|%0, %1}"
4287 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4288 (set_attr "mode" "TI,TI,DI,DI")])
4290 (define_insn "*sse1_concatv2si"
4291 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4293 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4294 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4297 unpcklps\t{%2, %0|%0, %2}
4298 movss\t{%1, %0|%0, %1}
4299 punpckldq\t{%2, %0|%0, %2}
4300 movd\t{%1, %0|%0, %1}"
4301 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4302 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4304 (define_insn "*vec_concatv4si_1"
4305 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4307 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4308 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4311 punpcklqdq\t{%2, %0|%0, %2}
4312 movlhps\t{%2, %0|%0, %2}
4313 movhps\t{%2, %0|%0, %2}"
4314 [(set_attr "type" "sselog,ssemov,ssemov")
4315 (set_attr "mode" "TI,V4SF,V2SF")])
4317 (define_insn "vec_concatv2di"
4318 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4320 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4321 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4324 movq\t{%1, %0|%0, %1}
4325 movq2dq\t{%1, %0|%0, %1}
4326 punpcklqdq\t{%2, %0|%0, %2}
4327 movlhps\t{%2, %0|%0, %2}
4328 movhps\t{%2, %0|%0, %2}
4329 movlps\t{%1, %0|%0, %1}"
4330 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4331 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4333 (define_expand "vec_setv2di"
4334 [(match_operand:V2DI 0 "register_operand" "")
4335 (match_operand:DI 1 "register_operand" "")
4336 (match_operand 2 "const_int_operand" "")]
4339 ix86_expand_vector_set (false, operands[0], operands[1],
4340 INTVAL (operands[2]));
4344 (define_expand "vec_extractv2di"
4345 [(match_operand:DI 0 "register_operand" "")
4346 (match_operand:V2DI 1 "register_operand" "")
4347 (match_operand 2 "const_int_operand" "")]
4350 ix86_expand_vector_extract (false, operands[0], operands[1],
4351 INTVAL (operands[2]));
4355 (define_expand "vec_initv2di"
4356 [(match_operand:V2DI 0 "register_operand" "")
4357 (match_operand 1 "" "")]
4360 ix86_expand_vector_init (false, operands[0], operands[1]);
4364 (define_expand "vec_setv4si"
4365 [(match_operand:V4SI 0 "register_operand" "")
4366 (match_operand:SI 1 "register_operand" "")
4367 (match_operand 2 "const_int_operand" "")]
4370 ix86_expand_vector_set (false, operands[0], operands[1],
4371 INTVAL (operands[2]));
4375 (define_expand "vec_extractv4si"
4376 [(match_operand:SI 0 "register_operand" "")
4377 (match_operand:V4SI 1 "register_operand" "")
4378 (match_operand 2 "const_int_operand" "")]
4381 ix86_expand_vector_extract (false, operands[0], operands[1],
4382 INTVAL (operands[2]));
4386 (define_expand "vec_initv4si"
4387 [(match_operand:V4SI 0 "register_operand" "")
4388 (match_operand 1 "" "")]
4391 ix86_expand_vector_init (false, operands[0], operands[1]);
4395 (define_expand "vec_setv8hi"
4396 [(match_operand:V8HI 0 "register_operand" "")
4397 (match_operand:HI 1 "register_operand" "")
4398 (match_operand 2 "const_int_operand" "")]
4401 ix86_expand_vector_set (false, operands[0], operands[1],
4402 INTVAL (operands[2]));
4406 (define_expand "vec_extractv8hi"
4407 [(match_operand:HI 0 "register_operand" "")
4408 (match_operand:V8HI 1 "register_operand" "")
4409 (match_operand 2 "const_int_operand" "")]
4412 ix86_expand_vector_extract (false, operands[0], operands[1],
4413 INTVAL (operands[2]));
4417 (define_expand "vec_initv8hi"
4418 [(match_operand:V8HI 0 "register_operand" "")
4419 (match_operand 1 "" "")]
4422 ix86_expand_vector_init (false, operands[0], operands[1]);
4426 (define_expand "vec_setv16qi"
4427 [(match_operand:V16QI 0 "register_operand" "")
4428 (match_operand:QI 1 "register_operand" "")
4429 (match_operand 2 "const_int_operand" "")]
4432 ix86_expand_vector_set (false, operands[0], operands[1],
4433 INTVAL (operands[2]));
4437 (define_expand "vec_extractv16qi"
4438 [(match_operand:QI 0 "register_operand" "")
4439 (match_operand:V16QI 1 "register_operand" "")
4440 (match_operand 2 "const_int_operand" "")]
4443 ix86_expand_vector_extract (false, operands[0], operands[1],
4444 INTVAL (operands[2]));
4448 (define_expand "vec_initv16qi"
4449 [(match_operand:V16QI 0 "register_operand" "")
4450 (match_operand 1 "" "")]
4453 ix86_expand_vector_init (false, operands[0], operands[1]);
4457 (define_expand "vec_unpacku_hi_v16qi"
4458 [(match_operand:V8HI 0 "register_operand" "")
4459 (match_operand:V16QI 1 "register_operand" "")]
4462 ix86_expand_sse_unpack (operands, true, true);
4466 (define_expand "vec_unpacks_hi_v16qi"
4467 [(match_operand:V8HI 0 "register_operand" "")
4468 (match_operand:V16QI 1 "register_operand" "")]
4471 ix86_expand_sse_unpack (operands, false, true);
4475 (define_expand "vec_unpacku_lo_v16qi"
4476 [(match_operand:V8HI 0 "register_operand" "")
4477 (match_operand:V16QI 1 "register_operand" "")]
4480 ix86_expand_sse_unpack (operands, true, false);
4484 (define_expand "vec_unpacks_lo_v16qi"
4485 [(match_operand:V8HI 0 "register_operand" "")
4486 (match_operand:V16QI 1 "register_operand" "")]
4489 ix86_expand_sse_unpack (operands, false, false);
4493 (define_expand "vec_unpacku_hi_v8hi"
4494 [(match_operand:V4SI 0 "register_operand" "")
4495 (match_operand:V8HI 1 "register_operand" "")]
4498 ix86_expand_sse_unpack (operands, true, true);
4502 (define_expand "vec_unpacks_hi_v8hi"
4503 [(match_operand:V4SI 0 "register_operand" "")
4504 (match_operand:V8HI 1 "register_operand" "")]
4507 ix86_expand_sse_unpack (operands, false, true);
4511 (define_expand "vec_unpacku_lo_v8hi"
4512 [(match_operand:V4SI 0 "register_operand" "")
4513 (match_operand:V8HI 1 "register_operand" "")]
4516 ix86_expand_sse_unpack (operands, true, false);
4520 (define_expand "vec_unpacks_lo_v8hi"
4521 [(match_operand:V4SI 0 "register_operand" "")
4522 (match_operand:V8HI 1 "register_operand" "")]
4525 ix86_expand_sse_unpack (operands, false, false);
4529 (define_expand "vec_unpacku_hi_v4si"
4530 [(match_operand:V2DI 0 "register_operand" "")
4531 (match_operand:V4SI 1 "register_operand" "")]
4534 ix86_expand_sse_unpack (operands, true, true);
4538 (define_expand "vec_unpacks_hi_v4si"
4539 [(match_operand:V2DI 0 "register_operand" "")
4540 (match_operand:V4SI 1 "register_operand" "")]
4543 ix86_expand_sse_unpack (operands, false, true);
4547 (define_expand "vec_unpacku_lo_v4si"
4548 [(match_operand:V2DI 0 "register_operand" "")
4549 (match_operand:V4SI 1 "register_operand" "")]
4552 ix86_expand_sse_unpack (operands, true, false);
4556 (define_expand "vec_unpacks_lo_v4si"
4557 [(match_operand:V2DI 0 "register_operand" "")
4558 (match_operand:V4SI 1 "register_operand" "")]
4561 ix86_expand_sse_unpack (operands, false, false);
4565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4569 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4571 (define_insn "sse2_uavgv16qi3"
4572 [(set (match_operand:V16QI 0 "register_operand" "=x")
4578 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4580 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4581 (const_vector:V16QI [(const_int 1) (const_int 1)
4582 (const_int 1) (const_int 1)
4583 (const_int 1) (const_int 1)
4584 (const_int 1) (const_int 1)
4585 (const_int 1) (const_int 1)
4586 (const_int 1) (const_int 1)
4587 (const_int 1) (const_int 1)
4588 (const_int 1) (const_int 1)]))
4590 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4591 "pavgb\t{%2, %0|%0, %2}"
4592 [(set_attr "type" "sseiadd")
4593 (set_attr "prefix_data16" "1")
4594 (set_attr "mode" "TI")])
4596 (define_insn "sse2_uavgv8hi3"
4597 [(set (match_operand:V8HI 0 "register_operand" "=x")
4603 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4605 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4606 (const_vector:V8HI [(const_int 1) (const_int 1)
4607 (const_int 1) (const_int 1)
4608 (const_int 1) (const_int 1)
4609 (const_int 1) (const_int 1)]))
4611 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4612 "pavgw\t{%2, %0|%0, %2}"
4613 [(set_attr "type" "sseiadd")
4614 (set_attr "prefix_data16" "1")
4615 (set_attr "mode" "TI")])
4617 ;; The correct representation for this is absolutely enormous, and
4618 ;; surely not generally useful.
4619 (define_insn "sse2_psadbw"
4620 [(set (match_operand:V2DI 0 "register_operand" "=x")
4621 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4622 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4625 "psadbw\t{%2, %0|%0, %2}"
4626 [(set_attr "type" "sseiadd")
4627 (set_attr "prefix_data16" "1")
4628 (set_attr "mode" "TI")])
4630 (define_insn "sse_movmskps"
4631 [(set (match_operand:SI 0 "register_operand" "=r")
4632 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
4635 "movmskps\t{%1, %0|%0, %1}"
4636 [(set_attr "type" "ssecvt")
4637 (set_attr "mode" "V4SF")])
4639 (define_insn "sse2_movmskpd"
4640 [(set (match_operand:SI 0 "register_operand" "=r")
4641 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
4644 "movmskpd\t{%1, %0|%0, %1}"
4645 [(set_attr "type" "ssecvt")
4646 (set_attr "mode" "V2DF")])
4648 (define_insn "sse2_pmovmskb"
4649 [(set (match_operand:SI 0 "register_operand" "=r")
4650 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
4653 "pmovmskb\t{%1, %0|%0, %1}"
4654 [(set_attr "type" "ssecvt")
4655 (set_attr "prefix_data16" "1")
4656 (set_attr "mode" "SI")])
4658 (define_expand "sse2_maskmovdqu"
4659 [(set (match_operand:V16QI 0 "memory_operand" "")
4660 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4661 (match_operand:V16QI 2 "register_operand" "x")
4667 (define_insn "*sse2_maskmovdqu"
4668 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
4669 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4670 (match_operand:V16QI 2 "register_operand" "x")
4671 (mem:V16QI (match_dup 0))]
4673 "TARGET_SSE2 && !TARGET_64BIT"
4674 ;; @@@ check ordering of operands in intel/nonintel syntax
4675 "maskmovdqu\t{%2, %1|%1, %2}"
4676 [(set_attr "type" "ssecvt")
4677 (set_attr "prefix_data16" "1")
4678 (set_attr "mode" "TI")])
4680 (define_insn "*sse2_maskmovdqu_rex64"
4681 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
4682 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4683 (match_operand:V16QI 2 "register_operand" "x")
4684 (mem:V16QI (match_dup 0))]
4686 "TARGET_SSE2 && TARGET_64BIT"
4687 ;; @@@ check ordering of operands in intel/nonintel syntax
4688 "maskmovdqu\t{%2, %1|%1, %2}"
4689 [(set_attr "type" "ssecvt")
4690 (set_attr "prefix_data16" "1")
4691 (set_attr "mode" "TI")])
4693 (define_insn "sse_ldmxcsr"
4694 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
4698 [(set_attr "type" "sse")
4699 (set_attr "memory" "load")])
4701 (define_insn "sse_stmxcsr"
4702 [(set (match_operand:SI 0 "memory_operand" "=m")
4703 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
4706 [(set_attr "type" "sse")
4707 (set_attr "memory" "store")])
4709 (define_expand "sse_sfence"
4711 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4712 "TARGET_SSE || TARGET_3DNOW_A"
4714 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4715 MEM_VOLATILE_P (operands[0]) = 1;
4718 (define_insn "*sse_sfence"
4719 [(set (match_operand:BLK 0 "" "")
4720 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4721 "TARGET_SSE || TARGET_3DNOW_A"
4723 [(set_attr "type" "sse")
4724 (set_attr "memory" "unknown")])
4726 (define_insn "sse2_clflush"
4727 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
4731 [(set_attr "type" "sse")
4732 (set_attr "memory" "unknown")])
4734 (define_expand "sse2_mfence"
4736 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4739 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4740 MEM_VOLATILE_P (operands[0]) = 1;
4743 (define_insn "*sse2_mfence"
4744 [(set (match_operand:BLK 0 "" "")
4745 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4748 [(set_attr "type" "sse")
4749 (set_attr "memory" "unknown")])
4751 (define_expand "sse2_lfence"
4753 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4756 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4757 MEM_VOLATILE_P (operands[0]) = 1;
4760 (define_insn "*sse2_lfence"
4761 [(set (match_operand:BLK 0 "" "")
4762 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4765 [(set_attr "type" "sse")
4766 (set_attr "memory" "unknown")])
4768 (define_insn "sse3_mwait"
4769 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4770 (match_operand:SI 1 "register_operand" "c")]
4773 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
4774 ;; Since 32bit register operands are implicitly zero extended to 64bit,
4775 ;; we only need to set up 32bit registers.
4777 [(set_attr "length" "3")])
4779 (define_insn "sse3_monitor"
4780 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4781 (match_operand:SI 1 "register_operand" "c")
4782 (match_operand:SI 2 "register_operand" "d")]
4784 "TARGET_SSE3 && !TARGET_64BIT"
4785 "monitor\t%0, %1, %2"
4786 [(set_attr "length" "3")])
4788 (define_insn "sse3_monitor64"
4789 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
4790 (match_operand:SI 1 "register_operand" "c")
4791 (match_operand:SI 2 "register_operand" "d")]
4793 "TARGET_SSE3 && TARGET_64BIT"
4794 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
4795 ;; RCX and RDX are used. Since 32bit register operands are implicitly
4796 ;; zero extended to 64bit, we only need to set up 32bit registers.
4798 [(set_attr "length" "3")])
4801 (define_insn "ssse3_phaddwv8hi3"
4802 [(set (match_operand:V8HI 0 "register_operand" "=x")
4808 (match_operand:V8HI 1 "register_operand" "0")
4809 (parallel [(const_int 0)]))
4810 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4812 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4813 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4816 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4817 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4819 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4820 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4825 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4826 (parallel [(const_int 0)]))
4827 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4829 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4830 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4833 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4834 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4836 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4837 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4839 "phaddw\t{%2, %0|%0, %2}"
4840 [(set_attr "type" "sseiadd")
4841 (set_attr "prefix_data16" "1")
4842 (set_attr "prefix_extra" "1")
4843 (set_attr "mode" "TI")])
4845 (define_insn "ssse3_phaddwv4hi3"
4846 [(set (match_operand:V4HI 0 "register_operand" "=y")
4851 (match_operand:V4HI 1 "register_operand" "0")
4852 (parallel [(const_int 0)]))
4853 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4855 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4856 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4860 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4861 (parallel [(const_int 0)]))
4862 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4864 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4865 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4867 "phaddw\t{%2, %0|%0, %2}"
4868 [(set_attr "type" "sseiadd")
4869 (set_attr "prefix_extra" "1")
4870 (set_attr "mode" "DI")])
4872 (define_insn "ssse3_phadddv4si3"
4873 [(set (match_operand:V4SI 0 "register_operand" "=x")
4878 (match_operand:V4SI 1 "register_operand" "0")
4879 (parallel [(const_int 0)]))
4880 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4882 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4883 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4887 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4888 (parallel [(const_int 0)]))
4889 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4891 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4892 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4894 "phaddd\t{%2, %0|%0, %2}"
4895 [(set_attr "type" "sseiadd")
4896 (set_attr "prefix_data16" "1")
4897 (set_attr "prefix_extra" "1")
4898 (set_attr "mode" "TI")])
4900 (define_insn "ssse3_phadddv2si3"
4901 [(set (match_operand:V2SI 0 "register_operand" "=y")
4905 (match_operand:V2SI 1 "register_operand" "0")
4906 (parallel [(const_int 0)]))
4907 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4910 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4911 (parallel [(const_int 0)]))
4912 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4914 "phaddd\t{%2, %0|%0, %2}"
4915 [(set_attr "type" "sseiadd")
4916 (set_attr "prefix_extra" "1")
4917 (set_attr "mode" "DI")])
4919 (define_insn "ssse3_phaddswv8hi3"
4920 [(set (match_operand:V8HI 0 "register_operand" "=x")
4926 (match_operand:V8HI 1 "register_operand" "0")
4927 (parallel [(const_int 0)]))
4928 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4930 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4931 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4934 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4935 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4937 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4938 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4943 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4944 (parallel [(const_int 0)]))
4945 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4947 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4948 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4951 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4952 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4954 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4955 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4957 "phaddsw\t{%2, %0|%0, %2}"
4958 [(set_attr "type" "sseiadd")
4959 (set_attr "prefix_data16" "1")
4960 (set_attr "prefix_extra" "1")
4961 (set_attr "mode" "TI")])
4963 (define_insn "ssse3_phaddswv4hi3"
4964 [(set (match_operand:V4HI 0 "register_operand" "=y")
4969 (match_operand:V4HI 1 "register_operand" "0")
4970 (parallel [(const_int 0)]))
4971 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4973 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4974 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4978 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4979 (parallel [(const_int 0)]))
4980 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4982 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4983 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4985 "phaddsw\t{%2, %0|%0, %2}"
4986 [(set_attr "type" "sseiadd")
4987 (set_attr "prefix_extra" "1")
4988 (set_attr "mode" "DI")])
4990 (define_insn "ssse3_phsubwv8hi3"
4991 [(set (match_operand:V8HI 0 "register_operand" "=x")
4997 (match_operand:V8HI 1 "register_operand" "0")
4998 (parallel [(const_int 0)]))
4999 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5001 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5002 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5005 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5006 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5008 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5009 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5014 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5015 (parallel [(const_int 0)]))
5016 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5018 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5019 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5022 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5023 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5025 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5026 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5028 "phsubw\t{%2, %0|%0, %2}"
5029 [(set_attr "type" "sseiadd")
5030 (set_attr "prefix_data16" "1")
5031 (set_attr "prefix_extra" "1")
5032 (set_attr "mode" "TI")])
5034 (define_insn "ssse3_phsubwv4hi3"
5035 [(set (match_operand:V4HI 0 "register_operand" "=y")
5040 (match_operand:V4HI 1 "register_operand" "0")
5041 (parallel [(const_int 0)]))
5042 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5044 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5045 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5049 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5050 (parallel [(const_int 0)]))
5051 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5053 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5054 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5056 "phsubw\t{%2, %0|%0, %2}"
5057 [(set_attr "type" "sseiadd")
5058 (set_attr "prefix_extra" "1")
5059 (set_attr "mode" "DI")])
5061 (define_insn "ssse3_phsubdv4si3"
5062 [(set (match_operand:V4SI 0 "register_operand" "=x")
5067 (match_operand:V4SI 1 "register_operand" "0")
5068 (parallel [(const_int 0)]))
5069 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5071 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5072 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5076 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5077 (parallel [(const_int 0)]))
5078 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5080 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5081 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5083 "phsubd\t{%2, %0|%0, %2}"
5084 [(set_attr "type" "sseiadd")
5085 (set_attr "prefix_data16" "1")
5086 (set_attr "prefix_extra" "1")
5087 (set_attr "mode" "TI")])
5089 (define_insn "ssse3_phsubdv2si3"
5090 [(set (match_operand:V2SI 0 "register_operand" "=y")
5094 (match_operand:V2SI 1 "register_operand" "0")
5095 (parallel [(const_int 0)]))
5096 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5099 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5100 (parallel [(const_int 0)]))
5101 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5103 "phsubd\t{%2, %0|%0, %2}"
5104 [(set_attr "type" "sseiadd")
5105 (set_attr "prefix_extra" "1")
5106 (set_attr "mode" "DI")])
5108 (define_insn "ssse3_phsubswv8hi3"
5109 [(set (match_operand:V8HI 0 "register_operand" "=x")
5115 (match_operand:V8HI 1 "register_operand" "0")
5116 (parallel [(const_int 0)]))
5117 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5119 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5120 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5123 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5124 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5126 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5127 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5132 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5133 (parallel [(const_int 0)]))
5134 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5136 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5137 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5140 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5141 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5143 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5144 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5146 "phsubsw\t{%2, %0|%0, %2}"
5147 [(set_attr "type" "sseiadd")
5148 (set_attr "prefix_data16" "1")
5149 (set_attr "prefix_extra" "1")
5150 (set_attr "mode" "TI")])
5152 (define_insn "ssse3_phsubswv4hi3"
5153 [(set (match_operand:V4HI 0 "register_operand" "=y")
5158 (match_operand:V4HI 1 "register_operand" "0")
5159 (parallel [(const_int 0)]))
5160 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5162 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5163 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5167 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5168 (parallel [(const_int 0)]))
5169 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5171 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5172 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5174 "phsubsw\t{%2, %0|%0, %2}"
5175 [(set_attr "type" "sseiadd")
5176 (set_attr "prefix_extra" "1")
5177 (set_attr "mode" "DI")])
5179 (define_insn "ssse3_pmaddubswv8hi3"
5180 [(set (match_operand:V8HI 0 "register_operand" "=x")
5185 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5186 (parallel [(const_int 0)
5196 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5197 (parallel [(const_int 0)
5207 (vec_select:V16QI (match_dup 1)
5208 (parallel [(const_int 1)
5217 (vec_select:V16QI (match_dup 2)
5218 (parallel [(const_int 1)
5225 (const_int 15)]))))))]
5227 "pmaddubsw\t{%2, %0|%0, %2}"
5228 [(set_attr "type" "sseiadd")
5229 (set_attr "prefix_data16" "1")
5230 (set_attr "prefix_extra" "1")
5231 (set_attr "mode" "TI")])
5233 (define_insn "ssse3_pmaddubswv4hi3"
5234 [(set (match_operand:V4HI 0 "register_operand" "=y")
5239 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5240 (parallel [(const_int 0)
5246 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5247 (parallel [(const_int 0)
5253 (vec_select:V8QI (match_dup 1)
5254 (parallel [(const_int 1)
5259 (vec_select:V8QI (match_dup 2)
5260 (parallel [(const_int 1)
5263 (const_int 7)]))))))]
5265 "pmaddubsw\t{%2, %0|%0, %2}"
5266 [(set_attr "type" "sseiadd")
5267 (set_attr "prefix_extra" "1")
5268 (set_attr "mode" "DI")])
5270 (define_insn "ssse3_pmulhrswv8hi3"
5271 [(set (match_operand:V8HI 0 "register_operand" "=x")
5278 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5280 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5282 (const_vector:V8HI [(const_int 1) (const_int 1)
5283 (const_int 1) (const_int 1)
5284 (const_int 1) (const_int 1)
5285 (const_int 1) (const_int 1)]))
5287 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5288 "pmulhrsw\t{%2, %0|%0, %2}"
5289 [(set_attr "type" "sseimul")
5290 (set_attr "prefix_data16" "1")
5291 (set_attr "prefix_extra" "1")
5292 (set_attr "mode" "TI")])
5294 (define_insn "ssse3_pmulhrswv4hi3"
5295 [(set (match_operand:V4HI 0 "register_operand" "=y")
5302 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5304 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5306 (const_vector:V4HI [(const_int 1) (const_int 1)
5307 (const_int 1) (const_int 1)]))
5309 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5310 "pmulhrsw\t{%2, %0|%0, %2}"
5311 [(set_attr "type" "sseimul")
5312 (set_attr "prefix_extra" "1")
5313 (set_attr "mode" "DI")])
5315 (define_insn "ssse3_pshufbv16qi3"
5316 [(set (match_operand:V16QI 0 "register_operand" "=x")
5317 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5318 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5321 "pshufb\t{%2, %0|%0, %2}";
5322 [(set_attr "type" "sselog1")
5323 (set_attr "prefix_data16" "1")
5324 (set_attr "prefix_extra" "1")
5325 (set_attr "mode" "TI")])
5327 (define_insn "ssse3_pshufbv8qi3"
5328 [(set (match_operand:V8QI 0 "register_operand" "=y")
5329 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5330 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5333 "pshufb\t{%2, %0|%0, %2}";
5334 [(set_attr "type" "sselog1")
5335 (set_attr "prefix_extra" "1")
5336 (set_attr "mode" "DI")])
5338 (define_insn "ssse3_psign<mode>3"
5339 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5340 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5341 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5344 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5345 [(set_attr "type" "sselog1")
5346 (set_attr "prefix_data16" "1")
5347 (set_attr "prefix_extra" "1")
5348 (set_attr "mode" "TI")])
5350 (define_insn "ssse3_psign<mode>3"
5351 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5352 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5353 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5356 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5357 [(set_attr "type" "sselog1")
5358 (set_attr "prefix_extra" "1")
5359 (set_attr "mode" "DI")])
5361 (define_insn "ssse3_palignrti"
5362 [(set (match_operand:TI 0 "register_operand" "=x")
5363 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5364 (match_operand:TI 2 "nonimmediate_operand" "xm")
5365 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5369 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5370 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5372 [(set_attr "type" "sseishft")
5373 (set_attr "prefix_data16" "1")
5374 (set_attr "prefix_extra" "1")
5375 (set_attr "mode" "TI")])
5377 (define_insn "ssse3_palignrdi"
5378 [(set (match_operand:DI 0 "register_operand" "=y")
5379 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5380 (match_operand:DI 2 "nonimmediate_operand" "ym")
5381 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5385 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5386 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5388 [(set_attr "type" "sseishft")
5389 (set_attr "prefix_extra" "1")
5390 (set_attr "mode" "DI")])
5392 (define_insn "abs<mode>2"
5393 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5394 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5396 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5397 [(set_attr "type" "sselog1")
5398 (set_attr "prefix_data16" "1")
5399 (set_attr "prefix_extra" "1")
5400 (set_attr "mode" "TI")])
5402 (define_insn "abs<mode>2"
5403 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5404 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5406 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5407 [(set_attr "type" "sselog1")
5408 (set_attr "prefix_extra" "1")
5409 (set_attr "mode" "DI")])
5411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5413 ;; AMD SSE4A instructions
5415 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5417 (define_insn "sse4a_vmmovntv2df"
5418 [(set (match_operand:DF 0 "memory_operand" "=m")
5419 (unspec:DF [(vec_select:DF
5420 (match_operand:V2DF 1 "register_operand" "x")
5421 (parallel [(const_int 0)]))]
5424 "movntsd\t{%1, %0|%0, %1}"
5425 [(set_attr "type" "ssemov")
5426 (set_attr "mode" "DF")])
5428 (define_insn "sse4a_movntdf"
5429 [(set (match_operand:DF 0 "memory_operand" "=m")
5430 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5433 "movntsd\t{%1, %0|%0, %1}"
5434 [(set_attr "type" "ssemov")
5435 (set_attr "mode" "DF")])
5437 (define_insn "sse4a_vmmovntv4sf"
5438 [(set (match_operand:SF 0 "memory_operand" "=m")
5439 (unspec:SF [(vec_select:SF
5440 (match_operand:V4SF 1 "register_operand" "x")
5441 (parallel [(const_int 0)]))]
5444 "movntss\t{%1, %0|%0, %1}"
5445 [(set_attr "type" "ssemov")
5446 (set_attr "mode" "SF")])
5448 (define_insn "sse4a_movntsf"
5449 [(set (match_operand:SF 0 "memory_operand" "=m")
5450 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5453 "movntss\t{%1, %0|%0, %1}"
5454 [(set_attr "type" "ssemov")
5455 (set_attr "mode" "SF")])
5457 (define_insn "sse4a_extrqi"
5458 [(set (match_operand:V2DI 0 "register_operand" "=x")
5459 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5460 (match_operand 2 "const_int_operand" "")
5461 (match_operand 3 "const_int_operand" "")]
5464 "extrq\t{%3, %2, %0|%0, %2, %3}"
5465 [(set_attr "type" "sse")
5466 (set_attr "prefix_data16" "1")
5467 (set_attr "mode" "TI")])
5469 (define_insn "sse4a_extrq"
5470 [(set (match_operand:V2DI 0 "register_operand" "=x")
5471 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5472 (match_operand:V16QI 2 "register_operand" "x")]
5475 "extrq\t{%2, %0|%0, %2}"
5476 [(set_attr "type" "sse")
5477 (set_attr "prefix_data16" "1")
5478 (set_attr "mode" "TI")])
5480 (define_insn "sse4a_insertqi"
5481 [(set (match_operand:V2DI 0 "register_operand" "=x")
5482 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5483 (match_operand:V2DI 2 "register_operand" "x")
5484 (match_operand 3 "const_int_operand" "")
5485 (match_operand 4 "const_int_operand" "")]
5488 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5489 [(set_attr "type" "sseins")
5490 (set_attr "prefix_rep" "1")
5491 (set_attr "mode" "TI")])
5493 (define_insn "sse4a_insertq"
5494 [(set (match_operand:V2DI 0 "register_operand" "=x")
5495 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5496 (match_operand:V2DI 2 "register_operand" "x")]
5499 "insertq\t{%2, %0|%0, %2}"
5500 [(set_attr "type" "sseins")
5501 (set_attr "prefix_rep" "1")
5502 (set_attr "mode" "TI")])