1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 && (register_operand (operands[0], <MODE>mode)
64 || register_operand (operands[1], <MODE>mode))"
66 switch (which_alternative)
69 return standard_sse_constant_opcode (insn, operands[1]);
72 if (get_attr_mode (insn) == MODE_V4SF)
73 return "movaps\t{%1, %0|%0, %1}";
75 return "movdqa\t{%1, %0|%0, %1}";
80 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
84 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
85 (and (eq_attr "alternative" "2")
86 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
89 (const_string "TI")))])
91 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
92 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
93 ;; from memory, we'd prefer to load the memory directly into the %xmm
94 ;; register. To facilitate this happy circumstance, this pattern won't
95 ;; split until after register allocation. If the 64-bit value didn't
96 ;; come from memory, this is the best we can do. This is much better
97 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
100 (define_insn_and_split "movdi_to_sse"
102 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
103 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
104 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
105 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
107 "&& reload_completed"
110 if (register_operand (operands[1], DImode))
112 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
113 Assemble the 64-bit DImode value in an xmm register. */
114 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
115 gen_rtx_SUBREG (SImode, operands[1], 0)));
116 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 4)));
118 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
120 else if (memory_operand (operands[1], DImode))
121 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
126 (define_expand "movv4sf"
127 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
128 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
131 ix86_expand_vector_move (V4SFmode, operands);
135 (define_insn "*movv4sf_internal"
136 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
137 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
139 && (register_operand (operands[0], V4SFmode)
140 || register_operand (operands[1], V4SFmode))"
142 switch (which_alternative)
145 return standard_sse_constant_opcode (insn, operands[1]);
148 return "movaps\t{%1, %0|%0, %1}";
153 [(set_attr "type" "sselog1,ssemov,ssemov")
154 (set_attr "mode" "V4SF")])
157 [(set (match_operand:V4SF 0 "register_operand" "")
158 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
159 "TARGET_SSE && reload_completed"
162 (vec_duplicate:V4SF (match_dup 1))
166 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
167 operands[2] = CONST0_RTX (V4SFmode);
170 (define_expand "movv2df"
171 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
172 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
175 ix86_expand_vector_move (V2DFmode, operands);
179 (define_insn "*movv2df_internal"
180 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
181 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
183 && (register_operand (operands[0], V2DFmode)
184 || register_operand (operands[1], V2DFmode))"
186 switch (which_alternative)
189 return standard_sse_constant_opcode (insn, operands[1]);
192 if (get_attr_mode (insn) == MODE_V4SF)
193 return "movaps\t{%1, %0|%0, %1}";
195 return "movapd\t{%1, %0|%0, %1}";
200 [(set_attr "type" "sselog1,ssemov,ssemov")
203 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
204 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
205 (and (eq_attr "alternative" "2")
206 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
208 (const_string "V4SF")
209 (const_string "V2DF")))])
212 [(set (match_operand:V2DF 0 "register_operand" "")
213 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
214 "TARGET_SSE2 && reload_completed"
215 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
217 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
218 operands[2] = CONST0_RTX (DFmode);
221 (define_expand "push<mode>1"
222 [(match_operand:SSEMODE 0 "register_operand" "")]
225 ix86_expand_push (<MODE>mode, operands[0]);
229 (define_expand "movmisalign<mode>"
230 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
231 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
234 ix86_expand_vector_move_misalign (<MODE>mode, operands);
238 (define_insn "sse_movups"
239 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
240 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
242 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
243 "movups\t{%1, %0|%0, %1}"
244 [(set_attr "type" "ssemov")
245 (set_attr "mode" "V2DF")])
247 (define_insn "sse2_movupd"
248 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
249 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
251 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
252 "movupd\t{%1, %0|%0, %1}"
253 [(set_attr "type" "ssemov")
254 (set_attr "mode" "V2DF")])
256 (define_insn "sse2_movdqu"
257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
258 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
260 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
261 "movdqu\t{%1, %0|%0, %1}"
262 [(set_attr "type" "ssemov")
263 (set_attr "prefix_data16" "1")
264 (set_attr "mode" "TI")])
266 (define_insn "sse_movntv4sf"
267 [(set (match_operand:V4SF 0 "memory_operand" "=m")
268 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
271 "movntps\t{%1, %0|%0, %1}"
272 [(set_attr "type" "ssemov")
273 (set_attr "mode" "V4SF")])
275 (define_insn "sse2_movntv2df"
276 [(set (match_operand:V2DF 0 "memory_operand" "=m")
277 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
280 "movntpd\t{%1, %0|%0, %1}"
281 [(set_attr "type" "ssecvt")
282 (set_attr "mode" "V2DF")])
284 (define_insn "sse2_movntv2di"
285 [(set (match_operand:V2DI 0 "memory_operand" "=m")
286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
289 "movntdq\t{%1, %0|%0, %1}"
290 [(set_attr "type" "ssecvt")
291 (set_attr "prefix_data16" "1")
292 (set_attr "mode" "TI")])
294 (define_insn "sse2_movntsi"
295 [(set (match_operand:SI 0 "memory_operand" "=m")
296 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
299 "movnti\t{%1, %0|%0, %1}"
300 [(set_attr "type" "ssecvt")
301 (set_attr "mode" "V2DF")])
303 (define_insn "sse3_lddqu"
304 [(set (match_operand:V16QI 0 "register_operand" "=x")
305 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
308 "lddqu\t{%1, %0|%0, %1}"
309 [(set_attr "type" "ssecvt")
310 (set_attr "prefix_rep" "1")
311 (set_attr "mode" "TI")])
313 ; Expand patterns for non-temporal stores. At the moment, only those
314 ; that directly map to insns are defined; it would be possible to
315 ; define patterns for other modes that would expand to several insns.
317 (define_expand "storentv4sf"
318 [(set (match_operand:V4SF 0 "memory_operand" "=m")
319 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
324 (define_expand "storentv2df"
325 [(set (match_operand:V2DF 0 "memory_operand" "=m")
326 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
331 (define_expand "storentv2di"
332 [(set (match_operand:V2DI 0 "memory_operand" "=m")
333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
338 (define_expand "storentsi"
339 [(set (match_operand:SI 0 "memory_operand" "=m")
340 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
345 (define_expand "storentdf"
346 [(set (match_operand:DF 0 "memory_operand" "")
347 (unspec:DF [(match_operand:DF 1 "register_operand" "")]
352 (define_expand "storentsf"
353 [(set (match_operand:SF 0 "memory_operand" "")
354 (unspec:SF [(match_operand:SF 1 "register_operand" "")]
359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
361 ;; Parallel single-precision floating point arithmetic
363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
365 (define_expand "negv4sf2"
366 [(set (match_operand:V4SF 0 "register_operand" "")
367 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
369 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
371 (define_expand "absv4sf2"
372 [(set (match_operand:V4SF 0 "register_operand" "")
373 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
375 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
377 (define_expand "addv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "")
379 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
380 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
382 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
384 (define_insn "*addv4sf3"
385 [(set (match_operand:V4SF 0 "register_operand" "=x")
386 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
387 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
388 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
389 "addps\t{%2, %0|%0, %2}"
390 [(set_attr "type" "sseadd")
391 (set_attr "mode" "V4SF")])
393 (define_insn "sse_vmaddv4sf3"
394 [(set (match_operand:V4SF 0 "register_operand" "=x")
396 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
397 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
400 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
401 "addss\t{%2, %0|%0, %2}"
402 [(set_attr "type" "sseadd")
403 (set_attr "mode" "SF")])
405 (define_expand "subv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "")
407 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
408 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
410 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
412 (define_insn "*subv4sf3"
413 [(set (match_operand:V4SF 0 "register_operand" "=x")
414 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
415 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
417 "subps\t{%2, %0|%0, %2}"
418 [(set_attr "type" "sseadd")
419 (set_attr "mode" "V4SF")])
421 (define_insn "sse_vmsubv4sf3"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
425 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
429 "subss\t{%2, %0|%0, %2}"
430 [(set_attr "type" "sseadd")
431 (set_attr "mode" "SF")])
433 (define_expand "mulv4sf3"
434 [(set (match_operand:V4SF 0 "register_operand" "")
435 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
436 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
438 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
440 (define_insn "*mulv4sf3"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
443 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
444 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
445 "mulps\t{%2, %0|%0, %2}"
446 [(set_attr "type" "ssemul")
447 (set_attr "mode" "V4SF")])
449 (define_insn "sse_vmmulv4sf3"
450 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
456 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
457 "mulss\t{%2, %0|%0, %2}"
458 [(set_attr "type" "ssemul")
459 (set_attr "mode" "SF")])
461 (define_expand "divv4sf3"
462 [(set (match_operand:V4SF 0 "register_operand" "")
463 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
464 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
467 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
469 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
470 && flag_finite_math_only && !flag_trapping_math
471 && flag_unsafe_math_optimizations)
473 ix86_emit_swdivsf (operands[0], operands[1],
474 operands[2], V4SFmode);
479 (define_insn "*divv4sf3"
480 [(set (match_operand:V4SF 0 "register_operand" "=x")
481 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
482 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
484 "divps\t{%2, %0|%0, %2}"
485 [(set_attr "type" "ssediv")
486 (set_attr "mode" "V4SF")])
488 (define_insn "sse_vmdivv4sf3"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
492 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
496 "divss\t{%2, %0|%0, %2}"
497 [(set_attr "type" "ssediv")
498 (set_attr "mode" "SF")])
500 (define_insn "sse_rcpv4sf2"
501 [(set (match_operand:V4SF 0 "register_operand" "=x")
503 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
505 "rcpps\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "V4SF")])
509 (define_insn "sse_vmrcpv4sf2"
510 [(set (match_operand:V4SF 0 "register_operand" "=x")
512 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
514 (match_operand:V4SF 2 "register_operand" "0")
517 "rcpss\t{%1, %0|%0, %1}"
518 [(set_attr "type" "sse")
519 (set_attr "mode" "SF")])
521 (define_insn "*sse_rsqrtv4sf2"
522 [(set (match_operand:V4SF 0 "register_operand" "=x")
524 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
526 "rsqrtps\t{%1, %0|%0, %1}"
527 [(set_attr "type" "sse")
528 (set_attr "mode" "V4SF")])
530 (define_expand "sse_rsqrtv4sf2"
531 [(set (match_operand:V4SF 0 "register_operand" "")
533 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
536 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
537 && flag_finite_math_only && !flag_trapping_math
538 && flag_unsafe_math_optimizations)
540 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
545 (define_insn "sse_vmrsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
550 (match_operand:V4SF 2 "register_operand" "0")
553 "rsqrtss\t{%1, %0|%0, %1}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "SF")])
557 (define_insn "*sqrtv4sf2"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
561 "sqrtps\t{%1, %0|%0, %1}"
562 [(set_attr "type" "sse")
563 (set_attr "mode" "V4SF")])
565 (define_expand "sqrtv4sf2"
566 [(set (match_operand:V4SF 0 "register_operand" "=")
567 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
570 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
571 && flag_finite_math_only && !flag_trapping_math
572 && flag_unsafe_math_optimizations)
574 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
579 (define_insn "sse_vmsqrtv4sf2"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
583 (match_operand:V4SF 2 "register_operand" "0")
586 "sqrtss\t{%1, %0|%0, %1}"
587 [(set_attr "type" "sse")
588 (set_attr "mode" "SF")])
590 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
591 ;; isn't really correct, as those rtl operators aren't defined when
592 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
594 (define_expand "smaxv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "")
596 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
597 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
600 if (!flag_finite_math_only)
601 operands[1] = force_reg (V4SFmode, operands[1]);
602 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
605 (define_insn "*smaxv4sf3_finite"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
607 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
608 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
609 "TARGET_SSE && flag_finite_math_only
610 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
611 "maxps\t{%2, %0|%0, %2}"
612 [(set_attr "type" "sse")
613 (set_attr "mode" "V4SF")])
615 (define_insn "*smaxv4sf3"
616 [(set (match_operand:V4SF 0 "register_operand" "=x")
617 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
618 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
620 "maxps\t{%2, %0|%0, %2}"
621 [(set_attr "type" "sse")
622 (set_attr "mode" "V4SF")])
624 (define_insn "sse_vmsmaxv4sf3"
625 [(set (match_operand:V4SF 0 "register_operand" "=x")
627 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
628 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
632 "maxss\t{%2, %0|%0, %2}"
633 [(set_attr "type" "sse")
634 (set_attr "mode" "SF")])
636 (define_expand "sminv4sf3"
637 [(set (match_operand:V4SF 0 "register_operand" "")
638 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
639 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
642 if (!flag_finite_math_only)
643 operands[1] = force_reg (V4SFmode, operands[1]);
644 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
647 (define_insn "*sminv4sf3_finite"
648 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
651 "TARGET_SSE && flag_finite_math_only
652 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
653 "minps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sse")
655 (set_attr "mode" "V4SF")])
657 (define_insn "*sminv4sf3"
658 [(set (match_operand:V4SF 0 "register_operand" "=x")
659 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
660 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
662 "minps\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sse")
664 (set_attr "mode" "V4SF")])
666 (define_insn "sse_vmsminv4sf3"
667 [(set (match_operand:V4SF 0 "register_operand" "=x")
669 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
670 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
674 "minss\t{%2, %0|%0, %2}"
675 [(set_attr "type" "sse")
676 (set_attr "mode" "SF")])
678 ;; These versions of the min/max patterns implement exactly the operations
679 ;; min = (op1 < op2 ? op1 : op2)
680 ;; max = (!(op1 < op2) ? op1 : op2)
681 ;; Their operands are not commutative, and thus they may be used in the
682 ;; presence of -0.0 and NaN.
684 (define_insn "*ieee_sminv4sf3"
685 [(set (match_operand:V4SF 0 "register_operand" "=x")
686 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
687 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
690 "minps\t{%2, %0|%0, %2}"
691 [(set_attr "type" "sseadd")
692 (set_attr "mode" "V4SF")])
694 (define_insn "*ieee_smaxv4sf3"
695 [(set (match_operand:V4SF 0 "register_operand" "=x")
696 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
697 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
700 "maxps\t{%2, %0|%0, %2}"
701 [(set_attr "type" "sseadd")
702 (set_attr "mode" "V4SF")])
704 (define_insn "*ieee_sminv2df3"
705 [(set (match_operand:V2DF 0 "register_operand" "=x")
706 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
707 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
710 "minpd\t{%2, %0|%0, %2}"
711 [(set_attr "type" "sseadd")
712 (set_attr "mode" "V2DF")])
714 (define_insn "*ieee_smaxv2df3"
715 [(set (match_operand:V2DF 0 "register_operand" "=x")
716 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
717 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
720 "maxpd\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "mode" "V2DF")])
724 (define_insn "sse3_addsubv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operand:V4SF 1 "register_operand" "0")
729 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
730 (minus:V4SF (match_dup 1) (match_dup 2))
733 "addsubps\t{%2, %0|%0, %2}"
734 [(set_attr "type" "sseadd")
735 (set_attr "prefix_rep" "1")
736 (set_attr "mode" "V4SF")])
738 (define_insn "sse3_haddv4sf3"
739 [(set (match_operand:V4SF 0 "register_operand" "=x")
744 (match_operand:V4SF 1 "register_operand" "0")
745 (parallel [(const_int 0)]))
746 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
748 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
749 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
753 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))
755 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
757 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
758 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
760 "haddps\t{%2, %0|%0, %2}"
761 [(set_attr "type" "sseadd")
762 (set_attr "prefix_rep" "1")
763 (set_attr "mode" "V4SF")])
765 (define_insn "sse3_hsubv4sf3"
766 [(set (match_operand:V4SF 0 "register_operand" "=x")
771 (match_operand:V4SF 1 "register_operand" "0")
772 (parallel [(const_int 0)]))
773 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
775 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
776 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
780 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))
782 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
784 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
785 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
787 "hsubps\t{%2, %0|%0, %2}"
788 [(set_attr "type" "sseadd")
789 (set_attr "prefix_rep" "1")
790 (set_attr "mode" "V4SF")])
792 (define_expand "reduc_splus_v4sf"
793 [(match_operand:V4SF 0 "register_operand" "")
794 (match_operand:V4SF 1 "register_operand" "")]
799 rtx tmp = gen_reg_rtx (V4SFmode);
800 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
801 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
804 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
808 (define_expand "reduc_smax_v4sf"
809 [(match_operand:V4SF 0 "register_operand" "")
810 (match_operand:V4SF 1 "register_operand" "")]
813 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
817 (define_expand "reduc_smin_v4sf"
818 [(match_operand:V4SF 0 "register_operand" "")
819 (match_operand:V4SF 1 "register_operand" "")]
822 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
828 ;; Parallel single-precision floating point comparisons
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
832 (define_insn "sse_maskcmpv4sf3"
833 [(set (match_operand:V4SF 0 "register_operand" "=x")
834 (match_operator:V4SF 3 "sse_comparison_operator"
835 [(match_operand:V4SF 1 "register_operand" "0")
836 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
838 "cmp%D3ps\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssecmp")
840 (set_attr "mode" "V4SF")])
842 (define_insn "sse_maskcmpsf3"
843 [(set (match_operand:SF 0 "register_operand" "=x")
844 (match_operator:SF 3 "sse_comparison_operator"
845 [(match_operand:SF 1 "register_operand" "0")
846 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
852 (define_insn "sse_vmmaskcmpv4sf3"
853 [(set (match_operand:V4SF 0 "register_operand" "=x")
855 (match_operator:V4SF 3 "sse_comparison_operator"
856 [(match_operand:V4SF 1 "register_operand" "0")
857 (match_operand:V4SF 2 "register_operand" "x")])
861 "cmp%D3ss\t{%2, %0|%0, %2}"
862 [(set_attr "type" "ssecmp")
863 (set_attr "mode" "SF")])
865 (define_insn "sse_comi"
866 [(set (reg:CCFP FLAGS_REG)
869 (match_operand:V4SF 0 "register_operand" "x")
870 (parallel [(const_int 0)]))
872 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
873 (parallel [(const_int 0)]))))]
875 "comiss\t{%1, %0|%0, %1}"
876 [(set_attr "type" "ssecomi")
877 (set_attr "mode" "SF")])
879 (define_insn "sse_ucomi"
880 [(set (reg:CCFPU FLAGS_REG)
883 (match_operand:V4SF 0 "register_operand" "x")
884 (parallel [(const_int 0)]))
886 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
887 (parallel [(const_int 0)]))))]
889 "ucomiss\t{%1, %0|%0, %1}"
890 [(set_attr "type" "ssecomi")
891 (set_attr "mode" "SF")])
893 (define_expand "vcondv4sf"
894 [(set (match_operand:V4SF 0 "register_operand" "")
897 [(match_operand:V4SF 4 "nonimmediate_operand" "")
898 (match_operand:V4SF 5 "nonimmediate_operand" "")])
899 (match_operand:V4SF 1 "general_operand" "")
900 (match_operand:V4SF 2 "general_operand" "")))]
903 if (ix86_expand_fp_vcond (operands))
909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
911 ;; Parallel single-precision floating point logical operations
913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
915 (define_expand "andv4sf3"
916 [(set (match_operand:V4SF 0 "register_operand" "")
917 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
918 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
920 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
922 (define_insn "*andv4sf3"
923 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
925 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
926 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
927 "andps\t{%2, %0|%0, %2}"
928 [(set_attr "type" "sselog")
929 (set_attr "mode" "V4SF")])
931 (define_insn "sse_nandv4sf3"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
934 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
936 "andnps\t{%2, %0|%0, %2}"
937 [(set_attr "type" "sselog")
938 (set_attr "mode" "V4SF")])
940 (define_expand "iorv4sf3"
941 [(set (match_operand:V4SF 0 "register_operand" "")
942 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
943 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
945 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
947 (define_insn "*iorv4sf3"
948 [(set (match_operand:V4SF 0 "register_operand" "=x")
949 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
950 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
951 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
952 "orps\t{%2, %0|%0, %2}"
953 [(set_attr "type" "sselog")
954 (set_attr "mode" "V4SF")])
956 (define_expand "xorv4sf3"
957 [(set (match_operand:V4SF 0 "register_operand" "")
958 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
959 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
961 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
963 (define_insn "*xorv4sf3"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
965 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
966 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
967 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
968 "xorps\t{%2, %0|%0, %2}"
969 [(set_attr "type" "sselog")
970 (set_attr "mode" "V4SF")])
972 ;; Also define scalar versions. These are used for abs, neg, and
973 ;; conditional move. Using subregs into vector modes causes register
974 ;; allocation lossage. These patterns do not allow memory operands
975 ;; because the native instructions read the full 128-bits.
977 (define_insn "*andsf3"
978 [(set (match_operand:SF 0 "register_operand" "=x")
979 (and:SF (match_operand:SF 1 "register_operand" "0")
980 (match_operand:SF 2 "register_operand" "x")))]
982 "andps\t{%2, %0|%0, %2}"
983 [(set_attr "type" "sselog")
984 (set_attr "mode" "V4SF")])
986 (define_insn "*nandsf3"
987 [(set (match_operand:SF 0 "register_operand" "=x")
988 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
989 (match_operand:SF 2 "register_operand" "x")))]
991 "andnps\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sselog")
993 (set_attr "mode" "V4SF")])
995 (define_insn "*iorsf3"
996 [(set (match_operand:SF 0 "register_operand" "=x")
997 (ior:SF (match_operand:SF 1 "register_operand" "0")
998 (match_operand:SF 2 "register_operand" "x")))]
1000 "orps\t{%2, %0|%0, %2}"
1001 [(set_attr "type" "sselog")
1002 (set_attr "mode" "V4SF")])
1004 (define_insn "*xorsf3"
1005 [(set (match_operand:SF 0 "register_operand" "=x")
1006 (xor:SF (match_operand:SF 1 "register_operand" "0")
1007 (match_operand:SF 2 "register_operand" "x")))]
1009 "xorps\t{%2, %0|%0, %2}"
1010 [(set_attr "type" "sselog")
1011 (set_attr "mode" "V4SF")])
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1015 ;; Parallel single-precision floating point conversion operations
1017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1019 (define_insn "sse_cvtpi2ps"
1020 [(set (match_operand:V4SF 0 "register_operand" "=x")
1023 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1024 (match_operand:V4SF 1 "register_operand" "0")
1027 "cvtpi2ps\t{%2, %0|%0, %2}"
1028 [(set_attr "type" "ssecvt")
1029 (set_attr "mode" "V4SF")])
1031 (define_insn "sse_cvtps2pi"
1032 [(set (match_operand:V2SI 0 "register_operand" "=y")
1034 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1036 (parallel [(const_int 0) (const_int 1)])))]
1038 "cvtps2pi\t{%1, %0|%0, %1}"
1039 [(set_attr "type" "ssecvt")
1040 (set_attr "unit" "mmx")
1041 (set_attr "mode" "DI")])
1043 (define_insn "sse_cvttps2pi"
1044 [(set (match_operand:V2SI 0 "register_operand" "=y")
1046 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1047 (parallel [(const_int 0) (const_int 1)])))]
1049 "cvttps2pi\t{%1, %0|%0, %1}"
1050 [(set_attr "type" "ssecvt")
1051 (set_attr "unit" "mmx")
1052 (set_attr "mode" "SF")])
1054 (define_insn "sse_cvtsi2ss"
1055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1058 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1059 (match_operand:V4SF 1 "register_operand" "0,0")
1062 "cvtsi2ss\t{%2, %0|%0, %2}"
1063 [(set_attr "type" "sseicvt")
1064 (set_attr "athlon_decode" "vector,double")
1065 (set_attr "amdfam10_decode" "vector,double")
1066 (set_attr "mode" "SF")])
1068 (define_insn "sse_cvtsi2ssq"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1072 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1073 (match_operand:V4SF 1 "register_operand" "0,0")
1075 "TARGET_SSE && TARGET_64BIT"
1076 "cvtsi2ssq\t{%2, %0|%0, %2}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "vector,double")
1079 (set_attr "amdfam10_decode" "vector,double")
1080 (set_attr "mode" "SF")])
1082 (define_insn "sse_cvtss2si"
1083 [(set (match_operand:SI 0 "register_operand" "=r,r")
1086 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1087 (parallel [(const_int 0)]))]
1088 UNSPEC_FIX_NOTRUNC))]
1090 "cvtss2si\t{%1, %0|%0, %1}"
1091 [(set_attr "type" "sseicvt")
1092 (set_attr "athlon_decode" "double,vector")
1093 (set_attr "prefix_rep" "1")
1094 (set_attr "mode" "SI")])
1096 (define_insn "sse_cvtss2si_2"
1097 [(set (match_operand:SI 0 "register_operand" "=r,r")
1098 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1099 UNSPEC_FIX_NOTRUNC))]
1101 "cvtss2si\t{%1, %0|%0, %1}"
1102 [(set_attr "type" "sseicvt")
1103 (set_attr "athlon_decode" "double,vector")
1104 (set_attr "amdfam10_decode" "double,double")
1105 (set_attr "prefix_rep" "1")
1106 (set_attr "mode" "SI")])
1108 (define_insn "sse_cvtss2siq"
1109 [(set (match_operand:DI 0 "register_operand" "=r,r")
1112 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1113 (parallel [(const_int 0)]))]
1114 UNSPEC_FIX_NOTRUNC))]
1115 "TARGET_SSE && TARGET_64BIT"
1116 "cvtss2siq\t{%1, %0|%0, %1}"
1117 [(set_attr "type" "sseicvt")
1118 (set_attr "athlon_decode" "double,vector")
1119 (set_attr "prefix_rep" "1")
1120 (set_attr "mode" "DI")])
1122 (define_insn "sse_cvtss2siq_2"
1123 [(set (match_operand:DI 0 "register_operand" "=r,r")
1124 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1125 UNSPEC_FIX_NOTRUNC))]
1126 "TARGET_SSE && TARGET_64BIT"
1127 "cvtss2siq\t{%1, %0|%0, %1}"
1128 [(set_attr "type" "sseicvt")
1129 (set_attr "athlon_decode" "double,vector")
1130 (set_attr "amdfam10_decode" "double,double")
1131 (set_attr "prefix_rep" "1")
1132 (set_attr "mode" "DI")])
1134 (define_insn "sse_cvttss2si"
1135 [(set (match_operand:SI 0 "register_operand" "=r,r")
1138 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1139 (parallel [(const_int 0)]))))]
1141 "cvttss2si\t{%1, %0|%0, %1}"
1142 [(set_attr "type" "sseicvt")
1143 (set_attr "athlon_decode" "double,vector")
1144 (set_attr "amdfam10_decode" "double,double")
1145 (set_attr "prefix_rep" "1")
1146 (set_attr "mode" "SI")])
1148 (define_insn "sse_cvttss2siq"
1149 [(set (match_operand:DI 0 "register_operand" "=r,r")
1152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1153 (parallel [(const_int 0)]))))]
1154 "TARGET_SSE && TARGET_64BIT"
1155 "cvttss2siq\t{%1, %0|%0, %1}"
1156 [(set_attr "type" "sseicvt")
1157 (set_attr "athlon_decode" "double,vector")
1158 (set_attr "amdfam10_decode" "double,double")
1159 (set_attr "prefix_rep" "1")
1160 (set_attr "mode" "DI")])
1162 (define_insn "sse2_cvtdq2ps"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1164 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1166 "cvtdq2ps\t{%1, %0|%0, %1}"
1167 [(set_attr "type" "ssecvt")
1168 (set_attr "mode" "V4SF")])
1170 (define_insn "sse2_cvtps2dq"
1171 [(set (match_operand:V4SI 0 "register_operand" "=x")
1172 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1173 UNSPEC_FIX_NOTRUNC))]
1175 "cvtps2dq\t{%1, %0|%0, %1}"
1176 [(set_attr "type" "ssecvt")
1177 (set_attr "prefix_data16" "1")
1178 (set_attr "mode" "TI")])
1180 (define_insn "sse2_cvttps2dq"
1181 [(set (match_operand:V4SI 0 "register_operand" "=x")
1182 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1184 "cvttps2dq\t{%1, %0|%0, %1}"
1185 [(set_attr "type" "ssecvt")
1186 (set_attr "prefix_rep" "1")
1187 (set_attr "mode" "TI")])
1189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1191 ;; Parallel single-precision floating point element swizzling
1193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1195 (define_insn "sse_movhlps"
1196 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1199 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1200 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1201 (parallel [(const_int 6)
1205 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1207 movhlps\t{%2, %0|%0, %2}
1208 movlps\t{%H2, %0|%0, %H2}
1209 movhps\t{%2, %0|%0, %2}"
1210 [(set_attr "type" "ssemov")
1211 (set_attr "mode" "V4SF,V2SF,V2SF")])
1213 (define_insn "sse_movlhps"
1214 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1217 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1218 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1219 (parallel [(const_int 0)
1223 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1225 movlhps\t{%2, %0|%0, %2}
1226 movhps\t{%2, %0|%0, %2}
1227 movlps\t{%2, %H0|%H0, %2}"
1228 [(set_attr "type" "ssemov")
1229 (set_attr "mode" "V4SF,V2SF,V2SF")])
1231 (define_insn "sse_unpckhps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 2) (const_int 6)
1238 (const_int 3) (const_int 7)])))]
1240 "unpckhps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1244 (define_insn "sse_unpcklps"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1248 (match_operand:V4SF 1 "register_operand" "0")
1249 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1250 (parallel [(const_int 0) (const_int 4)
1251 (const_int 1) (const_int 5)])))]
1253 "unpcklps\t{%2, %0|%0, %2}"
1254 [(set_attr "type" "sselog")
1255 (set_attr "mode" "V4SF")])
1257 ;; These are modeled with the same vec_concat as the others so that we
1258 ;; capture users of shufps that can use the new instructions
1259 (define_insn "sse3_movshdup"
1260 [(set (match_operand:V4SF 0 "register_operand" "=x")
1263 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1265 (parallel [(const_int 1)
1270 "movshdup\t{%1, %0|%0, %1}"
1271 [(set_attr "type" "sse")
1272 (set_attr "prefix_rep" "1")
1273 (set_attr "mode" "V4SF")])
1275 (define_insn "sse3_movsldup"
1276 [(set (match_operand:V4SF 0 "register_operand" "=x")
1279 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1281 (parallel [(const_int 0)
1286 "movsldup\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "sse")
1288 (set_attr "prefix_rep" "1")
1289 (set_attr "mode" "V4SF")])
1291 (define_expand "sse_shufps"
1292 [(match_operand:V4SF 0 "register_operand" "")
1293 (match_operand:V4SF 1 "register_operand" "")
1294 (match_operand:V4SF 2 "nonimmediate_operand" "")
1295 (match_operand:SI 3 "const_int_operand" "")]
1298 int mask = INTVAL (operands[3]);
1299 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1300 GEN_INT ((mask >> 0) & 3),
1301 GEN_INT ((mask >> 2) & 3),
1302 GEN_INT (((mask >> 4) & 3) + 4),
1303 GEN_INT (((mask >> 6) & 3) + 4)));
1307 (define_insn "sse_shufps_1"
1308 [(set (match_operand:V4SF 0 "register_operand" "=x")
1311 (match_operand:V4SF 1 "register_operand" "0")
1312 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1313 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1314 (match_operand 4 "const_0_to_3_operand" "")
1315 (match_operand 5 "const_4_to_7_operand" "")
1316 (match_operand 6 "const_4_to_7_operand" "")])))]
1320 mask |= INTVAL (operands[3]) << 0;
1321 mask |= INTVAL (operands[4]) << 2;
1322 mask |= (INTVAL (operands[5]) - 4) << 4;
1323 mask |= (INTVAL (operands[6]) - 4) << 6;
1324 operands[3] = GEN_INT (mask);
1326 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1328 [(set_attr "type" "sselog")
1329 (set_attr "mode" "V4SF")])
1331 (define_insn "sse_storehps"
1332 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1334 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1335 (parallel [(const_int 2) (const_int 3)])))]
1338 movhps\t{%1, %0|%0, %1}
1339 movhlps\t{%1, %0|%0, %1}
1340 movlps\t{%H1, %0|%0, %H1}"
1341 [(set_attr "type" "ssemov")
1342 (set_attr "mode" "V2SF,V4SF,V2SF")])
1344 (define_insn "sse_loadhps"
1345 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1348 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1349 (parallel [(const_int 0) (const_int 1)]))
1350 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1353 movhps\t{%2, %0|%0, %2}
1354 movlhps\t{%2, %0|%0, %2}
1355 movlps\t{%2, %H0|%H0, %2}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1359 (define_insn "sse_storelps"
1360 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1362 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1363 (parallel [(const_int 0) (const_int 1)])))]
1366 movlps\t{%1, %0|%0, %1}
1367 movaps\t{%1, %0|%0, %1}
1368 movlps\t{%1, %0|%0, %1}"
1369 [(set_attr "type" "ssemov")
1370 (set_attr "mode" "V2SF,V4SF,V2SF")])
1372 (define_insn "sse_loadlps"
1373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1375 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1377 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1378 (parallel [(const_int 2) (const_int 3)]))))]
1381 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1382 movlps\t{%2, %0|%0, %2}
1383 movlps\t{%2, %0|%0, %2}"
1384 [(set_attr "type" "sselog,ssemov,ssemov")
1385 (set_attr "mode" "V4SF,V2SF,V2SF")])
1387 (define_insn "sse_movss"
1388 [(set (match_operand:V4SF 0 "register_operand" "=x")
1390 (match_operand:V4SF 2 "register_operand" "x")
1391 (match_operand:V4SF 1 "register_operand" "0")
1394 "movss\t{%2, %0|%0, %2}"
1395 [(set_attr "type" "ssemov")
1396 (set_attr "mode" "SF")])
1398 (define_insn "*vec_dupv4sf"
1399 [(set (match_operand:V4SF 0 "register_operand" "=x")
1401 (match_operand:SF 1 "register_operand" "0")))]
1403 "shufps\t{$0, %0, %0|%0, %0, 0}"
1404 [(set_attr "type" "sselog1")
1405 (set_attr "mode" "V4SF")])
1407 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1408 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1409 ;; alternatives pretty much forces the MMX alternative to be chosen.
1410 (define_insn "*sse_concatv2sf"
1411 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1413 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1414 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1417 unpcklps\t{%2, %0|%0, %2}
1418 movss\t{%1, %0|%0, %1}
1419 punpckldq\t{%2, %0|%0, %2}
1420 movd\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1422 (set_attr "mode" "V4SF,SF,DI,DI")])
1424 (define_insn "*sse_concatv4sf"
1425 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1427 (match_operand:V2SF 1 "register_operand" " 0,0")
1428 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1431 movlhps\t{%2, %0|%0, %2}
1432 movhps\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "ssemov")
1434 (set_attr "mode" "V4SF,V2SF")])
1436 (define_expand "vec_initv4sf"
1437 [(match_operand:V4SF 0 "register_operand" "")
1438 (match_operand 1 "" "")]
1441 ix86_expand_vector_init (false, operands[0], operands[1]);
1445 (define_insn "vec_setv4sf_0"
1446 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1449 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1450 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1454 movss\t{%2, %0|%0, %2}
1455 movss\t{%2, %0|%0, %2}
1456 movd\t{%2, %0|%0, %2}
1458 [(set_attr "type" "ssemov")
1459 (set_attr "mode" "SF")])
1461 ;; A subset is vec_setv4sf.
1462 (define_insn "*vec_setv4sf_sse4_1"
1463 [(set (match_operand:V4SF 0 "register_operand" "=x")
1466 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1467 (match_operand:V4SF 1 "register_operand" "0")
1468 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1471 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1472 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1474 [(set_attr "type" "sselog")
1475 (set_attr "prefix_extra" "1")
1476 (set_attr "mode" "V4SF")])
1478 (define_insn "sse4_1_insertps"
1479 [(set (match_operand:V4SF 0 "register_operand" "=x")
1480 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1481 (match_operand:V4SF 1 "register_operand" "0")
1482 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1485 "insertps\t{%3, %2, %0|%0, %2, %3}";
1486 [(set_attr "type" "sselog")
1487 (set_attr "prefix_extra" "1")
1488 (set_attr "mode" "V4SF")])
1491 [(set (match_operand:V4SF 0 "memory_operand" "")
1494 (match_operand:SF 1 "nonmemory_operand" ""))
1497 "TARGET_SSE && reload_completed"
1500 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1504 (define_expand "vec_setv4sf"
1505 [(match_operand:V4SF 0 "register_operand" "")
1506 (match_operand:SF 1 "register_operand" "")
1507 (match_operand 2 "const_int_operand" "")]
1510 ix86_expand_vector_set (false, operands[0], operands[1],
1511 INTVAL (operands[2]));
1515 (define_insn_and_split "*vec_extractv4sf_0"
1516 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1518 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1519 (parallel [(const_int 0)])))]
1520 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1522 "&& reload_completed"
1525 rtx op1 = operands[1];
1527 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1529 op1 = gen_lowpart (SFmode, op1);
1530 emit_move_insn (operands[0], op1);
1534 (define_insn "*sse4_1_extractps"
1535 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
1537 (match_operand:V4SF 1 "register_operand" "x")
1538 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1540 "extractps\t{%2, %1, %0|%0, %1, %2}"
1541 [(set_attr "type" "sselog")
1542 (set_attr "prefix_extra" "1")
1543 (set_attr "mode" "V4SF")])
1545 (define_insn_and_split "*vec_extract_v4sf_mem"
1546 [(set (match_operand:SF 0 "register_operand" "=x*rf")
1548 (match_operand:V4SF 1 "memory_operand" "o")
1549 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
1555 int i = INTVAL (operands[2]);
1557 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
1561 (define_expand "vec_extractv4sf"
1562 [(match_operand:SF 0 "register_operand" "")
1563 (match_operand:V4SF 1 "register_operand" "")
1564 (match_operand 2 "const_int_operand" "")]
1567 ix86_expand_vector_extract (false, operands[0], operands[1],
1568 INTVAL (operands[2]));
1572 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1574 ;; Parallel double-precision floating point arithmetic
1576 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1578 (define_expand "negv2df2"
1579 [(set (match_operand:V2DF 0 "register_operand" "")
1580 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1582 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1584 (define_expand "absv2df2"
1585 [(set (match_operand:V2DF 0 "register_operand" "")
1586 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1588 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1590 (define_expand "addv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "")
1592 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1593 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1595 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1597 (define_insn "*addv2df3"
1598 [(set (match_operand:V2DF 0 "register_operand" "=x")
1599 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1600 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1601 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1602 "addpd\t{%2, %0|%0, %2}"
1603 [(set_attr "type" "sseadd")
1604 (set_attr "mode" "V2DF")])
1606 (define_insn "sse2_vmaddv2df3"
1607 [(set (match_operand:V2DF 0 "register_operand" "=x")
1609 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1610 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1613 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1614 "addsd\t{%2, %0|%0, %2}"
1615 [(set_attr "type" "sseadd")
1616 (set_attr "mode" "DF")])
1618 (define_expand "subv2df3"
1619 [(set (match_operand:V2DF 0 "register_operand" "")
1620 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1621 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1623 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1625 (define_insn "*subv2df3"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x")
1627 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1628 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1630 "subpd\t{%2, %0|%0, %2}"
1631 [(set_attr "type" "sseadd")
1632 (set_attr "mode" "V2DF")])
1634 (define_insn "sse2_vmsubv2df3"
1635 [(set (match_operand:V2DF 0 "register_operand" "=x")
1637 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1638 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1642 "subsd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "sseadd")
1644 (set_attr "mode" "DF")])
1646 (define_expand "mulv2df3"
1647 [(set (match_operand:V2DF 0 "register_operand" "")
1648 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1649 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1651 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1653 (define_insn "*mulv2df3"
1654 [(set (match_operand:V2DF 0 "register_operand" "=x")
1655 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1656 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1657 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1658 "mulpd\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "ssemul")
1660 (set_attr "mode" "V2DF")])
1662 (define_insn "sse2_vmmulv2df3"
1663 [(set (match_operand:V2DF 0 "register_operand" "=x")
1665 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1666 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1669 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1670 "mulsd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "ssemul")
1672 (set_attr "mode" "DF")])
1674 (define_expand "divv2df3"
1675 [(set (match_operand:V2DF 0 "register_operand" "")
1676 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1677 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1679 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1681 (define_insn "*divv2df3"
1682 [(set (match_operand:V2DF 0 "register_operand" "=x")
1683 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1684 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1686 "divpd\t{%2, %0|%0, %2}"
1687 [(set_attr "type" "ssediv")
1688 (set_attr "mode" "V2DF")])
1690 (define_insn "sse2_vmdivv2df3"
1691 [(set (match_operand:V2DF 0 "register_operand" "=x")
1693 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1694 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1698 "divsd\t{%2, %0|%0, %2}"
1699 [(set_attr "type" "ssediv")
1700 (set_attr "mode" "DF")])
1702 (define_insn "sqrtv2df2"
1703 [(set (match_operand:V2DF 0 "register_operand" "=x")
1704 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1706 "sqrtpd\t{%1, %0|%0, %1}"
1707 [(set_attr "type" "sse")
1708 (set_attr "mode" "V2DF")])
1710 (define_insn "sse2_vmsqrtv2df2"
1711 [(set (match_operand:V2DF 0 "register_operand" "=x")
1713 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1714 (match_operand:V2DF 2 "register_operand" "0")
1717 "sqrtsd\t{%1, %0|%0, %1}"
1718 [(set_attr "type" "sse")
1719 (set_attr "mode" "DF")])
1721 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1722 ;; isn't really correct, as those rtl operators aren't defined when
1723 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1725 (define_expand "smaxv2df3"
1726 [(set (match_operand:V2DF 0 "register_operand" "")
1727 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1731 if (!flag_finite_math_only)
1732 operands[1] = force_reg (V2DFmode, operands[1]);
1733 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1736 (define_insn "*smaxv2df3_finite"
1737 [(set (match_operand:V2DF 0 "register_operand" "=x")
1738 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1739 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1740 "TARGET_SSE2 && flag_finite_math_only
1741 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1742 "maxpd\t{%2, %0|%0, %2}"
1743 [(set_attr "type" "sseadd")
1744 (set_attr "mode" "V2DF")])
1746 (define_insn "*smaxv2df3"
1747 [(set (match_operand:V2DF 0 "register_operand" "=x")
1748 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1749 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1751 "maxpd\t{%2, %0|%0, %2}"
1752 [(set_attr "type" "sseadd")
1753 (set_attr "mode" "V2DF")])
1755 (define_insn "sse2_vmsmaxv2df3"
1756 [(set (match_operand:V2DF 0 "register_operand" "=x")
1758 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1759 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1763 "maxsd\t{%2, %0|%0, %2}"
1764 [(set_attr "type" "sseadd")
1765 (set_attr "mode" "DF")])
1767 (define_expand "sminv2df3"
1768 [(set (match_operand:V2DF 0 "register_operand" "")
1769 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1770 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1773 if (!flag_finite_math_only)
1774 operands[1] = force_reg (V2DFmode, operands[1]);
1775 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1778 (define_insn "*sminv2df3_finite"
1779 [(set (match_operand:V2DF 0 "register_operand" "=x")
1780 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1781 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1782 "TARGET_SSE2 && flag_finite_math_only
1783 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1784 "minpd\t{%2, %0|%0, %2}"
1785 [(set_attr "type" "sseadd")
1786 (set_attr "mode" "V2DF")])
1788 (define_insn "*sminv2df3"
1789 [(set (match_operand:V2DF 0 "register_operand" "=x")
1790 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1791 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1793 "minpd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "sseadd")
1795 (set_attr "mode" "V2DF")])
1797 (define_insn "sse2_vmsminv2df3"
1798 [(set (match_operand:V2DF 0 "register_operand" "=x")
1800 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1801 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1805 "minsd\t{%2, %0|%0, %2}"
1806 [(set_attr "type" "sseadd")
1807 (set_attr "mode" "DF")])
1809 (define_insn "sse3_addsubv2df3"
1810 [(set (match_operand:V2DF 0 "register_operand" "=x")
1813 (match_operand:V2DF 1 "register_operand" "0")
1814 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1815 (minus:V2DF (match_dup 1) (match_dup 2))
1818 "addsubpd\t{%2, %0|%0, %2}"
1819 [(set_attr "type" "sseadd")
1820 (set_attr "mode" "V2DF")])
1822 (define_insn "sse3_haddv2df3"
1823 [(set (match_operand:V2DF 0 "register_operand" "=x")
1827 (match_operand:V2DF 1 "register_operand" "0")
1828 (parallel [(const_int 0)]))
1829 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1832 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1833 (parallel [(const_int 0)]))
1834 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1836 "haddpd\t{%2, %0|%0, %2}"
1837 [(set_attr "type" "sseadd")
1838 (set_attr "mode" "V2DF")])
1840 (define_insn "sse3_hsubv2df3"
1841 [(set (match_operand:V2DF 0 "register_operand" "=x")
1845 (match_operand:V2DF 1 "register_operand" "0")
1846 (parallel [(const_int 0)]))
1847 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1850 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1851 (parallel [(const_int 0)]))
1852 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1854 "hsubpd\t{%2, %0|%0, %2}"
1855 [(set_attr "type" "sseadd")
1856 (set_attr "mode" "V2DF")])
1858 (define_expand "reduc_splus_v2df"
1859 [(match_operand:V2DF 0 "register_operand" "")
1860 (match_operand:V2DF 1 "register_operand" "")]
1863 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1867 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1869 ;; Parallel double-precision floating point comparisons
1871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1873 (define_insn "sse2_maskcmpv2df3"
1874 [(set (match_operand:V2DF 0 "register_operand" "=x")
1875 (match_operator:V2DF 3 "sse_comparison_operator"
1876 [(match_operand:V2DF 1 "register_operand" "0")
1877 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1879 "cmp%D3pd\t{%2, %0|%0, %2}"
1880 [(set_attr "type" "ssecmp")
1881 (set_attr "mode" "V2DF")])
1883 (define_insn "sse2_maskcmpdf3"
1884 [(set (match_operand:DF 0 "register_operand" "=x")
1885 (match_operator:DF 3 "sse_comparison_operator"
1886 [(match_operand:DF 1 "register_operand" "0")
1887 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1889 "cmp%D3sd\t{%2, %0|%0, %2}"
1890 [(set_attr "type" "ssecmp")
1891 (set_attr "mode" "DF")])
1893 (define_insn "sse2_vmmaskcmpv2df3"
1894 [(set (match_operand:V2DF 0 "register_operand" "=x")
1896 (match_operator:V2DF 3 "sse_comparison_operator"
1897 [(match_operand:V2DF 1 "register_operand" "0")
1898 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1902 "cmp%D3sd\t{%2, %0|%0, %2}"
1903 [(set_attr "type" "ssecmp")
1904 (set_attr "mode" "DF")])
1906 (define_insn "sse2_comi"
1907 [(set (reg:CCFP FLAGS_REG)
1910 (match_operand:V2DF 0 "register_operand" "x")
1911 (parallel [(const_int 0)]))
1913 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1914 (parallel [(const_int 0)]))))]
1916 "comisd\t{%1, %0|%0, %1}"
1917 [(set_attr "type" "ssecomi")
1918 (set_attr "mode" "DF")])
1920 (define_insn "sse2_ucomi"
1921 [(set (reg:CCFPU FLAGS_REG)
1924 (match_operand:V2DF 0 "register_operand" "x")
1925 (parallel [(const_int 0)]))
1927 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1928 (parallel [(const_int 0)]))))]
1930 "ucomisd\t{%1, %0|%0, %1}"
1931 [(set_attr "type" "ssecomi")
1932 (set_attr "mode" "DF")])
1934 (define_expand "vcondv2df"
1935 [(set (match_operand:V2DF 0 "register_operand" "")
1937 (match_operator 3 ""
1938 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1939 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1940 (match_operand:V2DF 1 "general_operand" "")
1941 (match_operand:V2DF 2 "general_operand" "")))]
1944 if (ix86_expand_fp_vcond (operands))
1950 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1952 ;; Parallel double-precision floating point logical operations
1954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1956 (define_expand "andv2df3"
1957 [(set (match_operand:V2DF 0 "register_operand" "")
1958 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1959 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1961 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1963 (define_insn "*andv2df3"
1964 [(set (match_operand:V2DF 0 "register_operand" "=x")
1965 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1966 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1967 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1968 "andpd\t{%2, %0|%0, %2}"
1969 [(set_attr "type" "sselog")
1970 (set_attr "mode" "V2DF")])
1972 (define_insn "sse2_nandv2df3"
1973 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1975 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1977 "andnpd\t{%2, %0|%0, %2}"
1978 [(set_attr "type" "sselog")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "iorv2df3"
1982 [(set (match_operand:V2DF 0 "register_operand" "")
1983 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1984 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1986 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1988 (define_insn "*iorv2df3"
1989 [(set (match_operand:V2DF 0 "register_operand" "=x")
1990 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1991 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1992 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1993 "orpd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "sselog")
1995 (set_attr "mode" "V2DF")])
1997 (define_expand "xorv2df3"
1998 [(set (match_operand:V2DF 0 "register_operand" "")
1999 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2000 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2002 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
2004 (define_insn "*xorv2df3"
2005 [(set (match_operand:V2DF 0 "register_operand" "=x")
2006 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2007 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2008 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
2009 "xorpd\t{%2, %0|%0, %2}"
2010 [(set_attr "type" "sselog")
2011 (set_attr "mode" "V2DF")])
2013 ;; Also define scalar versions. These are used for abs, neg, and
2014 ;; conditional move. Using subregs into vector modes causes register
2015 ;; allocation lossage. These patterns do not allow memory operands
2016 ;; because the native instructions read the full 128-bits.
2018 (define_insn "*anddf3"
2019 [(set (match_operand:DF 0 "register_operand" "=x")
2020 (and:DF (match_operand:DF 1 "register_operand" "0")
2021 (match_operand:DF 2 "register_operand" "x")))]
2023 "andpd\t{%2, %0|%0, %2}"
2024 [(set_attr "type" "sselog")
2025 (set_attr "mode" "V2DF")])
2027 (define_insn "*nanddf3"
2028 [(set (match_operand:DF 0 "register_operand" "=x")
2029 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2030 (match_operand:DF 2 "register_operand" "x")))]
2032 "andnpd\t{%2, %0|%0, %2}"
2033 [(set_attr "type" "sselog")
2034 (set_attr "mode" "V2DF")])
2036 (define_insn "*iordf3"
2037 [(set (match_operand:DF 0 "register_operand" "=x")
2038 (ior:DF (match_operand:DF 1 "register_operand" "0")
2039 (match_operand:DF 2 "register_operand" "x")))]
2041 "orpd\t{%2, %0|%0, %2}"
2042 [(set_attr "type" "sselog")
2043 (set_attr "mode" "V2DF")])
2045 (define_insn "*xordf3"
2046 [(set (match_operand:DF 0 "register_operand" "=x")
2047 (xor:DF (match_operand:DF 1 "register_operand" "0")
2048 (match_operand:DF 2 "register_operand" "x")))]
2050 "xorpd\t{%2, %0|%0, %2}"
2051 [(set_attr "type" "sselog")
2052 (set_attr "mode" "V2DF")])
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 ;; Parallel double-precision floating point conversion operations
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 (define_insn "sse2_cvtpi2pd"
2061 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2062 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2064 "cvtpi2pd\t{%1, %0|%0, %1}"
2065 [(set_attr "type" "ssecvt")
2066 (set_attr "unit" "mmx,*")
2067 (set_attr "mode" "V2DF")])
2069 (define_insn "sse2_cvtpd2pi"
2070 [(set (match_operand:V2SI 0 "register_operand" "=y")
2071 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2072 UNSPEC_FIX_NOTRUNC))]
2074 "cvtpd2pi\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "ssecvt")
2076 (set_attr "unit" "mmx")
2077 (set_attr "prefix_data16" "1")
2078 (set_attr "mode" "DI")])
2080 (define_insn "sse2_cvttpd2pi"
2081 [(set (match_operand:V2SI 0 "register_operand" "=y")
2082 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2084 "cvttpd2pi\t{%1, %0|%0, %1}"
2085 [(set_attr "type" "ssecvt")
2086 (set_attr "unit" "mmx")
2087 (set_attr "prefix_data16" "1")
2088 (set_attr "mode" "TI")])
2090 (define_insn "sse2_cvtsi2sd"
2091 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2094 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2095 (match_operand:V2DF 1 "register_operand" "0,0")
2098 "cvtsi2sd\t{%2, %0|%0, %2}"
2099 [(set_attr "type" "sseicvt")
2100 (set_attr "mode" "DF")
2101 (set_attr "athlon_decode" "double,direct")
2102 (set_attr "amdfam10_decode" "vector,double")])
2104 (define_insn "sse2_cvtsi2sdq"
2105 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2108 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2109 (match_operand:V2DF 1 "register_operand" "0,0")
2111 "TARGET_SSE2 && TARGET_64BIT"
2112 "cvtsi2sdq\t{%2, %0|%0, %2}"
2113 [(set_attr "type" "sseicvt")
2114 (set_attr "mode" "DF")
2115 (set_attr "athlon_decode" "double,direct")
2116 (set_attr "amdfam10_decode" "vector,double")])
2118 (define_insn "sse2_cvtsd2si"
2119 [(set (match_operand:SI 0 "register_operand" "=r,r")
2122 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2123 (parallel [(const_int 0)]))]
2124 UNSPEC_FIX_NOTRUNC))]
2126 "cvtsd2si\t{%1, %0|%0, %1}"
2127 [(set_attr "type" "sseicvt")
2128 (set_attr "athlon_decode" "double,vector")
2129 (set_attr "prefix_rep" "1")
2130 (set_attr "mode" "SI")])
2132 (define_insn "sse2_cvtsd2si_2"
2133 [(set (match_operand:SI 0 "register_operand" "=r,r")
2134 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2135 UNSPEC_FIX_NOTRUNC))]
2137 "cvtsd2si\t{%1, %0|%0, %1}"
2138 [(set_attr "type" "sseicvt")
2139 (set_attr "athlon_decode" "double,vector")
2140 (set_attr "amdfam10_decode" "double,double")
2141 (set_attr "prefix_rep" "1")
2142 (set_attr "mode" "SI")])
2144 (define_insn "sse2_cvtsd2siq"
2145 [(set (match_operand:DI 0 "register_operand" "=r,r")
2148 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2149 (parallel [(const_int 0)]))]
2150 UNSPEC_FIX_NOTRUNC))]
2151 "TARGET_SSE2 && TARGET_64BIT"
2152 "cvtsd2siq\t{%1, %0|%0, %1}"
2153 [(set_attr "type" "sseicvt")
2154 (set_attr "athlon_decode" "double,vector")
2155 (set_attr "prefix_rep" "1")
2156 (set_attr "mode" "DI")])
2158 (define_insn "sse2_cvtsd2siq_2"
2159 [(set (match_operand:DI 0 "register_operand" "=r,r")
2160 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2161 UNSPEC_FIX_NOTRUNC))]
2162 "TARGET_SSE2 && TARGET_64BIT"
2163 "cvtsd2siq\t{%1, %0|%0, %1}"
2164 [(set_attr "type" "sseicvt")
2165 (set_attr "athlon_decode" "double,vector")
2166 (set_attr "amdfam10_decode" "double,double")
2167 (set_attr "prefix_rep" "1")
2168 (set_attr "mode" "DI")])
2170 (define_insn "sse2_cvttsd2si"
2171 [(set (match_operand:SI 0 "register_operand" "=r,r")
2174 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2175 (parallel [(const_int 0)]))))]
2177 "cvttsd2si\t{%1, %0|%0, %1}"
2178 [(set_attr "type" "sseicvt")
2179 (set_attr "prefix_rep" "1")
2180 (set_attr "mode" "SI")
2181 (set_attr "athlon_decode" "double,vector")
2182 (set_attr "amdfam10_decode" "double,double")])
2184 (define_insn "sse2_cvttsd2siq"
2185 [(set (match_operand:DI 0 "register_operand" "=r,r")
2188 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2189 (parallel [(const_int 0)]))))]
2190 "TARGET_SSE2 && TARGET_64BIT"
2191 "cvttsd2siq\t{%1, %0|%0, %1}"
2192 [(set_attr "type" "sseicvt")
2193 (set_attr "prefix_rep" "1")
2194 (set_attr "mode" "DI")
2195 (set_attr "athlon_decode" "double,vector")
2196 (set_attr "amdfam10_decode" "double,double")])
2198 (define_insn "sse2_cvtdq2pd"
2199 [(set (match_operand:V2DF 0 "register_operand" "=x")
2202 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2203 (parallel [(const_int 0) (const_int 1)]))))]
2205 "cvtdq2pd\t{%1, %0|%0, %1}"
2206 [(set_attr "type" "ssecvt")
2207 (set_attr "mode" "V2DF")])
2209 (define_expand "sse2_cvtpd2dq"
2210 [(set (match_operand:V4SI 0 "register_operand" "")
2212 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2216 "operands[2] = CONST0_RTX (V2SImode);")
2218 (define_insn "*sse2_cvtpd2dq"
2219 [(set (match_operand:V4SI 0 "register_operand" "=x")
2221 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2223 (match_operand:V2SI 2 "const0_operand" "")))]
2225 "cvtpd2dq\t{%1, %0|%0, %1}"
2226 [(set_attr "type" "ssecvt")
2227 (set_attr "prefix_rep" "1")
2228 (set_attr "mode" "TI")
2229 (set_attr "amdfam10_decode" "double")])
2231 (define_expand "sse2_cvttpd2dq"
2232 [(set (match_operand:V4SI 0 "register_operand" "")
2234 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2237 "operands[2] = CONST0_RTX (V2SImode);")
2239 (define_insn "*sse2_cvttpd2dq"
2240 [(set (match_operand:V4SI 0 "register_operand" "=x")
2242 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2243 (match_operand:V2SI 2 "const0_operand" "")))]
2245 "cvttpd2dq\t{%1, %0|%0, %1}"
2246 [(set_attr "type" "ssecvt")
2247 (set_attr "prefix_rep" "1")
2248 (set_attr "mode" "TI")
2249 (set_attr "amdfam10_decode" "double")])
2251 (define_insn "sse2_cvtsd2ss"
2252 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2255 (float_truncate:V2SF
2256 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2257 (match_operand:V4SF 1 "register_operand" "0,0")
2260 "cvtsd2ss\t{%2, %0|%0, %2}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "athlon_decode" "vector,double")
2263 (set_attr "amdfam10_decode" "vector,double")
2264 (set_attr "mode" "SF")])
2266 (define_insn "sse2_cvtss2sd"
2267 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2271 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2272 (parallel [(const_int 0) (const_int 1)])))
2273 (match_operand:V2DF 1 "register_operand" "0,0")
2276 "cvtss2sd\t{%2, %0|%0, %2}"
2277 [(set_attr "type" "ssecvt")
2278 (set_attr "amdfam10_decode" "vector,double")
2279 (set_attr "mode" "DF")])
2281 (define_expand "sse2_cvtpd2ps"
2282 [(set (match_operand:V4SF 0 "register_operand" "")
2284 (float_truncate:V2SF
2285 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2288 "operands[2] = CONST0_RTX (V2SFmode);")
2290 (define_insn "*sse2_cvtpd2ps"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x")
2293 (float_truncate:V2SF
2294 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2295 (match_operand:V2SF 2 "const0_operand" "")))]
2297 "cvtpd2ps\t{%1, %0|%0, %1}"
2298 [(set_attr "type" "ssecvt")
2299 (set_attr "prefix_data16" "1")
2300 (set_attr "mode" "V4SF")
2301 (set_attr "amdfam10_decode" "double")])
2303 (define_insn "sse2_cvtps2pd"
2304 [(set (match_operand:V2DF 0 "register_operand" "=x")
2307 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2308 (parallel [(const_int 0) (const_int 1)]))))]
2310 "cvtps2pd\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "ssecvt")
2312 (set_attr "mode" "V2DF")
2313 (set_attr "amdfam10_decode" "direct")])
2315 (define_expand "vec_unpacks_hi_v4sf"
2320 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2321 (parallel [(const_int 6)
2325 (set (match_operand:V2DF 0 "register_operand" "")
2329 (parallel [(const_int 0) (const_int 1)]))))]
2332 operands[2] = gen_reg_rtx (V4SFmode);
2335 (define_expand "vec_unpacks_lo_v4sf"
2336 [(set (match_operand:V2DF 0 "register_operand" "")
2339 (match_operand:V4SF 1 "nonimmediate_operand" "")
2340 (parallel [(const_int 0) (const_int 1)]))))]
2343 (define_expand "vec_unpacks_float_hi_v8hi"
2344 [(match_operand:V4SF 0 "register_operand" "")
2345 (match_operand:V8HI 1 "register_operand" "")]
2348 rtx tmp = gen_reg_rtx (V4SImode);
2350 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2351 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2355 (define_expand "vec_unpacks_float_lo_v8hi"
2356 [(match_operand:V4SF 0 "register_operand" "")
2357 (match_operand:V8HI 1 "register_operand" "")]
2360 rtx tmp = gen_reg_rtx (V4SImode);
2362 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2363 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2367 (define_expand "vec_unpacku_float_hi_v8hi"
2368 [(match_operand:V4SF 0 "register_operand" "")
2369 (match_operand:V8HI 1 "register_operand" "")]
2372 rtx tmp = gen_reg_rtx (V4SImode);
2374 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2375 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2379 (define_expand "vec_unpacku_float_lo_v8hi"
2380 [(match_operand:V4SF 0 "register_operand" "")
2381 (match_operand:V8HI 1 "register_operand" "")]
2384 rtx tmp = gen_reg_rtx (V4SImode);
2386 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2387 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2391 (define_expand "vec_unpacks_float_hi_v4si"
2394 (match_operand:V4SI 1 "nonimmediate_operand" "")
2395 (parallel [(const_int 2)
2399 (set (match_operand:V2DF 0 "register_operand" "")
2403 (parallel [(const_int 0) (const_int 1)]))))]
2406 operands[2] = gen_reg_rtx (V4SImode);
2409 (define_expand "vec_unpacks_float_lo_v4si"
2410 [(set (match_operand:V2DF 0 "register_operand" "")
2413 (match_operand:V4SI 1 "nonimmediate_operand" "")
2414 (parallel [(const_int 0) (const_int 1)]))))]
2417 (define_expand "vec_pack_trunc_v2df"
2418 [(match_operand:V4SF 0 "register_operand" "")
2419 (match_operand:V2DF 1 "nonimmediate_operand" "")
2420 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2425 r1 = gen_reg_rtx (V4SFmode);
2426 r2 = gen_reg_rtx (V4SFmode);
2428 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2429 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2430 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2434 (define_expand "vec_pack_sfix_trunc_v2df"
2435 [(match_operand:V4SI 0 "register_operand" "")
2436 (match_operand:V2DF 1 "nonimmediate_operand" "")
2437 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2442 r1 = gen_reg_rtx (V4SImode);
2443 r2 = gen_reg_rtx (V4SImode);
2445 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2446 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2447 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2448 gen_lowpart (V2DImode, r1),
2449 gen_lowpart (V2DImode, r2)));
2453 (define_expand "vec_pack_sfix_v2df"
2454 [(match_operand:V4SI 0 "register_operand" "")
2455 (match_operand:V2DF 1 "nonimmediate_operand" "")
2456 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2461 r1 = gen_reg_rtx (V4SImode);
2462 r2 = gen_reg_rtx (V4SImode);
2464 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2465 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2466 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2467 gen_lowpart (V2DImode, r1),
2468 gen_lowpart (V2DImode, r2)));
2473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2475 ;; Parallel double-precision floating point element swizzling
2477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2479 (define_insn "sse2_unpckhpd"
2480 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2483 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2484 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2485 (parallel [(const_int 1)
2487 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2489 unpckhpd\t{%2, %0|%0, %2}
2490 movlpd\t{%H1, %0|%0, %H1}
2491 movhpd\t{%1, %0|%0, %1}"
2492 [(set_attr "type" "sselog,ssemov,ssemov")
2493 (set_attr "mode" "V2DF,V1DF,V1DF")])
2495 (define_insn "*sse3_movddup"
2496 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2499 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2501 (parallel [(const_int 0)
2503 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2505 movddup\t{%1, %0|%0, %1}
2507 [(set_attr "type" "sselog1,ssemov")
2508 (set_attr "mode" "V2DF")])
2511 [(set (match_operand:V2DF 0 "memory_operand" "")
2514 (match_operand:V2DF 1 "register_operand" "")
2516 (parallel [(const_int 0)
2518 "TARGET_SSE3 && reload_completed"
2521 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2522 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2523 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2527 (define_insn "sse2_unpcklpd"
2528 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2531 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2532 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2533 (parallel [(const_int 0)
2535 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2537 unpcklpd\t{%2, %0|%0, %2}
2538 movhpd\t{%2, %0|%0, %2}
2539 movlpd\t{%2, %H0|%H0, %2}"
2540 [(set_attr "type" "sselog,ssemov,ssemov")
2541 (set_attr "mode" "V2DF,V1DF,V1DF")])
2543 (define_expand "sse2_shufpd"
2544 [(match_operand:V2DF 0 "register_operand" "")
2545 (match_operand:V2DF 1 "register_operand" "")
2546 (match_operand:V2DF 2 "nonimmediate_operand" "")
2547 (match_operand:SI 3 "const_int_operand" "")]
2550 int mask = INTVAL (operands[3]);
2551 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2553 GEN_INT (mask & 2 ? 3 : 2)));
2557 (define_insn "sse2_shufpd_1"
2558 [(set (match_operand:V2DF 0 "register_operand" "=x")
2561 (match_operand:V2DF 1 "register_operand" "0")
2562 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2563 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2564 (match_operand 4 "const_2_to_3_operand" "")])))]
2568 mask = INTVAL (operands[3]);
2569 mask |= (INTVAL (operands[4]) - 2) << 1;
2570 operands[3] = GEN_INT (mask);
2572 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2574 [(set_attr "type" "sselog")
2575 (set_attr "mode" "V2DF")])
2577 (define_insn "sse2_storehpd"
2578 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2580 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2581 (parallel [(const_int 1)])))]
2582 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2584 movhpd\t{%1, %0|%0, %1}
2587 [(set_attr "type" "ssemov,sselog1,ssemov")
2588 (set_attr "mode" "V1DF,V2DF,DF")])
2591 [(set (match_operand:DF 0 "register_operand" "")
2593 (match_operand:V2DF 1 "memory_operand" "")
2594 (parallel [(const_int 1)])))]
2595 "TARGET_SSE2 && reload_completed"
2596 [(set (match_dup 0) (match_dup 1))]
2598 operands[1] = adjust_address (operands[1], DFmode, 8);
2601 (define_insn "sse2_storelpd"
2602 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2604 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2605 (parallel [(const_int 0)])))]
2606 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2608 movlpd\t{%1, %0|%0, %1}
2611 [(set_attr "type" "ssemov")
2612 (set_attr "mode" "V1DF,DF,DF")])
2615 [(set (match_operand:DF 0 "register_operand" "")
2617 (match_operand:V2DF 1 "nonimmediate_operand" "")
2618 (parallel [(const_int 0)])))]
2619 "TARGET_SSE2 && reload_completed"
2622 rtx op1 = operands[1];
2624 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2626 op1 = gen_lowpart (DFmode, op1);
2627 emit_move_insn (operands[0], op1);
2631 (define_insn "sse2_loadhpd"
2632 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2635 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2636 (parallel [(const_int 0)]))
2637 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2638 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2640 movhpd\t{%2, %0|%0, %2}
2641 unpcklpd\t{%2, %0|%0, %2}
2642 shufpd\t{$1, %1, %0|%0, %1, 1}
2644 [(set_attr "type" "ssemov,sselog,sselog,other")
2645 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2648 [(set (match_operand:V2DF 0 "memory_operand" "")
2650 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2651 (match_operand:DF 1 "register_operand" "")))]
2652 "TARGET_SSE2 && reload_completed"
2653 [(set (match_dup 0) (match_dup 1))]
2655 operands[0] = adjust_address (operands[0], DFmode, 8);
2658 (define_insn "sse2_loadlpd"
2659 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2661 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2663 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2664 (parallel [(const_int 1)]))))]
2665 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2667 movsd\t{%2, %0|%0, %2}
2668 movlpd\t{%2, %0|%0, %2}
2669 movsd\t{%2, %0|%0, %2}
2670 shufpd\t{$2, %2, %0|%0, %2, 2}
2671 movhpd\t{%H1, %0|%0, %H1}
2673 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2674 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2677 [(set (match_operand:V2DF 0 "memory_operand" "")
2679 (match_operand:DF 1 "register_operand" "")
2680 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2681 "TARGET_SSE2 && reload_completed"
2682 [(set (match_dup 0) (match_dup 1))]
2684 operands[0] = adjust_address (operands[0], DFmode, 8);
2687 ;; Not sure these two are ever used, but it doesn't hurt to have
2689 (define_insn "*vec_extractv2df_1_sse"
2690 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2692 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2693 (parallel [(const_int 1)])))]
2694 "!TARGET_SSE2 && TARGET_SSE
2695 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2697 movhps\t{%1, %0|%0, %1}
2698 movhlps\t{%1, %0|%0, %1}
2699 movlps\t{%H1, %0|%0, %H1}"
2700 [(set_attr "type" "ssemov")
2701 (set_attr "mode" "V2SF,V4SF,V2SF")])
2703 (define_insn "*vec_extractv2df_0_sse"
2704 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2706 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2707 (parallel [(const_int 0)])))]
2708 "!TARGET_SSE2 && TARGET_SSE
2709 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2711 movlps\t{%1, %0|%0, %1}
2712 movaps\t{%1, %0|%0, %1}
2713 movlps\t{%1, %0|%0, %1}"
2714 [(set_attr "type" "ssemov")
2715 (set_attr "mode" "V2SF,V4SF,V2SF")])
2717 (define_insn "sse2_movsd"
2718 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2720 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2721 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2725 movsd\t{%2, %0|%0, %2}
2726 movlpd\t{%2, %0|%0, %2}
2727 movlpd\t{%2, %0|%0, %2}
2728 shufpd\t{$2, %2, %0|%0, %2, 2}
2729 movhps\t{%H1, %0|%0, %H1}
2730 movhps\t{%1, %H0|%H0, %1}"
2731 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2732 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2734 (define_insn "*vec_dupv2df_sse3"
2735 [(set (match_operand:V2DF 0 "register_operand" "=x")
2737 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2739 "movddup\t{%1, %0|%0, %1}"
2740 [(set_attr "type" "sselog1")
2741 (set_attr "mode" "DF")])
2743 (define_insn "*vec_dupv2df"
2744 [(set (match_operand:V2DF 0 "register_operand" "=x")
2746 (match_operand:DF 1 "register_operand" "0")))]
2749 [(set_attr "type" "sselog1")
2750 (set_attr "mode" "V2DF")])
2752 (define_insn "*vec_concatv2df_sse3"
2753 [(set (match_operand:V2DF 0 "register_operand" "=x")
2755 (match_operand:DF 1 "nonimmediate_operand" "xm")
2758 "movddup\t{%1, %0|%0, %1}"
2759 [(set_attr "type" "sselog1")
2760 (set_attr "mode" "DF")])
2762 (define_insn "*vec_concatv2df"
2763 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2765 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2766 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2769 unpcklpd\t{%2, %0|%0, %2}
2770 movhpd\t{%2, %0|%0, %2}
2771 movsd\t{%1, %0|%0, %1}
2772 movlhps\t{%2, %0|%0, %2}
2773 movhps\t{%2, %0|%0, %2}"
2774 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2775 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2777 (define_expand "vec_setv2df"
2778 [(match_operand:V2DF 0 "register_operand" "")
2779 (match_operand:DF 1 "register_operand" "")
2780 (match_operand 2 "const_int_operand" "")]
2783 ix86_expand_vector_set (false, operands[0], operands[1],
2784 INTVAL (operands[2]));
2788 (define_expand "vec_extractv2df"
2789 [(match_operand:DF 0 "register_operand" "")
2790 (match_operand:V2DF 1 "register_operand" "")
2791 (match_operand 2 "const_int_operand" "")]
2794 ix86_expand_vector_extract (false, operands[0], operands[1],
2795 INTVAL (operands[2]));
2799 (define_expand "vec_initv2df"
2800 [(match_operand:V2DF 0 "register_operand" "")
2801 (match_operand 1 "" "")]
2804 ix86_expand_vector_init (false, operands[0], operands[1]);
2808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2810 ;; Parallel integral arithmetic
2812 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2814 (define_expand "neg<mode>2"
2815 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2818 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2820 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2822 (define_expand "add<mode>3"
2823 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2824 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2825 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2827 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2829 (define_insn "*add<mode>3"
2830 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2832 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2833 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2834 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2835 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2836 [(set_attr "type" "sseiadd")
2837 (set_attr "prefix_data16" "1")
2838 (set_attr "mode" "TI")])
2840 (define_insn "sse2_ssadd<mode>3"
2841 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2843 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2844 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2845 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2846 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2847 [(set_attr "type" "sseiadd")
2848 (set_attr "prefix_data16" "1")
2849 (set_attr "mode" "TI")])
2851 (define_insn "sse2_usadd<mode>3"
2852 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2854 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2855 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2856 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2857 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2858 [(set_attr "type" "sseiadd")
2859 (set_attr "prefix_data16" "1")
2860 (set_attr "mode" "TI")])
2862 (define_expand "sub<mode>3"
2863 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2864 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2865 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2867 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2869 (define_insn "*sub<mode>3"
2870 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2872 (match_operand:SSEMODEI 1 "register_operand" "0")
2873 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2875 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2876 [(set_attr "type" "sseiadd")
2877 (set_attr "prefix_data16" "1")
2878 (set_attr "mode" "TI")])
2880 (define_insn "sse2_sssub<mode>3"
2881 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2883 (match_operand:SSEMODE12 1 "register_operand" "0")
2884 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2886 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2887 [(set_attr "type" "sseiadd")
2888 (set_attr "prefix_data16" "1")
2889 (set_attr "mode" "TI")])
2891 (define_insn "sse2_ussub<mode>3"
2892 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2894 (match_operand:SSEMODE12 1 "register_operand" "0")
2895 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2897 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2898 [(set_attr "type" "sseiadd")
2899 (set_attr "prefix_data16" "1")
2900 (set_attr "mode" "TI")])
2902 (define_expand "mulv16qi3"
2903 [(set (match_operand:V16QI 0 "register_operand" "")
2904 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2905 (match_operand:V16QI 2 "register_operand" "")))]
2911 for (i = 0; i < 12; ++i)
2912 t[i] = gen_reg_rtx (V16QImode);
2914 /* Unpack data such that we've got a source byte in each low byte of
2915 each word. We don't care what goes into the high byte of each word.
2916 Rather than trying to get zero in there, most convenient is to let
2917 it be a copy of the low byte. */
2918 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2919 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2920 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2921 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2923 /* Multiply words. The end-of-line annotations here give a picture of what
2924 the output of that instruction looks like. Dot means don't care; the
2925 letters are the bytes of the result with A being the most significant. */
2926 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2927 gen_lowpart (V8HImode, t[0]),
2928 gen_lowpart (V8HImode, t[1])));
2929 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2930 gen_lowpart (V8HImode, t[2]),
2931 gen_lowpart (V8HImode, t[3])));
2933 /* Extract the relevant bytes and merge them back together. */
2934 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2935 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2936 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2937 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2938 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2939 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2942 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2946 (define_expand "mulv8hi3"
2947 [(set (match_operand:V8HI 0 "register_operand" "")
2948 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2949 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2951 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2953 (define_insn "*mulv8hi3"
2954 [(set (match_operand:V8HI 0 "register_operand" "=x")
2955 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2956 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2957 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2958 "pmullw\t{%2, %0|%0, %2}"
2959 [(set_attr "type" "sseimul")
2960 (set_attr "prefix_data16" "1")
2961 (set_attr "mode" "TI")])
2963 (define_expand "smulv8hi3_highpart"
2964 [(set (match_operand:V8HI 0 "register_operand" "")
2969 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2971 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2974 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2976 (define_insn "*smulv8hi3_highpart"
2977 [(set (match_operand:V8HI 0 "register_operand" "=x")
2982 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2984 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2986 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2987 "pmulhw\t{%2, %0|%0, %2}"
2988 [(set_attr "type" "sseimul")
2989 (set_attr "prefix_data16" "1")
2990 (set_attr "mode" "TI")])
2992 (define_expand "umulv8hi3_highpart"
2993 [(set (match_operand:V8HI 0 "register_operand" "")
2998 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3000 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3003 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3005 (define_insn "*umulv8hi3_highpart"
3006 [(set (match_operand:V8HI 0 "register_operand" "=x")
3011 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3013 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3015 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3016 "pmulhuw\t{%2, %0|%0, %2}"
3017 [(set_attr "type" "sseimul")
3018 (set_attr "prefix_data16" "1")
3019 (set_attr "mode" "TI")])
3021 (define_insn "sse2_umulv2siv2di3"
3022 [(set (match_operand:V2DI 0 "register_operand" "=x")
3026 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3027 (parallel [(const_int 0) (const_int 2)])))
3030 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3031 (parallel [(const_int 0) (const_int 2)])))))]
3032 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3033 "pmuludq\t{%2, %0|%0, %2}"
3034 [(set_attr "type" "sseimul")
3035 (set_attr "prefix_data16" "1")
3036 (set_attr "mode" "TI")])
3038 (define_insn "sse4_1_mulv2siv2di3"
3039 [(set (match_operand:V2DI 0 "register_operand" "=x")
3043 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3044 (parallel [(const_int 0) (const_int 2)])))
3047 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3048 (parallel [(const_int 0) (const_int 2)])))))]
3049 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3050 "pmuldq\t{%2, %0|%0, %2}"
3051 [(set_attr "type" "sseimul")
3052 (set_attr "prefix_extra" "1")
3053 (set_attr "mode" "TI")])
3055 (define_insn "sse2_pmaddwd"
3056 [(set (match_operand:V4SI 0 "register_operand" "=x")
3061 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3062 (parallel [(const_int 0)
3068 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3069 (parallel [(const_int 0)
3075 (vec_select:V4HI (match_dup 1)
3076 (parallel [(const_int 1)
3081 (vec_select:V4HI (match_dup 2)
3082 (parallel [(const_int 1)
3085 (const_int 7)]))))))]
3086 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3087 "pmaddwd\t{%2, %0|%0, %2}"
3088 [(set_attr "type" "sseiadd")
3089 (set_attr "prefix_data16" "1")
3090 (set_attr "mode" "TI")])
3092 (define_expand "mulv4si3"
3093 [(set (match_operand:V4SI 0 "register_operand" "")
3094 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3095 (match_operand:V4SI 2 "register_operand" "")))]
3099 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3102 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3108 t1 = gen_reg_rtx (V4SImode);
3109 t2 = gen_reg_rtx (V4SImode);
3110 t3 = gen_reg_rtx (V4SImode);
3111 t4 = gen_reg_rtx (V4SImode);
3112 t5 = gen_reg_rtx (V4SImode);
3113 t6 = gen_reg_rtx (V4SImode);
3114 thirtytwo = GEN_INT (32);
3116 /* Multiply elements 2 and 0. */
3117 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3120 /* Shift both input vectors down one element, so that elements 3
3121 and 1 are now in the slots for elements 2 and 0. For K8, at
3122 least, this is faster than using a shuffle. */
3123 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3124 gen_lowpart (TImode, op1),
3126 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3127 gen_lowpart (TImode, op2),
3129 /* Multiply elements 3 and 1. */
3130 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3133 /* Move the results in element 2 down to element 1; we don't care
3134 what goes in elements 2 and 3. */
3135 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3136 const0_rtx, const0_rtx));
3137 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3138 const0_rtx, const0_rtx));
3140 /* Merge the parts back together. */
3141 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3146 (define_insn "*sse4_1_mulv4si3"
3147 [(set (match_operand:V4SI 0 "register_operand" "=x")
3148 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3149 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3150 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3151 "pmulld\t{%2, %0|%0, %2}"
3152 [(set_attr "type" "sseimul")
3153 (set_attr "prefix_extra" "1")
3154 (set_attr "mode" "TI")])
3156 (define_expand "mulv2di3"
3157 [(set (match_operand:V2DI 0 "register_operand" "")
3158 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3159 (match_operand:V2DI 2 "register_operand" "")))]
3162 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3168 t1 = gen_reg_rtx (V2DImode);
3169 t2 = gen_reg_rtx (V2DImode);
3170 t3 = gen_reg_rtx (V2DImode);
3171 t4 = gen_reg_rtx (V2DImode);
3172 t5 = gen_reg_rtx (V2DImode);
3173 t6 = gen_reg_rtx (V2DImode);
3174 thirtytwo = GEN_INT (32);
3176 /* Multiply low parts. */
3177 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3178 gen_lowpart (V4SImode, op2)));
3180 /* Shift input vectors left 32 bits so we can multiply high parts. */
3181 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3182 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3184 /* Multiply high parts by low parts. */
3185 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3186 gen_lowpart (V4SImode, t3)));
3187 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3188 gen_lowpart (V4SImode, t2)));
3190 /* Shift them back. */
3191 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3192 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3194 /* Add the three parts together. */
3195 emit_insn (gen_addv2di3 (t6, t1, t4));
3196 emit_insn (gen_addv2di3 (op0, t6, t5));
3200 (define_expand "vec_widen_smult_hi_v8hi"
3201 [(match_operand:V4SI 0 "register_operand" "")
3202 (match_operand:V8HI 1 "register_operand" "")
3203 (match_operand:V8HI 2 "register_operand" "")]
3206 rtx op1, op2, t1, t2, dest;
3210 t1 = gen_reg_rtx (V8HImode);
3211 t2 = gen_reg_rtx (V8HImode);
3212 dest = gen_lowpart (V8HImode, operands[0]);
3214 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3215 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3216 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3220 (define_expand "vec_widen_smult_lo_v8hi"
3221 [(match_operand:V4SI 0 "register_operand" "")
3222 (match_operand:V8HI 1 "register_operand" "")
3223 (match_operand:V8HI 2 "register_operand" "")]
3226 rtx op1, op2, t1, t2, dest;
3230 t1 = gen_reg_rtx (V8HImode);
3231 t2 = gen_reg_rtx (V8HImode);
3232 dest = gen_lowpart (V8HImode, operands[0]);
3234 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3235 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3236 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3240 (define_expand "vec_widen_umult_hi_v8hi"
3241 [(match_operand:V4SI 0 "register_operand" "")
3242 (match_operand:V8HI 1 "register_operand" "")
3243 (match_operand:V8HI 2 "register_operand" "")]
3246 rtx op1, op2, t1, t2, dest;
3250 t1 = gen_reg_rtx (V8HImode);
3251 t2 = gen_reg_rtx (V8HImode);
3252 dest = gen_lowpart (V8HImode, operands[0]);
3254 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3255 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3256 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3260 (define_expand "vec_widen_umult_lo_v8hi"
3261 [(match_operand:V4SI 0 "register_operand" "")
3262 (match_operand:V8HI 1 "register_operand" "")
3263 (match_operand:V8HI 2 "register_operand" "")]
3266 rtx op1, op2, t1, t2, dest;
3270 t1 = gen_reg_rtx (V8HImode);
3271 t2 = gen_reg_rtx (V8HImode);
3272 dest = gen_lowpart (V8HImode, operands[0]);
3274 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3275 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3276 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3280 (define_expand "vec_widen_smult_hi_v4si"
3281 [(match_operand:V2DI 0 "register_operand" "")
3282 (match_operand:V4SI 1 "register_operand" "")
3283 (match_operand:V4SI 2 "register_operand" "")]
3286 rtx op1, op2, t1, t2;
3290 t1 = gen_reg_rtx (V4SImode);
3291 t2 = gen_reg_rtx (V4SImode);
3293 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3294 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3295 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3299 (define_expand "vec_widen_smult_lo_v4si"
3300 [(match_operand:V2DI 0 "register_operand" "")
3301 (match_operand:V4SI 1 "register_operand" "")
3302 (match_operand:V4SI 2 "register_operand" "")]
3305 rtx op1, op2, t1, t2;
3309 t1 = gen_reg_rtx (V4SImode);
3310 t2 = gen_reg_rtx (V4SImode);
3312 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3313 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3314 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3318 (define_expand "vec_widen_umult_hi_v4si"
3319 [(match_operand:V2DI 0 "register_operand" "")
3320 (match_operand:V4SI 1 "register_operand" "")
3321 (match_operand:V4SI 2 "register_operand" "")]
3324 rtx op1, op2, t1, t2;
3328 t1 = gen_reg_rtx (V4SImode);
3329 t2 = gen_reg_rtx (V4SImode);
3331 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3332 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3333 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3337 (define_expand "vec_widen_umult_lo_v4si"
3338 [(match_operand:V2DI 0 "register_operand" "")
3339 (match_operand:V4SI 1 "register_operand" "")
3340 (match_operand:V4SI 2 "register_operand" "")]
3343 rtx op1, op2, t1, t2;
3347 t1 = gen_reg_rtx (V4SImode);
3348 t2 = gen_reg_rtx (V4SImode);
3350 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3351 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3352 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3356 (define_expand "sdot_prodv8hi"
3357 [(match_operand:V4SI 0 "register_operand" "")
3358 (match_operand:V8HI 1 "register_operand" "")
3359 (match_operand:V8HI 2 "register_operand" "")
3360 (match_operand:V4SI 3 "register_operand" "")]
3363 rtx t = gen_reg_rtx (V4SImode);
3364 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3365 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3369 (define_expand "udot_prodv4si"
3370 [(match_operand:V2DI 0 "register_operand" "")
3371 (match_operand:V4SI 1 "register_operand" "")
3372 (match_operand:V4SI 2 "register_operand" "")
3373 (match_operand:V2DI 3 "register_operand" "")]
3378 t1 = gen_reg_rtx (V2DImode);
3379 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3380 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3382 t2 = gen_reg_rtx (V4SImode);
3383 t3 = gen_reg_rtx (V4SImode);
3384 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3385 gen_lowpart (TImode, operands[1]),
3387 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3388 gen_lowpart (TImode, operands[2]),
3391 t4 = gen_reg_rtx (V2DImode);
3392 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3394 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3398 (define_insn "ashr<mode>3"
3399 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3401 (match_operand:SSEMODE24 1 "register_operand" "0")
3402 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3404 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3405 [(set_attr "type" "sseishft")
3406 (set_attr "prefix_data16" "1")
3407 (set_attr "mode" "TI")])
3409 (define_insn "lshr<mode>3"
3410 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3411 (lshiftrt:SSEMODE248
3412 (match_operand:SSEMODE248 1 "register_operand" "0")
3413 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3415 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3416 [(set_attr "type" "sseishft")
3417 (set_attr "prefix_data16" "1")
3418 (set_attr "mode" "TI")])
3420 (define_insn "ashl<mode>3"
3421 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3423 (match_operand:SSEMODE248 1 "register_operand" "0")
3424 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3426 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3427 [(set_attr "type" "sseishft")
3428 (set_attr "prefix_data16" "1")
3429 (set_attr "mode" "TI")])
3431 (define_expand "vec_shl_<mode>"
3432 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3433 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3434 (match_operand:SI 2 "general_operand" "")))]
3437 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3439 operands[0] = gen_lowpart (TImode, operands[0]);
3440 operands[1] = gen_lowpart (TImode, operands[1]);
3443 (define_expand "vec_shr_<mode>"
3444 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3445 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3446 (match_operand:SI 2 "general_operand" "")))]
3449 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3451 operands[0] = gen_lowpart (TImode, operands[0]);
3452 operands[1] = gen_lowpart (TImode, operands[1]);
3455 (define_expand "umaxv16qi3"
3456 [(set (match_operand:V16QI 0 "register_operand" "")
3457 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3458 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3460 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3462 (define_insn "*umaxv16qi3"
3463 [(set (match_operand:V16QI 0 "register_operand" "=x")
3464 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3465 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3466 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3467 "pmaxub\t{%2, %0|%0, %2}"
3468 [(set_attr "type" "sseiadd")
3469 (set_attr "prefix_data16" "1")
3470 (set_attr "mode" "TI")])
3472 (define_expand "smaxv8hi3"
3473 [(set (match_operand:V8HI 0 "register_operand" "")
3474 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3475 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3477 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3479 (define_insn "*smaxv8hi3"
3480 [(set (match_operand:V8HI 0 "register_operand" "=x")
3481 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3482 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3483 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3484 "pmaxsw\t{%2, %0|%0, %2}"
3485 [(set_attr "type" "sseiadd")
3486 (set_attr "prefix_data16" "1")
3487 (set_attr "mode" "TI")])
3489 (define_expand "umaxv8hi3"
3490 [(set (match_operand:V8HI 0 "register_operand" "")
3491 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3492 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3496 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3499 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3500 if (rtx_equal_p (op3, op2))
3501 op3 = gen_reg_rtx (V8HImode);
3502 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3503 emit_insn (gen_addv8hi3 (op0, op3, op2));
3508 (define_expand "smax<mode>3"
3509 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3510 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3511 (match_operand:SSEMODE14 2 "register_operand" "")))]
3515 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3521 xops[0] = operands[0];
3522 xops[1] = operands[1];
3523 xops[2] = operands[2];
3524 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3525 xops[4] = operands[1];
3526 xops[5] = operands[2];
3527 ok = ix86_expand_int_vcond (xops);
3533 (define_insn "*sse4_1_smax<mode>3"
3534 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3536 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3537 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3538 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3539 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3540 [(set_attr "type" "sseiadd")
3541 (set_attr "prefix_extra" "1")
3542 (set_attr "mode" "TI")])
3544 (define_expand "umaxv4si3"
3545 [(set (match_operand:V4SI 0 "register_operand" "")
3546 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3547 (match_operand:V4SI 2 "register_operand" "")))]
3551 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3557 xops[0] = operands[0];
3558 xops[1] = operands[1];
3559 xops[2] = operands[2];
3560 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3561 xops[4] = operands[1];
3562 xops[5] = operands[2];
3563 ok = ix86_expand_int_vcond (xops);
3569 (define_insn "*sse4_1_umax<mode>3"
3570 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3572 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3573 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3574 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3575 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3576 [(set_attr "type" "sseiadd")
3577 (set_attr "prefix_extra" "1")
3578 (set_attr "mode" "TI")])
3580 (define_expand "uminv16qi3"
3581 [(set (match_operand:V16QI 0 "register_operand" "")
3582 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3583 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3585 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3587 (define_insn "*uminv16qi3"
3588 [(set (match_operand:V16QI 0 "register_operand" "=x")
3589 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3590 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3591 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3592 "pminub\t{%2, %0|%0, %2}"
3593 [(set_attr "type" "sseiadd")
3594 (set_attr "prefix_data16" "1")
3595 (set_attr "mode" "TI")])
3597 (define_expand "sminv8hi3"
3598 [(set (match_operand:V8HI 0 "register_operand" "")
3599 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3600 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3602 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3604 (define_insn "*sminv8hi3"
3605 [(set (match_operand:V8HI 0 "register_operand" "=x")
3606 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3607 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3608 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3609 "pminsw\t{%2, %0|%0, %2}"
3610 [(set_attr "type" "sseiadd")
3611 (set_attr "prefix_data16" "1")
3612 (set_attr "mode" "TI")])
3614 (define_expand "smin<mode>3"
3615 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3616 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3617 (match_operand:SSEMODE14 2 "register_operand" "")))]
3621 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3627 xops[0] = operands[0];
3628 xops[1] = operands[2];
3629 xops[2] = operands[1];
3630 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3631 xops[4] = operands[1];
3632 xops[5] = operands[2];
3633 ok = ix86_expand_int_vcond (xops);
3639 (define_insn "*sse4_1_smin<mode>3"
3640 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3642 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3643 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3644 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3645 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3646 [(set_attr "type" "sseiadd")
3647 (set_attr "prefix_extra" "1")
3648 (set_attr "mode" "TI")])
3650 (define_expand "umin<mode>3"
3651 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3652 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3653 (match_operand:SSEMODE24 2 "register_operand" "")))]
3657 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3663 xops[0] = operands[0];
3664 xops[1] = operands[2];
3665 xops[2] = operands[1];
3666 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3667 xops[4] = operands[1];
3668 xops[5] = operands[2];
3669 ok = ix86_expand_int_vcond (xops);
3675 (define_insn "*sse4_1_umin<mode>3"
3676 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3678 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3679 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3680 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3681 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3682 [(set_attr "type" "sseiadd")
3683 (set_attr "prefix_extra" "1")
3684 (set_attr "mode" "TI")])
3686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3688 ;; Parallel integral comparisons
3690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3692 (define_insn "sse2_eq<mode>3"
3693 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3695 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3696 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3697 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3698 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3699 [(set_attr "type" "ssecmp")
3700 (set_attr "prefix_data16" "1")
3701 (set_attr "mode" "TI")])
3703 (define_insn "sse4_1_eqv2di3"
3704 [(set (match_operand:V2DI 0 "register_operand" "=x")
3706 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3707 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3708 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3709 "pcmpeqq\t{%2, %0|%0, %2}"
3710 [(set_attr "type" "ssecmp")
3711 (set_attr "prefix_extra" "1")
3712 (set_attr "mode" "TI")])
3714 (define_insn "sse2_gt<mode>3"
3715 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3717 (match_operand:SSEMODE124 1 "register_operand" "0")
3718 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3720 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3721 [(set_attr "type" "ssecmp")
3722 (set_attr "prefix_data16" "1")
3723 (set_attr "mode" "TI")])
3725 (define_insn "sse4_2_gtv2di3"
3726 [(set (match_operand:V2DI 0 "register_operand" "=x")
3728 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3729 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3731 "pcmpgtq\t{%2, %0|%0, %2}"
3732 [(set_attr "type" "ssecmp")
3733 (set_attr "mode" "TI")])
3735 (define_expand "vcond<mode>"
3736 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3737 (if_then_else:SSEMODEI
3738 (match_operator 3 ""
3739 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3740 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3741 (match_operand:SSEMODEI 1 "general_operand" "")
3742 (match_operand:SSEMODEI 2 "general_operand" "")))]
3745 if (ix86_expand_int_vcond (operands))
3751 (define_expand "vcondu<mode>"
3752 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3753 (if_then_else:SSEMODEI
3754 (match_operator 3 ""
3755 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3756 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3757 (match_operand:SSEMODEI 1 "general_operand" "")
3758 (match_operand:SSEMODEI 2 "general_operand" "")))]
3761 if (ix86_expand_int_vcond (operands))
3767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3769 ;; Parallel bitwise logical operations
3771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3773 (define_expand "one_cmpl<mode>2"
3774 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3775 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3779 int i, n = GET_MODE_NUNITS (<MODE>mode);
3780 rtvec v = rtvec_alloc (n);
3782 for (i = 0; i < n; ++i)
3783 RTVEC_ELT (v, i) = constm1_rtx;
3785 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3788 (define_expand "and<mode>3"
3789 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3790 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3791 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3793 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3795 (define_insn "*sse_and<mode>3"
3796 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3798 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3799 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3800 "(TARGET_SSE && !TARGET_SSE2)
3801 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3802 "andps\t{%2, %0|%0, %2}"
3803 [(set_attr "type" "sselog")
3804 (set_attr "mode" "V4SF")])
3806 (define_insn "*sse2_and<mode>3"
3807 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3809 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3810 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3811 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3812 "pand\t{%2, %0|%0, %2}"
3813 [(set_attr "type" "sselog")
3814 (set_attr "prefix_data16" "1")
3815 (set_attr "mode" "TI")])
3817 (define_insn "*sse_nand<mode>3"
3818 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3820 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3821 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3822 "(TARGET_SSE && !TARGET_SSE2)"
3823 "andnps\t{%2, %0|%0, %2}"
3824 [(set_attr "type" "sselog")
3825 (set_attr "mode" "V4SF")])
3827 (define_insn "sse2_nand<mode>3"
3828 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3830 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3831 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3833 "pandn\t{%2, %0|%0, %2}"
3834 [(set_attr "type" "sselog")
3835 (set_attr "prefix_data16" "1")
3836 (set_attr "mode" "TI")])
3838 (define_expand "andtf3"
3839 [(set (match_operand:TF 0 "register_operand" "")
3840 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3841 (match_operand:TF 2 "nonimmediate_operand" "")))]
3843 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3845 (define_insn "*andtf3"
3846 [(set (match_operand:TF 0 "register_operand" "=x")
3848 (match_operand:TF 1 "nonimmediate_operand" "%0")
3849 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3850 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3851 "pand\t{%2, %0|%0, %2}"
3852 [(set_attr "type" "sselog")
3853 (set_attr "prefix_data16" "1")
3854 (set_attr "mode" "TI")])
3856 (define_insn "*nandtf3"
3857 [(set (match_operand:TF 0 "register_operand" "=x")
3859 (not:TF (match_operand:TF 1 "register_operand" "0"))
3860 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3862 "pandn\t{%2, %0|%0, %2}"
3863 [(set_attr "type" "sselog")
3864 (set_attr "prefix_data16" "1")
3865 (set_attr "mode" "TI")])
3867 (define_expand "ior<mode>3"
3868 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3869 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3870 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3872 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3874 (define_insn "*sse_ior<mode>3"
3875 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3877 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3878 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3879 "(TARGET_SSE && !TARGET_SSE2)
3880 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3881 "orps\t{%2, %0|%0, %2}"
3882 [(set_attr "type" "sselog")
3883 (set_attr "mode" "V4SF")])
3885 (define_insn "*sse2_ior<mode>3"
3886 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3888 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3889 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3890 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3891 "por\t{%2, %0|%0, %2}"
3892 [(set_attr "type" "sselog")
3893 (set_attr "prefix_data16" "1")
3894 (set_attr "mode" "TI")])
3896 (define_expand "iortf3"
3897 [(set (match_operand:TF 0 "register_operand" "")
3898 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3899 (match_operand:TF 2 "nonimmediate_operand" "")))]
3901 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3903 (define_insn "*iortf3"
3904 [(set (match_operand:TF 0 "register_operand" "=x")
3906 (match_operand:TF 1 "nonimmediate_operand" "%0")
3907 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3908 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3909 "por\t{%2, %0|%0, %2}"
3910 [(set_attr "type" "sselog")
3911 (set_attr "prefix_data16" "1")
3912 (set_attr "mode" "TI")])
3914 (define_expand "xor<mode>3"
3915 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3916 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3917 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3919 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3921 (define_insn "*sse_xor<mode>3"
3922 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3924 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3925 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3926 "(TARGET_SSE && !TARGET_SSE2)
3927 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3928 "xorps\t{%2, %0|%0, %2}"
3929 [(set_attr "type" "sselog")
3930 (set_attr "mode" "V4SF")])
3932 (define_insn "*sse2_xor<mode>3"
3933 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3935 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3936 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3937 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3938 "pxor\t{%2, %0|%0, %2}"
3939 [(set_attr "type" "sselog")
3940 (set_attr "prefix_data16" "1")
3941 (set_attr "mode" "TI")])
3943 (define_expand "xortf3"
3944 [(set (match_operand:TF 0 "register_operand" "")
3945 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3946 (match_operand:TF 2 "nonimmediate_operand" "")))]
3948 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3950 (define_insn "*xortf3"
3951 [(set (match_operand:TF 0 "register_operand" "=x")
3953 (match_operand:TF 1 "nonimmediate_operand" "%0")
3954 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3955 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3956 "pxor\t{%2, %0|%0, %2}"
3957 [(set_attr "type" "sselog")
3958 (set_attr "prefix_data16" "1")
3959 (set_attr "mode" "TI")])
3961 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3963 ;; Parallel integral element swizzling
3965 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3968 ;; op1 = abcdefghijklmnop
3969 ;; op2 = qrstuvwxyz012345
3970 ;; h1 = aqbrcsdteufvgwhx
3971 ;; l1 = iyjzk0l1m2n3o4p5
3972 ;; h2 = aiqybjrzcks0dlt1
3973 ;; l2 = emu2fnv3gow4hpx5
3974 ;; h3 = aeimquy2bfjnrvz3
3975 ;; l3 = cgkosw04dhlptx15
3976 ;; result = bdfhjlnprtvxz135
3977 (define_expand "vec_pack_trunc_v8hi"
3978 [(match_operand:V16QI 0 "register_operand" "")
3979 (match_operand:V8HI 1 "register_operand" "")
3980 (match_operand:V8HI 2 "register_operand" "")]
3983 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3985 op1 = gen_lowpart (V16QImode, operands[1]);
3986 op2 = gen_lowpart (V16QImode, operands[2]);
3987 h1 = gen_reg_rtx (V16QImode);
3988 l1 = gen_reg_rtx (V16QImode);
3989 h2 = gen_reg_rtx (V16QImode);
3990 l2 = gen_reg_rtx (V16QImode);
3991 h3 = gen_reg_rtx (V16QImode);
3992 l3 = gen_reg_rtx (V16QImode);
3994 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3995 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3996 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3997 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3998 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3999 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4000 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4011 ;; result = bdfhjlnp
4012 (define_expand "vec_pack_trunc_v4si"
4013 [(match_operand:V8HI 0 "register_operand" "")
4014 (match_operand:V4SI 1 "register_operand" "")
4015 (match_operand:V4SI 2 "register_operand" "")]
4018 rtx op1, op2, h1, l1, h2, l2;
4020 op1 = gen_lowpart (V8HImode, operands[1]);
4021 op2 = gen_lowpart (V8HImode, operands[2]);
4022 h1 = gen_reg_rtx (V8HImode);
4023 l1 = gen_reg_rtx (V8HImode);
4024 h2 = gen_reg_rtx (V8HImode);
4025 l2 = gen_reg_rtx (V8HImode);
4027 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4028 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4029 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4030 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4031 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4041 (define_expand "vec_pack_trunc_v2di"
4042 [(match_operand:V4SI 0 "register_operand" "")
4043 (match_operand:V2DI 1 "register_operand" "")
4044 (match_operand:V2DI 2 "register_operand" "")]
4047 rtx op1, op2, h1, l1;
4049 op1 = gen_lowpart (V4SImode, operands[1]);
4050 op2 = gen_lowpart (V4SImode, operands[2]);
4051 h1 = gen_reg_rtx (V4SImode);
4052 l1 = gen_reg_rtx (V4SImode);
4054 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4055 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4056 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4060 (define_expand "vec_interleave_highv16qi"
4061 [(set (match_operand:V16QI 0 "register_operand" "=x")
4064 (match_operand:V16QI 1 "register_operand" "0")
4065 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4066 (parallel [(const_int 8) (const_int 24)
4067 (const_int 9) (const_int 25)
4068 (const_int 10) (const_int 26)
4069 (const_int 11) (const_int 27)
4070 (const_int 12) (const_int 28)
4071 (const_int 13) (const_int 29)
4072 (const_int 14) (const_int 30)
4073 (const_int 15) (const_int 31)])))]
4076 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4080 (define_expand "vec_interleave_lowv16qi"
4081 [(set (match_operand:V16QI 0 "register_operand" "=x")
4084 (match_operand:V16QI 1 "register_operand" "0")
4085 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4086 (parallel [(const_int 0) (const_int 16)
4087 (const_int 1) (const_int 17)
4088 (const_int 2) (const_int 18)
4089 (const_int 3) (const_int 19)
4090 (const_int 4) (const_int 20)
4091 (const_int 5) (const_int 21)
4092 (const_int 6) (const_int 22)
4093 (const_int 7) (const_int 23)])))]
4096 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4100 (define_expand "vec_interleave_highv8hi"
4101 [(set (match_operand:V8HI 0 "register_operand" "=x")
4104 (match_operand:V8HI 1 "register_operand" "0")
4105 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4106 (parallel [(const_int 4) (const_int 12)
4107 (const_int 5) (const_int 13)
4108 (const_int 6) (const_int 14)
4109 (const_int 7) (const_int 15)])))]
4112 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4116 (define_expand "vec_interleave_lowv8hi"
4117 [(set (match_operand:V8HI 0 "register_operand" "=x")
4120 (match_operand:V8HI 1 "register_operand" "0")
4121 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4122 (parallel [(const_int 0) (const_int 8)
4123 (const_int 1) (const_int 9)
4124 (const_int 2) (const_int 10)
4125 (const_int 3) (const_int 11)])))]
4128 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4132 (define_expand "vec_interleave_highv4si"
4133 [(set (match_operand:V4SI 0 "register_operand" "=x")
4136 (match_operand:V4SI 1 "register_operand" "0")
4137 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4138 (parallel [(const_int 2) (const_int 6)
4139 (const_int 3) (const_int 7)])))]
4142 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4146 (define_expand "vec_interleave_lowv4si"
4147 [(set (match_operand:V4SI 0 "register_operand" "=x")
4150 (match_operand:V4SI 1 "register_operand" "0")
4151 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4152 (parallel [(const_int 0) (const_int 4)
4153 (const_int 1) (const_int 5)])))]
4156 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4160 (define_expand "vec_interleave_highv2di"
4161 [(set (match_operand:V2DI 0 "register_operand" "=x")
4164 (match_operand:V2DI 1 "register_operand" "0")
4165 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4166 (parallel [(const_int 1)
4170 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4174 (define_expand "vec_interleave_lowv2di"
4175 [(set (match_operand:V2DI 0 "register_operand" "=x")
4178 (match_operand:V2DI 1 "register_operand" "0")
4179 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4180 (parallel [(const_int 0)
4184 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4188 (define_insn "sse2_packsswb"
4189 [(set (match_operand:V16QI 0 "register_operand" "=x")
4192 (match_operand:V8HI 1 "register_operand" "0"))
4194 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4196 "packsswb\t{%2, %0|%0, %2}"
4197 [(set_attr "type" "sselog")
4198 (set_attr "prefix_data16" "1")
4199 (set_attr "mode" "TI")])
4201 (define_insn "sse2_packssdw"
4202 [(set (match_operand:V8HI 0 "register_operand" "=x")
4205 (match_operand:V4SI 1 "register_operand" "0"))
4207 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4209 "packssdw\t{%2, %0|%0, %2}"
4210 [(set_attr "type" "sselog")
4211 (set_attr "prefix_data16" "1")
4212 (set_attr "mode" "TI")])
4214 (define_insn "sse2_packuswb"
4215 [(set (match_operand:V16QI 0 "register_operand" "=x")
4218 (match_operand:V8HI 1 "register_operand" "0"))
4220 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4222 "packuswb\t{%2, %0|%0, %2}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_data16" "1")
4225 (set_attr "mode" "TI")])
4227 (define_insn "sse2_punpckhbw"
4228 [(set (match_operand:V16QI 0 "register_operand" "=x")
4231 (match_operand:V16QI 1 "register_operand" "0")
4232 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4233 (parallel [(const_int 8) (const_int 24)
4234 (const_int 9) (const_int 25)
4235 (const_int 10) (const_int 26)
4236 (const_int 11) (const_int 27)
4237 (const_int 12) (const_int 28)
4238 (const_int 13) (const_int 29)
4239 (const_int 14) (const_int 30)
4240 (const_int 15) (const_int 31)])))]
4242 "punpckhbw\t{%2, %0|%0, %2}"
4243 [(set_attr "type" "sselog")
4244 (set_attr "prefix_data16" "1")
4245 (set_attr "mode" "TI")])
4247 (define_insn "sse2_punpcklbw"
4248 [(set (match_operand:V16QI 0 "register_operand" "=x")
4251 (match_operand:V16QI 1 "register_operand" "0")
4252 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4253 (parallel [(const_int 0) (const_int 16)
4254 (const_int 1) (const_int 17)
4255 (const_int 2) (const_int 18)
4256 (const_int 3) (const_int 19)
4257 (const_int 4) (const_int 20)
4258 (const_int 5) (const_int 21)
4259 (const_int 6) (const_int 22)
4260 (const_int 7) (const_int 23)])))]
4262 "punpcklbw\t{%2, %0|%0, %2}"
4263 [(set_attr "type" "sselog")
4264 (set_attr "prefix_data16" "1")
4265 (set_attr "mode" "TI")])
4267 (define_insn "sse2_punpckhwd"
4268 [(set (match_operand:V8HI 0 "register_operand" "=x")
4271 (match_operand:V8HI 1 "register_operand" "0")
4272 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4273 (parallel [(const_int 4) (const_int 12)
4274 (const_int 5) (const_int 13)
4275 (const_int 6) (const_int 14)
4276 (const_int 7) (const_int 15)])))]
4278 "punpckhwd\t{%2, %0|%0, %2}"
4279 [(set_attr "type" "sselog")
4280 (set_attr "prefix_data16" "1")
4281 (set_attr "mode" "TI")])
4283 (define_insn "sse2_punpcklwd"
4284 [(set (match_operand:V8HI 0 "register_operand" "=x")
4287 (match_operand:V8HI 1 "register_operand" "0")
4288 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4289 (parallel [(const_int 0) (const_int 8)
4290 (const_int 1) (const_int 9)
4291 (const_int 2) (const_int 10)
4292 (const_int 3) (const_int 11)])))]
4294 "punpcklwd\t{%2, %0|%0, %2}"
4295 [(set_attr "type" "sselog")
4296 (set_attr "prefix_data16" "1")
4297 (set_attr "mode" "TI")])
4299 (define_insn "sse2_punpckhdq"
4300 [(set (match_operand:V4SI 0 "register_operand" "=x")
4303 (match_operand:V4SI 1 "register_operand" "0")
4304 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4305 (parallel [(const_int 2) (const_int 6)
4306 (const_int 3) (const_int 7)])))]
4308 "punpckhdq\t{%2, %0|%0, %2}"
4309 [(set_attr "type" "sselog")
4310 (set_attr "prefix_data16" "1")
4311 (set_attr "mode" "TI")])
4313 (define_insn "sse2_punpckldq"
4314 [(set (match_operand:V4SI 0 "register_operand" "=x")
4317 (match_operand:V4SI 1 "register_operand" "0")
4318 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4319 (parallel [(const_int 0) (const_int 4)
4320 (const_int 1) (const_int 5)])))]
4322 "punpckldq\t{%2, %0|%0, %2}"
4323 [(set_attr "type" "sselog")
4324 (set_attr "prefix_data16" "1")
4325 (set_attr "mode" "TI")])
4327 (define_insn "sse2_punpckhqdq"
4328 [(set (match_operand:V2DI 0 "register_operand" "=x")
4331 (match_operand:V2DI 1 "register_operand" "0")
4332 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4333 (parallel [(const_int 1)
4336 "punpckhqdq\t{%2, %0|%0, %2}"
4337 [(set_attr "type" "sselog")
4338 (set_attr "prefix_data16" "1")
4339 (set_attr "mode" "TI")])
4341 (define_insn "sse2_punpcklqdq"
4342 [(set (match_operand:V2DI 0 "register_operand" "=x")
4345 (match_operand:V2DI 1 "register_operand" "0")
4346 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4347 (parallel [(const_int 0)
4350 "punpcklqdq\t{%2, %0|%0, %2}"
4351 [(set_attr "type" "sselog")
4352 (set_attr "prefix_data16" "1")
4353 (set_attr "mode" "TI")])
4355 (define_insn "*sse4_1_pinsrb"
4356 [(set (match_operand:V16QI 0 "register_operand" "=x")
4358 (vec_duplicate:V16QI
4359 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4360 (match_operand:V16QI 1 "register_operand" "0")
4361 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4364 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4365 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4367 [(set_attr "type" "sselog")
4368 (set_attr "prefix_extra" "1")
4369 (set_attr "mode" "TI")])
4371 (define_insn "*sse2_pinsrw"
4372 [(set (match_operand:V8HI 0 "register_operand" "=x")
4375 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4376 (match_operand:V8HI 1 "register_operand" "0")
4377 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4380 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4381 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4383 [(set_attr "type" "sselog")
4384 (set_attr "prefix_data16" "1")
4385 (set_attr "mode" "TI")])
4387 ;; It must come before sse2_loadld since it is preferred.
4388 (define_insn "*sse4_1_pinsrd"
4389 [(set (match_operand:V4SI 0 "register_operand" "=x")
4392 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4393 (match_operand:V4SI 1 "register_operand" "0")
4394 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4397 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4398 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4400 [(set_attr "type" "sselog")
4401 (set_attr "prefix_extra" "1")
4402 (set_attr "mode" "TI")])
4404 (define_insn "*sse4_1_pinsrq"
4405 [(set (match_operand:V2DI 0 "register_operand" "=x")
4408 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4409 (match_operand:V2DI 1 "register_operand" "0")
4410 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4413 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4414 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4416 [(set_attr "type" "sselog")
4417 (set_attr "prefix_extra" "1")
4418 (set_attr "mode" "TI")])
4420 (define_insn "*sse4_1_pextrb"
4421 [(set (match_operand:SI 0 "register_operand" "=r")
4424 (match_operand:V16QI 1 "register_operand" "x")
4425 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4427 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4428 [(set_attr "type" "sselog")
4429 (set_attr "prefix_extra" "1")
4430 (set_attr "mode" "TI")])
4432 (define_insn "*sse4_1_pextrb_memory"
4433 [(set (match_operand:QI 0 "memory_operand" "=m")
4435 (match_operand:V16QI 1 "register_operand" "x")
4436 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4438 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4439 [(set_attr "type" "sselog")
4440 (set_attr "prefix_extra" "1")
4441 (set_attr "mode" "TI")])
4443 (define_insn "*sse2_pextrw"
4444 [(set (match_operand:SI 0 "register_operand" "=r")
4447 (match_operand:V8HI 1 "register_operand" "x")
4448 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4450 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4451 [(set_attr "type" "sselog")
4452 (set_attr "prefix_data16" "1")
4453 (set_attr "mode" "TI")])
4455 (define_insn "*sse4_1_pextrw_memory"
4456 [(set (match_operand:HI 0 "memory_operand" "=m")
4458 (match_operand:V8HI 1 "register_operand" "x")
4459 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4461 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4462 [(set_attr "type" "sselog")
4463 (set_attr "prefix_extra" "1")
4464 (set_attr "mode" "TI")])
4466 (define_insn "*sse4_1_pextrd"
4467 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4469 (match_operand:V4SI 1 "register_operand" "x")
4470 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4472 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4473 [(set_attr "type" "sselog")
4474 (set_attr "prefix_extra" "1")
4475 (set_attr "mode" "TI")])
4477 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4478 (define_insn "*sse4_1_pextrq"
4479 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4481 (match_operand:V2DI 1 "register_operand" "x")
4482 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4483 "TARGET_SSE4_1 && TARGET_64BIT"
4484 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4485 [(set_attr "type" "sselog")
4486 (set_attr "prefix_extra" "1")
4487 (set_attr "mode" "TI")])
4489 (define_expand "sse2_pshufd"
4490 [(match_operand:V4SI 0 "register_operand" "")
4491 (match_operand:V4SI 1 "nonimmediate_operand" "")
4492 (match_operand:SI 2 "const_int_operand" "")]
4495 int mask = INTVAL (operands[2]);
4496 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4497 GEN_INT ((mask >> 0) & 3),
4498 GEN_INT ((mask >> 2) & 3),
4499 GEN_INT ((mask >> 4) & 3),
4500 GEN_INT ((mask >> 6) & 3)));
4504 (define_insn "sse2_pshufd_1"
4505 [(set (match_operand:V4SI 0 "register_operand" "=x")
4507 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4508 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4509 (match_operand 3 "const_0_to_3_operand" "")
4510 (match_operand 4 "const_0_to_3_operand" "")
4511 (match_operand 5 "const_0_to_3_operand" "")])))]
4515 mask |= INTVAL (operands[2]) << 0;
4516 mask |= INTVAL (operands[3]) << 2;
4517 mask |= INTVAL (operands[4]) << 4;
4518 mask |= INTVAL (operands[5]) << 6;
4519 operands[2] = GEN_INT (mask);
4521 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4523 [(set_attr "type" "sselog1")
4524 (set_attr "prefix_data16" "1")
4525 (set_attr "mode" "TI")])
4527 (define_expand "sse2_pshuflw"
4528 [(match_operand:V8HI 0 "register_operand" "")
4529 (match_operand:V8HI 1 "nonimmediate_operand" "")
4530 (match_operand:SI 2 "const_int_operand" "")]
4533 int mask = INTVAL (operands[2]);
4534 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4535 GEN_INT ((mask >> 0) & 3),
4536 GEN_INT ((mask >> 2) & 3),
4537 GEN_INT ((mask >> 4) & 3),
4538 GEN_INT ((mask >> 6) & 3)));
4542 (define_insn "sse2_pshuflw_1"
4543 [(set (match_operand:V8HI 0 "register_operand" "=x")
4545 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4546 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4547 (match_operand 3 "const_0_to_3_operand" "")
4548 (match_operand 4 "const_0_to_3_operand" "")
4549 (match_operand 5 "const_0_to_3_operand" "")
4557 mask |= INTVAL (operands[2]) << 0;
4558 mask |= INTVAL (operands[3]) << 2;
4559 mask |= INTVAL (operands[4]) << 4;
4560 mask |= INTVAL (operands[5]) << 6;
4561 operands[2] = GEN_INT (mask);
4563 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4565 [(set_attr "type" "sselog")
4566 (set_attr "prefix_rep" "1")
4567 (set_attr "mode" "TI")])
4569 (define_expand "sse2_pshufhw"
4570 [(match_operand:V8HI 0 "register_operand" "")
4571 (match_operand:V8HI 1 "nonimmediate_operand" "")
4572 (match_operand:SI 2 "const_int_operand" "")]
4575 int mask = INTVAL (operands[2]);
4576 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4577 GEN_INT (((mask >> 0) & 3) + 4),
4578 GEN_INT (((mask >> 2) & 3) + 4),
4579 GEN_INT (((mask >> 4) & 3) + 4),
4580 GEN_INT (((mask >> 6) & 3) + 4)));
4584 (define_insn "sse2_pshufhw_1"
4585 [(set (match_operand:V8HI 0 "register_operand" "=x")
4587 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4588 (parallel [(const_int 0)
4592 (match_operand 2 "const_4_to_7_operand" "")
4593 (match_operand 3 "const_4_to_7_operand" "")
4594 (match_operand 4 "const_4_to_7_operand" "")
4595 (match_operand 5 "const_4_to_7_operand" "")])))]
4599 mask |= (INTVAL (operands[2]) - 4) << 0;
4600 mask |= (INTVAL (operands[3]) - 4) << 2;
4601 mask |= (INTVAL (operands[4]) - 4) << 4;
4602 mask |= (INTVAL (operands[5]) - 4) << 6;
4603 operands[2] = GEN_INT (mask);
4605 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4607 [(set_attr "type" "sselog")
4608 (set_attr "prefix_rep" "1")
4609 (set_attr "mode" "TI")])
4611 (define_expand "sse2_loadd"
4612 [(set (match_operand:V4SI 0 "register_operand" "")
4615 (match_operand:SI 1 "nonimmediate_operand" ""))
4619 "operands[2] = CONST0_RTX (V4SImode);")
4621 (define_insn "sse2_loadld"
4622 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4625 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4626 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4630 movd\t{%2, %0|%0, %2}
4631 movd\t{%2, %0|%0, %2}
4632 movss\t{%2, %0|%0, %2}
4633 movss\t{%2, %0|%0, %2}"
4634 [(set_attr "type" "ssemov")
4635 (set_attr "mode" "TI,TI,V4SF,SF")])
4637 (define_insn_and_split "sse2_stored"
4638 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4640 (match_operand:V4SI 1 "register_operand" "x,Yi")
4641 (parallel [(const_int 0)])))]
4644 "&& reload_completed
4645 && (TARGET_INTER_UNIT_MOVES
4646 || MEM_P (operands [0])
4647 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4648 [(set (match_dup 0) (match_dup 1))]
4650 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4653 (define_insn_and_split "*vec_ext_v4si_mem"
4654 [(set (match_operand:SI 0 "register_operand" "=r")
4656 (match_operand:V4SI 1 "memory_operand" "o")
4657 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4663 int i = INTVAL (operands[2]);
4665 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4669 (define_expand "sse_storeq"
4670 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4672 (match_operand:V2DI 1 "register_operand" "")
4673 (parallel [(const_int 0)])))]
4677 (define_insn "*sse2_storeq_rex64"
4678 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4680 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4681 (parallel [(const_int 0)])))]
4682 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4686 mov{q}\t{%1, %0|%0, %1}"
4687 [(set_attr "type" "*,*,imov")
4688 (set_attr "mode" "*,*,DI")])
4690 (define_insn "*sse2_storeq"
4691 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4693 (match_operand:V2DI 1 "register_operand" "x")
4694 (parallel [(const_int 0)])))]
4699 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4701 (match_operand:V2DI 1 "register_operand" "")
4702 (parallel [(const_int 0)])))]
4705 && (TARGET_INTER_UNIT_MOVES
4706 || MEM_P (operands [0])
4707 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4708 [(set (match_dup 0) (match_dup 1))]
4710 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4713 (define_insn "*vec_extractv2di_1_rex64"
4714 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4716 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4717 (parallel [(const_int 1)])))]
4718 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4720 movhps\t{%1, %0|%0, %1}
4721 psrldq\t{$8, %0|%0, 8}
4722 movq\t{%H1, %0|%0, %H1}
4723 mov{q}\t{%H1, %0|%0, %H1}"
4724 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4725 (set_attr "memory" "*,none,*,*")
4726 (set_attr "mode" "V2SF,TI,TI,DI")])
4728 (define_insn "*vec_extractv2di_1_sse2"
4729 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4731 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4732 (parallel [(const_int 1)])))]
4734 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4736 movhps\t{%1, %0|%0, %1}
4737 psrldq\t{$8, %0|%0, 8}
4738 movq\t{%H1, %0|%0, %H1}"
4739 [(set_attr "type" "ssemov,sseishft,ssemov")
4740 (set_attr "memory" "*,none,*")
4741 (set_attr "mode" "V2SF,TI,TI")])
4743 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4744 (define_insn "*vec_extractv2di_1_sse"
4745 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4747 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4748 (parallel [(const_int 1)])))]
4749 "!TARGET_SSE2 && TARGET_SSE
4750 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4752 movhps\t{%1, %0|%0, %1}
4753 movhlps\t{%1, %0|%0, %1}
4754 movlps\t{%H1, %0|%0, %H1}"
4755 [(set_attr "type" "ssemov")
4756 (set_attr "mode" "V2SF,V4SF,V2SF")])
4758 (define_insn "*vec_dupv4si"
4759 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4761 (match_operand:SI 1 "register_operand" " Yt,0")))]
4764 pshufd\t{$0, %1, %0|%0, %1, 0}
4765 shufps\t{$0, %0, %0|%0, %0, 0}"
4766 [(set_attr "type" "sselog1")
4767 (set_attr "mode" "TI,V4SF")])
4769 (define_insn "*vec_dupv2di"
4770 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4772 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4777 [(set_attr "type" "sselog1,ssemov")
4778 (set_attr "mode" "TI,V4SF")])
4780 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4781 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4782 ;; alternatives pretty much forces the MMX alternative to be chosen.
4783 (define_insn "*sse2_concatv2si"
4784 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4786 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4787 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4790 punpckldq\t{%2, %0|%0, %2}
4791 movd\t{%1, %0|%0, %1}
4792 punpckldq\t{%2, %0|%0, %2}
4793 movd\t{%1, %0|%0, %1}"
4794 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4795 (set_attr "mode" "TI,TI,DI,DI")])
4797 (define_insn "*sse1_concatv2si"
4798 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4800 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4801 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4804 unpcklps\t{%2, %0|%0, %2}
4805 movss\t{%1, %0|%0, %1}
4806 punpckldq\t{%2, %0|%0, %2}
4807 movd\t{%1, %0|%0, %1}"
4808 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4809 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4811 (define_insn "*vec_concatv4si_1"
4812 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4814 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4815 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4818 punpcklqdq\t{%2, %0|%0, %2}
4819 movlhps\t{%2, %0|%0, %2}
4820 movhps\t{%2, %0|%0, %2}"
4821 [(set_attr "type" "sselog,ssemov,ssemov")
4822 (set_attr "mode" "TI,V4SF,V2SF")])
4824 (define_insn "vec_concatv2di"
4825 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4827 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4828 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4829 "!TARGET_64BIT && TARGET_SSE"
4831 movq\t{%1, %0|%0, %1}
4832 movq2dq\t{%1, %0|%0, %1}
4833 punpcklqdq\t{%2, %0|%0, %2}
4834 movlhps\t{%2, %0|%0, %2}
4835 movhps\t{%2, %0|%0, %2}
4836 movlps\t{%1, %0|%0, %1}"
4837 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4838 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4840 (define_insn "*vec_concatv2di_rex"
4841 [(set (match_operand:V2DI 0 "register_operand" "=Yt,Yi,!Yt,Yt,x,x,x")
4843 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4844 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Yt,x,m,0")))]
4847 movq\t{%1, %0|%0, %1}
4848 movq\t{%1, %0|%0, %1}
4849 movq2dq\t{%1, %0|%0, %1}
4850 punpcklqdq\t{%2, %0|%0, %2}
4851 movlhps\t{%2, %0|%0, %2}
4852 movhps\t{%2, %0|%0, %2}
4853 movlps\t{%1, %0|%0, %1}"
4854 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4855 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4857 (define_expand "vec_setv2di"
4858 [(match_operand:V2DI 0 "register_operand" "")
4859 (match_operand:DI 1 "register_operand" "")
4860 (match_operand 2 "const_int_operand" "")]
4863 ix86_expand_vector_set (false, operands[0], operands[1],
4864 INTVAL (operands[2]));
4868 (define_expand "vec_extractv2di"
4869 [(match_operand:DI 0 "register_operand" "")
4870 (match_operand:V2DI 1 "register_operand" "")
4871 (match_operand 2 "const_int_operand" "")]
4874 ix86_expand_vector_extract (false, operands[0], operands[1],
4875 INTVAL (operands[2]));
4879 (define_expand "vec_initv2di"
4880 [(match_operand:V2DI 0 "register_operand" "")
4881 (match_operand 1 "" "")]
4884 ix86_expand_vector_init (false, operands[0], operands[1]);
4888 (define_expand "vec_setv4si"
4889 [(match_operand:V4SI 0 "register_operand" "")
4890 (match_operand:SI 1 "register_operand" "")
4891 (match_operand 2 "const_int_operand" "")]
4894 ix86_expand_vector_set (false, operands[0], operands[1],
4895 INTVAL (operands[2]));
4899 (define_expand "vec_extractv4si"
4900 [(match_operand:SI 0 "register_operand" "")
4901 (match_operand:V4SI 1 "register_operand" "")
4902 (match_operand 2 "const_int_operand" "")]
4905 ix86_expand_vector_extract (false, operands[0], operands[1],
4906 INTVAL (operands[2]));
4910 (define_expand "vec_initv4si"
4911 [(match_operand:V4SI 0 "register_operand" "")
4912 (match_operand 1 "" "")]
4915 ix86_expand_vector_init (false, operands[0], operands[1]);
4919 (define_expand "vec_setv8hi"
4920 [(match_operand:V8HI 0 "register_operand" "")
4921 (match_operand:HI 1 "register_operand" "")
4922 (match_operand 2 "const_int_operand" "")]
4925 ix86_expand_vector_set (false, operands[0], operands[1],
4926 INTVAL (operands[2]));
4930 (define_expand "vec_extractv8hi"
4931 [(match_operand:HI 0 "register_operand" "")
4932 (match_operand:V8HI 1 "register_operand" "")
4933 (match_operand 2 "const_int_operand" "")]
4936 ix86_expand_vector_extract (false, operands[0], operands[1],
4937 INTVAL (operands[2]));
4941 (define_expand "vec_initv8hi"
4942 [(match_operand:V8HI 0 "register_operand" "")
4943 (match_operand 1 "" "")]
4946 ix86_expand_vector_init (false, operands[0], operands[1]);
4950 (define_expand "vec_setv16qi"
4951 [(match_operand:V16QI 0 "register_operand" "")
4952 (match_operand:QI 1 "register_operand" "")
4953 (match_operand 2 "const_int_operand" "")]
4956 ix86_expand_vector_set (false, operands[0], operands[1],
4957 INTVAL (operands[2]));
4961 (define_expand "vec_extractv16qi"
4962 [(match_operand:QI 0 "register_operand" "")
4963 (match_operand:V16QI 1 "register_operand" "")
4964 (match_operand 2 "const_int_operand" "")]
4967 ix86_expand_vector_extract (false, operands[0], operands[1],
4968 INTVAL (operands[2]));
4972 (define_expand "vec_initv16qi"
4973 [(match_operand:V16QI 0 "register_operand" "")
4974 (match_operand 1 "" "")]
4977 ix86_expand_vector_init (false, operands[0], operands[1]);
4981 (define_expand "vec_unpacku_hi_v16qi"
4982 [(match_operand:V8HI 0 "register_operand" "")
4983 (match_operand:V16QI 1 "register_operand" "")]
4987 ix86_expand_sse4_unpack (operands, true, true);
4989 ix86_expand_sse_unpack (operands, true, true);
4993 (define_expand "vec_unpacks_hi_v16qi"
4994 [(match_operand:V8HI 0 "register_operand" "")
4995 (match_operand:V16QI 1 "register_operand" "")]
4999 ix86_expand_sse4_unpack (operands, false, true);
5001 ix86_expand_sse_unpack (operands, false, true);
5005 (define_expand "vec_unpacku_lo_v16qi"
5006 [(match_operand:V8HI 0 "register_operand" "")
5007 (match_operand:V16QI 1 "register_operand" "")]
5011 ix86_expand_sse4_unpack (operands, true, false);
5013 ix86_expand_sse_unpack (operands, true, false);
5017 (define_expand "vec_unpacks_lo_v16qi"
5018 [(match_operand:V8HI 0 "register_operand" "")
5019 (match_operand:V16QI 1 "register_operand" "")]
5023 ix86_expand_sse4_unpack (operands, false, false);
5025 ix86_expand_sse_unpack (operands, false, false);
5029 (define_expand "vec_unpacku_hi_v8hi"
5030 [(match_operand:V4SI 0 "register_operand" "")
5031 (match_operand:V8HI 1 "register_operand" "")]
5035 ix86_expand_sse4_unpack (operands, true, true);
5037 ix86_expand_sse_unpack (operands, true, true);
5041 (define_expand "vec_unpacks_hi_v8hi"
5042 [(match_operand:V4SI 0 "register_operand" "")
5043 (match_operand:V8HI 1 "register_operand" "")]
5047 ix86_expand_sse4_unpack (operands, false, true);
5049 ix86_expand_sse_unpack (operands, false, true);
5053 (define_expand "vec_unpacku_lo_v8hi"
5054 [(match_operand:V4SI 0 "register_operand" "")
5055 (match_operand:V8HI 1 "register_operand" "")]
5059 ix86_expand_sse4_unpack (operands, true, false);
5061 ix86_expand_sse_unpack (operands, true, false);
5065 (define_expand "vec_unpacks_lo_v8hi"
5066 [(match_operand:V4SI 0 "register_operand" "")
5067 (match_operand:V8HI 1 "register_operand" "")]
5071 ix86_expand_sse4_unpack (operands, false, false);
5073 ix86_expand_sse_unpack (operands, false, false);
5077 (define_expand "vec_unpacku_hi_v4si"
5078 [(match_operand:V2DI 0 "register_operand" "")
5079 (match_operand:V4SI 1 "register_operand" "")]
5083 ix86_expand_sse4_unpack (operands, true, true);
5085 ix86_expand_sse_unpack (operands, true, true);
5089 (define_expand "vec_unpacks_hi_v4si"
5090 [(match_operand:V2DI 0 "register_operand" "")
5091 (match_operand:V4SI 1 "register_operand" "")]
5095 ix86_expand_sse4_unpack (operands, false, true);
5097 ix86_expand_sse_unpack (operands, false, true);
5101 (define_expand "vec_unpacku_lo_v4si"
5102 [(match_operand:V2DI 0 "register_operand" "")
5103 (match_operand:V4SI 1 "register_operand" "")]
5107 ix86_expand_sse4_unpack (operands, true, false);
5109 ix86_expand_sse_unpack (operands, true, false);
5113 (define_expand "vec_unpacks_lo_v4si"
5114 [(match_operand:V2DI 0 "register_operand" "")
5115 (match_operand:V4SI 1 "register_operand" "")]
5119 ix86_expand_sse4_unpack (operands, false, false);
5121 ix86_expand_sse_unpack (operands, false, false);
5125 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5129 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5131 (define_insn "sse2_uavgv16qi3"
5132 [(set (match_operand:V16QI 0 "register_operand" "=x")
5138 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5140 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5141 (const_vector:V16QI [(const_int 1) (const_int 1)
5142 (const_int 1) (const_int 1)
5143 (const_int 1) (const_int 1)
5144 (const_int 1) (const_int 1)
5145 (const_int 1) (const_int 1)
5146 (const_int 1) (const_int 1)
5147 (const_int 1) (const_int 1)
5148 (const_int 1) (const_int 1)]))
5150 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5151 "pavgb\t{%2, %0|%0, %2}"
5152 [(set_attr "type" "sseiadd")
5153 (set_attr "prefix_data16" "1")
5154 (set_attr "mode" "TI")])
5156 (define_insn "sse2_uavgv8hi3"
5157 [(set (match_operand:V8HI 0 "register_operand" "=x")
5163 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5165 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5166 (const_vector:V8HI [(const_int 1) (const_int 1)
5167 (const_int 1) (const_int 1)
5168 (const_int 1) (const_int 1)
5169 (const_int 1) (const_int 1)]))
5171 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5172 "pavgw\t{%2, %0|%0, %2}"
5173 [(set_attr "type" "sseiadd")
5174 (set_attr "prefix_data16" "1")
5175 (set_attr "mode" "TI")])
5177 ;; The correct representation for this is absolutely enormous, and
5178 ;; surely not generally useful.
5179 (define_insn "sse2_psadbw"
5180 [(set (match_operand:V2DI 0 "register_operand" "=x")
5181 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5182 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5185 "psadbw\t{%2, %0|%0, %2}"
5186 [(set_attr "type" "sseiadd")
5187 (set_attr "prefix_data16" "1")
5188 (set_attr "mode" "TI")])
5190 (define_insn "sse_movmskps"
5191 [(set (match_operand:SI 0 "register_operand" "=r")
5192 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5195 "movmskps\t{%1, %0|%0, %1}"
5196 [(set_attr "type" "ssecvt")
5197 (set_attr "mode" "V4SF")])
5199 (define_insn "sse2_movmskpd"
5200 [(set (match_operand:SI 0 "register_operand" "=r")
5201 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5204 "movmskpd\t{%1, %0|%0, %1}"
5205 [(set_attr "type" "ssecvt")
5206 (set_attr "mode" "V2DF")])
5208 (define_insn "sse2_pmovmskb"
5209 [(set (match_operand:SI 0 "register_operand" "=r")
5210 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5213 "pmovmskb\t{%1, %0|%0, %1}"
5214 [(set_attr "type" "ssecvt")
5215 (set_attr "prefix_data16" "1")
5216 (set_attr "mode" "SI")])
5218 (define_expand "sse2_maskmovdqu"
5219 [(set (match_operand:V16QI 0 "memory_operand" "")
5220 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5221 (match_operand:V16QI 2 "register_operand" "x")
5227 (define_insn "*sse2_maskmovdqu"
5228 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5229 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5230 (match_operand:V16QI 2 "register_operand" "x")
5231 (mem:V16QI (match_dup 0))]
5233 "TARGET_SSE2 && !TARGET_64BIT"
5234 ;; @@@ check ordering of operands in intel/nonintel syntax
5235 "maskmovdqu\t{%2, %1|%1, %2}"
5236 [(set_attr "type" "ssecvt")
5237 (set_attr "prefix_data16" "1")
5238 (set_attr "mode" "TI")])
5240 (define_insn "*sse2_maskmovdqu_rex64"
5241 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5242 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5243 (match_operand:V16QI 2 "register_operand" "x")
5244 (mem:V16QI (match_dup 0))]
5246 "TARGET_SSE2 && TARGET_64BIT"
5247 ;; @@@ check ordering of operands in intel/nonintel syntax
5248 "maskmovdqu\t{%2, %1|%1, %2}"
5249 [(set_attr "type" "ssecvt")
5250 (set_attr "prefix_data16" "1")
5251 (set_attr "mode" "TI")])
5253 (define_insn "sse_ldmxcsr"
5254 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5258 [(set_attr "type" "sse")
5259 (set_attr "memory" "load")])
5261 (define_insn "sse_stmxcsr"
5262 [(set (match_operand:SI 0 "memory_operand" "=m")
5263 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5266 [(set_attr "type" "sse")
5267 (set_attr "memory" "store")])
5269 (define_expand "sse_sfence"
5271 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5272 "TARGET_SSE || TARGET_3DNOW_A"
5274 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5275 MEM_VOLATILE_P (operands[0]) = 1;
5278 (define_insn "*sse_sfence"
5279 [(set (match_operand:BLK 0 "" "")
5280 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5281 "TARGET_SSE || TARGET_3DNOW_A"
5283 [(set_attr "type" "sse")
5284 (set_attr "memory" "unknown")])
5286 (define_insn "sse2_clflush"
5287 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5291 [(set_attr "type" "sse")
5292 (set_attr "memory" "unknown")])
5294 (define_expand "sse2_mfence"
5296 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5299 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5300 MEM_VOLATILE_P (operands[0]) = 1;
5303 (define_insn "*sse2_mfence"
5304 [(set (match_operand:BLK 0 "" "")
5305 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5308 [(set_attr "type" "sse")
5309 (set_attr "memory" "unknown")])
5311 (define_expand "sse2_lfence"
5313 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5316 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5317 MEM_VOLATILE_P (operands[0]) = 1;
5320 (define_insn "*sse2_lfence"
5321 [(set (match_operand:BLK 0 "" "")
5322 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5325 [(set_attr "type" "sse")
5326 (set_attr "memory" "unknown")])
5328 (define_insn "sse3_mwait"
5329 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5330 (match_operand:SI 1 "register_operand" "c")]
5333 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5334 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5335 ;; we only need to set up 32bit registers.
5337 [(set_attr "length" "3")])
5339 (define_insn "sse3_monitor"
5340 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5341 (match_operand:SI 1 "register_operand" "c")
5342 (match_operand:SI 2 "register_operand" "d")]
5344 "TARGET_SSE3 && !TARGET_64BIT"
5345 "monitor\t%0, %1, %2"
5346 [(set_attr "length" "3")])
5348 (define_insn "sse3_monitor64"
5349 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5350 (match_operand:SI 1 "register_operand" "c")
5351 (match_operand:SI 2 "register_operand" "d")]
5353 "TARGET_SSE3 && TARGET_64BIT"
5354 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5355 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5356 ;; zero extended to 64bit, we only need to set up 32bit registers.
5358 [(set_attr "length" "3")])
5361 (define_insn "ssse3_phaddwv8hi3"
5362 [(set (match_operand:V8HI 0 "register_operand" "=x")
5368 (match_operand:V8HI 1 "register_operand" "0")
5369 (parallel [(const_int 0)]))
5370 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5372 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5373 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5376 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5377 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5379 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5380 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5385 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5386 (parallel [(const_int 0)]))
5387 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5389 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5390 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5393 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5394 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5396 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5397 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5399 "phaddw\t{%2, %0|%0, %2}"
5400 [(set_attr "type" "sseiadd")
5401 (set_attr "prefix_data16" "1")
5402 (set_attr "prefix_extra" "1")
5403 (set_attr "mode" "TI")])
5405 (define_insn "ssse3_phaddwv4hi3"
5406 [(set (match_operand:V4HI 0 "register_operand" "=y")
5411 (match_operand:V4HI 1 "register_operand" "0")
5412 (parallel [(const_int 0)]))
5413 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5415 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5416 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5420 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5421 (parallel [(const_int 0)]))
5422 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5424 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5425 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5427 "phaddw\t{%2, %0|%0, %2}"
5428 [(set_attr "type" "sseiadd")
5429 (set_attr "prefix_extra" "1")
5430 (set_attr "mode" "DI")])
5432 (define_insn "ssse3_phadddv4si3"
5433 [(set (match_operand:V4SI 0 "register_operand" "=x")
5438 (match_operand:V4SI 1 "register_operand" "0")
5439 (parallel [(const_int 0)]))
5440 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5442 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5443 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5447 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5448 (parallel [(const_int 0)]))
5449 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5451 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5452 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5454 "phaddd\t{%2, %0|%0, %2}"
5455 [(set_attr "type" "sseiadd")
5456 (set_attr "prefix_data16" "1")
5457 (set_attr "prefix_extra" "1")
5458 (set_attr "mode" "TI")])
5460 (define_insn "ssse3_phadddv2si3"
5461 [(set (match_operand:V2SI 0 "register_operand" "=y")
5465 (match_operand:V2SI 1 "register_operand" "0")
5466 (parallel [(const_int 0)]))
5467 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5470 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5471 (parallel [(const_int 0)]))
5472 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5474 "phaddd\t{%2, %0|%0, %2}"
5475 [(set_attr "type" "sseiadd")
5476 (set_attr "prefix_extra" "1")
5477 (set_attr "mode" "DI")])
5479 (define_insn "ssse3_phaddswv8hi3"
5480 [(set (match_operand:V8HI 0 "register_operand" "=x")
5486 (match_operand:V8HI 1 "register_operand" "0")
5487 (parallel [(const_int 0)]))
5488 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5490 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5491 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5494 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5495 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5497 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5498 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5503 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5504 (parallel [(const_int 0)]))
5505 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5507 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5508 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5511 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5512 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5514 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5515 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5517 "phaddsw\t{%2, %0|%0, %2}"
5518 [(set_attr "type" "sseiadd")
5519 (set_attr "prefix_data16" "1")
5520 (set_attr "prefix_extra" "1")
5521 (set_attr "mode" "TI")])
5523 (define_insn "ssse3_phaddswv4hi3"
5524 [(set (match_operand:V4HI 0 "register_operand" "=y")
5529 (match_operand:V4HI 1 "register_operand" "0")
5530 (parallel [(const_int 0)]))
5531 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5533 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5534 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5538 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5539 (parallel [(const_int 0)]))
5540 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5542 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5543 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5545 "phaddsw\t{%2, %0|%0, %2}"
5546 [(set_attr "type" "sseiadd")
5547 (set_attr "prefix_extra" "1")
5548 (set_attr "mode" "DI")])
5550 (define_insn "ssse3_phsubwv8hi3"
5551 [(set (match_operand:V8HI 0 "register_operand" "=x")
5557 (match_operand:V8HI 1 "register_operand" "0")
5558 (parallel [(const_int 0)]))
5559 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5561 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5562 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5565 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5566 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5568 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5569 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5574 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5575 (parallel [(const_int 0)]))
5576 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5578 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5579 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5582 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5583 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5585 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5586 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5588 "phsubw\t{%2, %0|%0, %2}"
5589 [(set_attr "type" "sseiadd")
5590 (set_attr "prefix_data16" "1")
5591 (set_attr "prefix_extra" "1")
5592 (set_attr "mode" "TI")])
5594 (define_insn "ssse3_phsubwv4hi3"
5595 [(set (match_operand:V4HI 0 "register_operand" "=y")
5600 (match_operand:V4HI 1 "register_operand" "0")
5601 (parallel [(const_int 0)]))
5602 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5604 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5605 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5609 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5610 (parallel [(const_int 0)]))
5611 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5613 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5614 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5616 "phsubw\t{%2, %0|%0, %2}"
5617 [(set_attr "type" "sseiadd")
5618 (set_attr "prefix_extra" "1")
5619 (set_attr "mode" "DI")])
5621 (define_insn "ssse3_phsubdv4si3"
5622 [(set (match_operand:V4SI 0 "register_operand" "=x")
5627 (match_operand:V4SI 1 "register_operand" "0")
5628 (parallel [(const_int 0)]))
5629 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5631 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5632 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5636 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5637 (parallel [(const_int 0)]))
5638 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5640 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5641 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5643 "phsubd\t{%2, %0|%0, %2}"
5644 [(set_attr "type" "sseiadd")
5645 (set_attr "prefix_data16" "1")
5646 (set_attr "prefix_extra" "1")
5647 (set_attr "mode" "TI")])
5649 (define_insn "ssse3_phsubdv2si3"
5650 [(set (match_operand:V2SI 0 "register_operand" "=y")
5654 (match_operand:V2SI 1 "register_operand" "0")
5655 (parallel [(const_int 0)]))
5656 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5659 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5660 (parallel [(const_int 0)]))
5661 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5663 "phsubd\t{%2, %0|%0, %2}"
5664 [(set_attr "type" "sseiadd")
5665 (set_attr "prefix_extra" "1")
5666 (set_attr "mode" "DI")])
5668 (define_insn "ssse3_phsubswv8hi3"
5669 [(set (match_operand:V8HI 0 "register_operand" "=x")
5675 (match_operand:V8HI 1 "register_operand" "0")
5676 (parallel [(const_int 0)]))
5677 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5679 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5680 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5683 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5684 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5686 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5687 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5692 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5693 (parallel [(const_int 0)]))
5694 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5696 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5697 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5700 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5701 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5703 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5704 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5706 "phsubsw\t{%2, %0|%0, %2}"
5707 [(set_attr "type" "sseiadd")
5708 (set_attr "prefix_data16" "1")
5709 (set_attr "prefix_extra" "1")
5710 (set_attr "mode" "TI")])
5712 (define_insn "ssse3_phsubswv4hi3"
5713 [(set (match_operand:V4HI 0 "register_operand" "=y")
5718 (match_operand:V4HI 1 "register_operand" "0")
5719 (parallel [(const_int 0)]))
5720 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5722 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5723 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5727 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5728 (parallel [(const_int 0)]))
5729 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5731 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5732 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5734 "phsubsw\t{%2, %0|%0, %2}"
5735 [(set_attr "type" "sseiadd")
5736 (set_attr "prefix_extra" "1")
5737 (set_attr "mode" "DI")])
5739 (define_insn "ssse3_pmaddubswv8hi3"
5740 [(set (match_operand:V8HI 0 "register_operand" "=x")
5745 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5746 (parallel [(const_int 0)
5756 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5757 (parallel [(const_int 0)
5767 (vec_select:V16QI (match_dup 1)
5768 (parallel [(const_int 1)
5777 (vec_select:V16QI (match_dup 2)
5778 (parallel [(const_int 1)
5785 (const_int 15)]))))))]
5787 "pmaddubsw\t{%2, %0|%0, %2}"
5788 [(set_attr "type" "sseiadd")
5789 (set_attr "prefix_data16" "1")
5790 (set_attr "prefix_extra" "1")
5791 (set_attr "mode" "TI")])
5793 (define_insn "ssse3_pmaddubswv4hi3"
5794 [(set (match_operand:V4HI 0 "register_operand" "=y")
5799 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5800 (parallel [(const_int 0)
5806 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5807 (parallel [(const_int 0)
5813 (vec_select:V8QI (match_dup 1)
5814 (parallel [(const_int 1)
5819 (vec_select:V8QI (match_dup 2)
5820 (parallel [(const_int 1)
5823 (const_int 7)]))))))]
5825 "pmaddubsw\t{%2, %0|%0, %2}"
5826 [(set_attr "type" "sseiadd")
5827 (set_attr "prefix_extra" "1")
5828 (set_attr "mode" "DI")])
5830 (define_insn "ssse3_pmulhrswv8hi3"
5831 [(set (match_operand:V8HI 0 "register_operand" "=x")
5838 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5840 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5842 (const_vector:V8HI [(const_int 1) (const_int 1)
5843 (const_int 1) (const_int 1)
5844 (const_int 1) (const_int 1)
5845 (const_int 1) (const_int 1)]))
5847 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5848 "pmulhrsw\t{%2, %0|%0, %2}"
5849 [(set_attr "type" "sseimul")
5850 (set_attr "prefix_data16" "1")
5851 (set_attr "prefix_extra" "1")
5852 (set_attr "mode" "TI")])
5854 (define_insn "ssse3_pmulhrswv4hi3"
5855 [(set (match_operand:V4HI 0 "register_operand" "=y")
5862 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5864 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5866 (const_vector:V4HI [(const_int 1) (const_int 1)
5867 (const_int 1) (const_int 1)]))
5869 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5870 "pmulhrsw\t{%2, %0|%0, %2}"
5871 [(set_attr "type" "sseimul")
5872 (set_attr "prefix_extra" "1")
5873 (set_attr "mode" "DI")])
5875 (define_insn "ssse3_pshufbv16qi3"
5876 [(set (match_operand:V16QI 0 "register_operand" "=x")
5877 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5878 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5881 "pshufb\t{%2, %0|%0, %2}";
5882 [(set_attr "type" "sselog1")
5883 (set_attr "prefix_data16" "1")
5884 (set_attr "prefix_extra" "1")
5885 (set_attr "mode" "TI")])
5887 (define_insn "ssse3_pshufbv8qi3"
5888 [(set (match_operand:V8QI 0 "register_operand" "=y")
5889 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5890 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5893 "pshufb\t{%2, %0|%0, %2}";
5894 [(set_attr "type" "sselog1")
5895 (set_attr "prefix_extra" "1")
5896 (set_attr "mode" "DI")])
5898 (define_insn "ssse3_psign<mode>3"
5899 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5900 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5901 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5904 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5905 [(set_attr "type" "sselog1")
5906 (set_attr "prefix_data16" "1")
5907 (set_attr "prefix_extra" "1")
5908 (set_attr "mode" "TI")])
5910 (define_insn "ssse3_psign<mode>3"
5911 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5912 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5913 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5916 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5917 [(set_attr "type" "sselog1")
5918 (set_attr "prefix_extra" "1")
5919 (set_attr "mode" "DI")])
5921 (define_insn "ssse3_palignrti"
5922 [(set (match_operand:TI 0 "register_operand" "=x")
5923 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5924 (match_operand:TI 2 "nonimmediate_operand" "xm")
5925 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5929 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5930 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5932 [(set_attr "type" "sseishft")
5933 (set_attr "prefix_data16" "1")
5934 (set_attr "prefix_extra" "1")
5935 (set_attr "mode" "TI")])
5937 (define_insn "ssse3_palignrdi"
5938 [(set (match_operand:DI 0 "register_operand" "=y")
5939 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5940 (match_operand:DI 2 "nonimmediate_operand" "ym")
5941 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5945 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5946 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5948 [(set_attr "type" "sseishft")
5949 (set_attr "prefix_extra" "1")
5950 (set_attr "mode" "DI")])
5952 (define_insn "abs<mode>2"
5953 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5954 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5956 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5957 [(set_attr "type" "sselog1")
5958 (set_attr "prefix_data16" "1")
5959 (set_attr "prefix_extra" "1")
5960 (set_attr "mode" "TI")])
5962 (define_insn "abs<mode>2"
5963 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5964 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5966 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5967 [(set_attr "type" "sselog1")
5968 (set_attr "prefix_extra" "1")
5969 (set_attr "mode" "DI")])
5971 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5973 ;; AMD SSE4A instructions
5975 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5977 (define_insn "sse4a_vmmovntv2df"
5978 [(set (match_operand:DF 0 "memory_operand" "=m")
5979 (unspec:DF [(vec_select:DF
5980 (match_operand:V2DF 1 "register_operand" "x")
5981 (parallel [(const_int 0)]))]
5984 "movntsd\t{%1, %0|%0, %1}"
5985 [(set_attr "type" "ssemov")
5986 (set_attr "mode" "DF")])
5988 (define_insn "sse4a_movntdf"
5989 [(set (match_operand:DF 0 "memory_operand" "=m")
5990 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5993 "movntsd\t{%1, %0|%0, %1}"
5994 [(set_attr "type" "ssemov")
5995 (set_attr "mode" "DF")])
5997 (define_insn "sse4a_vmmovntv4sf"
5998 [(set (match_operand:SF 0 "memory_operand" "=m")
5999 (unspec:SF [(vec_select:SF
6000 (match_operand:V4SF 1 "register_operand" "x")
6001 (parallel [(const_int 0)]))]
6004 "movntss\t{%1, %0|%0, %1}"
6005 [(set_attr "type" "ssemov")
6006 (set_attr "mode" "SF")])
6008 (define_insn "sse4a_movntsf"
6009 [(set (match_operand:SF 0 "memory_operand" "=m")
6010 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
6013 "movntss\t{%1, %0|%0, %1}"
6014 [(set_attr "type" "ssemov")
6015 (set_attr "mode" "SF")])
6017 (define_insn "sse4a_extrqi"
6018 [(set (match_operand:V2DI 0 "register_operand" "=x")
6019 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6020 (match_operand 2 "const_int_operand" "")
6021 (match_operand 3 "const_int_operand" "")]
6024 "extrq\t{%3, %2, %0|%0, %2, %3}"
6025 [(set_attr "type" "sse")
6026 (set_attr "prefix_data16" "1")
6027 (set_attr "mode" "TI")])
6029 (define_insn "sse4a_extrq"
6030 [(set (match_operand:V2DI 0 "register_operand" "=x")
6031 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6032 (match_operand:V16QI 2 "register_operand" "x")]
6035 "extrq\t{%2, %0|%0, %2}"
6036 [(set_attr "type" "sse")
6037 (set_attr "prefix_data16" "1")
6038 (set_attr "mode" "TI")])
6040 (define_insn "sse4a_insertqi"
6041 [(set (match_operand:V2DI 0 "register_operand" "=x")
6042 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6043 (match_operand:V2DI 2 "register_operand" "x")
6044 (match_operand 3 "const_int_operand" "")
6045 (match_operand 4 "const_int_operand" "")]
6048 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6049 [(set_attr "type" "sseins")
6050 (set_attr "prefix_rep" "1")
6051 (set_attr "mode" "TI")])
6053 (define_insn "sse4a_insertq"
6054 [(set (match_operand:V2DI 0 "register_operand" "=x")
6055 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6056 (match_operand:V2DI 2 "register_operand" "x")]
6059 "insertq\t{%2, %0|%0, %2}"
6060 [(set_attr "type" "sseins")
6061 (set_attr "prefix_rep" "1")
6062 (set_attr "mode" "TI")])
6064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6066 ;; Intel SSE4.1 instructions
6068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6070 (define_insn "sse4_1_blendpd"
6071 [(set (match_operand:V2DF 0 "register_operand" "=x")
6073 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6074 (match_operand:V2DF 1 "register_operand" "0")
6075 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6077 "blendpd\t{%3, %2, %0|%0, %2, %3}"
6078 [(set_attr "type" "ssemov")
6079 (set_attr "prefix_extra" "1")
6080 (set_attr "mode" "V2DF")])
6082 (define_insn "sse4_1_blendps"
6083 [(set (match_operand:V4SF 0 "register_operand" "=x")
6085 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6086 (match_operand:V4SF 1 "register_operand" "0")
6087 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
6089 "blendps\t{%3, %2, %0|%0, %2, %3}"
6090 [(set_attr "type" "ssemov")
6091 (set_attr "prefix_extra" "1")
6092 (set_attr "mode" "V4SF")])
6094 (define_insn "sse4_1_blendvpd"
6095 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
6096 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
6097 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
6098 (match_operand:V2DF 3 "register_operand" "Y0")]
6101 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
6102 [(set_attr "type" "ssemov")
6103 (set_attr "prefix_extra" "1")
6104 (set_attr "mode" "V2DF")])
6106 (define_insn "sse4_1_blendvps"
6107 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
6108 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
6109 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
6110 (match_operand:V4SF 3 "register_operand" "Y0")]
6113 "blendvps\t{%3, %2, %0|%0, %2, %3}"
6114 [(set_attr "type" "ssemov")
6115 (set_attr "prefix_extra" "1")
6116 (set_attr "mode" "V4SF")])
6118 (define_insn "sse4_1_dppd"
6119 [(set (match_operand:V2DF 0 "register_operand" "=x")
6120 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
6121 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6122 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6125 "dppd\t{%3, %2, %0|%0, %2, %3}"
6126 [(set_attr "type" "ssemul")
6127 (set_attr "prefix_extra" "1")
6128 (set_attr "mode" "V2DF")])
6130 (define_insn "sse4_1_dpps"
6131 [(set (match_operand:V4SF 0 "register_operand" "=x")
6132 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6133 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6134 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6137 "dpps\t{%3, %2, %0|%0, %2, %3}"
6138 [(set_attr "type" "ssemul")
6139 (set_attr "prefix_extra" "1")
6140 (set_attr "mode" "V4SF")])
6142 (define_insn "sse4_1_movntdqa"
6143 [(set (match_operand:V2DI 0 "register_operand" "=x")
6144 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6147 "movntdqa\t{%1, %0|%0, %1}"
6148 [(set_attr "type" "ssecvt")
6149 (set_attr "prefix_extra" "1")
6150 (set_attr "mode" "TI")])
6152 (define_insn "sse4_1_mpsadbw"
6153 [(set (match_operand:V16QI 0 "register_operand" "=x")
6154 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6155 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6156 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6159 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6160 [(set_attr "type" "sselog1")
6161 (set_attr "prefix_extra" "1")
6162 (set_attr "mode" "TI")])
6164 (define_insn "sse4_1_packusdw"
6165 [(set (match_operand:V8HI 0 "register_operand" "=x")
6168 (match_operand:V4SI 1 "register_operand" "0"))
6170 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6172 "packusdw\t{%2, %0|%0, %2}"
6173 [(set_attr "type" "sselog")
6174 (set_attr "prefix_extra" "1")
6175 (set_attr "mode" "TI")])
6177 (define_insn "sse4_1_pblendvb"
6178 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6179 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6180 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6181 (match_operand:V16QI 3 "register_operand" "Y0")]
6184 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6185 [(set_attr "type" "ssemov")
6186 (set_attr "prefix_extra" "1")
6187 (set_attr "mode" "TI")])
6189 (define_insn "sse4_1_pblendw"
6190 [(set (match_operand:V8HI 0 "register_operand" "=x")
6192 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6193 (match_operand:V8HI 1 "register_operand" "0")
6194 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6196 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6197 [(set_attr "type" "ssemov")
6198 (set_attr "prefix_extra" "1")
6199 (set_attr "mode" "TI")])
6201 (define_insn "sse4_1_phminposuw"
6202 [(set (match_operand:V8HI 0 "register_operand" "=x")
6203 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6204 UNSPEC_PHMINPOSUW))]
6206 "phminposuw\t{%1, %0|%0, %1}"
6207 [(set_attr "type" "sselog1")
6208 (set_attr "prefix_extra" "1")
6209 (set_attr "mode" "TI")])
6211 (define_insn "sse4_1_extendv8qiv8hi2"
6212 [(set (match_operand:V8HI 0 "register_operand" "=x")
6215 (match_operand:V16QI 1 "register_operand" "x")
6216 (parallel [(const_int 0)
6225 "pmovsxbw\t{%1, %0|%0, %1}"
6226 [(set_attr "type" "ssemov")
6227 (set_attr "prefix_extra" "1")
6228 (set_attr "mode" "TI")])
6230 (define_insn "*sse4_1_extendv8qiv8hi2"
6231 [(set (match_operand:V8HI 0 "register_operand" "=x")
6234 (vec_duplicate:V16QI
6235 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6236 (parallel [(const_int 0)
6245 "pmovsxbw\t{%1, %0|%0, %1}"
6246 [(set_attr "type" "ssemov")
6247 (set_attr "prefix_extra" "1")
6248 (set_attr "mode" "TI")])
6250 (define_insn "sse4_1_extendv4qiv4si2"
6251 [(set (match_operand:V4SI 0 "register_operand" "=x")
6254 (match_operand:V16QI 1 "register_operand" "x")
6255 (parallel [(const_int 0)
6260 "pmovsxbd\t{%1, %0|%0, %1}"
6261 [(set_attr "type" "ssemov")
6262 (set_attr "prefix_extra" "1")
6263 (set_attr "mode" "TI")])
6265 (define_insn "*sse4_1_extendv4qiv4si2"
6266 [(set (match_operand:V4SI 0 "register_operand" "=x")
6269 (vec_duplicate:V16QI
6270 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6271 (parallel [(const_int 0)
6276 "pmovsxbd\t{%1, %0|%0, %1}"
6277 [(set_attr "type" "ssemov")
6278 (set_attr "prefix_extra" "1")
6279 (set_attr "mode" "TI")])
6281 (define_insn "sse4_1_extendv2qiv2di2"
6282 [(set (match_operand:V2DI 0 "register_operand" "=x")
6285 (match_operand:V16QI 1 "register_operand" "x")
6286 (parallel [(const_int 0)
6289 "pmovsxbq\t{%1, %0|%0, %1}"
6290 [(set_attr "type" "ssemov")
6291 (set_attr "prefix_extra" "1")
6292 (set_attr "mode" "TI")])
6294 (define_insn "*sse4_1_extendv2qiv2di2"
6295 [(set (match_operand:V2DI 0 "register_operand" "=x")
6298 (vec_duplicate:V16QI
6299 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6300 (parallel [(const_int 0)
6303 "pmovsxbq\t{%1, %0|%0, %1}"
6304 [(set_attr "type" "ssemov")
6305 (set_attr "prefix_extra" "1")
6306 (set_attr "mode" "TI")])
6308 (define_insn "sse4_1_extendv4hiv4si2"
6309 [(set (match_operand:V4SI 0 "register_operand" "=x")
6312 (match_operand:V8HI 1 "register_operand" "x")
6313 (parallel [(const_int 0)
6318 "pmovsxwd\t{%1, %0|%0, %1}"
6319 [(set_attr "type" "ssemov")
6320 (set_attr "prefix_extra" "1")
6321 (set_attr "mode" "TI")])
6323 (define_insn "*sse4_1_extendv4hiv4si2"
6324 [(set (match_operand:V4SI 0 "register_operand" "=x")
6328 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6329 (parallel [(const_int 0)
6334 "pmovsxwd\t{%1, %0|%0, %1}"
6335 [(set_attr "type" "ssemov")
6336 (set_attr "prefix_extra" "1")
6337 (set_attr "mode" "TI")])
6339 (define_insn "sse4_1_extendv2hiv2di2"
6340 [(set (match_operand:V2DI 0 "register_operand" "=x")
6343 (match_operand:V8HI 1 "register_operand" "x")
6344 (parallel [(const_int 0)
6347 "pmovsxwq\t{%1, %0|%0, %1}"
6348 [(set_attr "type" "ssemov")
6349 (set_attr "prefix_extra" "1")
6350 (set_attr "mode" "TI")])
6352 (define_insn "*sse4_1_extendv2hiv2di2"
6353 [(set (match_operand:V2DI 0 "register_operand" "=x")
6357 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6358 (parallel [(const_int 0)
6361 "pmovsxwq\t{%1, %0|%0, %1}"
6362 [(set_attr "type" "ssemov")
6363 (set_attr "prefix_extra" "1")
6364 (set_attr "mode" "TI")])
6366 (define_insn "sse4_1_extendv2siv2di2"
6367 [(set (match_operand:V2DI 0 "register_operand" "=x")
6370 (match_operand:V4SI 1 "register_operand" "x")
6371 (parallel [(const_int 0)
6374 "pmovsxdq\t{%1, %0|%0, %1}"
6375 [(set_attr "type" "ssemov")
6376 (set_attr "prefix_extra" "1")
6377 (set_attr "mode" "TI")])
6379 (define_insn "*sse4_1_extendv2siv2di2"
6380 [(set (match_operand:V2DI 0 "register_operand" "=x")
6384 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6385 (parallel [(const_int 0)
6388 "pmovsxdq\t{%1, %0|%0, %1}"
6389 [(set_attr "type" "ssemov")
6390 (set_attr "prefix_extra" "1")
6391 (set_attr "mode" "TI")])
6393 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6394 [(set (match_operand:V8HI 0 "register_operand" "=x")
6397 (match_operand:V16QI 1 "register_operand" "x")
6398 (parallel [(const_int 0)
6407 "pmovzxbw\t{%1, %0|%0, %1}"
6408 [(set_attr "type" "ssemov")
6409 (set_attr "prefix_extra" "1")
6410 (set_attr "mode" "TI")])
6412 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6413 [(set (match_operand:V8HI 0 "register_operand" "=x")
6416 (vec_duplicate:V16QI
6417 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6418 (parallel [(const_int 0)
6427 "pmovzxbw\t{%1, %0|%0, %1}"
6428 [(set_attr "type" "ssemov")
6429 (set_attr "prefix_extra" "1")
6430 (set_attr "mode" "TI")])
6432 (define_insn "sse4_1_zero_extendv4qiv4si2"
6433 [(set (match_operand:V4SI 0 "register_operand" "=x")
6436 (match_operand:V16QI 1 "register_operand" "x")
6437 (parallel [(const_int 0)
6442 "pmovzxbd\t{%1, %0|%0, %1}"
6443 [(set_attr "type" "ssemov")
6444 (set_attr "prefix_extra" "1")
6445 (set_attr "mode" "TI")])
6447 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6448 [(set (match_operand:V4SI 0 "register_operand" "=x")
6451 (vec_duplicate:V16QI
6452 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6453 (parallel [(const_int 0)
6458 "pmovzxbd\t{%1, %0|%0, %1}"
6459 [(set_attr "type" "ssemov")
6460 (set_attr "prefix_extra" "1")
6461 (set_attr "mode" "TI")])
6463 (define_insn "sse4_1_zero_extendv2qiv2di2"
6464 [(set (match_operand:V2DI 0 "register_operand" "=x")
6467 (match_operand:V16QI 1 "register_operand" "x")
6468 (parallel [(const_int 0)
6471 "pmovzxbq\t{%1, %0|%0, %1}"
6472 [(set_attr "type" "ssemov")
6473 (set_attr "prefix_extra" "1")
6474 (set_attr "mode" "TI")])
6476 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6477 [(set (match_operand:V2DI 0 "register_operand" "=x")
6480 (vec_duplicate:V16QI
6481 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6482 (parallel [(const_int 0)
6485 "pmovzxbq\t{%1, %0|%0, %1}"
6486 [(set_attr "type" "ssemov")
6487 (set_attr "prefix_extra" "1")
6488 (set_attr "mode" "TI")])
6490 (define_insn "sse4_1_zero_extendv4hiv4si2"
6491 [(set (match_operand:V4SI 0 "register_operand" "=x")
6494 (match_operand:V8HI 1 "register_operand" "x")
6495 (parallel [(const_int 0)
6500 "pmovzxwd\t{%1, %0|%0, %1}"
6501 [(set_attr "type" "ssemov")
6502 (set_attr "prefix_extra" "1")
6503 (set_attr "mode" "TI")])
6505 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6506 [(set (match_operand:V4SI 0 "register_operand" "=x")
6510 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6511 (parallel [(const_int 0)
6516 "pmovzxwd\t{%1, %0|%0, %1}"
6517 [(set_attr "type" "ssemov")
6518 (set_attr "prefix_extra" "1")
6519 (set_attr "mode" "TI")])
6521 (define_insn "sse4_1_zero_extendv2hiv2di2"
6522 [(set (match_operand:V2DI 0 "register_operand" "=x")
6525 (match_operand:V8HI 1 "register_operand" "x")
6526 (parallel [(const_int 0)
6529 "pmovzxwq\t{%1, %0|%0, %1}"
6530 [(set_attr "type" "ssemov")
6531 (set_attr "prefix_extra" "1")
6532 (set_attr "mode" "TI")])
6534 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6535 [(set (match_operand:V2DI 0 "register_operand" "=x")
6539 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6540 (parallel [(const_int 0)
6543 "pmovzxwq\t{%1, %0|%0, %1}"
6544 [(set_attr "type" "ssemov")
6545 (set_attr "prefix_extra" "1")
6546 (set_attr "mode" "TI")])
6548 (define_insn "sse4_1_zero_extendv2siv2di2"
6549 [(set (match_operand:V2DI 0 "register_operand" "=x")
6552 (match_operand:V4SI 1 "register_operand" "x")
6553 (parallel [(const_int 0)
6556 "pmovzxdq\t{%1, %0|%0, %1}"
6557 [(set_attr "type" "ssemov")
6558 (set_attr "prefix_extra" "1")
6559 (set_attr "mode" "TI")])
6561 (define_insn "*sse4_1_zero_extendv2siv2di2"
6562 [(set (match_operand:V2DI 0 "register_operand" "=x")
6566 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6567 (parallel [(const_int 0)
6570 "pmovzxdq\t{%1, %0|%0, %1}"
6571 [(set_attr "type" "ssemov")
6572 (set_attr "prefix_extra" "1")
6573 (set_attr "mode" "TI")])
6575 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6576 ;; But it is not a really compare instruction.
6577 (define_insn "sse4_1_ptest"
6578 [(set (reg:CC FLAGS_REG)
6579 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6580 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6583 "ptest\t{%1, %0|%0, %1}"
6584 [(set_attr "type" "ssecomi")
6585 (set_attr "prefix_extra" "1")
6586 (set_attr "mode" "TI")])
6588 (define_insn "sse4_1_roundpd"
6589 [(set (match_operand:V2DF 0 "register_operand" "=x")
6590 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6591 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6594 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6595 [(set_attr "type" "ssecvt")
6596 (set_attr "prefix_extra" "1")
6597 (set_attr "mode" "V2DF")])
6599 (define_insn "sse4_1_roundps"
6600 [(set (match_operand:V4SF 0 "register_operand" "=x")
6601 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6602 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6605 "roundps\t{%2, %1, %0|%0, %1, %2}"
6606 [(set_attr "type" "ssecvt")
6607 (set_attr "prefix_extra" "1")
6608 (set_attr "mode" "V4SF")])
6610 (define_insn "sse4_1_roundsd"
6611 [(set (match_operand:V2DF 0 "register_operand" "=x")
6613 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6614 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6616 (match_operand:V2DF 1 "register_operand" "0")
6619 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6620 [(set_attr "type" "ssecvt")
6621 (set_attr "prefix_extra" "1")
6622 (set_attr "mode" "V2DF")])
6624 (define_insn "sse4_1_roundss"
6625 [(set (match_operand:V4SF 0 "register_operand" "=x")
6627 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6628 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6630 (match_operand:V4SF 1 "register_operand" "0")
6633 "roundss\t{%3, %2, %0|%0, %2, %3}"
6634 [(set_attr "type" "ssecvt")
6635 (set_attr "prefix_extra" "1")
6636 (set_attr "mode" "V4SF")])
6638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6640 ;; Intel SSE4.2 string/text processing instructions
6642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6644 (define_insn_and_split "sse4_2_pcmpestr"
6645 [(set (match_operand:SI 0 "register_operand" "=c,c")
6647 [(match_operand:V16QI 2 "register_operand" "x,x")
6648 (match_operand:SI 3 "register_operand" "a,a")
6649 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6650 (match_operand:SI 5 "register_operand" "d,d")
6651 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6653 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6661 (set (reg:CC FLAGS_REG)
6670 && !(reload_completed || reload_in_progress)"
6675 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6676 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6677 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6680 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6681 operands[3], operands[4],
6682 operands[5], operands[6]));
6684 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6685 operands[3], operands[4],
6686 operands[5], operands[6]));
6687 if (flags && !(ecx || xmm0))
6688 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6689 operands[4], operands[5],
6693 [(set_attr "type" "sselog")
6694 (set_attr "prefix_data16" "1")
6695 (set_attr "prefix_extra" "1")
6696 (set_attr "memory" "none,load")
6697 (set_attr "mode" "TI")])
6699 (define_insn "sse4_2_pcmpestri"
6700 [(set (match_operand:SI 0 "register_operand" "=c,c")
6702 [(match_operand:V16QI 1 "register_operand" "x,x")
6703 (match_operand:SI 2 "register_operand" "a,a")
6704 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6705 (match_operand:SI 4 "register_operand" "d,d")
6706 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6708 (set (reg:CC FLAGS_REG)
6717 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6718 [(set_attr "type" "sselog")
6719 (set_attr "prefix_data16" "1")
6720 (set_attr "prefix_extra" "1")
6721 (set_attr "memory" "none,load")
6722 (set_attr "mode" "TI")])
6724 (define_insn "sse4_2_pcmpestrm"
6725 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6727 [(match_operand:V16QI 1 "register_operand" "x,x")
6728 (match_operand:SI 2 "register_operand" "a,a")
6729 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6730 (match_operand:SI 4 "register_operand" "d,d")
6731 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6733 (set (reg:CC FLAGS_REG)
6742 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6743 [(set_attr "type" "sselog")
6744 (set_attr "prefix_data16" "1")
6745 (set_attr "prefix_extra" "1")
6746 (set_attr "memory" "none,load")
6747 (set_attr "mode" "TI")])
6749 (define_insn "sse4_2_pcmpestr_cconly"
6750 [(set (reg:CC FLAGS_REG)
6752 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6753 (match_operand:SI 1 "register_operand" "a,a,a,a")
6754 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6755 (match_operand:SI 3 "register_operand" "d,d,d,d")
6756 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6758 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6759 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6762 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6763 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6764 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6765 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6766 [(set_attr "type" "sselog")
6767 (set_attr "prefix_data16" "1")
6768 (set_attr "prefix_extra" "1")
6769 (set_attr "memory" "none,load,none,load")
6770 (set_attr "mode" "TI")])
6772 (define_insn_and_split "sse4_2_pcmpistr"
6773 [(set (match_operand:SI 0 "register_operand" "=c,c")
6775 [(match_operand:V16QI 2 "register_operand" "x,x")
6776 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6777 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6779 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6785 (set (reg:CC FLAGS_REG)
6792 && !(reload_completed || reload_in_progress)"
6797 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6798 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6799 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6802 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6803 operands[3], operands[4]));
6805 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6806 operands[3], operands[4]));
6807 if (flags && !(ecx || xmm0))
6808 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6812 [(set_attr "type" "sselog")
6813 (set_attr "prefix_data16" "1")
6814 (set_attr "prefix_extra" "1")
6815 (set_attr "memory" "none,load")
6816 (set_attr "mode" "TI")])
6818 (define_insn "sse4_2_pcmpistri"
6819 [(set (match_operand:SI 0 "register_operand" "=c,c")
6821 [(match_operand:V16QI 1 "register_operand" "x,x")
6822 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6823 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6825 (set (reg:CC FLAGS_REG)
6832 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6833 [(set_attr "type" "sselog")
6834 (set_attr "prefix_data16" "1")
6835 (set_attr "prefix_extra" "1")
6836 (set_attr "memory" "none,load")
6837 (set_attr "mode" "TI")])
6839 (define_insn "sse4_2_pcmpistrm"
6840 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6842 [(match_operand:V16QI 1 "register_operand" "x,x")
6843 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6844 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6846 (set (reg:CC FLAGS_REG)
6853 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6854 [(set_attr "type" "sselog")
6855 (set_attr "prefix_data16" "1")
6856 (set_attr "prefix_extra" "1")
6857 (set_attr "memory" "none,load")
6858 (set_attr "mode" "TI")])
6860 (define_insn "sse4_2_pcmpistr_cconly"
6861 [(set (reg:CC FLAGS_REG)
6863 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6864 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6865 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6867 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6868 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6871 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6872 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6873 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6874 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6875 [(set_attr "type" "sselog")
6876 (set_attr "prefix_data16" "1")
6877 (set_attr "prefix_extra" "1")
6878 (set_attr "memory" "none,load,none,load")
6879 (set_attr "mode" "TI")])