1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 && (register_operand (operands[0], <MODE>mode)
64 || register_operand (operands[1], <MODE>mode))"
66 switch (which_alternative)
69 return standard_sse_constant_opcode (insn, operands[1]);
72 if (get_attr_mode (insn) == MODE_V4SF)
73 return "movaps\t{%1, %0|%0, %1}";
75 return "movdqa\t{%1, %0|%0, %1}";
80 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
84 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
85 (and (eq_attr "alternative" "2")
86 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
89 (const_string "TI")))])
91 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
92 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
93 ;; from memory, we'd prefer to load the memory directly into the %xmm
94 ;; register. To facilitate this happy circumstance, this pattern won't
95 ;; split until after register allocation. If the 64-bit value didn't
96 ;; come from memory, this is the best we can do. This is much better
97 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
100 (define_insn_and_split "movdi_to_sse"
102 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
103 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
104 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
105 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
107 "&& reload_completed"
110 if (register_operand (operands[1], DImode))
112 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
113 Assemble the 64-bit DImode value in an xmm register. */
114 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
115 gen_rtx_SUBREG (SImode, operands[1], 0)));
116 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 4)));
118 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
120 else if (memory_operand (operands[1], DImode))
121 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
126 (define_expand "movv4sf"
127 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
128 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
131 ix86_expand_vector_move (V4SFmode, operands);
135 (define_insn "*movv4sf_internal"
136 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
137 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
139 && (register_operand (operands[0], V4SFmode)
140 || register_operand (operands[1], V4SFmode))"
142 switch (which_alternative)
145 return standard_sse_constant_opcode (insn, operands[1]);
148 return "movaps\t{%1, %0|%0, %1}";
153 [(set_attr "type" "sselog1,ssemov,ssemov")
154 (set_attr "mode" "V4SF")])
157 [(set (match_operand:V4SF 0 "register_operand" "")
158 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
159 "TARGET_SSE && reload_completed"
162 (vec_duplicate:V4SF (match_dup 1))
166 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
167 operands[2] = CONST0_RTX (V4SFmode);
170 (define_expand "movv2df"
171 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
172 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
175 ix86_expand_vector_move (V2DFmode, operands);
179 (define_insn "*movv2df_internal"
180 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
181 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
183 && (register_operand (operands[0], V2DFmode)
184 || register_operand (operands[1], V2DFmode))"
186 switch (which_alternative)
189 return standard_sse_constant_opcode (insn, operands[1]);
192 if (get_attr_mode (insn) == MODE_V4SF)
193 return "movaps\t{%1, %0|%0, %1}";
195 return "movapd\t{%1, %0|%0, %1}";
200 [(set_attr "type" "sselog1,ssemov,ssemov")
203 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
204 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
205 (and (eq_attr "alternative" "2")
206 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
208 (const_string "V4SF")
209 (const_string "V2DF")))])
212 [(set (match_operand:V2DF 0 "register_operand" "")
213 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
214 "TARGET_SSE2 && reload_completed"
215 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
217 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
218 operands[2] = CONST0_RTX (DFmode);
221 (define_expand "push<mode>1"
222 [(match_operand:SSEMODE 0 "register_operand" "")]
225 ix86_expand_push (<MODE>mode, operands[0]);
229 (define_expand "movmisalign<mode>"
230 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
231 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
234 ix86_expand_vector_move_misalign (<MODE>mode, operands);
238 (define_insn "sse_movups"
239 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
240 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
242 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
243 "movups\t{%1, %0|%0, %1}"
244 [(set_attr "type" "ssemov")
245 (set_attr "mode" "V2DF")])
247 (define_insn "sse2_movupd"
248 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
249 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
251 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
252 "movupd\t{%1, %0|%0, %1}"
253 [(set_attr "type" "ssemov")
254 (set_attr "mode" "V2DF")])
256 (define_insn "sse2_movdqu"
257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
258 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
260 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
261 "movdqu\t{%1, %0|%0, %1}"
262 [(set_attr "type" "ssemov")
263 (set_attr "prefix_data16" "1")
264 (set_attr "mode" "TI")])
266 (define_insn "sse_movntv4sf"
267 [(set (match_operand:V4SF 0 "memory_operand" "=m")
268 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
271 "movntps\t{%1, %0|%0, %1}"
272 [(set_attr "type" "ssemov")
273 (set_attr "mode" "V4SF")])
275 (define_insn "sse2_movntv2df"
276 [(set (match_operand:V2DF 0 "memory_operand" "=m")
277 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
280 "movntpd\t{%1, %0|%0, %1}"
281 [(set_attr "type" "ssecvt")
282 (set_attr "mode" "V2DF")])
284 (define_insn "sse2_movntv2di"
285 [(set (match_operand:V2DI 0 "memory_operand" "=m")
286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
289 "movntdq\t{%1, %0|%0, %1}"
290 [(set_attr "type" "ssecvt")
291 (set_attr "prefix_data16" "1")
292 (set_attr "mode" "TI")])
294 (define_insn "sse2_movntsi"
295 [(set (match_operand:SI 0 "memory_operand" "=m")
296 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
299 "movnti\t{%1, %0|%0, %1}"
300 [(set_attr "type" "ssecvt")
301 (set_attr "mode" "V2DF")])
303 (define_insn "sse3_lddqu"
304 [(set (match_operand:V16QI 0 "register_operand" "=x")
305 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
308 "lddqu\t{%1, %0|%0, %1}"
309 [(set_attr "type" "ssecvt")
310 (set_attr "prefix_rep" "1")
311 (set_attr "mode" "TI")])
313 ; Expand patterns for non-temporal stores. At the moment, only those
314 ; that directly map to insns are defined; it would be possible to
315 ; define patterns for other modes that would expand to several insns.
317 (define_expand "storentv4sf"
318 [(set (match_operand:V4SF 0 "memory_operand" "=m")
319 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
324 (define_expand "storentv2df"
325 [(set (match_operand:V2DF 0 "memory_operand" "=m")
326 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
331 (define_expand "storentv2di"
332 [(set (match_operand:V2DI 0 "memory_operand" "=m")
333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
338 (define_expand "storentsi"
339 [(set (match_operand:SI 0 "memory_operand" "=m")
340 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
345 (define_expand "storentdf"
346 [(set (match_operand:DF 0 "memory_operand" "")
347 (unspec:DF [(match_operand:DF 1 "register_operand" "")]
352 (define_expand "storentsf"
353 [(set (match_operand:SF 0 "memory_operand" "")
354 (unspec:SF [(match_operand:SF 1 "register_operand" "")]
359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
361 ;; Parallel single-precision floating point arithmetic
363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
365 (define_expand "negv4sf2"
366 [(set (match_operand:V4SF 0 "register_operand" "")
367 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
369 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
371 (define_expand "absv4sf2"
372 [(set (match_operand:V4SF 0 "register_operand" "")
373 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
375 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
377 (define_expand "addv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "")
379 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
380 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
382 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
384 (define_insn "*addv4sf3"
385 [(set (match_operand:V4SF 0 "register_operand" "=x")
386 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
387 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
388 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
389 "addps\t{%2, %0|%0, %2}"
390 [(set_attr "type" "sseadd")
391 (set_attr "mode" "V4SF")])
393 (define_insn "sse_vmaddv4sf3"
394 [(set (match_operand:V4SF 0 "register_operand" "=x")
396 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
397 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
400 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
401 "addss\t{%2, %0|%0, %2}"
402 [(set_attr "type" "sseadd")
403 (set_attr "mode" "SF")])
405 (define_expand "subv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "")
407 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
408 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
410 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
412 (define_insn "*subv4sf3"
413 [(set (match_operand:V4SF 0 "register_operand" "=x")
414 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
415 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
417 "subps\t{%2, %0|%0, %2}"
418 [(set_attr "type" "sseadd")
419 (set_attr "mode" "V4SF")])
421 (define_insn "sse_vmsubv4sf3"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
425 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
429 "subss\t{%2, %0|%0, %2}"
430 [(set_attr "type" "sseadd")
431 (set_attr "mode" "SF")])
433 (define_expand "mulv4sf3"
434 [(set (match_operand:V4SF 0 "register_operand" "")
435 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
436 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
438 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
440 (define_insn "*mulv4sf3"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
443 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
444 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
445 "mulps\t{%2, %0|%0, %2}"
446 [(set_attr "type" "ssemul")
447 (set_attr "mode" "V4SF")])
449 (define_insn "sse_vmmulv4sf3"
450 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
456 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
457 "mulss\t{%2, %0|%0, %2}"
458 [(set_attr "type" "ssemul")
459 (set_attr "mode" "SF")])
461 (define_expand "divv4sf3"
462 [(set (match_operand:V4SF 0 "register_operand" "")
463 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
464 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
467 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
469 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
470 && flag_finite_math_only && !flag_trapping_math
471 && flag_unsafe_math_optimizations)
473 ix86_emit_swdivsf (operands[0], operands[1],
474 operands[2], V4SFmode);
479 (define_insn "*divv4sf3"
480 [(set (match_operand:V4SF 0 "register_operand" "=x")
481 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
482 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
484 "divps\t{%2, %0|%0, %2}"
485 [(set_attr "type" "ssediv")
486 (set_attr "mode" "V4SF")])
488 (define_insn "sse_vmdivv4sf3"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
492 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
496 "divss\t{%2, %0|%0, %2}"
497 [(set_attr "type" "ssediv")
498 (set_attr "mode" "SF")])
500 (define_insn "sse_rcpv4sf2"
501 [(set (match_operand:V4SF 0 "register_operand" "=x")
503 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
505 "rcpps\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "V4SF")])
509 (define_insn "sse_vmrcpv4sf2"
510 [(set (match_operand:V4SF 0 "register_operand" "=x")
512 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
514 (match_operand:V4SF 2 "register_operand" "0")
517 "rcpss\t{%1, %0|%0, %1}"
518 [(set_attr "type" "sse")
519 (set_attr "mode" "SF")])
521 (define_insn "*sse_rsqrtv4sf2"
522 [(set (match_operand:V4SF 0 "register_operand" "=x")
524 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
526 "rsqrtps\t{%1, %0|%0, %1}"
527 [(set_attr "type" "sse")
528 (set_attr "mode" "V4SF")])
530 (define_expand "sse_rsqrtv4sf2"
531 [(set (match_operand:V4SF 0 "register_operand" "")
533 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
536 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
537 && flag_finite_math_only && !flag_trapping_math
538 && flag_unsafe_math_optimizations)
540 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
545 (define_insn "sse_vmrsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
550 (match_operand:V4SF 2 "register_operand" "0")
553 "rsqrtss\t{%1, %0|%0, %1}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "SF")])
557 (define_insn "*sqrtv4sf2"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
561 "sqrtps\t{%1, %0|%0, %1}"
562 [(set_attr "type" "sse")
563 (set_attr "mode" "V4SF")])
565 (define_expand "sqrtv4sf2"
566 [(set (match_operand:V4SF 0 "register_operand" "=")
567 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
570 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
571 && flag_finite_math_only && !flag_trapping_math
572 && flag_unsafe_math_optimizations)
574 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
579 (define_insn "sse_vmsqrtv4sf2"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
583 (match_operand:V4SF 2 "register_operand" "0")
586 "sqrtss\t{%1, %0|%0, %1}"
587 [(set_attr "type" "sse")
588 (set_attr "mode" "SF")])
590 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
591 ;; isn't really correct, as those rtl operators aren't defined when
592 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
594 (define_expand "smaxv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "")
596 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
597 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
600 if (!flag_finite_math_only)
601 operands[1] = force_reg (V4SFmode, operands[1]);
602 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
605 (define_insn "*smaxv4sf3_finite"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
607 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
608 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
609 "TARGET_SSE && flag_finite_math_only
610 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
611 "maxps\t{%2, %0|%0, %2}"
612 [(set_attr "type" "sse")
613 (set_attr "mode" "V4SF")])
615 (define_insn "*smaxv4sf3"
616 [(set (match_operand:V4SF 0 "register_operand" "=x")
617 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
618 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
620 "maxps\t{%2, %0|%0, %2}"
621 [(set_attr "type" "sse")
622 (set_attr "mode" "V4SF")])
624 (define_insn "sse_vmsmaxv4sf3"
625 [(set (match_operand:V4SF 0 "register_operand" "=x")
627 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
628 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
632 "maxss\t{%2, %0|%0, %2}"
633 [(set_attr "type" "sse")
634 (set_attr "mode" "SF")])
636 (define_expand "sminv4sf3"
637 [(set (match_operand:V4SF 0 "register_operand" "")
638 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
639 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
642 if (!flag_finite_math_only)
643 operands[1] = force_reg (V4SFmode, operands[1]);
644 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
647 (define_insn "*sminv4sf3_finite"
648 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
651 "TARGET_SSE && flag_finite_math_only
652 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
653 "minps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sse")
655 (set_attr "mode" "V4SF")])
657 (define_insn "*sminv4sf3"
658 [(set (match_operand:V4SF 0 "register_operand" "=x")
659 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
660 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
662 "minps\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sse")
664 (set_attr "mode" "V4SF")])
666 (define_insn "sse_vmsminv4sf3"
667 [(set (match_operand:V4SF 0 "register_operand" "=x")
669 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
670 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
674 "minss\t{%2, %0|%0, %2}"
675 [(set_attr "type" "sse")
676 (set_attr "mode" "SF")])
678 ;; These versions of the min/max patterns implement exactly the operations
679 ;; min = (op1 < op2 ? op1 : op2)
680 ;; max = (!(op1 < op2) ? op1 : op2)
681 ;; Their operands are not commutative, and thus they may be used in the
682 ;; presence of -0.0 and NaN.
684 (define_insn "*ieee_sminv4sf3"
685 [(set (match_operand:V4SF 0 "register_operand" "=x")
686 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
687 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
690 "minps\t{%2, %0|%0, %2}"
691 [(set_attr "type" "sseadd")
692 (set_attr "mode" "V4SF")])
694 (define_insn "*ieee_smaxv4sf3"
695 [(set (match_operand:V4SF 0 "register_operand" "=x")
696 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
697 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
700 "maxps\t{%2, %0|%0, %2}"
701 [(set_attr "type" "sseadd")
702 (set_attr "mode" "V4SF")])
704 (define_insn "*ieee_sminv2df3"
705 [(set (match_operand:V2DF 0 "register_operand" "=x")
706 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
707 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
710 "minpd\t{%2, %0|%0, %2}"
711 [(set_attr "type" "sseadd")
712 (set_attr "mode" "V2DF")])
714 (define_insn "*ieee_smaxv2df3"
715 [(set (match_operand:V2DF 0 "register_operand" "=x")
716 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
717 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
720 "maxpd\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "mode" "V2DF")])
724 (define_insn "sse3_addsubv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operand:V4SF 1 "register_operand" "0")
729 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
730 (minus:V4SF (match_dup 1) (match_dup 2))
733 "addsubps\t{%2, %0|%0, %2}"
734 [(set_attr "type" "sseadd")
735 (set_attr "prefix_rep" "1")
736 (set_attr "mode" "V4SF")])
738 (define_insn "sse3_haddv4sf3"
739 [(set (match_operand:V4SF 0 "register_operand" "=x")
744 (match_operand:V4SF 1 "register_operand" "0")
745 (parallel [(const_int 0)]))
746 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
748 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
749 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
753 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))
755 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
757 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
758 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
760 "haddps\t{%2, %0|%0, %2}"
761 [(set_attr "type" "sseadd")
762 (set_attr "prefix_rep" "1")
763 (set_attr "mode" "V4SF")])
765 (define_insn "sse3_hsubv4sf3"
766 [(set (match_operand:V4SF 0 "register_operand" "=x")
771 (match_operand:V4SF 1 "register_operand" "0")
772 (parallel [(const_int 0)]))
773 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
775 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
776 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
780 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))
782 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
784 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
785 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
787 "hsubps\t{%2, %0|%0, %2}"
788 [(set_attr "type" "sseadd")
789 (set_attr "prefix_rep" "1")
790 (set_attr "mode" "V4SF")])
792 (define_expand "reduc_splus_v4sf"
793 [(match_operand:V4SF 0 "register_operand" "")
794 (match_operand:V4SF 1 "register_operand" "")]
799 rtx tmp = gen_reg_rtx (V4SFmode);
800 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
801 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
804 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
808 (define_expand "reduc_smax_v4sf"
809 [(match_operand:V4SF 0 "register_operand" "")
810 (match_operand:V4SF 1 "register_operand" "")]
813 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
817 (define_expand "reduc_smin_v4sf"
818 [(match_operand:V4SF 0 "register_operand" "")
819 (match_operand:V4SF 1 "register_operand" "")]
822 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
828 ;; Parallel single-precision floating point comparisons
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
832 (define_insn "sse_maskcmpv4sf3"
833 [(set (match_operand:V4SF 0 "register_operand" "=x")
834 (match_operator:V4SF 3 "sse_comparison_operator"
835 [(match_operand:V4SF 1 "register_operand" "0")
836 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
838 "cmp%D3ps\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssecmp")
840 (set_attr "mode" "V4SF")])
842 (define_insn "sse_maskcmpsf3"
843 [(set (match_operand:SF 0 "register_operand" "=x")
844 (match_operator:SF 3 "sse_comparison_operator"
845 [(match_operand:SF 1 "register_operand" "0")
846 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
852 (define_insn "sse_vmmaskcmpv4sf3"
853 [(set (match_operand:V4SF 0 "register_operand" "=x")
855 (match_operator:V4SF 3 "sse_comparison_operator"
856 [(match_operand:V4SF 1 "register_operand" "0")
857 (match_operand:V4SF 2 "register_operand" "x")])
861 "cmp%D3ss\t{%2, %0|%0, %2}"
862 [(set_attr "type" "ssecmp")
863 (set_attr "mode" "SF")])
865 (define_insn "sse_comi"
866 [(set (reg:CCFP FLAGS_REG)
869 (match_operand:V4SF 0 "register_operand" "x")
870 (parallel [(const_int 0)]))
872 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
873 (parallel [(const_int 0)]))))]
875 "comiss\t{%1, %0|%0, %1}"
876 [(set_attr "type" "ssecomi")
877 (set_attr "mode" "SF")])
879 (define_insn "sse_ucomi"
880 [(set (reg:CCFPU FLAGS_REG)
883 (match_operand:V4SF 0 "register_operand" "x")
884 (parallel [(const_int 0)]))
886 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
887 (parallel [(const_int 0)]))))]
889 "ucomiss\t{%1, %0|%0, %1}"
890 [(set_attr "type" "ssecomi")
891 (set_attr "mode" "SF")])
893 (define_expand "vcondv4sf"
894 [(set (match_operand:V4SF 0 "register_operand" "")
897 [(match_operand:V4SF 4 "nonimmediate_operand" "")
898 (match_operand:V4SF 5 "nonimmediate_operand" "")])
899 (match_operand:V4SF 1 "general_operand" "")
900 (match_operand:V4SF 2 "general_operand" "")))]
903 if (ix86_expand_fp_vcond (operands))
909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
911 ;; Parallel single-precision floating point logical operations
913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
915 (define_expand "andv4sf3"
916 [(set (match_operand:V4SF 0 "register_operand" "")
917 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
918 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
920 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
922 (define_insn "*andv4sf3"
923 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
925 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
926 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
927 "andps\t{%2, %0|%0, %2}"
928 [(set_attr "type" "sselog")
929 (set_attr "mode" "V4SF")])
931 (define_insn "sse_nandv4sf3"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
934 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
936 "andnps\t{%2, %0|%0, %2}"
937 [(set_attr "type" "sselog")
938 (set_attr "mode" "V4SF")])
940 (define_expand "iorv4sf3"
941 [(set (match_operand:V4SF 0 "register_operand" "")
942 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
943 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
945 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
947 (define_insn "*iorv4sf3"
948 [(set (match_operand:V4SF 0 "register_operand" "=x")
949 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
950 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
951 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
952 "orps\t{%2, %0|%0, %2}"
953 [(set_attr "type" "sselog")
954 (set_attr "mode" "V4SF")])
956 (define_expand "xorv4sf3"
957 [(set (match_operand:V4SF 0 "register_operand" "")
958 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
959 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
961 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
963 (define_insn "*xorv4sf3"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
965 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
966 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
967 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
968 "xorps\t{%2, %0|%0, %2}"
969 [(set_attr "type" "sselog")
970 (set_attr "mode" "V4SF")])
972 ;; Also define scalar versions. These are used for abs, neg, and
973 ;; conditional move. Using subregs into vector modes causes register
974 ;; allocation lossage. These patterns do not allow memory operands
975 ;; because the native instructions read the full 128-bits.
977 (define_insn "*andsf3"
978 [(set (match_operand:SF 0 "register_operand" "=x")
979 (and:SF (match_operand:SF 1 "register_operand" "0")
980 (match_operand:SF 2 "register_operand" "x")))]
982 "andps\t{%2, %0|%0, %2}"
983 [(set_attr "type" "sselog")
984 (set_attr "mode" "V4SF")])
986 (define_insn "*nandsf3"
987 [(set (match_operand:SF 0 "register_operand" "=x")
988 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
989 (match_operand:SF 2 "register_operand" "x")))]
991 "andnps\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sselog")
993 (set_attr "mode" "V4SF")])
995 (define_insn "*iorsf3"
996 [(set (match_operand:SF 0 "register_operand" "=x")
997 (ior:SF (match_operand:SF 1 "register_operand" "0")
998 (match_operand:SF 2 "register_operand" "x")))]
1000 "orps\t{%2, %0|%0, %2}"
1001 [(set_attr "type" "sselog")
1002 (set_attr "mode" "V4SF")])
1004 (define_insn "*xorsf3"
1005 [(set (match_operand:SF 0 "register_operand" "=x")
1006 (xor:SF (match_operand:SF 1 "register_operand" "0")
1007 (match_operand:SF 2 "register_operand" "x")))]
1009 "xorps\t{%2, %0|%0, %2}"
1010 [(set_attr "type" "sselog")
1011 (set_attr "mode" "V4SF")])
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1015 ;; Parallel single-precision floating point conversion operations
1017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1019 (define_insn "sse_cvtpi2ps"
1020 [(set (match_operand:V4SF 0 "register_operand" "=x")
1023 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1024 (match_operand:V4SF 1 "register_operand" "0")
1027 "cvtpi2ps\t{%2, %0|%0, %2}"
1028 [(set_attr "type" "ssecvt")
1029 (set_attr "mode" "V4SF")])
1031 (define_insn "sse_cvtps2pi"
1032 [(set (match_operand:V2SI 0 "register_operand" "=y")
1034 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1036 (parallel [(const_int 0) (const_int 1)])))]
1038 "cvtps2pi\t{%1, %0|%0, %1}"
1039 [(set_attr "type" "ssecvt")
1040 (set_attr "unit" "mmx")
1041 (set_attr "mode" "DI")])
1043 (define_insn "sse_cvttps2pi"
1044 [(set (match_operand:V2SI 0 "register_operand" "=y")
1046 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1047 (parallel [(const_int 0) (const_int 1)])))]
1049 "cvttps2pi\t{%1, %0|%0, %1}"
1050 [(set_attr "type" "ssecvt")
1051 (set_attr "unit" "mmx")
1052 (set_attr "mode" "SF")])
1054 (define_insn "sse_cvtsi2ss"
1055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1058 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1059 (match_operand:V4SF 1 "register_operand" "0,0")
1062 "cvtsi2ss\t{%2, %0|%0, %2}"
1063 [(set_attr "type" "sseicvt")
1064 (set_attr "athlon_decode" "vector,double")
1065 (set_attr "amdfam10_decode" "vector,double")
1066 (set_attr "mode" "SF")])
1068 (define_insn "sse_cvtsi2ssq"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1072 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1073 (match_operand:V4SF 1 "register_operand" "0,0")
1075 "TARGET_SSE && TARGET_64BIT"
1076 "cvtsi2ssq\t{%2, %0|%0, %2}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "vector,double")
1079 (set_attr "amdfam10_decode" "vector,double")
1080 (set_attr "mode" "SF")])
1082 (define_insn "sse_cvtss2si"
1083 [(set (match_operand:SI 0 "register_operand" "=r,r")
1086 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1087 (parallel [(const_int 0)]))]
1088 UNSPEC_FIX_NOTRUNC))]
1090 "cvtss2si\t{%1, %0|%0, %1}"
1091 [(set_attr "type" "sseicvt")
1092 (set_attr "athlon_decode" "double,vector")
1093 (set_attr "prefix_rep" "1")
1094 (set_attr "mode" "SI")])
1096 (define_insn "sse_cvtss2si_2"
1097 [(set (match_operand:SI 0 "register_operand" "=r,r")
1098 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1099 UNSPEC_FIX_NOTRUNC))]
1101 "cvtss2si\t{%1, %0|%0, %1}"
1102 [(set_attr "type" "sseicvt")
1103 (set_attr "athlon_decode" "double,vector")
1104 (set_attr "amdfam10_decode" "double,double")
1105 (set_attr "prefix_rep" "1")
1106 (set_attr "mode" "SI")])
1108 (define_insn "sse_cvtss2siq"
1109 [(set (match_operand:DI 0 "register_operand" "=r,r")
1112 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1113 (parallel [(const_int 0)]))]
1114 UNSPEC_FIX_NOTRUNC))]
1115 "TARGET_SSE && TARGET_64BIT"
1116 "cvtss2siq\t{%1, %0|%0, %1}"
1117 [(set_attr "type" "sseicvt")
1118 (set_attr "athlon_decode" "double,vector")
1119 (set_attr "prefix_rep" "1")
1120 (set_attr "mode" "DI")])
1122 (define_insn "sse_cvtss2siq_2"
1123 [(set (match_operand:DI 0 "register_operand" "=r,r")
1124 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1125 UNSPEC_FIX_NOTRUNC))]
1126 "TARGET_SSE && TARGET_64BIT"
1127 "cvtss2siq\t{%1, %0|%0, %1}"
1128 [(set_attr "type" "sseicvt")
1129 (set_attr "athlon_decode" "double,vector")
1130 (set_attr "amdfam10_decode" "double,double")
1131 (set_attr "prefix_rep" "1")
1132 (set_attr "mode" "DI")])
1134 (define_insn "sse_cvttss2si"
1135 [(set (match_operand:SI 0 "register_operand" "=r,r")
1138 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1139 (parallel [(const_int 0)]))))]
1141 "cvttss2si\t{%1, %0|%0, %1}"
1142 [(set_attr "type" "sseicvt")
1143 (set_attr "athlon_decode" "double,vector")
1144 (set_attr "amdfam10_decode" "double,double")
1145 (set_attr "prefix_rep" "1")
1146 (set_attr "mode" "SI")])
1148 (define_insn "sse_cvttss2siq"
1149 [(set (match_operand:DI 0 "register_operand" "=r,r")
1152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1153 (parallel [(const_int 0)]))))]
1154 "TARGET_SSE && TARGET_64BIT"
1155 "cvttss2siq\t{%1, %0|%0, %1}"
1156 [(set_attr "type" "sseicvt")
1157 (set_attr "athlon_decode" "double,vector")
1158 (set_attr "amdfam10_decode" "double,double")
1159 (set_attr "prefix_rep" "1")
1160 (set_attr "mode" "DI")])
1162 (define_insn "sse2_cvtdq2ps"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1164 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1166 "cvtdq2ps\t{%1, %0|%0, %1}"
1167 [(set_attr "type" "ssecvt")
1168 (set_attr "mode" "V4SF")])
1170 (define_insn "sse2_cvtps2dq"
1171 [(set (match_operand:V4SI 0 "register_operand" "=x")
1172 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1173 UNSPEC_FIX_NOTRUNC))]
1175 "cvtps2dq\t{%1, %0|%0, %1}"
1176 [(set_attr "type" "ssecvt")
1177 (set_attr "prefix_data16" "1")
1178 (set_attr "mode" "TI")])
1180 (define_insn "sse2_cvttps2dq"
1181 [(set (match_operand:V4SI 0 "register_operand" "=x")
1182 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1184 "cvttps2dq\t{%1, %0|%0, %1}"
1185 [(set_attr "type" "ssecvt")
1186 (set_attr "prefix_rep" "1")
1187 (set_attr "mode" "TI")])
1189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1191 ;; Parallel single-precision floating point element swizzling
1193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1195 (define_insn "sse_movhlps"
1196 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1199 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1200 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1201 (parallel [(const_int 6)
1205 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1207 movhlps\t{%2, %0|%0, %2}
1208 movlps\t{%H2, %0|%0, %H2}
1209 movhps\t{%2, %0|%0, %2}"
1210 [(set_attr "type" "ssemov")
1211 (set_attr "mode" "V4SF,V2SF,V2SF")])
1213 (define_insn "sse_movlhps"
1214 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1217 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1218 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1219 (parallel [(const_int 0)
1223 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1225 movlhps\t{%2, %0|%0, %2}
1226 movhps\t{%2, %0|%0, %2}
1227 movlps\t{%2, %H0|%H0, %2}"
1228 [(set_attr "type" "ssemov")
1229 (set_attr "mode" "V4SF,V2SF,V2SF")])
1231 (define_insn "sse_unpckhps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 2) (const_int 6)
1238 (const_int 3) (const_int 7)])))]
1240 "unpckhps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1244 (define_insn "sse_unpcklps"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1248 (match_operand:V4SF 1 "register_operand" "0")
1249 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1250 (parallel [(const_int 0) (const_int 4)
1251 (const_int 1) (const_int 5)])))]
1253 "unpcklps\t{%2, %0|%0, %2}"
1254 [(set_attr "type" "sselog")
1255 (set_attr "mode" "V4SF")])
1257 ;; These are modeled with the same vec_concat as the others so that we
1258 ;; capture users of shufps that can use the new instructions
1259 (define_insn "sse3_movshdup"
1260 [(set (match_operand:V4SF 0 "register_operand" "=x")
1263 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1265 (parallel [(const_int 1)
1270 "movshdup\t{%1, %0|%0, %1}"
1271 [(set_attr "type" "sse")
1272 (set_attr "prefix_rep" "1")
1273 (set_attr "mode" "V4SF")])
1275 (define_insn "sse3_movsldup"
1276 [(set (match_operand:V4SF 0 "register_operand" "=x")
1279 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1281 (parallel [(const_int 0)
1286 "movsldup\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "sse")
1288 (set_attr "prefix_rep" "1")
1289 (set_attr "mode" "V4SF")])
1291 (define_expand "sse_shufps"
1292 [(match_operand:V4SF 0 "register_operand" "")
1293 (match_operand:V4SF 1 "register_operand" "")
1294 (match_operand:V4SF 2 "nonimmediate_operand" "")
1295 (match_operand:SI 3 "const_int_operand" "")]
1298 int mask = INTVAL (operands[3]);
1299 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1300 GEN_INT ((mask >> 0) & 3),
1301 GEN_INT ((mask >> 2) & 3),
1302 GEN_INT (((mask >> 4) & 3) + 4),
1303 GEN_INT (((mask >> 6) & 3) + 4)));
1307 (define_insn "sse_shufps_1"
1308 [(set (match_operand:V4SF 0 "register_operand" "=x")
1311 (match_operand:V4SF 1 "register_operand" "0")
1312 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1313 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1314 (match_operand 4 "const_0_to_3_operand" "")
1315 (match_operand 5 "const_4_to_7_operand" "")
1316 (match_operand 6 "const_4_to_7_operand" "")])))]
1320 mask |= INTVAL (operands[3]) << 0;
1321 mask |= INTVAL (operands[4]) << 2;
1322 mask |= (INTVAL (operands[5]) - 4) << 4;
1323 mask |= (INTVAL (operands[6]) - 4) << 6;
1324 operands[3] = GEN_INT (mask);
1326 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1328 [(set_attr "type" "sselog")
1329 (set_attr "mode" "V4SF")])
1331 (define_insn "sse_storehps"
1332 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1334 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1335 (parallel [(const_int 2) (const_int 3)])))]
1338 movhps\t{%1, %0|%0, %1}
1339 movhlps\t{%1, %0|%0, %1}
1340 movlps\t{%H1, %0|%0, %H1}"
1341 [(set_attr "type" "ssemov")
1342 (set_attr "mode" "V2SF,V4SF,V2SF")])
1344 (define_insn "sse_loadhps"
1345 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1348 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1349 (parallel [(const_int 0) (const_int 1)]))
1350 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1353 movhps\t{%2, %0|%0, %2}
1354 movlhps\t{%2, %0|%0, %2}
1355 movlps\t{%2, %H0|%H0, %2}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1359 (define_insn "sse_storelps"
1360 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1362 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1363 (parallel [(const_int 0) (const_int 1)])))]
1366 movlps\t{%1, %0|%0, %1}
1367 movaps\t{%1, %0|%0, %1}
1368 movlps\t{%1, %0|%0, %1}"
1369 [(set_attr "type" "ssemov")
1370 (set_attr "mode" "V2SF,V4SF,V2SF")])
1372 (define_insn "sse_loadlps"
1373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1375 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1377 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1378 (parallel [(const_int 2) (const_int 3)]))))]
1381 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1382 movlps\t{%2, %0|%0, %2}
1383 movlps\t{%2, %0|%0, %2}"
1384 [(set_attr "type" "sselog,ssemov,ssemov")
1385 (set_attr "mode" "V4SF,V2SF,V2SF")])
1387 (define_insn "sse_movss"
1388 [(set (match_operand:V4SF 0 "register_operand" "=x")
1390 (match_operand:V4SF 2 "register_operand" "x")
1391 (match_operand:V4SF 1 "register_operand" "0")
1394 "movss\t{%2, %0|%0, %2}"
1395 [(set_attr "type" "ssemov")
1396 (set_attr "mode" "SF")])
1398 (define_insn "*vec_dupv4sf"
1399 [(set (match_operand:V4SF 0 "register_operand" "=x")
1401 (match_operand:SF 1 "register_operand" "0")))]
1403 "shufps\t{$0, %0, %0|%0, %0, 0}"
1404 [(set_attr "type" "sselog1")
1405 (set_attr "mode" "V4SF")])
1407 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1408 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1409 ;; alternatives pretty much forces the MMX alternative to be chosen.
1410 (define_insn "*sse_concatv2sf"
1411 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1413 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1414 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1417 unpcklps\t{%2, %0|%0, %2}
1418 movss\t{%1, %0|%0, %1}
1419 punpckldq\t{%2, %0|%0, %2}
1420 movd\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1422 (set_attr "mode" "V4SF,SF,DI,DI")])
1424 (define_insn "*sse_concatv4sf"
1425 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1427 (match_operand:V2SF 1 "register_operand" " 0,0")
1428 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1431 movlhps\t{%2, %0|%0, %2}
1432 movhps\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "ssemov")
1434 (set_attr "mode" "V4SF,V2SF")])
1436 (define_expand "vec_initv4sf"
1437 [(match_operand:V4SF 0 "register_operand" "")
1438 (match_operand 1 "" "")]
1441 ix86_expand_vector_init (false, operands[0], operands[1]);
1445 (define_insn "vec_setv4sf_0"
1446 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1449 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1450 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1454 movss\t{%2, %0|%0, %2}
1455 movss\t{%2, %0|%0, %2}
1456 movd\t{%2, %0|%0, %2}
1458 [(set_attr "type" "ssemov")
1459 (set_attr "mode" "SF")])
1461 ;; A subset is vec_setv4sf.
1462 (define_insn "*vec_setv4sf_sse4_1"
1463 [(set (match_operand:V4SF 0 "register_operand" "=x")
1466 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1467 (match_operand:V4SF 1 "register_operand" "0")
1468 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1471 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1472 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1474 [(set_attr "type" "sselog")
1475 (set_attr "prefix_extra" "1")
1476 (set_attr "mode" "V4SF")])
1478 (define_insn "sse4_1_insertps"
1479 [(set (match_operand:V4SF 0 "register_operand" "=x")
1480 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1481 (match_operand:V4SF 1 "register_operand" "0")
1482 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1485 "insertps\t{%3, %2, %0|%0, %2, %3}";
1486 [(set_attr "type" "sselog")
1487 (set_attr "prefix_extra" "1")
1488 (set_attr "mode" "V4SF")])
1491 [(set (match_operand:V4SF 0 "memory_operand" "")
1494 (match_operand:SF 1 "nonmemory_operand" ""))
1497 "TARGET_SSE && reload_completed"
1500 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1504 (define_expand "vec_setv4sf"
1505 [(match_operand:V4SF 0 "register_operand" "")
1506 (match_operand:SF 1 "register_operand" "")
1507 (match_operand 2 "const_int_operand" "")]
1510 ix86_expand_vector_set (false, operands[0], operands[1],
1511 INTVAL (operands[2]));
1515 (define_insn_and_split "*vec_extractv4sf_0"
1516 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1518 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1519 (parallel [(const_int 0)])))]
1520 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1522 "&& reload_completed"
1525 rtx op1 = operands[1];
1527 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1529 op1 = gen_lowpart (SFmode, op1);
1530 emit_move_insn (operands[0], op1);
1534 (define_insn "*sse4_1_extractps"
1535 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
1537 (match_operand:V4SF 1 "register_operand" "x")
1538 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1540 "extractps\t{%2, %1, %0|%0, %1, %2}"
1541 [(set_attr "type" "sselog")
1542 (set_attr "prefix_extra" "1")
1543 (set_attr "mode" "V4SF")])
1545 (define_expand "vec_extractv4sf"
1546 [(match_operand:SF 0 "register_operand" "")
1547 (match_operand:V4SF 1 "register_operand" "")
1548 (match_operand 2 "const_int_operand" "")]
1551 ix86_expand_vector_extract (false, operands[0], operands[1],
1552 INTVAL (operands[2]));
1556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1558 ;; Parallel double-precision floating point arithmetic
1560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1562 (define_expand "negv2df2"
1563 [(set (match_operand:V2DF 0 "register_operand" "")
1564 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1566 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1568 (define_expand "absv2df2"
1569 [(set (match_operand:V2DF 0 "register_operand" "")
1570 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1572 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1574 (define_expand "addv2df3"
1575 [(set (match_operand:V2DF 0 "register_operand" "")
1576 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1577 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1579 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1581 (define_insn "*addv2df3"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1585 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1586 "addpd\t{%2, %0|%0, %2}"
1587 [(set_attr "type" "sseadd")
1588 (set_attr "mode" "V2DF")])
1590 (define_insn "sse2_vmaddv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1597 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1598 "addsd\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sseadd")
1600 (set_attr "mode" "DF")])
1602 (define_expand "subv2df3"
1603 [(set (match_operand:V2DF 0 "register_operand" "")
1604 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1605 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1607 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1609 (define_insn "*subv2df3"
1610 [(set (match_operand:V2DF 0 "register_operand" "=x")
1611 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1612 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1614 "subpd\t{%2, %0|%0, %2}"
1615 [(set_attr "type" "sseadd")
1616 (set_attr "mode" "V2DF")])
1618 (define_insn "sse2_vmsubv2df3"
1619 [(set (match_operand:V2DF 0 "register_operand" "=x")
1621 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1622 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1626 "subsd\t{%2, %0|%0, %2}"
1627 [(set_attr "type" "sseadd")
1628 (set_attr "mode" "DF")])
1630 (define_expand "mulv2df3"
1631 [(set (match_operand:V2DF 0 "register_operand" "")
1632 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1633 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1635 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1637 (define_insn "*mulv2df3"
1638 [(set (match_operand:V2DF 0 "register_operand" "=x")
1639 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1640 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1641 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1642 "mulpd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "ssemul")
1644 (set_attr "mode" "V2DF")])
1646 (define_insn "sse2_vmmulv2df3"
1647 [(set (match_operand:V2DF 0 "register_operand" "=x")
1649 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1650 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1653 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1654 "mulsd\t{%2, %0|%0, %2}"
1655 [(set_attr "type" "ssemul")
1656 (set_attr "mode" "DF")])
1658 (define_expand "divv2df3"
1659 [(set (match_operand:V2DF 0 "register_operand" "")
1660 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1661 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1663 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1665 (define_insn "*divv2df3"
1666 [(set (match_operand:V2DF 0 "register_operand" "=x")
1667 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1668 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1670 "divpd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "ssediv")
1672 (set_attr "mode" "V2DF")])
1674 (define_insn "sse2_vmdivv2df3"
1675 [(set (match_operand:V2DF 0 "register_operand" "=x")
1677 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1678 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1682 "divsd\t{%2, %0|%0, %2}"
1683 [(set_attr "type" "ssediv")
1684 (set_attr "mode" "DF")])
1686 (define_insn "sqrtv2df2"
1687 [(set (match_operand:V2DF 0 "register_operand" "=x")
1688 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1690 "sqrtpd\t{%1, %0|%0, %1}"
1691 [(set_attr "type" "sse")
1692 (set_attr "mode" "V2DF")])
1694 (define_insn "sse2_vmsqrtv2df2"
1695 [(set (match_operand:V2DF 0 "register_operand" "=x")
1697 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1698 (match_operand:V2DF 2 "register_operand" "0")
1701 "sqrtsd\t{%1, %0|%0, %1}"
1702 [(set_attr "type" "sse")
1703 (set_attr "mode" "DF")])
1705 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1706 ;; isn't really correct, as those rtl operators aren't defined when
1707 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1709 (define_expand "smaxv2df3"
1710 [(set (match_operand:V2DF 0 "register_operand" "")
1711 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1712 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1715 if (!flag_finite_math_only)
1716 operands[1] = force_reg (V2DFmode, operands[1]);
1717 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1720 (define_insn "*smaxv2df3_finite"
1721 [(set (match_operand:V2DF 0 "register_operand" "=x")
1722 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1723 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1724 "TARGET_SSE2 && flag_finite_math_only
1725 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1726 "maxpd\t{%2, %0|%0, %2}"
1727 [(set_attr "type" "sseadd")
1728 (set_attr "mode" "V2DF")])
1730 (define_insn "*smaxv2df3"
1731 [(set (match_operand:V2DF 0 "register_operand" "=x")
1732 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1733 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1735 "maxpd\t{%2, %0|%0, %2}"
1736 [(set_attr "type" "sseadd")
1737 (set_attr "mode" "V2DF")])
1739 (define_insn "sse2_vmsmaxv2df3"
1740 [(set (match_operand:V2DF 0 "register_operand" "=x")
1742 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1743 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1747 "maxsd\t{%2, %0|%0, %2}"
1748 [(set_attr "type" "sseadd")
1749 (set_attr "mode" "DF")])
1751 (define_expand "sminv2df3"
1752 [(set (match_operand:V2DF 0 "register_operand" "")
1753 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1754 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1757 if (!flag_finite_math_only)
1758 operands[1] = force_reg (V2DFmode, operands[1]);
1759 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1762 (define_insn "*sminv2df3_finite"
1763 [(set (match_operand:V2DF 0 "register_operand" "=x")
1764 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1765 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1766 "TARGET_SSE2 && flag_finite_math_only
1767 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1768 "minpd\t{%2, %0|%0, %2}"
1769 [(set_attr "type" "sseadd")
1770 (set_attr "mode" "V2DF")])
1772 (define_insn "*sminv2df3"
1773 [(set (match_operand:V2DF 0 "register_operand" "=x")
1774 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1775 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1777 "minpd\t{%2, %0|%0, %2}"
1778 [(set_attr "type" "sseadd")
1779 (set_attr "mode" "V2DF")])
1781 (define_insn "sse2_vmsminv2df3"
1782 [(set (match_operand:V2DF 0 "register_operand" "=x")
1784 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1785 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1789 "minsd\t{%2, %0|%0, %2}"
1790 [(set_attr "type" "sseadd")
1791 (set_attr "mode" "DF")])
1793 (define_insn "sse3_addsubv2df3"
1794 [(set (match_operand:V2DF 0 "register_operand" "=x")
1797 (match_operand:V2DF 1 "register_operand" "0")
1798 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1799 (minus:V2DF (match_dup 1) (match_dup 2))
1802 "addsubpd\t{%2, %0|%0, %2}"
1803 [(set_attr "type" "sseadd")
1804 (set_attr "mode" "V2DF")])
1806 (define_insn "sse3_haddv2df3"
1807 [(set (match_operand:V2DF 0 "register_operand" "=x")
1811 (match_operand:V2DF 1 "register_operand" "0")
1812 (parallel [(const_int 0)]))
1813 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1816 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1817 (parallel [(const_int 0)]))
1818 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1820 "haddpd\t{%2, %0|%0, %2}"
1821 [(set_attr "type" "sseadd")
1822 (set_attr "mode" "V2DF")])
1824 (define_insn "sse3_hsubv2df3"
1825 [(set (match_operand:V2DF 0 "register_operand" "=x")
1829 (match_operand:V2DF 1 "register_operand" "0")
1830 (parallel [(const_int 0)]))
1831 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1834 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1835 (parallel [(const_int 0)]))
1836 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1838 "hsubpd\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "sseadd")
1840 (set_attr "mode" "V2DF")])
1842 (define_expand "reduc_splus_v2df"
1843 [(match_operand:V2DF 0 "register_operand" "")
1844 (match_operand:V2DF 1 "register_operand" "")]
1847 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1853 ;; Parallel double-precision floating point comparisons
1855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1857 (define_insn "sse2_maskcmpv2df3"
1858 [(set (match_operand:V2DF 0 "register_operand" "=x")
1859 (match_operator:V2DF 3 "sse_comparison_operator"
1860 [(match_operand:V2DF 1 "register_operand" "0")
1861 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1863 "cmp%D3pd\t{%2, %0|%0, %2}"
1864 [(set_attr "type" "ssecmp")
1865 (set_attr "mode" "V2DF")])
1867 (define_insn "sse2_maskcmpdf3"
1868 [(set (match_operand:DF 0 "register_operand" "=x")
1869 (match_operator:DF 3 "sse_comparison_operator"
1870 [(match_operand:DF 1 "register_operand" "0")
1871 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1873 "cmp%D3sd\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "ssecmp")
1875 (set_attr "mode" "DF")])
1877 (define_insn "sse2_vmmaskcmpv2df3"
1878 [(set (match_operand:V2DF 0 "register_operand" "=x")
1880 (match_operator:V2DF 3 "sse_comparison_operator"
1881 [(match_operand:V2DF 1 "register_operand" "0")
1882 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1886 "cmp%D3sd\t{%2, %0|%0, %2}"
1887 [(set_attr "type" "ssecmp")
1888 (set_attr "mode" "DF")])
1890 (define_insn "sse2_comi"
1891 [(set (reg:CCFP FLAGS_REG)
1894 (match_operand:V2DF 0 "register_operand" "x")
1895 (parallel [(const_int 0)]))
1897 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1898 (parallel [(const_int 0)]))))]
1900 "comisd\t{%1, %0|%0, %1}"
1901 [(set_attr "type" "ssecomi")
1902 (set_attr "mode" "DF")])
1904 (define_insn "sse2_ucomi"
1905 [(set (reg:CCFPU FLAGS_REG)
1908 (match_operand:V2DF 0 "register_operand" "x")
1909 (parallel [(const_int 0)]))
1911 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1912 (parallel [(const_int 0)]))))]
1914 "ucomisd\t{%1, %0|%0, %1}"
1915 [(set_attr "type" "ssecomi")
1916 (set_attr "mode" "DF")])
1918 (define_expand "vcondv2df"
1919 [(set (match_operand:V2DF 0 "register_operand" "")
1921 (match_operator 3 ""
1922 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1923 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1924 (match_operand:V2DF 1 "general_operand" "")
1925 (match_operand:V2DF 2 "general_operand" "")))]
1928 if (ix86_expand_fp_vcond (operands))
1934 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1936 ;; Parallel double-precision floating point logical operations
1938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1940 (define_expand "andv2df3"
1941 [(set (match_operand:V2DF 0 "register_operand" "")
1942 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1943 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1945 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1947 (define_insn "*andv2df3"
1948 [(set (match_operand:V2DF 0 "register_operand" "=x")
1949 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1950 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1951 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1952 "andpd\t{%2, %0|%0, %2}"
1953 [(set_attr "type" "sselog")
1954 (set_attr "mode" "V2DF")])
1956 (define_insn "sse2_nandv2df3"
1957 [(set (match_operand:V2DF 0 "register_operand" "=x")
1958 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1959 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1961 "andnpd\t{%2, %0|%0, %2}"
1962 [(set_attr "type" "sselog")
1963 (set_attr "mode" "V2DF")])
1965 (define_expand "iorv2df3"
1966 [(set (match_operand:V2DF 0 "register_operand" "")
1967 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1968 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1970 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1972 (define_insn "*iorv2df3"
1973 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1975 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1976 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1977 "orpd\t{%2, %0|%0, %2}"
1978 [(set_attr "type" "sselog")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "xorv2df3"
1982 [(set (match_operand:V2DF 0 "register_operand" "")
1983 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1984 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1986 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1988 (define_insn "*xorv2df3"
1989 [(set (match_operand:V2DF 0 "register_operand" "=x")
1990 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1991 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1992 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1993 "xorpd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "sselog")
1995 (set_attr "mode" "V2DF")])
1997 ;; Also define scalar versions. These are used for abs, neg, and
1998 ;; conditional move. Using subregs into vector modes causes register
1999 ;; allocation lossage. These patterns do not allow memory operands
2000 ;; because the native instructions read the full 128-bits.
2002 (define_insn "*anddf3"
2003 [(set (match_operand:DF 0 "register_operand" "=x")
2004 (and:DF (match_operand:DF 1 "register_operand" "0")
2005 (match_operand:DF 2 "register_operand" "x")))]
2007 "andpd\t{%2, %0|%0, %2}"
2008 [(set_attr "type" "sselog")
2009 (set_attr "mode" "V2DF")])
2011 (define_insn "*nanddf3"
2012 [(set (match_operand:DF 0 "register_operand" "=x")
2013 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2014 (match_operand:DF 2 "register_operand" "x")))]
2016 "andnpd\t{%2, %0|%0, %2}"
2017 [(set_attr "type" "sselog")
2018 (set_attr "mode" "V2DF")])
2020 (define_insn "*iordf3"
2021 [(set (match_operand:DF 0 "register_operand" "=x")
2022 (ior:DF (match_operand:DF 1 "register_operand" "0")
2023 (match_operand:DF 2 "register_operand" "x")))]
2025 "orpd\t{%2, %0|%0, %2}"
2026 [(set_attr "type" "sselog")
2027 (set_attr "mode" "V2DF")])
2029 (define_insn "*xordf3"
2030 [(set (match_operand:DF 0 "register_operand" "=x")
2031 (xor:DF (match_operand:DF 1 "register_operand" "0")
2032 (match_operand:DF 2 "register_operand" "x")))]
2034 "xorpd\t{%2, %0|%0, %2}"
2035 [(set_attr "type" "sselog")
2036 (set_attr "mode" "V2DF")])
2038 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2040 ;; Parallel double-precision floating point conversion operations
2042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2044 (define_insn "sse2_cvtpi2pd"
2045 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2046 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2048 "cvtpi2pd\t{%1, %0|%0, %1}"
2049 [(set_attr "type" "ssecvt")
2050 (set_attr "unit" "mmx,*")
2051 (set_attr "mode" "V2DF")])
2053 (define_insn "sse2_cvtpd2pi"
2054 [(set (match_operand:V2SI 0 "register_operand" "=y")
2055 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2056 UNSPEC_FIX_NOTRUNC))]
2058 "cvtpd2pi\t{%1, %0|%0, %1}"
2059 [(set_attr "type" "ssecvt")
2060 (set_attr "unit" "mmx")
2061 (set_attr "prefix_data16" "1")
2062 (set_attr "mode" "DI")])
2064 (define_insn "sse2_cvttpd2pi"
2065 [(set (match_operand:V2SI 0 "register_operand" "=y")
2066 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2068 "cvttpd2pi\t{%1, %0|%0, %1}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "unit" "mmx")
2071 (set_attr "prefix_data16" "1")
2072 (set_attr "mode" "TI")])
2074 (define_insn "sse2_cvtsi2sd"
2075 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2078 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2079 (match_operand:V2DF 1 "register_operand" "0,0")
2082 "cvtsi2sd\t{%2, %0|%0, %2}"
2083 [(set_attr "type" "sseicvt")
2084 (set_attr "mode" "DF")
2085 (set_attr "athlon_decode" "double,direct")
2086 (set_attr "amdfam10_decode" "vector,double")])
2088 (define_insn "sse2_cvtsi2sdq"
2089 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2092 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2093 (match_operand:V2DF 1 "register_operand" "0,0")
2095 "TARGET_SSE2 && TARGET_64BIT"
2096 "cvtsi2sdq\t{%2, %0|%0, %2}"
2097 [(set_attr "type" "sseicvt")
2098 (set_attr "mode" "DF")
2099 (set_attr "athlon_decode" "double,direct")
2100 (set_attr "amdfam10_decode" "vector,double")])
2102 (define_insn "sse2_cvtsd2si"
2103 [(set (match_operand:SI 0 "register_operand" "=r,r")
2106 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2107 (parallel [(const_int 0)]))]
2108 UNSPEC_FIX_NOTRUNC))]
2110 "cvtsd2si\t{%1, %0|%0, %1}"
2111 [(set_attr "type" "sseicvt")
2112 (set_attr "athlon_decode" "double,vector")
2113 (set_attr "prefix_rep" "1")
2114 (set_attr "mode" "SI")])
2116 (define_insn "sse2_cvtsd2si_2"
2117 [(set (match_operand:SI 0 "register_operand" "=r,r")
2118 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2119 UNSPEC_FIX_NOTRUNC))]
2121 "cvtsd2si\t{%1, %0|%0, %1}"
2122 [(set_attr "type" "sseicvt")
2123 (set_attr "athlon_decode" "double,vector")
2124 (set_attr "amdfam10_decode" "double,double")
2125 (set_attr "prefix_rep" "1")
2126 (set_attr "mode" "SI")])
2128 (define_insn "sse2_cvtsd2siq"
2129 [(set (match_operand:DI 0 "register_operand" "=r,r")
2132 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2133 (parallel [(const_int 0)]))]
2134 UNSPEC_FIX_NOTRUNC))]
2135 "TARGET_SSE2 && TARGET_64BIT"
2136 "cvtsd2siq\t{%1, %0|%0, %1}"
2137 [(set_attr "type" "sseicvt")
2138 (set_attr "athlon_decode" "double,vector")
2139 (set_attr "prefix_rep" "1")
2140 (set_attr "mode" "DI")])
2142 (define_insn "sse2_cvtsd2siq_2"
2143 [(set (match_operand:DI 0 "register_operand" "=r,r")
2144 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2145 UNSPEC_FIX_NOTRUNC))]
2146 "TARGET_SSE2 && TARGET_64BIT"
2147 "cvtsd2siq\t{%1, %0|%0, %1}"
2148 [(set_attr "type" "sseicvt")
2149 (set_attr "athlon_decode" "double,vector")
2150 (set_attr "amdfam10_decode" "double,double")
2151 (set_attr "prefix_rep" "1")
2152 (set_attr "mode" "DI")])
2154 (define_insn "sse2_cvttsd2si"
2155 [(set (match_operand:SI 0 "register_operand" "=r,r")
2158 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2159 (parallel [(const_int 0)]))))]
2161 "cvttsd2si\t{%1, %0|%0, %1}"
2162 [(set_attr "type" "sseicvt")
2163 (set_attr "prefix_rep" "1")
2164 (set_attr "mode" "SI")
2165 (set_attr "athlon_decode" "double,vector")
2166 (set_attr "amdfam10_decode" "double,double")])
2168 (define_insn "sse2_cvttsd2siq"
2169 [(set (match_operand:DI 0 "register_operand" "=r,r")
2172 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2173 (parallel [(const_int 0)]))))]
2174 "TARGET_SSE2 && TARGET_64BIT"
2175 "cvttsd2siq\t{%1, %0|%0, %1}"
2176 [(set_attr "type" "sseicvt")
2177 (set_attr "prefix_rep" "1")
2178 (set_attr "mode" "DI")
2179 (set_attr "athlon_decode" "double,vector")
2180 (set_attr "amdfam10_decode" "double,double")])
2182 (define_insn "sse2_cvtdq2pd"
2183 [(set (match_operand:V2DF 0 "register_operand" "=x")
2186 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2187 (parallel [(const_int 0) (const_int 1)]))))]
2189 "cvtdq2pd\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "ssecvt")
2191 (set_attr "mode" "V2DF")])
2193 (define_expand "sse2_cvtpd2dq"
2194 [(set (match_operand:V4SI 0 "register_operand" "")
2196 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2200 "operands[2] = CONST0_RTX (V2SImode);")
2202 (define_insn "*sse2_cvtpd2dq"
2203 [(set (match_operand:V4SI 0 "register_operand" "=x")
2205 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2207 (match_operand:V2SI 2 "const0_operand" "")))]
2209 "cvtpd2dq\t{%1, %0|%0, %1}"
2210 [(set_attr "type" "ssecvt")
2211 (set_attr "prefix_rep" "1")
2212 (set_attr "mode" "TI")
2213 (set_attr "amdfam10_decode" "double")])
2215 (define_expand "sse2_cvttpd2dq"
2216 [(set (match_operand:V4SI 0 "register_operand" "")
2218 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2221 "operands[2] = CONST0_RTX (V2SImode);")
2223 (define_insn "*sse2_cvttpd2dq"
2224 [(set (match_operand:V4SI 0 "register_operand" "=x")
2226 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2227 (match_operand:V2SI 2 "const0_operand" "")))]
2229 "cvttpd2dq\t{%1, %0|%0, %1}"
2230 [(set_attr "type" "ssecvt")
2231 (set_attr "prefix_rep" "1")
2232 (set_attr "mode" "TI")
2233 (set_attr "amdfam10_decode" "double")])
2235 (define_insn "sse2_cvtsd2ss"
2236 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2239 (float_truncate:V2SF
2240 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2241 (match_operand:V4SF 1 "register_operand" "0,0")
2244 "cvtsd2ss\t{%2, %0|%0, %2}"
2245 [(set_attr "type" "ssecvt")
2246 (set_attr "athlon_decode" "vector,double")
2247 (set_attr "amdfam10_decode" "vector,double")
2248 (set_attr "mode" "SF")])
2250 (define_insn "sse2_cvtss2sd"
2251 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2255 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2256 (parallel [(const_int 0) (const_int 1)])))
2257 (match_operand:V2DF 1 "register_operand" "0,0")
2260 "cvtss2sd\t{%2, %0|%0, %2}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "amdfam10_decode" "vector,double")
2263 (set_attr "mode" "DF")])
2265 (define_expand "sse2_cvtpd2ps"
2266 [(set (match_operand:V4SF 0 "register_operand" "")
2268 (float_truncate:V2SF
2269 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2272 "operands[2] = CONST0_RTX (V2SFmode);")
2274 (define_insn "*sse2_cvtpd2ps"
2275 [(set (match_operand:V4SF 0 "register_operand" "=x")
2277 (float_truncate:V2SF
2278 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2279 (match_operand:V2SF 2 "const0_operand" "")))]
2281 "cvtpd2ps\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "prefix_data16" "1")
2284 (set_attr "mode" "V4SF")
2285 (set_attr "amdfam10_decode" "double")])
2287 (define_insn "sse2_cvtps2pd"
2288 [(set (match_operand:V2DF 0 "register_operand" "=x")
2291 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2292 (parallel [(const_int 0) (const_int 1)]))))]
2294 "cvtps2pd\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "ssecvt")
2296 (set_attr "mode" "V2DF")
2297 (set_attr "amdfam10_decode" "direct")])
2299 (define_expand "vec_unpacks_hi_v4sf"
2304 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2305 (parallel [(const_int 6)
2309 (set (match_operand:V2DF 0 "register_operand" "")
2313 (parallel [(const_int 0) (const_int 1)]))))]
2316 operands[2] = gen_reg_rtx (V4SFmode);
2319 (define_expand "vec_unpacks_lo_v4sf"
2320 [(set (match_operand:V2DF 0 "register_operand" "")
2323 (match_operand:V4SF 1 "nonimmediate_operand" "")
2324 (parallel [(const_int 0) (const_int 1)]))))]
2327 (define_expand "vec_unpacks_float_hi_v8hi"
2328 [(match_operand:V4SF 0 "register_operand" "")
2329 (match_operand:V8HI 1 "register_operand" "")]
2332 rtx tmp = gen_reg_rtx (V4SImode);
2334 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2335 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2339 (define_expand "vec_unpacks_float_lo_v8hi"
2340 [(match_operand:V4SF 0 "register_operand" "")
2341 (match_operand:V8HI 1 "register_operand" "")]
2344 rtx tmp = gen_reg_rtx (V4SImode);
2346 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2347 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2351 (define_expand "vec_unpacku_float_hi_v8hi"
2352 [(match_operand:V4SF 0 "register_operand" "")
2353 (match_operand:V8HI 1 "register_operand" "")]
2356 rtx tmp = gen_reg_rtx (V4SImode);
2358 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2359 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2363 (define_expand "vec_unpacku_float_lo_v8hi"
2364 [(match_operand:V4SF 0 "register_operand" "")
2365 (match_operand:V8HI 1 "register_operand" "")]
2368 rtx tmp = gen_reg_rtx (V4SImode);
2370 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2371 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2375 (define_expand "vec_unpacks_float_hi_v4si"
2378 (match_operand:V4SI 1 "nonimmediate_operand" "")
2379 (parallel [(const_int 2)
2383 (set (match_operand:V2DF 0 "register_operand" "")
2387 (parallel [(const_int 0) (const_int 1)]))))]
2390 operands[2] = gen_reg_rtx (V4SImode);
2393 (define_expand "vec_unpacks_float_lo_v4si"
2394 [(set (match_operand:V2DF 0 "register_operand" "")
2397 (match_operand:V4SI 1 "nonimmediate_operand" "")
2398 (parallel [(const_int 0) (const_int 1)]))))]
2401 (define_expand "vec_pack_trunc_v2df"
2402 [(match_operand:V4SF 0 "register_operand" "")
2403 (match_operand:V2DF 1 "nonimmediate_operand" "")
2404 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2409 r1 = gen_reg_rtx (V4SFmode);
2410 r2 = gen_reg_rtx (V4SFmode);
2412 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2413 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2414 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2418 (define_expand "vec_pack_sfix_trunc_v2df"
2419 [(match_operand:V4SI 0 "register_operand" "")
2420 (match_operand:V2DF 1 "nonimmediate_operand" "")
2421 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2426 r1 = gen_reg_rtx (V4SImode);
2427 r2 = gen_reg_rtx (V4SImode);
2429 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2430 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2431 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2432 gen_lowpart (V2DImode, r1),
2433 gen_lowpart (V2DImode, r2)));
2437 (define_expand "vec_pack_sfix_v2df"
2438 [(match_operand:V4SI 0 "register_operand" "")
2439 (match_operand:V2DF 1 "nonimmediate_operand" "")
2440 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2445 r1 = gen_reg_rtx (V4SImode);
2446 r2 = gen_reg_rtx (V4SImode);
2448 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2449 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2450 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2451 gen_lowpart (V2DImode, r1),
2452 gen_lowpart (V2DImode, r2)));
2457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2459 ;; Parallel double-precision floating point element swizzling
2461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2463 (define_insn "sse2_unpckhpd"
2464 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2467 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2468 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2469 (parallel [(const_int 1)
2471 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2473 unpckhpd\t{%2, %0|%0, %2}
2474 movlpd\t{%H1, %0|%0, %H1}
2475 movhpd\t{%1, %0|%0, %1}"
2476 [(set_attr "type" "sselog,ssemov,ssemov")
2477 (set_attr "mode" "V2DF,V1DF,V1DF")])
2479 (define_insn "*sse3_movddup"
2480 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2483 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2485 (parallel [(const_int 0)
2487 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2489 movddup\t{%1, %0|%0, %1}
2491 [(set_attr "type" "sselog1,ssemov")
2492 (set_attr "mode" "V2DF")])
2495 [(set (match_operand:V2DF 0 "memory_operand" "")
2498 (match_operand:V2DF 1 "register_operand" "")
2500 (parallel [(const_int 0)
2502 "TARGET_SSE3 && reload_completed"
2505 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2506 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2507 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2511 (define_insn "sse2_unpcklpd"
2512 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2515 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2516 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2517 (parallel [(const_int 0)
2519 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2521 unpcklpd\t{%2, %0|%0, %2}
2522 movhpd\t{%2, %0|%0, %2}
2523 movlpd\t{%2, %H0|%H0, %2}"
2524 [(set_attr "type" "sselog,ssemov,ssemov")
2525 (set_attr "mode" "V2DF,V1DF,V1DF")])
2527 (define_expand "sse2_shufpd"
2528 [(match_operand:V2DF 0 "register_operand" "")
2529 (match_operand:V2DF 1 "register_operand" "")
2530 (match_operand:V2DF 2 "nonimmediate_operand" "")
2531 (match_operand:SI 3 "const_int_operand" "")]
2534 int mask = INTVAL (operands[3]);
2535 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2537 GEN_INT (mask & 2 ? 3 : 2)));
2541 (define_insn "sse2_shufpd_1"
2542 [(set (match_operand:V2DF 0 "register_operand" "=x")
2545 (match_operand:V2DF 1 "register_operand" "0")
2546 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2547 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2548 (match_operand 4 "const_2_to_3_operand" "")])))]
2552 mask = INTVAL (operands[3]);
2553 mask |= (INTVAL (operands[4]) - 2) << 1;
2554 operands[3] = GEN_INT (mask);
2556 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2558 [(set_attr "type" "sselog")
2559 (set_attr "mode" "V2DF")])
2561 (define_insn "sse2_storehpd"
2562 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2564 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2565 (parallel [(const_int 1)])))]
2566 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2568 movhpd\t{%1, %0|%0, %1}
2571 [(set_attr "type" "ssemov,sselog1,ssemov")
2572 (set_attr "mode" "V1DF,V2DF,DF")])
2575 [(set (match_operand:DF 0 "register_operand" "")
2577 (match_operand:V2DF 1 "memory_operand" "")
2578 (parallel [(const_int 1)])))]
2579 "TARGET_SSE2 && reload_completed"
2580 [(set (match_dup 0) (match_dup 1))]
2582 operands[1] = adjust_address (operands[1], DFmode, 8);
2585 (define_insn "sse2_storelpd"
2586 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2588 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2589 (parallel [(const_int 0)])))]
2590 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2592 movlpd\t{%1, %0|%0, %1}
2595 [(set_attr "type" "ssemov")
2596 (set_attr "mode" "V1DF,DF,DF")])
2599 [(set (match_operand:DF 0 "register_operand" "")
2601 (match_operand:V2DF 1 "nonimmediate_operand" "")
2602 (parallel [(const_int 0)])))]
2603 "TARGET_SSE2 && reload_completed"
2606 rtx op1 = operands[1];
2608 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2610 op1 = gen_lowpart (DFmode, op1);
2611 emit_move_insn (operands[0], op1);
2615 (define_insn "sse2_loadhpd"
2616 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2619 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2620 (parallel [(const_int 0)]))
2621 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2622 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2624 movhpd\t{%2, %0|%0, %2}
2625 unpcklpd\t{%2, %0|%0, %2}
2626 shufpd\t{$1, %1, %0|%0, %1, 1}
2628 [(set_attr "type" "ssemov,sselog,sselog,other")
2629 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2632 [(set (match_operand:V2DF 0 "memory_operand" "")
2634 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2635 (match_operand:DF 1 "register_operand" "")))]
2636 "TARGET_SSE2 && reload_completed"
2637 [(set (match_dup 0) (match_dup 1))]
2639 operands[0] = adjust_address (operands[0], DFmode, 8);
2642 (define_insn "sse2_loadlpd"
2643 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2645 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2647 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2648 (parallel [(const_int 1)]))))]
2649 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2651 movsd\t{%2, %0|%0, %2}
2652 movlpd\t{%2, %0|%0, %2}
2653 movsd\t{%2, %0|%0, %2}
2654 shufpd\t{$2, %2, %0|%0, %2, 2}
2655 movhpd\t{%H1, %0|%0, %H1}
2657 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2658 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2661 [(set (match_operand:V2DF 0 "memory_operand" "")
2663 (match_operand:DF 1 "register_operand" "")
2664 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2665 "TARGET_SSE2 && reload_completed"
2666 [(set (match_dup 0) (match_dup 1))]
2668 operands[0] = adjust_address (operands[0], DFmode, 8);
2671 ;; Not sure these two are ever used, but it doesn't hurt to have
2673 (define_insn "*vec_extractv2df_1_sse"
2674 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2676 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2677 (parallel [(const_int 1)])))]
2678 "!TARGET_SSE2 && TARGET_SSE
2679 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2681 movhps\t{%1, %0|%0, %1}
2682 movhlps\t{%1, %0|%0, %1}
2683 movlps\t{%H1, %0|%0, %H1}"
2684 [(set_attr "type" "ssemov")
2685 (set_attr "mode" "V2SF,V4SF,V2SF")])
2687 (define_insn "*vec_extractv2df_0_sse"
2688 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2690 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2691 (parallel [(const_int 0)])))]
2692 "!TARGET_SSE2 && TARGET_SSE
2693 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2695 movlps\t{%1, %0|%0, %1}
2696 movaps\t{%1, %0|%0, %1}
2697 movlps\t{%1, %0|%0, %1}"
2698 [(set_attr "type" "ssemov")
2699 (set_attr "mode" "V2SF,V4SF,V2SF")])
2701 (define_insn "sse2_movsd"
2702 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2704 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2705 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2709 movsd\t{%2, %0|%0, %2}
2710 movlpd\t{%2, %0|%0, %2}
2711 movlpd\t{%2, %0|%0, %2}
2712 shufpd\t{$2, %2, %0|%0, %2, 2}
2713 movhps\t{%H1, %0|%0, %H1}
2714 movhps\t{%1, %H0|%H0, %1}"
2715 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2716 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2718 (define_insn "*vec_dupv2df_sse3"
2719 [(set (match_operand:V2DF 0 "register_operand" "=x")
2721 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2723 "movddup\t{%1, %0|%0, %1}"
2724 [(set_attr "type" "sselog1")
2725 (set_attr "mode" "DF")])
2727 (define_insn "*vec_dupv2df"
2728 [(set (match_operand:V2DF 0 "register_operand" "=x")
2730 (match_operand:DF 1 "register_operand" "0")))]
2733 [(set_attr "type" "sselog1")
2734 (set_attr "mode" "V2DF")])
2736 (define_insn "*vec_concatv2df_sse3"
2737 [(set (match_operand:V2DF 0 "register_operand" "=x")
2739 (match_operand:DF 1 "nonimmediate_operand" "xm")
2742 "movddup\t{%1, %0|%0, %1}"
2743 [(set_attr "type" "sselog1")
2744 (set_attr "mode" "DF")])
2746 (define_insn "*vec_concatv2df"
2747 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2749 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2750 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2753 unpcklpd\t{%2, %0|%0, %2}
2754 movhpd\t{%2, %0|%0, %2}
2755 movsd\t{%1, %0|%0, %1}
2756 movlhps\t{%2, %0|%0, %2}
2757 movhps\t{%2, %0|%0, %2}"
2758 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2759 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2761 (define_expand "vec_setv2df"
2762 [(match_operand:V2DF 0 "register_operand" "")
2763 (match_operand:DF 1 "register_operand" "")
2764 (match_operand 2 "const_int_operand" "")]
2767 ix86_expand_vector_set (false, operands[0], operands[1],
2768 INTVAL (operands[2]));
2772 (define_expand "vec_extractv2df"
2773 [(match_operand:DF 0 "register_operand" "")
2774 (match_operand:V2DF 1 "register_operand" "")
2775 (match_operand 2 "const_int_operand" "")]
2778 ix86_expand_vector_extract (false, operands[0], operands[1],
2779 INTVAL (operands[2]));
2783 (define_expand "vec_initv2df"
2784 [(match_operand:V2DF 0 "register_operand" "")
2785 (match_operand 1 "" "")]
2788 ix86_expand_vector_init (false, operands[0], operands[1]);
2792 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2794 ;; Parallel integral arithmetic
2796 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2798 (define_expand "neg<mode>2"
2799 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2802 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2804 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2806 (define_expand "add<mode>3"
2807 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2808 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2809 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2811 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2813 (define_insn "*add<mode>3"
2814 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2816 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2817 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2818 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2819 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2820 [(set_attr "type" "sseiadd")
2821 (set_attr "prefix_data16" "1")
2822 (set_attr "mode" "TI")])
2824 (define_insn "sse2_ssadd<mode>3"
2825 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2827 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2828 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2829 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2830 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2831 [(set_attr "type" "sseiadd")
2832 (set_attr "prefix_data16" "1")
2833 (set_attr "mode" "TI")])
2835 (define_insn "sse2_usadd<mode>3"
2836 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2838 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2839 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2840 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2841 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2842 [(set_attr "type" "sseiadd")
2843 (set_attr "prefix_data16" "1")
2844 (set_attr "mode" "TI")])
2846 (define_expand "sub<mode>3"
2847 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2848 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2849 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2851 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2853 (define_insn "*sub<mode>3"
2854 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2856 (match_operand:SSEMODEI 1 "register_operand" "0")
2857 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2859 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2860 [(set_attr "type" "sseiadd")
2861 (set_attr "prefix_data16" "1")
2862 (set_attr "mode" "TI")])
2864 (define_insn "sse2_sssub<mode>3"
2865 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2867 (match_operand:SSEMODE12 1 "register_operand" "0")
2868 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2870 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2871 [(set_attr "type" "sseiadd")
2872 (set_attr "prefix_data16" "1")
2873 (set_attr "mode" "TI")])
2875 (define_insn "sse2_ussub<mode>3"
2876 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2878 (match_operand:SSEMODE12 1 "register_operand" "0")
2879 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2881 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2882 [(set_attr "type" "sseiadd")
2883 (set_attr "prefix_data16" "1")
2884 (set_attr "mode" "TI")])
2886 (define_expand "mulv16qi3"
2887 [(set (match_operand:V16QI 0 "register_operand" "")
2888 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2889 (match_operand:V16QI 2 "register_operand" "")))]
2895 for (i = 0; i < 12; ++i)
2896 t[i] = gen_reg_rtx (V16QImode);
2898 /* Unpack data such that we've got a source byte in each low byte of
2899 each word. We don't care what goes into the high byte of each word.
2900 Rather than trying to get zero in there, most convenient is to let
2901 it be a copy of the low byte. */
2902 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2903 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2904 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2905 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2907 /* Multiply words. The end-of-line annotations here give a picture of what
2908 the output of that instruction looks like. Dot means don't care; the
2909 letters are the bytes of the result with A being the most significant. */
2910 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2911 gen_lowpart (V8HImode, t[0]),
2912 gen_lowpart (V8HImode, t[1])));
2913 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2914 gen_lowpart (V8HImode, t[2]),
2915 gen_lowpart (V8HImode, t[3])));
2917 /* Extract the relevant bytes and merge them back together. */
2918 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2919 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2920 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2921 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2922 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2923 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2926 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2930 (define_expand "mulv8hi3"
2931 [(set (match_operand:V8HI 0 "register_operand" "")
2932 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2933 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2935 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2937 (define_insn "*mulv8hi3"
2938 [(set (match_operand:V8HI 0 "register_operand" "=x")
2939 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2940 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2941 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2942 "pmullw\t{%2, %0|%0, %2}"
2943 [(set_attr "type" "sseimul")
2944 (set_attr "prefix_data16" "1")
2945 (set_attr "mode" "TI")])
2947 (define_expand "smulv8hi3_highpart"
2948 [(set (match_operand:V8HI 0 "register_operand" "")
2953 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2955 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2958 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2960 (define_insn "*smulv8hi3_highpart"
2961 [(set (match_operand:V8HI 0 "register_operand" "=x")
2966 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2968 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2970 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2971 "pmulhw\t{%2, %0|%0, %2}"
2972 [(set_attr "type" "sseimul")
2973 (set_attr "prefix_data16" "1")
2974 (set_attr "mode" "TI")])
2976 (define_expand "umulv8hi3_highpart"
2977 [(set (match_operand:V8HI 0 "register_operand" "")
2982 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2984 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2987 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2989 (define_insn "*umulv8hi3_highpart"
2990 [(set (match_operand:V8HI 0 "register_operand" "=x")
2995 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2997 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2999 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3000 "pmulhuw\t{%2, %0|%0, %2}"
3001 [(set_attr "type" "sseimul")
3002 (set_attr "prefix_data16" "1")
3003 (set_attr "mode" "TI")])
3005 (define_insn "sse2_umulv2siv2di3"
3006 [(set (match_operand:V2DI 0 "register_operand" "=x")
3010 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3011 (parallel [(const_int 0) (const_int 2)])))
3014 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3015 (parallel [(const_int 0) (const_int 2)])))))]
3016 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3017 "pmuludq\t{%2, %0|%0, %2}"
3018 [(set_attr "type" "sseimul")
3019 (set_attr "prefix_data16" "1")
3020 (set_attr "mode" "TI")])
3022 (define_insn "sse4_1_mulv2siv2di3"
3023 [(set (match_operand:V2DI 0 "register_operand" "=x")
3027 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3028 (parallel [(const_int 0) (const_int 2)])))
3031 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3032 (parallel [(const_int 0) (const_int 2)])))))]
3033 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3034 "pmuldq\t{%2, %0|%0, %2}"
3035 [(set_attr "type" "sseimul")
3036 (set_attr "prefix_extra" "1")
3037 (set_attr "mode" "TI")])
3039 (define_insn "sse2_pmaddwd"
3040 [(set (match_operand:V4SI 0 "register_operand" "=x")
3045 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3046 (parallel [(const_int 0)
3052 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3053 (parallel [(const_int 0)
3059 (vec_select:V4HI (match_dup 1)
3060 (parallel [(const_int 1)
3065 (vec_select:V4HI (match_dup 2)
3066 (parallel [(const_int 1)
3069 (const_int 7)]))))))]
3070 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3071 "pmaddwd\t{%2, %0|%0, %2}"
3072 [(set_attr "type" "sseiadd")
3073 (set_attr "prefix_data16" "1")
3074 (set_attr "mode" "TI")])
3076 (define_expand "mulv4si3"
3077 [(set (match_operand:V4SI 0 "register_operand" "")
3078 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3079 (match_operand:V4SI 2 "register_operand" "")))]
3083 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3086 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3092 t1 = gen_reg_rtx (V4SImode);
3093 t2 = gen_reg_rtx (V4SImode);
3094 t3 = gen_reg_rtx (V4SImode);
3095 t4 = gen_reg_rtx (V4SImode);
3096 t5 = gen_reg_rtx (V4SImode);
3097 t6 = gen_reg_rtx (V4SImode);
3098 thirtytwo = GEN_INT (32);
3100 /* Multiply elements 2 and 0. */
3101 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3104 /* Shift both input vectors down one element, so that elements 3
3105 and 1 are now in the slots for elements 2 and 0. For K8, at
3106 least, this is faster than using a shuffle. */
3107 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3108 gen_lowpart (TImode, op1),
3110 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3111 gen_lowpart (TImode, op2),
3113 /* Multiply elements 3 and 1. */
3114 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3117 /* Move the results in element 2 down to element 1; we don't care
3118 what goes in elements 2 and 3. */
3119 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3120 const0_rtx, const0_rtx));
3121 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3122 const0_rtx, const0_rtx));
3124 /* Merge the parts back together. */
3125 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3130 (define_insn "*sse4_1_mulv4si3"
3131 [(set (match_operand:V4SI 0 "register_operand" "=x")
3132 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3133 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3134 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3135 "pmulld\t{%2, %0|%0, %2}"
3136 [(set_attr "type" "sseimul")
3137 (set_attr "prefix_extra" "1")
3138 (set_attr "mode" "TI")])
3140 (define_expand "mulv2di3"
3141 [(set (match_operand:V2DI 0 "register_operand" "")
3142 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3143 (match_operand:V2DI 2 "register_operand" "")))]
3146 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3152 t1 = gen_reg_rtx (V2DImode);
3153 t2 = gen_reg_rtx (V2DImode);
3154 t3 = gen_reg_rtx (V2DImode);
3155 t4 = gen_reg_rtx (V2DImode);
3156 t5 = gen_reg_rtx (V2DImode);
3157 t6 = gen_reg_rtx (V2DImode);
3158 thirtytwo = GEN_INT (32);
3160 /* Multiply low parts. */
3161 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3162 gen_lowpart (V4SImode, op2)));
3164 /* Shift input vectors left 32 bits so we can multiply high parts. */
3165 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3166 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3168 /* Multiply high parts by low parts. */
3169 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3170 gen_lowpart (V4SImode, t3)));
3171 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3172 gen_lowpart (V4SImode, t2)));
3174 /* Shift them back. */
3175 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3176 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3178 /* Add the three parts together. */
3179 emit_insn (gen_addv2di3 (t6, t1, t4));
3180 emit_insn (gen_addv2di3 (op0, t6, t5));
3184 (define_expand "vec_widen_smult_hi_v8hi"
3185 [(match_operand:V4SI 0 "register_operand" "")
3186 (match_operand:V8HI 1 "register_operand" "")
3187 (match_operand:V8HI 2 "register_operand" "")]
3190 rtx op1, op2, t1, t2, dest;
3194 t1 = gen_reg_rtx (V8HImode);
3195 t2 = gen_reg_rtx (V8HImode);
3196 dest = gen_lowpart (V8HImode, operands[0]);
3198 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3199 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3200 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3204 (define_expand "vec_widen_smult_lo_v8hi"
3205 [(match_operand:V4SI 0 "register_operand" "")
3206 (match_operand:V8HI 1 "register_operand" "")
3207 (match_operand:V8HI 2 "register_operand" "")]
3210 rtx op1, op2, t1, t2, dest;
3214 t1 = gen_reg_rtx (V8HImode);
3215 t2 = gen_reg_rtx (V8HImode);
3216 dest = gen_lowpart (V8HImode, operands[0]);
3218 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3219 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3220 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3224 (define_expand "vec_widen_umult_hi_v8hi"
3225 [(match_operand:V4SI 0 "register_operand" "")
3226 (match_operand:V8HI 1 "register_operand" "")
3227 (match_operand:V8HI 2 "register_operand" "")]
3230 rtx op1, op2, t1, t2, dest;
3234 t1 = gen_reg_rtx (V8HImode);
3235 t2 = gen_reg_rtx (V8HImode);
3236 dest = gen_lowpart (V8HImode, operands[0]);
3238 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3239 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3240 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3244 (define_expand "vec_widen_umult_lo_v8hi"
3245 [(match_operand:V4SI 0 "register_operand" "")
3246 (match_operand:V8HI 1 "register_operand" "")
3247 (match_operand:V8HI 2 "register_operand" "")]
3250 rtx op1, op2, t1, t2, dest;
3254 t1 = gen_reg_rtx (V8HImode);
3255 t2 = gen_reg_rtx (V8HImode);
3256 dest = gen_lowpart (V8HImode, operands[0]);
3258 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3259 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3260 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3264 (define_expand "vec_widen_smult_hi_v4si"
3265 [(match_operand:V2DI 0 "register_operand" "")
3266 (match_operand:V4SI 1 "register_operand" "")
3267 (match_operand:V4SI 2 "register_operand" "")]
3270 rtx op1, op2, t1, t2;
3274 t1 = gen_reg_rtx (V4SImode);
3275 t2 = gen_reg_rtx (V4SImode);
3277 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3278 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3279 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3283 (define_expand "vec_widen_smult_lo_v4si"
3284 [(match_operand:V2DI 0 "register_operand" "")
3285 (match_operand:V4SI 1 "register_operand" "")
3286 (match_operand:V4SI 2 "register_operand" "")]
3289 rtx op1, op2, t1, t2;
3293 t1 = gen_reg_rtx (V4SImode);
3294 t2 = gen_reg_rtx (V4SImode);
3296 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3297 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3298 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3302 (define_expand "vec_widen_umult_hi_v4si"
3303 [(match_operand:V2DI 0 "register_operand" "")
3304 (match_operand:V4SI 1 "register_operand" "")
3305 (match_operand:V4SI 2 "register_operand" "")]
3308 rtx op1, op2, t1, t2;
3312 t1 = gen_reg_rtx (V4SImode);
3313 t2 = gen_reg_rtx (V4SImode);
3315 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3316 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3317 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3321 (define_expand "vec_widen_umult_lo_v4si"
3322 [(match_operand:V2DI 0 "register_operand" "")
3323 (match_operand:V4SI 1 "register_operand" "")
3324 (match_operand:V4SI 2 "register_operand" "")]
3327 rtx op1, op2, t1, t2;
3331 t1 = gen_reg_rtx (V4SImode);
3332 t2 = gen_reg_rtx (V4SImode);
3334 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3335 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3336 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3340 (define_expand "sdot_prodv8hi"
3341 [(match_operand:V4SI 0 "register_operand" "")
3342 (match_operand:V8HI 1 "register_operand" "")
3343 (match_operand:V8HI 2 "register_operand" "")
3344 (match_operand:V4SI 3 "register_operand" "")]
3347 rtx t = gen_reg_rtx (V4SImode);
3348 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3349 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3353 (define_expand "udot_prodv4si"
3354 [(match_operand:V2DI 0 "register_operand" "")
3355 (match_operand:V4SI 1 "register_operand" "")
3356 (match_operand:V4SI 2 "register_operand" "")
3357 (match_operand:V2DI 3 "register_operand" "")]
3362 t1 = gen_reg_rtx (V2DImode);
3363 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3364 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3366 t2 = gen_reg_rtx (V4SImode);
3367 t3 = gen_reg_rtx (V4SImode);
3368 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3369 gen_lowpart (TImode, operands[1]),
3371 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3372 gen_lowpart (TImode, operands[2]),
3375 t4 = gen_reg_rtx (V2DImode);
3376 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3378 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3382 (define_insn "ashr<mode>3"
3383 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3385 (match_operand:SSEMODE24 1 "register_operand" "0")
3386 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3388 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3389 [(set_attr "type" "sseishft")
3390 (set_attr "prefix_data16" "1")
3391 (set_attr "mode" "TI")])
3393 (define_insn "lshr<mode>3"
3394 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3395 (lshiftrt:SSEMODE248
3396 (match_operand:SSEMODE248 1 "register_operand" "0")
3397 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3399 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3400 [(set_attr "type" "sseishft")
3401 (set_attr "prefix_data16" "1")
3402 (set_attr "mode" "TI")])
3404 (define_insn "ashl<mode>3"
3405 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3407 (match_operand:SSEMODE248 1 "register_operand" "0")
3408 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3410 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3411 [(set_attr "type" "sseishft")
3412 (set_attr "prefix_data16" "1")
3413 (set_attr "mode" "TI")])
3415 (define_expand "vec_shl_<mode>"
3416 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3417 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3418 (match_operand:SI 2 "general_operand" "")))]
3421 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3423 operands[0] = gen_lowpart (TImode, operands[0]);
3424 operands[1] = gen_lowpart (TImode, operands[1]);
3427 (define_expand "vec_shr_<mode>"
3428 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3429 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3430 (match_operand:SI 2 "general_operand" "")))]
3433 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3435 operands[0] = gen_lowpart (TImode, operands[0]);
3436 operands[1] = gen_lowpart (TImode, operands[1]);
3439 (define_expand "umaxv16qi3"
3440 [(set (match_operand:V16QI 0 "register_operand" "")
3441 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3442 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3444 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3446 (define_insn "*umaxv16qi3"
3447 [(set (match_operand:V16QI 0 "register_operand" "=x")
3448 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3449 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3450 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3451 "pmaxub\t{%2, %0|%0, %2}"
3452 [(set_attr "type" "sseiadd")
3453 (set_attr "prefix_data16" "1")
3454 (set_attr "mode" "TI")])
3456 (define_expand "smaxv8hi3"
3457 [(set (match_operand:V8HI 0 "register_operand" "")
3458 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3459 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3461 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3463 (define_insn "*smaxv8hi3"
3464 [(set (match_operand:V8HI 0 "register_operand" "=x")
3465 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3466 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3467 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3468 "pmaxsw\t{%2, %0|%0, %2}"
3469 [(set_attr "type" "sseiadd")
3470 (set_attr "prefix_data16" "1")
3471 (set_attr "mode" "TI")])
3473 (define_expand "umaxv8hi3"
3474 [(set (match_operand:V8HI 0 "register_operand" "")
3475 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3476 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3480 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3483 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3484 if (rtx_equal_p (op3, op2))
3485 op3 = gen_reg_rtx (V8HImode);
3486 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3487 emit_insn (gen_addv8hi3 (op0, op3, op2));
3492 (define_expand "smax<mode>3"
3493 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3494 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3495 (match_operand:SSEMODE14 2 "register_operand" "")))]
3499 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3505 xops[0] = operands[0];
3506 xops[1] = operands[1];
3507 xops[2] = operands[2];
3508 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3509 xops[4] = operands[1];
3510 xops[5] = operands[2];
3511 ok = ix86_expand_int_vcond (xops);
3517 (define_insn "*sse4_1_smax<mode>3"
3518 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3520 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3521 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3522 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3523 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3524 [(set_attr "type" "sseiadd")
3525 (set_attr "prefix_extra" "1")
3526 (set_attr "mode" "TI")])
3528 (define_expand "umaxv4si3"
3529 [(set (match_operand:V4SI 0 "register_operand" "")
3530 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3531 (match_operand:V4SI 2 "register_operand" "")))]
3535 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3541 xops[0] = operands[0];
3542 xops[1] = operands[1];
3543 xops[2] = operands[2];
3544 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3545 xops[4] = operands[1];
3546 xops[5] = operands[2];
3547 ok = ix86_expand_int_vcond (xops);
3553 (define_insn "*sse4_1_umax<mode>3"
3554 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3556 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3557 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3558 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3559 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3560 [(set_attr "type" "sseiadd")
3561 (set_attr "prefix_extra" "1")
3562 (set_attr "mode" "TI")])
3564 (define_expand "uminv16qi3"
3565 [(set (match_operand:V16QI 0 "register_operand" "")
3566 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3567 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3569 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3571 (define_insn "*uminv16qi3"
3572 [(set (match_operand:V16QI 0 "register_operand" "=x")
3573 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3574 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3575 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3576 "pminub\t{%2, %0|%0, %2}"
3577 [(set_attr "type" "sseiadd")
3578 (set_attr "prefix_data16" "1")
3579 (set_attr "mode" "TI")])
3581 (define_expand "sminv8hi3"
3582 [(set (match_operand:V8HI 0 "register_operand" "")
3583 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3584 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3586 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3588 (define_insn "*sminv8hi3"
3589 [(set (match_operand:V8HI 0 "register_operand" "=x")
3590 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3591 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3592 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3593 "pminsw\t{%2, %0|%0, %2}"
3594 [(set_attr "type" "sseiadd")
3595 (set_attr "prefix_data16" "1")
3596 (set_attr "mode" "TI")])
3598 (define_expand "smin<mode>3"
3599 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3600 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3601 (match_operand:SSEMODE14 2 "register_operand" "")))]
3605 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3611 xops[0] = operands[0];
3612 xops[1] = operands[2];
3613 xops[2] = operands[1];
3614 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3615 xops[4] = operands[1];
3616 xops[5] = operands[2];
3617 ok = ix86_expand_int_vcond (xops);
3623 (define_insn "*sse4_1_smin<mode>3"
3624 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3626 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3627 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3628 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3629 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3630 [(set_attr "type" "sseiadd")
3631 (set_attr "prefix_extra" "1")
3632 (set_attr "mode" "TI")])
3634 (define_expand "umin<mode>3"
3635 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3636 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3637 (match_operand:SSEMODE24 2 "register_operand" "")))]
3641 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3647 xops[0] = operands[0];
3648 xops[1] = operands[2];
3649 xops[2] = operands[1];
3650 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3651 xops[4] = operands[1];
3652 xops[5] = operands[2];
3653 ok = ix86_expand_int_vcond (xops);
3659 (define_insn "*sse4_1_umin<mode>3"
3660 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3662 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3663 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3664 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3665 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3666 [(set_attr "type" "sseiadd")
3667 (set_attr "prefix_extra" "1")
3668 (set_attr "mode" "TI")])
3670 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3672 ;; Parallel integral comparisons
3674 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3676 (define_insn "sse2_eq<mode>3"
3677 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3679 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3680 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3681 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3682 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3683 [(set_attr "type" "ssecmp")
3684 (set_attr "prefix_data16" "1")
3685 (set_attr "mode" "TI")])
3687 (define_insn "sse4_1_eqv2di3"
3688 [(set (match_operand:V2DI 0 "register_operand" "=x")
3690 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3691 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3692 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3693 "pcmpeqq\t{%2, %0|%0, %2}"
3694 [(set_attr "type" "ssecmp")
3695 (set_attr "prefix_extra" "1")
3696 (set_attr "mode" "TI")])
3698 (define_insn "sse2_gt<mode>3"
3699 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3701 (match_operand:SSEMODE124 1 "register_operand" "0")
3702 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3704 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3705 [(set_attr "type" "ssecmp")
3706 (set_attr "prefix_data16" "1")
3707 (set_attr "mode" "TI")])
3709 (define_insn "sse4_2_gtv2di3"
3710 [(set (match_operand:V2DI 0 "register_operand" "=x")
3712 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3713 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3715 "pcmpgtq\t{%2, %0|%0, %2}"
3716 [(set_attr "type" "ssecmp")
3717 (set_attr "mode" "TI")])
3719 (define_expand "vcond<mode>"
3720 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3721 (if_then_else:SSEMODEI
3722 (match_operator 3 ""
3723 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3724 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3725 (match_operand:SSEMODEI 1 "general_operand" "")
3726 (match_operand:SSEMODEI 2 "general_operand" "")))]
3729 if (ix86_expand_int_vcond (operands))
3735 (define_expand "vcondu<mode>"
3736 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3737 (if_then_else:SSEMODEI
3738 (match_operator 3 ""
3739 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3740 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3741 (match_operand:SSEMODEI 1 "general_operand" "")
3742 (match_operand:SSEMODEI 2 "general_operand" "")))]
3745 if (ix86_expand_int_vcond (operands))
3751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3753 ;; Parallel bitwise logical operations
3755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3757 (define_expand "one_cmpl<mode>2"
3758 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3759 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3763 int i, n = GET_MODE_NUNITS (<MODE>mode);
3764 rtvec v = rtvec_alloc (n);
3766 for (i = 0; i < n; ++i)
3767 RTVEC_ELT (v, i) = constm1_rtx;
3769 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3772 (define_expand "and<mode>3"
3773 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3774 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3775 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3777 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3779 (define_insn "*sse_and<mode>3"
3780 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3782 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3783 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3784 "(TARGET_SSE && !TARGET_SSE2)
3785 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3786 "andps\t{%2, %0|%0, %2}"
3787 [(set_attr "type" "sselog")
3788 (set_attr "mode" "V4SF")])
3790 (define_insn "*sse2_and<mode>3"
3791 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3793 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3794 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3795 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3796 "pand\t{%2, %0|%0, %2}"
3797 [(set_attr "type" "sselog")
3798 (set_attr "prefix_data16" "1")
3799 (set_attr "mode" "TI")])
3801 (define_insn "*sse_nand<mode>3"
3802 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3804 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3805 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3806 "(TARGET_SSE && !TARGET_SSE2)"
3807 "andnps\t{%2, %0|%0, %2}"
3808 [(set_attr "type" "sselog")
3809 (set_attr "mode" "V4SF")])
3811 (define_insn "sse2_nand<mode>3"
3812 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3814 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3815 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3817 "pandn\t{%2, %0|%0, %2}"
3818 [(set_attr "type" "sselog")
3819 (set_attr "prefix_data16" "1")
3820 (set_attr "mode" "TI")])
3822 (define_expand "andtf3"
3823 [(set (match_operand:TF 0 "register_operand" "")
3824 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3825 (match_operand:TF 2 "nonimmediate_operand" "")))]
3827 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3829 (define_insn "*andtf3"
3830 [(set (match_operand:TF 0 "register_operand" "=x")
3832 (match_operand:TF 1 "nonimmediate_operand" "%0")
3833 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3834 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3835 "pand\t{%2, %0|%0, %2}"
3836 [(set_attr "type" "sselog")
3837 (set_attr "prefix_data16" "1")
3838 (set_attr "mode" "TI")])
3840 (define_insn "*nandtf3"
3841 [(set (match_operand:TF 0 "register_operand" "=x")
3843 (not:TF (match_operand:TF 1 "register_operand" "0"))
3844 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3846 "pandn\t{%2, %0|%0, %2}"
3847 [(set_attr "type" "sselog")
3848 (set_attr "prefix_data16" "1")
3849 (set_attr "mode" "TI")])
3851 (define_expand "ior<mode>3"
3852 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3853 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3854 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3856 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3858 (define_insn "*sse_ior<mode>3"
3859 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3861 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3862 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3863 "(TARGET_SSE && !TARGET_SSE2)
3864 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3865 "orps\t{%2, %0|%0, %2}"
3866 [(set_attr "type" "sselog")
3867 (set_attr "mode" "V4SF")])
3869 (define_insn "*sse2_ior<mode>3"
3870 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3872 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3873 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3874 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3875 "por\t{%2, %0|%0, %2}"
3876 [(set_attr "type" "sselog")
3877 (set_attr "prefix_data16" "1")
3878 (set_attr "mode" "TI")])
3880 (define_expand "iortf3"
3881 [(set (match_operand:TF 0 "register_operand" "")
3882 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3883 (match_operand:TF 2 "nonimmediate_operand" "")))]
3885 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3887 (define_insn "*iortf3"
3888 [(set (match_operand:TF 0 "register_operand" "=x")
3890 (match_operand:TF 1 "nonimmediate_operand" "%0")
3891 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3892 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3893 "por\t{%2, %0|%0, %2}"
3894 [(set_attr "type" "sselog")
3895 (set_attr "prefix_data16" "1")
3896 (set_attr "mode" "TI")])
3898 (define_expand "xor<mode>3"
3899 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3900 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3901 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3903 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3905 (define_insn "*sse_xor<mode>3"
3906 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3908 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3909 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3910 "(TARGET_SSE && !TARGET_SSE2)
3911 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3912 "xorps\t{%2, %0|%0, %2}"
3913 [(set_attr "type" "sselog")
3914 (set_attr "mode" "V4SF")])
3916 (define_insn "*sse2_xor<mode>3"
3917 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3919 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3920 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3921 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3922 "pxor\t{%2, %0|%0, %2}"
3923 [(set_attr "type" "sselog")
3924 (set_attr "prefix_data16" "1")
3925 (set_attr "mode" "TI")])
3927 (define_expand "xortf3"
3928 [(set (match_operand:TF 0 "register_operand" "")
3929 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3930 (match_operand:TF 2 "nonimmediate_operand" "")))]
3932 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3934 (define_insn "*xortf3"
3935 [(set (match_operand:TF 0 "register_operand" "=x")
3937 (match_operand:TF 1 "nonimmediate_operand" "%0")
3938 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3939 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3940 "pxor\t{%2, %0|%0, %2}"
3941 [(set_attr "type" "sselog")
3942 (set_attr "prefix_data16" "1")
3943 (set_attr "mode" "TI")])
3945 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3947 ;; Parallel integral element swizzling
3949 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3952 ;; op1 = abcdefghijklmnop
3953 ;; op2 = qrstuvwxyz012345
3954 ;; h1 = aqbrcsdteufvgwhx
3955 ;; l1 = iyjzk0l1m2n3o4p5
3956 ;; h2 = aiqybjrzcks0dlt1
3957 ;; l2 = emu2fnv3gow4hpx5
3958 ;; h3 = aeimquy2bfjnrvz3
3959 ;; l3 = cgkosw04dhlptx15
3960 ;; result = bdfhjlnprtvxz135
3961 (define_expand "vec_pack_trunc_v8hi"
3962 [(match_operand:V16QI 0 "register_operand" "")
3963 (match_operand:V8HI 1 "register_operand" "")
3964 (match_operand:V8HI 2 "register_operand" "")]
3967 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3969 op1 = gen_lowpart (V16QImode, operands[1]);
3970 op2 = gen_lowpart (V16QImode, operands[2]);
3971 h1 = gen_reg_rtx (V16QImode);
3972 l1 = gen_reg_rtx (V16QImode);
3973 h2 = gen_reg_rtx (V16QImode);
3974 l2 = gen_reg_rtx (V16QImode);
3975 h3 = gen_reg_rtx (V16QImode);
3976 l3 = gen_reg_rtx (V16QImode);
3978 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3979 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3980 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3981 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3982 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3983 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3984 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3995 ;; result = bdfhjlnp
3996 (define_expand "vec_pack_trunc_v4si"
3997 [(match_operand:V8HI 0 "register_operand" "")
3998 (match_operand:V4SI 1 "register_operand" "")
3999 (match_operand:V4SI 2 "register_operand" "")]
4002 rtx op1, op2, h1, l1, h2, l2;
4004 op1 = gen_lowpart (V8HImode, operands[1]);
4005 op2 = gen_lowpart (V8HImode, operands[2]);
4006 h1 = gen_reg_rtx (V8HImode);
4007 l1 = gen_reg_rtx (V8HImode);
4008 h2 = gen_reg_rtx (V8HImode);
4009 l2 = gen_reg_rtx (V8HImode);
4011 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4012 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4013 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4014 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4015 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4025 (define_expand "vec_pack_trunc_v2di"
4026 [(match_operand:V4SI 0 "register_operand" "")
4027 (match_operand:V2DI 1 "register_operand" "")
4028 (match_operand:V2DI 2 "register_operand" "")]
4031 rtx op1, op2, h1, l1;
4033 op1 = gen_lowpart (V4SImode, operands[1]);
4034 op2 = gen_lowpart (V4SImode, operands[2]);
4035 h1 = gen_reg_rtx (V4SImode);
4036 l1 = gen_reg_rtx (V4SImode);
4038 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4039 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4040 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4044 (define_expand "vec_interleave_highv16qi"
4045 [(set (match_operand:V16QI 0 "register_operand" "=x")
4048 (match_operand:V16QI 1 "register_operand" "0")
4049 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4050 (parallel [(const_int 8) (const_int 24)
4051 (const_int 9) (const_int 25)
4052 (const_int 10) (const_int 26)
4053 (const_int 11) (const_int 27)
4054 (const_int 12) (const_int 28)
4055 (const_int 13) (const_int 29)
4056 (const_int 14) (const_int 30)
4057 (const_int 15) (const_int 31)])))]
4060 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4064 (define_expand "vec_interleave_lowv16qi"
4065 [(set (match_operand:V16QI 0 "register_operand" "=x")
4068 (match_operand:V16QI 1 "register_operand" "0")
4069 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4070 (parallel [(const_int 0) (const_int 16)
4071 (const_int 1) (const_int 17)
4072 (const_int 2) (const_int 18)
4073 (const_int 3) (const_int 19)
4074 (const_int 4) (const_int 20)
4075 (const_int 5) (const_int 21)
4076 (const_int 6) (const_int 22)
4077 (const_int 7) (const_int 23)])))]
4080 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4084 (define_expand "vec_interleave_highv8hi"
4085 [(set (match_operand:V8HI 0 "register_operand" "=x")
4088 (match_operand:V8HI 1 "register_operand" "0")
4089 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4090 (parallel [(const_int 4) (const_int 12)
4091 (const_int 5) (const_int 13)
4092 (const_int 6) (const_int 14)
4093 (const_int 7) (const_int 15)])))]
4096 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4100 (define_expand "vec_interleave_lowv8hi"
4101 [(set (match_operand:V8HI 0 "register_operand" "=x")
4104 (match_operand:V8HI 1 "register_operand" "0")
4105 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4106 (parallel [(const_int 0) (const_int 8)
4107 (const_int 1) (const_int 9)
4108 (const_int 2) (const_int 10)
4109 (const_int 3) (const_int 11)])))]
4112 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4116 (define_expand "vec_interleave_highv4si"
4117 [(set (match_operand:V4SI 0 "register_operand" "=x")
4120 (match_operand:V4SI 1 "register_operand" "0")
4121 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4122 (parallel [(const_int 2) (const_int 6)
4123 (const_int 3) (const_int 7)])))]
4126 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4130 (define_expand "vec_interleave_lowv4si"
4131 [(set (match_operand:V4SI 0 "register_operand" "=x")
4134 (match_operand:V4SI 1 "register_operand" "0")
4135 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4136 (parallel [(const_int 0) (const_int 4)
4137 (const_int 1) (const_int 5)])))]
4140 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4144 (define_expand "vec_interleave_highv2di"
4145 [(set (match_operand:V2DI 0 "register_operand" "=x")
4148 (match_operand:V2DI 1 "register_operand" "0")
4149 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4150 (parallel [(const_int 1)
4154 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4158 (define_expand "vec_interleave_lowv2di"
4159 [(set (match_operand:V2DI 0 "register_operand" "=x")
4162 (match_operand:V2DI 1 "register_operand" "0")
4163 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4164 (parallel [(const_int 0)
4168 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4172 (define_insn "sse2_packsswb"
4173 [(set (match_operand:V16QI 0 "register_operand" "=x")
4176 (match_operand:V8HI 1 "register_operand" "0"))
4178 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4180 "packsswb\t{%2, %0|%0, %2}"
4181 [(set_attr "type" "sselog")
4182 (set_attr "prefix_data16" "1")
4183 (set_attr "mode" "TI")])
4185 (define_insn "sse2_packssdw"
4186 [(set (match_operand:V8HI 0 "register_operand" "=x")
4189 (match_operand:V4SI 1 "register_operand" "0"))
4191 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4193 "packssdw\t{%2, %0|%0, %2}"
4194 [(set_attr "type" "sselog")
4195 (set_attr "prefix_data16" "1")
4196 (set_attr "mode" "TI")])
4198 (define_insn "sse2_packuswb"
4199 [(set (match_operand:V16QI 0 "register_operand" "=x")
4202 (match_operand:V8HI 1 "register_operand" "0"))
4204 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4206 "packuswb\t{%2, %0|%0, %2}"
4207 [(set_attr "type" "sselog")
4208 (set_attr "prefix_data16" "1")
4209 (set_attr "mode" "TI")])
4211 (define_insn "sse2_punpckhbw"
4212 [(set (match_operand:V16QI 0 "register_operand" "=x")
4215 (match_operand:V16QI 1 "register_operand" "0")
4216 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4217 (parallel [(const_int 8) (const_int 24)
4218 (const_int 9) (const_int 25)
4219 (const_int 10) (const_int 26)
4220 (const_int 11) (const_int 27)
4221 (const_int 12) (const_int 28)
4222 (const_int 13) (const_int 29)
4223 (const_int 14) (const_int 30)
4224 (const_int 15) (const_int 31)])))]
4226 "punpckhbw\t{%2, %0|%0, %2}"
4227 [(set_attr "type" "sselog")
4228 (set_attr "prefix_data16" "1")
4229 (set_attr "mode" "TI")])
4231 (define_insn "sse2_punpcklbw"
4232 [(set (match_operand:V16QI 0 "register_operand" "=x")
4235 (match_operand:V16QI 1 "register_operand" "0")
4236 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4237 (parallel [(const_int 0) (const_int 16)
4238 (const_int 1) (const_int 17)
4239 (const_int 2) (const_int 18)
4240 (const_int 3) (const_int 19)
4241 (const_int 4) (const_int 20)
4242 (const_int 5) (const_int 21)
4243 (const_int 6) (const_int 22)
4244 (const_int 7) (const_int 23)])))]
4246 "punpcklbw\t{%2, %0|%0, %2}"
4247 [(set_attr "type" "sselog")
4248 (set_attr "prefix_data16" "1")
4249 (set_attr "mode" "TI")])
4251 (define_insn "sse2_punpckhwd"
4252 [(set (match_operand:V8HI 0 "register_operand" "=x")
4255 (match_operand:V8HI 1 "register_operand" "0")
4256 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4257 (parallel [(const_int 4) (const_int 12)
4258 (const_int 5) (const_int 13)
4259 (const_int 6) (const_int 14)
4260 (const_int 7) (const_int 15)])))]
4262 "punpckhwd\t{%2, %0|%0, %2}"
4263 [(set_attr "type" "sselog")
4264 (set_attr "prefix_data16" "1")
4265 (set_attr "mode" "TI")])
4267 (define_insn "sse2_punpcklwd"
4268 [(set (match_operand:V8HI 0 "register_operand" "=x")
4271 (match_operand:V8HI 1 "register_operand" "0")
4272 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4273 (parallel [(const_int 0) (const_int 8)
4274 (const_int 1) (const_int 9)
4275 (const_int 2) (const_int 10)
4276 (const_int 3) (const_int 11)])))]
4278 "punpcklwd\t{%2, %0|%0, %2}"
4279 [(set_attr "type" "sselog")
4280 (set_attr "prefix_data16" "1")
4281 (set_attr "mode" "TI")])
4283 (define_insn "sse2_punpckhdq"
4284 [(set (match_operand:V4SI 0 "register_operand" "=x")
4287 (match_operand:V4SI 1 "register_operand" "0")
4288 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4289 (parallel [(const_int 2) (const_int 6)
4290 (const_int 3) (const_int 7)])))]
4292 "punpckhdq\t{%2, %0|%0, %2}"
4293 [(set_attr "type" "sselog")
4294 (set_attr "prefix_data16" "1")
4295 (set_attr "mode" "TI")])
4297 (define_insn "sse2_punpckldq"
4298 [(set (match_operand:V4SI 0 "register_operand" "=x")
4301 (match_operand:V4SI 1 "register_operand" "0")
4302 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4303 (parallel [(const_int 0) (const_int 4)
4304 (const_int 1) (const_int 5)])))]
4306 "punpckldq\t{%2, %0|%0, %2}"
4307 [(set_attr "type" "sselog")
4308 (set_attr "prefix_data16" "1")
4309 (set_attr "mode" "TI")])
4311 (define_insn "sse2_punpckhqdq"
4312 [(set (match_operand:V2DI 0 "register_operand" "=x")
4315 (match_operand:V2DI 1 "register_operand" "0")
4316 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4317 (parallel [(const_int 1)
4320 "punpckhqdq\t{%2, %0|%0, %2}"
4321 [(set_attr "type" "sselog")
4322 (set_attr "prefix_data16" "1")
4323 (set_attr "mode" "TI")])
4325 (define_insn "sse2_punpcklqdq"
4326 [(set (match_operand:V2DI 0 "register_operand" "=x")
4329 (match_operand:V2DI 1 "register_operand" "0")
4330 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4331 (parallel [(const_int 0)
4334 "punpcklqdq\t{%2, %0|%0, %2}"
4335 [(set_attr "type" "sselog")
4336 (set_attr "prefix_data16" "1")
4337 (set_attr "mode" "TI")])
4339 (define_insn "*sse4_1_pinsrb"
4340 [(set (match_operand:V16QI 0 "register_operand" "=x")
4342 (vec_duplicate:V16QI
4343 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4344 (match_operand:V16QI 1 "register_operand" "0")
4345 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4348 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4349 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4351 [(set_attr "type" "sselog")
4352 (set_attr "prefix_extra" "1")
4353 (set_attr "mode" "TI")])
4355 (define_insn "*sse2_pinsrw"
4356 [(set (match_operand:V8HI 0 "register_operand" "=x")
4359 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4360 (match_operand:V8HI 1 "register_operand" "0")
4361 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4364 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4365 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4367 [(set_attr "type" "sselog")
4368 (set_attr "prefix_data16" "1")
4369 (set_attr "mode" "TI")])
4371 ;; It must come before sse2_loadld since it is preferred.
4372 (define_insn "*sse4_1_pinsrd"
4373 [(set (match_operand:V4SI 0 "register_operand" "=x")
4376 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4377 (match_operand:V4SI 1 "register_operand" "0")
4378 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4381 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4382 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4384 [(set_attr "type" "sselog")
4385 (set_attr "prefix_extra" "1")
4386 (set_attr "mode" "TI")])
4388 (define_insn "*sse4_1_pinsrq"
4389 [(set (match_operand:V2DI 0 "register_operand" "=x")
4392 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4393 (match_operand:V2DI 1 "register_operand" "0")
4394 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4397 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4398 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4400 [(set_attr "type" "sselog")
4401 (set_attr "prefix_extra" "1")
4402 (set_attr "mode" "TI")])
4404 (define_insn "*sse4_1_pextrb"
4405 [(set (match_operand:SI 0 "register_operand" "=r")
4408 (match_operand:V16QI 1 "register_operand" "x")
4409 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4411 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4412 [(set_attr "type" "sselog")
4413 (set_attr "prefix_extra" "1")
4414 (set_attr "mode" "TI")])
4416 (define_insn "*sse4_1_pextrb_memory"
4417 [(set (match_operand:QI 0 "memory_operand" "=m")
4419 (match_operand:V16QI 1 "register_operand" "x")
4420 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4422 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4423 [(set_attr "type" "sselog")
4424 (set_attr "prefix_extra" "1")
4425 (set_attr "mode" "TI")])
4427 (define_insn "*sse2_pextrw"
4428 [(set (match_operand:SI 0 "register_operand" "=r")
4431 (match_operand:V8HI 1 "register_operand" "x")
4432 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4434 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4435 [(set_attr "type" "sselog")
4436 (set_attr "prefix_data16" "1")
4437 (set_attr "mode" "TI")])
4439 (define_insn "*sse4_1_pextrw_memory"
4440 [(set (match_operand:HI 0 "memory_operand" "=m")
4442 (match_operand:V8HI 1 "register_operand" "x")
4443 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4445 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4446 [(set_attr "type" "sselog")
4447 (set_attr "prefix_extra" "1")
4448 (set_attr "mode" "TI")])
4450 (define_insn "*sse4_1_pextrd"
4451 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4453 (match_operand:V4SI 1 "register_operand" "x")
4454 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4456 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4457 [(set_attr "type" "sselog")
4458 (set_attr "prefix_extra" "1")
4459 (set_attr "mode" "TI")])
4461 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4462 (define_insn "*sse4_1_pextrq"
4463 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4465 (match_operand:V2DI 1 "register_operand" "x")
4466 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4467 "TARGET_SSE4_1 && TARGET_64BIT"
4468 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4469 [(set_attr "type" "sselog")
4470 (set_attr "prefix_extra" "1")
4471 (set_attr "mode" "TI")])
4473 (define_expand "sse2_pshufd"
4474 [(match_operand:V4SI 0 "register_operand" "")
4475 (match_operand:V4SI 1 "nonimmediate_operand" "")
4476 (match_operand:SI 2 "const_int_operand" "")]
4479 int mask = INTVAL (operands[2]);
4480 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4481 GEN_INT ((mask >> 0) & 3),
4482 GEN_INT ((mask >> 2) & 3),
4483 GEN_INT ((mask >> 4) & 3),
4484 GEN_INT ((mask >> 6) & 3)));
4488 (define_insn "sse2_pshufd_1"
4489 [(set (match_operand:V4SI 0 "register_operand" "=x")
4491 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4492 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4493 (match_operand 3 "const_0_to_3_operand" "")
4494 (match_operand 4 "const_0_to_3_operand" "")
4495 (match_operand 5 "const_0_to_3_operand" "")])))]
4499 mask |= INTVAL (operands[2]) << 0;
4500 mask |= INTVAL (operands[3]) << 2;
4501 mask |= INTVAL (operands[4]) << 4;
4502 mask |= INTVAL (operands[5]) << 6;
4503 operands[2] = GEN_INT (mask);
4505 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4507 [(set_attr "type" "sselog1")
4508 (set_attr "prefix_data16" "1")
4509 (set_attr "mode" "TI")])
4511 (define_expand "sse2_pshuflw"
4512 [(match_operand:V8HI 0 "register_operand" "")
4513 (match_operand:V8HI 1 "nonimmediate_operand" "")
4514 (match_operand:SI 2 "const_int_operand" "")]
4517 int mask = INTVAL (operands[2]);
4518 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4519 GEN_INT ((mask >> 0) & 3),
4520 GEN_INT ((mask >> 2) & 3),
4521 GEN_INT ((mask >> 4) & 3),
4522 GEN_INT ((mask >> 6) & 3)));
4526 (define_insn "sse2_pshuflw_1"
4527 [(set (match_operand:V8HI 0 "register_operand" "=x")
4529 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4530 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4531 (match_operand 3 "const_0_to_3_operand" "")
4532 (match_operand 4 "const_0_to_3_operand" "")
4533 (match_operand 5 "const_0_to_3_operand" "")
4541 mask |= INTVAL (operands[2]) << 0;
4542 mask |= INTVAL (operands[3]) << 2;
4543 mask |= INTVAL (operands[4]) << 4;
4544 mask |= INTVAL (operands[5]) << 6;
4545 operands[2] = GEN_INT (mask);
4547 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4549 [(set_attr "type" "sselog")
4550 (set_attr "prefix_rep" "1")
4551 (set_attr "mode" "TI")])
4553 (define_expand "sse2_pshufhw"
4554 [(match_operand:V8HI 0 "register_operand" "")
4555 (match_operand:V8HI 1 "nonimmediate_operand" "")
4556 (match_operand:SI 2 "const_int_operand" "")]
4559 int mask = INTVAL (operands[2]);
4560 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4561 GEN_INT (((mask >> 0) & 3) + 4),
4562 GEN_INT (((mask >> 2) & 3) + 4),
4563 GEN_INT (((mask >> 4) & 3) + 4),
4564 GEN_INT (((mask >> 6) & 3) + 4)));
4568 (define_insn "sse2_pshufhw_1"
4569 [(set (match_operand:V8HI 0 "register_operand" "=x")
4571 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4572 (parallel [(const_int 0)
4576 (match_operand 2 "const_4_to_7_operand" "")
4577 (match_operand 3 "const_4_to_7_operand" "")
4578 (match_operand 4 "const_4_to_7_operand" "")
4579 (match_operand 5 "const_4_to_7_operand" "")])))]
4583 mask |= (INTVAL (operands[2]) - 4) << 0;
4584 mask |= (INTVAL (operands[3]) - 4) << 2;
4585 mask |= (INTVAL (operands[4]) - 4) << 4;
4586 mask |= (INTVAL (operands[5]) - 4) << 6;
4587 operands[2] = GEN_INT (mask);
4589 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4591 [(set_attr "type" "sselog")
4592 (set_attr "prefix_rep" "1")
4593 (set_attr "mode" "TI")])
4595 (define_expand "sse2_loadd"
4596 [(set (match_operand:V4SI 0 "register_operand" "")
4599 (match_operand:SI 1 "nonimmediate_operand" ""))
4603 "operands[2] = CONST0_RTX (V4SImode);")
4605 (define_insn "sse2_loadld"
4606 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4609 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4610 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4614 movd\t{%2, %0|%0, %2}
4615 movd\t{%2, %0|%0, %2}
4616 movss\t{%2, %0|%0, %2}
4617 movss\t{%2, %0|%0, %2}"
4618 [(set_attr "type" "ssemov")
4619 (set_attr "mode" "TI,TI,V4SF,SF")])
4621 (define_insn_and_split "sse2_stored"
4622 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4624 (match_operand:V4SI 1 "register_operand" "x,Yi")
4625 (parallel [(const_int 0)])))]
4628 "&& reload_completed
4629 && (TARGET_INTER_UNIT_MOVES
4630 || MEM_P (operands [0])
4631 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4632 [(set (match_dup 0) (match_dup 1))]
4634 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4637 (define_expand "sse_storeq"
4638 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4640 (match_operand:V2DI 1 "register_operand" "")
4641 (parallel [(const_int 0)])))]
4645 (define_insn "*sse2_storeq_rex64"
4646 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4648 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4649 (parallel [(const_int 0)])))]
4650 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4654 mov{q}\t{%1, %0|%0, %1}"
4655 [(set_attr "type" "*,*,imov")
4656 (set_attr "mode" "*,*,DI")])
4658 (define_insn "*sse2_storeq"
4659 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4661 (match_operand:V2DI 1 "register_operand" "x")
4662 (parallel [(const_int 0)])))]
4667 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4669 (match_operand:V2DI 1 "register_operand" "")
4670 (parallel [(const_int 0)])))]
4673 && (TARGET_INTER_UNIT_MOVES
4674 || MEM_P (operands [0])
4675 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4676 [(set (match_dup 0) (match_dup 1))]
4678 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4681 (define_insn "*vec_extractv2di_1_rex64"
4682 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4684 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4685 (parallel [(const_int 1)])))]
4686 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4688 movhps\t{%1, %0|%0, %1}
4689 psrldq\t{$8, %0|%0, 8}
4690 movq\t{%H1, %0|%0, %H1}
4691 mov{q}\t{%H1, %0|%0, %H1}"
4692 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4693 (set_attr "memory" "*,none,*,*")
4694 (set_attr "mode" "V2SF,TI,TI,DI")])
4696 (define_insn "*vec_extractv2di_1_sse2"
4697 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4699 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4700 (parallel [(const_int 1)])))]
4702 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4704 movhps\t{%1, %0|%0, %1}
4705 psrldq\t{$8, %0|%0, 8}
4706 movq\t{%H1, %0|%0, %H1}"
4707 [(set_attr "type" "ssemov,sseishft,ssemov")
4708 (set_attr "memory" "*,none,*")
4709 (set_attr "mode" "V2SF,TI,TI")])
4711 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4712 (define_insn "*vec_extractv2di_1_sse"
4713 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4715 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4716 (parallel [(const_int 1)])))]
4717 "!TARGET_SSE2 && TARGET_SSE
4718 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4720 movhps\t{%1, %0|%0, %1}
4721 movhlps\t{%1, %0|%0, %1}
4722 movlps\t{%H1, %0|%0, %H1}"
4723 [(set_attr "type" "ssemov")
4724 (set_attr "mode" "V2SF,V4SF,V2SF")])
4726 (define_insn "*vec_dupv4si"
4727 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4729 (match_operand:SI 1 "register_operand" " Yt,0")))]
4732 pshufd\t{$0, %1, %0|%0, %1, 0}
4733 shufps\t{$0, %0, %0|%0, %0, 0}"
4734 [(set_attr "type" "sselog1")
4735 (set_attr "mode" "TI,V4SF")])
4737 (define_insn "*vec_dupv2di"
4738 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4740 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4745 [(set_attr "type" "sselog1,ssemov")
4746 (set_attr "mode" "TI,V4SF")])
4748 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4749 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4750 ;; alternatives pretty much forces the MMX alternative to be chosen.
4751 (define_insn "*sse2_concatv2si"
4752 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4754 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4755 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4758 punpckldq\t{%2, %0|%0, %2}
4759 movd\t{%1, %0|%0, %1}
4760 punpckldq\t{%2, %0|%0, %2}
4761 movd\t{%1, %0|%0, %1}"
4762 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4763 (set_attr "mode" "TI,TI,DI,DI")])
4765 (define_insn "*sse1_concatv2si"
4766 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4768 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4769 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4772 unpcklps\t{%2, %0|%0, %2}
4773 movss\t{%1, %0|%0, %1}
4774 punpckldq\t{%2, %0|%0, %2}
4775 movd\t{%1, %0|%0, %1}"
4776 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4777 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4779 (define_insn "*vec_concatv4si_1"
4780 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4782 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4783 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4786 punpcklqdq\t{%2, %0|%0, %2}
4787 movlhps\t{%2, %0|%0, %2}
4788 movhps\t{%2, %0|%0, %2}"
4789 [(set_attr "type" "sselog,ssemov,ssemov")
4790 (set_attr "mode" "TI,V4SF,V2SF")])
4792 (define_insn "vec_concatv2di"
4793 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4795 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4796 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4797 "!TARGET_64BIT && TARGET_SSE"
4799 movq\t{%1, %0|%0, %1}
4800 movq2dq\t{%1, %0|%0, %1}
4801 punpcklqdq\t{%2, %0|%0, %2}
4802 movlhps\t{%2, %0|%0, %2}
4803 movhps\t{%2, %0|%0, %2}
4804 movlps\t{%1, %0|%0, %1}"
4805 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4806 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4808 (define_insn "*vec_concatv2di_rex"
4809 [(set (match_operand:V2DI 0 "register_operand" "=Yt,Yi,!Yt,Yt,x,x,x")
4811 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4812 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Yt,x,m,0")))]
4815 movq\t{%1, %0|%0, %1}
4816 movq\t{%1, %0|%0, %1}
4817 movq2dq\t{%1, %0|%0, %1}
4818 punpcklqdq\t{%2, %0|%0, %2}
4819 movlhps\t{%2, %0|%0, %2}
4820 movhps\t{%2, %0|%0, %2}
4821 movlps\t{%1, %0|%0, %1}"
4822 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4823 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4825 (define_expand "vec_setv2di"
4826 [(match_operand:V2DI 0 "register_operand" "")
4827 (match_operand:DI 1 "register_operand" "")
4828 (match_operand 2 "const_int_operand" "")]
4831 ix86_expand_vector_set (false, operands[0], operands[1],
4832 INTVAL (operands[2]));
4836 (define_expand "vec_extractv2di"
4837 [(match_operand:DI 0 "register_operand" "")
4838 (match_operand:V2DI 1 "register_operand" "")
4839 (match_operand 2 "const_int_operand" "")]
4842 ix86_expand_vector_extract (false, operands[0], operands[1],
4843 INTVAL (operands[2]));
4847 (define_expand "vec_initv2di"
4848 [(match_operand:V2DI 0 "register_operand" "")
4849 (match_operand 1 "" "")]
4852 ix86_expand_vector_init (false, operands[0], operands[1]);
4856 (define_expand "vec_setv4si"
4857 [(match_operand:V4SI 0 "register_operand" "")
4858 (match_operand:SI 1 "register_operand" "")
4859 (match_operand 2 "const_int_operand" "")]
4862 ix86_expand_vector_set (false, operands[0], operands[1],
4863 INTVAL (operands[2]));
4867 (define_expand "vec_extractv4si"
4868 [(match_operand:SI 0 "register_operand" "")
4869 (match_operand:V4SI 1 "register_operand" "")
4870 (match_operand 2 "const_int_operand" "")]
4873 ix86_expand_vector_extract (false, operands[0], operands[1],
4874 INTVAL (operands[2]));
4878 (define_expand "vec_initv4si"
4879 [(match_operand:V4SI 0 "register_operand" "")
4880 (match_operand 1 "" "")]
4883 ix86_expand_vector_init (false, operands[0], operands[1]);
4887 (define_expand "vec_setv8hi"
4888 [(match_operand:V8HI 0 "register_operand" "")
4889 (match_operand:HI 1 "register_operand" "")
4890 (match_operand 2 "const_int_operand" "")]
4893 ix86_expand_vector_set (false, operands[0], operands[1],
4894 INTVAL (operands[2]));
4898 (define_expand "vec_extractv8hi"
4899 [(match_operand:HI 0 "register_operand" "")
4900 (match_operand:V8HI 1 "register_operand" "")
4901 (match_operand 2 "const_int_operand" "")]
4904 ix86_expand_vector_extract (false, operands[0], operands[1],
4905 INTVAL (operands[2]));
4909 (define_expand "vec_initv8hi"
4910 [(match_operand:V8HI 0 "register_operand" "")
4911 (match_operand 1 "" "")]
4914 ix86_expand_vector_init (false, operands[0], operands[1]);
4918 (define_expand "vec_setv16qi"
4919 [(match_operand:V16QI 0 "register_operand" "")
4920 (match_operand:QI 1 "register_operand" "")
4921 (match_operand 2 "const_int_operand" "")]
4924 ix86_expand_vector_set (false, operands[0], operands[1],
4925 INTVAL (operands[2]));
4929 (define_expand "vec_extractv16qi"
4930 [(match_operand:QI 0 "register_operand" "")
4931 (match_operand:V16QI 1 "register_operand" "")
4932 (match_operand 2 "const_int_operand" "")]
4935 ix86_expand_vector_extract (false, operands[0], operands[1],
4936 INTVAL (operands[2]));
4940 (define_expand "vec_initv16qi"
4941 [(match_operand:V16QI 0 "register_operand" "")
4942 (match_operand 1 "" "")]
4945 ix86_expand_vector_init (false, operands[0], operands[1]);
4949 (define_expand "vec_unpacku_hi_v16qi"
4950 [(match_operand:V8HI 0 "register_operand" "")
4951 (match_operand:V16QI 1 "register_operand" "")]
4955 ix86_expand_sse4_unpack (operands, true, true);
4957 ix86_expand_sse_unpack (operands, true, true);
4961 (define_expand "vec_unpacks_hi_v16qi"
4962 [(match_operand:V8HI 0 "register_operand" "")
4963 (match_operand:V16QI 1 "register_operand" "")]
4967 ix86_expand_sse4_unpack (operands, false, true);
4969 ix86_expand_sse_unpack (operands, false, true);
4973 (define_expand "vec_unpacku_lo_v16qi"
4974 [(match_operand:V8HI 0 "register_operand" "")
4975 (match_operand:V16QI 1 "register_operand" "")]
4979 ix86_expand_sse4_unpack (operands, true, false);
4981 ix86_expand_sse_unpack (operands, true, false);
4985 (define_expand "vec_unpacks_lo_v16qi"
4986 [(match_operand:V8HI 0 "register_operand" "")
4987 (match_operand:V16QI 1 "register_operand" "")]
4991 ix86_expand_sse4_unpack (operands, false, false);
4993 ix86_expand_sse_unpack (operands, false, false);
4997 (define_expand "vec_unpacku_hi_v8hi"
4998 [(match_operand:V4SI 0 "register_operand" "")
4999 (match_operand:V8HI 1 "register_operand" "")]
5003 ix86_expand_sse4_unpack (operands, true, true);
5005 ix86_expand_sse_unpack (operands, true, true);
5009 (define_expand "vec_unpacks_hi_v8hi"
5010 [(match_operand:V4SI 0 "register_operand" "")
5011 (match_operand:V8HI 1 "register_operand" "")]
5015 ix86_expand_sse4_unpack (operands, false, true);
5017 ix86_expand_sse_unpack (operands, false, true);
5021 (define_expand "vec_unpacku_lo_v8hi"
5022 [(match_operand:V4SI 0 "register_operand" "")
5023 (match_operand:V8HI 1 "register_operand" "")]
5027 ix86_expand_sse4_unpack (operands, true, false);
5029 ix86_expand_sse_unpack (operands, true, false);
5033 (define_expand "vec_unpacks_lo_v8hi"
5034 [(match_operand:V4SI 0 "register_operand" "")
5035 (match_operand:V8HI 1 "register_operand" "")]
5039 ix86_expand_sse4_unpack (operands, false, false);
5041 ix86_expand_sse_unpack (operands, false, false);
5045 (define_expand "vec_unpacku_hi_v4si"
5046 [(match_operand:V2DI 0 "register_operand" "")
5047 (match_operand:V4SI 1 "register_operand" "")]
5051 ix86_expand_sse4_unpack (operands, true, true);
5053 ix86_expand_sse_unpack (operands, true, true);
5057 (define_expand "vec_unpacks_hi_v4si"
5058 [(match_operand:V2DI 0 "register_operand" "")
5059 (match_operand:V4SI 1 "register_operand" "")]
5063 ix86_expand_sse4_unpack (operands, false, true);
5065 ix86_expand_sse_unpack (operands, false, true);
5069 (define_expand "vec_unpacku_lo_v4si"
5070 [(match_operand:V2DI 0 "register_operand" "")
5071 (match_operand:V4SI 1 "register_operand" "")]
5075 ix86_expand_sse4_unpack (operands, true, false);
5077 ix86_expand_sse_unpack (operands, true, false);
5081 (define_expand "vec_unpacks_lo_v4si"
5082 [(match_operand:V2DI 0 "register_operand" "")
5083 (match_operand:V4SI 1 "register_operand" "")]
5087 ix86_expand_sse4_unpack (operands, false, false);
5089 ix86_expand_sse_unpack (operands, false, false);
5093 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5099 (define_insn "sse2_uavgv16qi3"
5100 [(set (match_operand:V16QI 0 "register_operand" "=x")
5106 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5108 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5109 (const_vector:V16QI [(const_int 1) (const_int 1)
5110 (const_int 1) (const_int 1)
5111 (const_int 1) (const_int 1)
5112 (const_int 1) (const_int 1)
5113 (const_int 1) (const_int 1)
5114 (const_int 1) (const_int 1)
5115 (const_int 1) (const_int 1)
5116 (const_int 1) (const_int 1)]))
5118 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5119 "pavgb\t{%2, %0|%0, %2}"
5120 [(set_attr "type" "sseiadd")
5121 (set_attr "prefix_data16" "1")
5122 (set_attr "mode" "TI")])
5124 (define_insn "sse2_uavgv8hi3"
5125 [(set (match_operand:V8HI 0 "register_operand" "=x")
5131 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5133 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5134 (const_vector:V8HI [(const_int 1) (const_int 1)
5135 (const_int 1) (const_int 1)
5136 (const_int 1) (const_int 1)
5137 (const_int 1) (const_int 1)]))
5139 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5140 "pavgw\t{%2, %0|%0, %2}"
5141 [(set_attr "type" "sseiadd")
5142 (set_attr "prefix_data16" "1")
5143 (set_attr "mode" "TI")])
5145 ;; The correct representation for this is absolutely enormous, and
5146 ;; surely not generally useful.
5147 (define_insn "sse2_psadbw"
5148 [(set (match_operand:V2DI 0 "register_operand" "=x")
5149 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5150 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5153 "psadbw\t{%2, %0|%0, %2}"
5154 [(set_attr "type" "sseiadd")
5155 (set_attr "prefix_data16" "1")
5156 (set_attr "mode" "TI")])
5158 (define_insn "sse_movmskps"
5159 [(set (match_operand:SI 0 "register_operand" "=r")
5160 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5163 "movmskps\t{%1, %0|%0, %1}"
5164 [(set_attr "type" "ssecvt")
5165 (set_attr "mode" "V4SF")])
5167 (define_insn "sse2_movmskpd"
5168 [(set (match_operand:SI 0 "register_operand" "=r")
5169 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5172 "movmskpd\t{%1, %0|%0, %1}"
5173 [(set_attr "type" "ssecvt")
5174 (set_attr "mode" "V2DF")])
5176 (define_insn "sse2_pmovmskb"
5177 [(set (match_operand:SI 0 "register_operand" "=r")
5178 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5181 "pmovmskb\t{%1, %0|%0, %1}"
5182 [(set_attr "type" "ssecvt")
5183 (set_attr "prefix_data16" "1")
5184 (set_attr "mode" "SI")])
5186 (define_expand "sse2_maskmovdqu"
5187 [(set (match_operand:V16QI 0 "memory_operand" "")
5188 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5189 (match_operand:V16QI 2 "register_operand" "x")
5195 (define_insn "*sse2_maskmovdqu"
5196 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5197 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5198 (match_operand:V16QI 2 "register_operand" "x")
5199 (mem:V16QI (match_dup 0))]
5201 "TARGET_SSE2 && !TARGET_64BIT"
5202 ;; @@@ check ordering of operands in intel/nonintel syntax
5203 "maskmovdqu\t{%2, %1|%1, %2}"
5204 [(set_attr "type" "ssecvt")
5205 (set_attr "prefix_data16" "1")
5206 (set_attr "mode" "TI")])
5208 (define_insn "*sse2_maskmovdqu_rex64"
5209 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5210 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5211 (match_operand:V16QI 2 "register_operand" "x")
5212 (mem:V16QI (match_dup 0))]
5214 "TARGET_SSE2 && TARGET_64BIT"
5215 ;; @@@ check ordering of operands in intel/nonintel syntax
5216 "maskmovdqu\t{%2, %1|%1, %2}"
5217 [(set_attr "type" "ssecvt")
5218 (set_attr "prefix_data16" "1")
5219 (set_attr "mode" "TI")])
5221 (define_insn "sse_ldmxcsr"
5222 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5226 [(set_attr "type" "sse")
5227 (set_attr "memory" "load")])
5229 (define_insn "sse_stmxcsr"
5230 [(set (match_operand:SI 0 "memory_operand" "=m")
5231 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5234 [(set_attr "type" "sse")
5235 (set_attr "memory" "store")])
5237 (define_expand "sse_sfence"
5239 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5240 "TARGET_SSE || TARGET_3DNOW_A"
5242 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5243 MEM_VOLATILE_P (operands[0]) = 1;
5246 (define_insn "*sse_sfence"
5247 [(set (match_operand:BLK 0 "" "")
5248 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5249 "TARGET_SSE || TARGET_3DNOW_A"
5251 [(set_attr "type" "sse")
5252 (set_attr "memory" "unknown")])
5254 (define_insn "sse2_clflush"
5255 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5259 [(set_attr "type" "sse")
5260 (set_attr "memory" "unknown")])
5262 (define_expand "sse2_mfence"
5264 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5267 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5268 MEM_VOLATILE_P (operands[0]) = 1;
5271 (define_insn "*sse2_mfence"
5272 [(set (match_operand:BLK 0 "" "")
5273 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5276 [(set_attr "type" "sse")
5277 (set_attr "memory" "unknown")])
5279 (define_expand "sse2_lfence"
5281 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5284 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5285 MEM_VOLATILE_P (operands[0]) = 1;
5288 (define_insn "*sse2_lfence"
5289 [(set (match_operand:BLK 0 "" "")
5290 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5293 [(set_attr "type" "sse")
5294 (set_attr "memory" "unknown")])
5296 (define_insn "sse3_mwait"
5297 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5298 (match_operand:SI 1 "register_operand" "c")]
5301 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5302 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5303 ;; we only need to set up 32bit registers.
5305 [(set_attr "length" "3")])
5307 (define_insn "sse3_monitor"
5308 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5309 (match_operand:SI 1 "register_operand" "c")
5310 (match_operand:SI 2 "register_operand" "d")]
5312 "TARGET_SSE3 && !TARGET_64BIT"
5313 "monitor\t%0, %1, %2"
5314 [(set_attr "length" "3")])
5316 (define_insn "sse3_monitor64"
5317 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5318 (match_operand:SI 1 "register_operand" "c")
5319 (match_operand:SI 2 "register_operand" "d")]
5321 "TARGET_SSE3 && TARGET_64BIT"
5322 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5323 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5324 ;; zero extended to 64bit, we only need to set up 32bit registers.
5326 [(set_attr "length" "3")])
5329 (define_insn "ssse3_phaddwv8hi3"
5330 [(set (match_operand:V8HI 0 "register_operand" "=x")
5336 (match_operand:V8HI 1 "register_operand" "0")
5337 (parallel [(const_int 0)]))
5338 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5340 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5341 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5344 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5345 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5347 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5348 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5353 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5354 (parallel [(const_int 0)]))
5355 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5357 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5358 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5361 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5362 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5364 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5365 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5367 "phaddw\t{%2, %0|%0, %2}"
5368 [(set_attr "type" "sseiadd")
5369 (set_attr "prefix_data16" "1")
5370 (set_attr "prefix_extra" "1")
5371 (set_attr "mode" "TI")])
5373 (define_insn "ssse3_phaddwv4hi3"
5374 [(set (match_operand:V4HI 0 "register_operand" "=y")
5379 (match_operand:V4HI 1 "register_operand" "0")
5380 (parallel [(const_int 0)]))
5381 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5383 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5384 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5388 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5389 (parallel [(const_int 0)]))
5390 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5392 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5393 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5395 "phaddw\t{%2, %0|%0, %2}"
5396 [(set_attr "type" "sseiadd")
5397 (set_attr "prefix_extra" "1")
5398 (set_attr "mode" "DI")])
5400 (define_insn "ssse3_phadddv4si3"
5401 [(set (match_operand:V4SI 0 "register_operand" "=x")
5406 (match_operand:V4SI 1 "register_operand" "0")
5407 (parallel [(const_int 0)]))
5408 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5410 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5411 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5415 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5416 (parallel [(const_int 0)]))
5417 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5419 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5420 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5422 "phaddd\t{%2, %0|%0, %2}"
5423 [(set_attr "type" "sseiadd")
5424 (set_attr "prefix_data16" "1")
5425 (set_attr "prefix_extra" "1")
5426 (set_attr "mode" "TI")])
5428 (define_insn "ssse3_phadddv2si3"
5429 [(set (match_operand:V2SI 0 "register_operand" "=y")
5433 (match_operand:V2SI 1 "register_operand" "0")
5434 (parallel [(const_int 0)]))
5435 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5438 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5439 (parallel [(const_int 0)]))
5440 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5442 "phaddd\t{%2, %0|%0, %2}"
5443 [(set_attr "type" "sseiadd")
5444 (set_attr "prefix_extra" "1")
5445 (set_attr "mode" "DI")])
5447 (define_insn "ssse3_phaddswv8hi3"
5448 [(set (match_operand:V8HI 0 "register_operand" "=x")
5454 (match_operand:V8HI 1 "register_operand" "0")
5455 (parallel [(const_int 0)]))
5456 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5458 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5459 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5462 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5463 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5465 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5466 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5471 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5472 (parallel [(const_int 0)]))
5473 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5475 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5476 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5479 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5480 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5482 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5483 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5485 "phaddsw\t{%2, %0|%0, %2}"
5486 [(set_attr "type" "sseiadd")
5487 (set_attr "prefix_data16" "1")
5488 (set_attr "prefix_extra" "1")
5489 (set_attr "mode" "TI")])
5491 (define_insn "ssse3_phaddswv4hi3"
5492 [(set (match_operand:V4HI 0 "register_operand" "=y")
5497 (match_operand:V4HI 1 "register_operand" "0")
5498 (parallel [(const_int 0)]))
5499 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5501 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5502 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5506 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5507 (parallel [(const_int 0)]))
5508 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5510 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5511 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5513 "phaddsw\t{%2, %0|%0, %2}"
5514 [(set_attr "type" "sseiadd")
5515 (set_attr "prefix_extra" "1")
5516 (set_attr "mode" "DI")])
5518 (define_insn "ssse3_phsubwv8hi3"
5519 [(set (match_operand:V8HI 0 "register_operand" "=x")
5525 (match_operand:V8HI 1 "register_operand" "0")
5526 (parallel [(const_int 0)]))
5527 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5529 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5530 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5533 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5534 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5536 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5537 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5542 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5543 (parallel [(const_int 0)]))
5544 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5546 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5547 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5550 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5551 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5553 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5554 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5556 "phsubw\t{%2, %0|%0, %2}"
5557 [(set_attr "type" "sseiadd")
5558 (set_attr "prefix_data16" "1")
5559 (set_attr "prefix_extra" "1")
5560 (set_attr "mode" "TI")])
5562 (define_insn "ssse3_phsubwv4hi3"
5563 [(set (match_operand:V4HI 0 "register_operand" "=y")
5568 (match_operand:V4HI 1 "register_operand" "0")
5569 (parallel [(const_int 0)]))
5570 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5572 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5573 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5577 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5578 (parallel [(const_int 0)]))
5579 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5581 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5582 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5584 "phsubw\t{%2, %0|%0, %2}"
5585 [(set_attr "type" "sseiadd")
5586 (set_attr "prefix_extra" "1")
5587 (set_attr "mode" "DI")])
5589 (define_insn "ssse3_phsubdv4si3"
5590 [(set (match_operand:V4SI 0 "register_operand" "=x")
5595 (match_operand:V4SI 1 "register_operand" "0")
5596 (parallel [(const_int 0)]))
5597 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5599 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5600 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5604 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5605 (parallel [(const_int 0)]))
5606 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5608 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5609 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5611 "phsubd\t{%2, %0|%0, %2}"
5612 [(set_attr "type" "sseiadd")
5613 (set_attr "prefix_data16" "1")
5614 (set_attr "prefix_extra" "1")
5615 (set_attr "mode" "TI")])
5617 (define_insn "ssse3_phsubdv2si3"
5618 [(set (match_operand:V2SI 0 "register_operand" "=y")
5622 (match_operand:V2SI 1 "register_operand" "0")
5623 (parallel [(const_int 0)]))
5624 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5627 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5628 (parallel [(const_int 0)]))
5629 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5631 "phsubd\t{%2, %0|%0, %2}"
5632 [(set_attr "type" "sseiadd")
5633 (set_attr "prefix_extra" "1")
5634 (set_attr "mode" "DI")])
5636 (define_insn "ssse3_phsubswv8hi3"
5637 [(set (match_operand:V8HI 0 "register_operand" "=x")
5643 (match_operand:V8HI 1 "register_operand" "0")
5644 (parallel [(const_int 0)]))
5645 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5647 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5648 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5651 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5652 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5654 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5655 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5660 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5661 (parallel [(const_int 0)]))
5662 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5664 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5665 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5668 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5669 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5671 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5672 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5674 "phsubsw\t{%2, %0|%0, %2}"
5675 [(set_attr "type" "sseiadd")
5676 (set_attr "prefix_data16" "1")
5677 (set_attr "prefix_extra" "1")
5678 (set_attr "mode" "TI")])
5680 (define_insn "ssse3_phsubswv4hi3"
5681 [(set (match_operand:V4HI 0 "register_operand" "=y")
5686 (match_operand:V4HI 1 "register_operand" "0")
5687 (parallel [(const_int 0)]))
5688 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5690 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5691 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5695 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5696 (parallel [(const_int 0)]))
5697 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5699 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5700 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5702 "phsubsw\t{%2, %0|%0, %2}"
5703 [(set_attr "type" "sseiadd")
5704 (set_attr "prefix_extra" "1")
5705 (set_attr "mode" "DI")])
5707 (define_insn "ssse3_pmaddubswv8hi3"
5708 [(set (match_operand:V8HI 0 "register_operand" "=x")
5713 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5714 (parallel [(const_int 0)
5724 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5725 (parallel [(const_int 0)
5735 (vec_select:V16QI (match_dup 1)
5736 (parallel [(const_int 1)
5745 (vec_select:V16QI (match_dup 2)
5746 (parallel [(const_int 1)
5753 (const_int 15)]))))))]
5755 "pmaddubsw\t{%2, %0|%0, %2}"
5756 [(set_attr "type" "sseiadd")
5757 (set_attr "prefix_data16" "1")
5758 (set_attr "prefix_extra" "1")
5759 (set_attr "mode" "TI")])
5761 (define_insn "ssse3_pmaddubswv4hi3"
5762 [(set (match_operand:V4HI 0 "register_operand" "=y")
5767 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5768 (parallel [(const_int 0)
5774 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5775 (parallel [(const_int 0)
5781 (vec_select:V8QI (match_dup 1)
5782 (parallel [(const_int 1)
5787 (vec_select:V8QI (match_dup 2)
5788 (parallel [(const_int 1)
5791 (const_int 7)]))))))]
5793 "pmaddubsw\t{%2, %0|%0, %2}"
5794 [(set_attr "type" "sseiadd")
5795 (set_attr "prefix_extra" "1")
5796 (set_attr "mode" "DI")])
5798 (define_insn "ssse3_pmulhrswv8hi3"
5799 [(set (match_operand:V8HI 0 "register_operand" "=x")
5806 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5808 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5810 (const_vector:V8HI [(const_int 1) (const_int 1)
5811 (const_int 1) (const_int 1)
5812 (const_int 1) (const_int 1)
5813 (const_int 1) (const_int 1)]))
5815 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5816 "pmulhrsw\t{%2, %0|%0, %2}"
5817 [(set_attr "type" "sseimul")
5818 (set_attr "prefix_data16" "1")
5819 (set_attr "prefix_extra" "1")
5820 (set_attr "mode" "TI")])
5822 (define_insn "ssse3_pmulhrswv4hi3"
5823 [(set (match_operand:V4HI 0 "register_operand" "=y")
5830 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5832 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5834 (const_vector:V4HI [(const_int 1) (const_int 1)
5835 (const_int 1) (const_int 1)]))
5837 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5838 "pmulhrsw\t{%2, %0|%0, %2}"
5839 [(set_attr "type" "sseimul")
5840 (set_attr "prefix_extra" "1")
5841 (set_attr "mode" "DI")])
5843 (define_insn "ssse3_pshufbv16qi3"
5844 [(set (match_operand:V16QI 0 "register_operand" "=x")
5845 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5846 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5849 "pshufb\t{%2, %0|%0, %2}";
5850 [(set_attr "type" "sselog1")
5851 (set_attr "prefix_data16" "1")
5852 (set_attr "prefix_extra" "1")
5853 (set_attr "mode" "TI")])
5855 (define_insn "ssse3_pshufbv8qi3"
5856 [(set (match_operand:V8QI 0 "register_operand" "=y")
5857 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5858 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5861 "pshufb\t{%2, %0|%0, %2}";
5862 [(set_attr "type" "sselog1")
5863 (set_attr "prefix_extra" "1")
5864 (set_attr "mode" "DI")])
5866 (define_insn "ssse3_psign<mode>3"
5867 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5868 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5869 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5872 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5873 [(set_attr "type" "sselog1")
5874 (set_attr "prefix_data16" "1")
5875 (set_attr "prefix_extra" "1")
5876 (set_attr "mode" "TI")])
5878 (define_insn "ssse3_psign<mode>3"
5879 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5880 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5881 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5884 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5885 [(set_attr "type" "sselog1")
5886 (set_attr "prefix_extra" "1")
5887 (set_attr "mode" "DI")])
5889 (define_insn "ssse3_palignrti"
5890 [(set (match_operand:TI 0 "register_operand" "=x")
5891 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5892 (match_operand:TI 2 "nonimmediate_operand" "xm")
5893 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5897 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5898 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5900 [(set_attr "type" "sseishft")
5901 (set_attr "prefix_data16" "1")
5902 (set_attr "prefix_extra" "1")
5903 (set_attr "mode" "TI")])
5905 (define_insn "ssse3_palignrdi"
5906 [(set (match_operand:DI 0 "register_operand" "=y")
5907 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5908 (match_operand:DI 2 "nonimmediate_operand" "ym")
5909 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5913 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5914 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5916 [(set_attr "type" "sseishft")
5917 (set_attr "prefix_extra" "1")
5918 (set_attr "mode" "DI")])
5920 (define_insn "abs<mode>2"
5921 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5922 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5924 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5925 [(set_attr "type" "sselog1")
5926 (set_attr "prefix_data16" "1")
5927 (set_attr "prefix_extra" "1")
5928 (set_attr "mode" "TI")])
5930 (define_insn "abs<mode>2"
5931 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5932 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5934 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5935 [(set_attr "type" "sselog1")
5936 (set_attr "prefix_extra" "1")
5937 (set_attr "mode" "DI")])
5939 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5941 ;; AMD SSE4A instructions
5943 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5945 (define_insn "sse4a_vmmovntv2df"
5946 [(set (match_operand:DF 0 "memory_operand" "=m")
5947 (unspec:DF [(vec_select:DF
5948 (match_operand:V2DF 1 "register_operand" "x")
5949 (parallel [(const_int 0)]))]
5952 "movntsd\t{%1, %0|%0, %1}"
5953 [(set_attr "type" "ssemov")
5954 (set_attr "mode" "DF")])
5956 (define_insn "sse4a_movntdf"
5957 [(set (match_operand:DF 0 "memory_operand" "=m")
5958 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5961 "movntsd\t{%1, %0|%0, %1}"
5962 [(set_attr "type" "ssemov")
5963 (set_attr "mode" "DF")])
5965 (define_insn "sse4a_vmmovntv4sf"
5966 [(set (match_operand:SF 0 "memory_operand" "=m")
5967 (unspec:SF [(vec_select:SF
5968 (match_operand:V4SF 1 "register_operand" "x")
5969 (parallel [(const_int 0)]))]
5972 "movntss\t{%1, %0|%0, %1}"
5973 [(set_attr "type" "ssemov")
5974 (set_attr "mode" "SF")])
5976 (define_insn "sse4a_movntsf"
5977 [(set (match_operand:SF 0 "memory_operand" "=m")
5978 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5981 "movntss\t{%1, %0|%0, %1}"
5982 [(set_attr "type" "ssemov")
5983 (set_attr "mode" "SF")])
5985 (define_insn "sse4a_extrqi"
5986 [(set (match_operand:V2DI 0 "register_operand" "=x")
5987 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5988 (match_operand 2 "const_int_operand" "")
5989 (match_operand 3 "const_int_operand" "")]
5992 "extrq\t{%3, %2, %0|%0, %2, %3}"
5993 [(set_attr "type" "sse")
5994 (set_attr "prefix_data16" "1")
5995 (set_attr "mode" "TI")])
5997 (define_insn "sse4a_extrq"
5998 [(set (match_operand:V2DI 0 "register_operand" "=x")
5999 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6000 (match_operand:V16QI 2 "register_operand" "x")]
6003 "extrq\t{%2, %0|%0, %2}"
6004 [(set_attr "type" "sse")
6005 (set_attr "prefix_data16" "1")
6006 (set_attr "mode" "TI")])
6008 (define_insn "sse4a_insertqi"
6009 [(set (match_operand:V2DI 0 "register_operand" "=x")
6010 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6011 (match_operand:V2DI 2 "register_operand" "x")
6012 (match_operand 3 "const_int_operand" "")
6013 (match_operand 4 "const_int_operand" "")]
6016 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6017 [(set_attr "type" "sseins")
6018 (set_attr "prefix_rep" "1")
6019 (set_attr "mode" "TI")])
6021 (define_insn "sse4a_insertq"
6022 [(set (match_operand:V2DI 0 "register_operand" "=x")
6023 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6024 (match_operand:V2DI 2 "register_operand" "x")]
6027 "insertq\t{%2, %0|%0, %2}"
6028 [(set_attr "type" "sseins")
6029 (set_attr "prefix_rep" "1")
6030 (set_attr "mode" "TI")])
6032 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6034 ;; Intel SSE4.1 instructions
6036 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6038 (define_insn "sse4_1_blendpd"
6039 [(set (match_operand:V2DF 0 "register_operand" "=x")
6041 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6042 (match_operand:V2DF 1 "register_operand" "0")
6043 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6045 "blendpd\t{%3, %2, %0|%0, %2, %3}"
6046 [(set_attr "type" "ssemov")
6047 (set_attr "prefix_extra" "1")
6048 (set_attr "mode" "V2DF")])
6050 (define_insn "sse4_1_blendps"
6051 [(set (match_operand:V4SF 0 "register_operand" "=x")
6053 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6054 (match_operand:V4SF 1 "register_operand" "0")
6055 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
6057 "blendps\t{%3, %2, %0|%0, %2, %3}"
6058 [(set_attr "type" "ssemov")
6059 (set_attr "prefix_extra" "1")
6060 (set_attr "mode" "V4SF")])
6062 (define_insn "sse4_1_blendvpd"
6063 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
6064 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
6065 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
6066 (match_operand:V2DF 3 "register_operand" "Y0")]
6069 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
6070 [(set_attr "type" "ssemov")
6071 (set_attr "prefix_extra" "1")
6072 (set_attr "mode" "V2DF")])
6074 (define_insn "sse4_1_blendvps"
6075 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
6076 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
6077 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
6078 (match_operand:V4SF 3 "register_operand" "Y0")]
6081 "blendvps\t{%3, %2, %0|%0, %2, %3}"
6082 [(set_attr "type" "ssemov")
6083 (set_attr "prefix_extra" "1")
6084 (set_attr "mode" "V4SF")])
6086 (define_insn "sse4_1_dppd"
6087 [(set (match_operand:V2DF 0 "register_operand" "=x")
6088 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
6089 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6090 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6093 "dppd\t{%3, %2, %0|%0, %2, %3}"
6094 [(set_attr "type" "ssemul")
6095 (set_attr "prefix_extra" "1")
6096 (set_attr "mode" "V2DF")])
6098 (define_insn "sse4_1_dpps"
6099 [(set (match_operand:V4SF 0 "register_operand" "=x")
6100 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6101 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6102 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6105 "dpps\t{%3, %2, %0|%0, %2, %3}"
6106 [(set_attr "type" "ssemul")
6107 (set_attr "prefix_extra" "1")
6108 (set_attr "mode" "V4SF")])
6110 (define_insn "sse4_1_movntdqa"
6111 [(set (match_operand:V2DI 0 "register_operand" "=x")
6112 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6115 "movntdqa\t{%1, %0|%0, %1}"
6116 [(set_attr "type" "ssecvt")
6117 (set_attr "prefix_extra" "1")
6118 (set_attr "mode" "TI")])
6120 (define_insn "sse4_1_mpsadbw"
6121 [(set (match_operand:V16QI 0 "register_operand" "=x")
6122 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6123 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6124 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6127 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6128 [(set_attr "type" "sselog1")
6129 (set_attr "prefix_extra" "1")
6130 (set_attr "mode" "TI")])
6132 (define_insn "sse4_1_packusdw"
6133 [(set (match_operand:V8HI 0 "register_operand" "=x")
6136 (match_operand:V4SI 1 "register_operand" "0"))
6138 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6140 "packusdw\t{%2, %0|%0, %2}"
6141 [(set_attr "type" "sselog")
6142 (set_attr "prefix_extra" "1")
6143 (set_attr "mode" "TI")])
6145 (define_insn "sse4_1_pblendvb"
6146 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6147 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6148 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6149 (match_operand:V16QI 3 "register_operand" "Y0")]
6152 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6153 [(set_attr "type" "ssemov")
6154 (set_attr "prefix_extra" "1")
6155 (set_attr "mode" "TI")])
6157 (define_insn "sse4_1_pblendw"
6158 [(set (match_operand:V8HI 0 "register_operand" "=x")
6160 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6161 (match_operand:V8HI 1 "register_operand" "0")
6162 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6164 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6165 [(set_attr "type" "ssemov")
6166 (set_attr "prefix_extra" "1")
6167 (set_attr "mode" "TI")])
6169 (define_insn "sse4_1_phminposuw"
6170 [(set (match_operand:V8HI 0 "register_operand" "=x")
6171 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6172 UNSPEC_PHMINPOSUW))]
6174 "phminposuw\t{%1, %0|%0, %1}"
6175 [(set_attr "type" "sselog1")
6176 (set_attr "prefix_extra" "1")
6177 (set_attr "mode" "TI")])
6179 (define_insn "sse4_1_extendv8qiv8hi2"
6180 [(set (match_operand:V8HI 0 "register_operand" "=x")
6183 (match_operand:V16QI 1 "register_operand" "x")
6184 (parallel [(const_int 0)
6193 "pmovsxbw\t{%1, %0|%0, %1}"
6194 [(set_attr "type" "ssemov")
6195 (set_attr "prefix_extra" "1")
6196 (set_attr "mode" "TI")])
6198 (define_insn "*sse4_1_extendv8qiv8hi2"
6199 [(set (match_operand:V8HI 0 "register_operand" "=x")
6202 (vec_duplicate:V16QI
6203 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6204 (parallel [(const_int 0)
6213 "pmovsxbw\t{%1, %0|%0, %1}"
6214 [(set_attr "type" "ssemov")
6215 (set_attr "prefix_extra" "1")
6216 (set_attr "mode" "TI")])
6218 (define_insn "sse4_1_extendv4qiv4si2"
6219 [(set (match_operand:V4SI 0 "register_operand" "=x")
6222 (match_operand:V16QI 1 "register_operand" "x")
6223 (parallel [(const_int 0)
6228 "pmovsxbd\t{%1, %0|%0, %1}"
6229 [(set_attr "type" "ssemov")
6230 (set_attr "prefix_extra" "1")
6231 (set_attr "mode" "TI")])
6233 (define_insn "*sse4_1_extendv4qiv4si2"
6234 [(set (match_operand:V4SI 0 "register_operand" "=x")
6237 (vec_duplicate:V16QI
6238 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6239 (parallel [(const_int 0)
6244 "pmovsxbd\t{%1, %0|%0, %1}"
6245 [(set_attr "type" "ssemov")
6246 (set_attr "prefix_extra" "1")
6247 (set_attr "mode" "TI")])
6249 (define_insn "sse4_1_extendv2qiv2di2"
6250 [(set (match_operand:V2DI 0 "register_operand" "=x")
6253 (match_operand:V16QI 1 "register_operand" "x")
6254 (parallel [(const_int 0)
6257 "pmovsxbq\t{%1, %0|%0, %1}"
6258 [(set_attr "type" "ssemov")
6259 (set_attr "prefix_extra" "1")
6260 (set_attr "mode" "TI")])
6262 (define_insn "*sse4_1_extendv2qiv2di2"
6263 [(set (match_operand:V2DI 0 "register_operand" "=x")
6266 (vec_duplicate:V16QI
6267 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6268 (parallel [(const_int 0)
6271 "pmovsxbq\t{%1, %0|%0, %1}"
6272 [(set_attr "type" "ssemov")
6273 (set_attr "prefix_extra" "1")
6274 (set_attr "mode" "TI")])
6276 (define_insn "sse4_1_extendv4hiv4si2"
6277 [(set (match_operand:V4SI 0 "register_operand" "=x")
6280 (match_operand:V8HI 1 "register_operand" "x")
6281 (parallel [(const_int 0)
6286 "pmovsxwd\t{%1, %0|%0, %1}"
6287 [(set_attr "type" "ssemov")
6288 (set_attr "prefix_extra" "1")
6289 (set_attr "mode" "TI")])
6291 (define_insn "*sse4_1_extendv4hiv4si2"
6292 [(set (match_operand:V4SI 0 "register_operand" "=x")
6296 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6297 (parallel [(const_int 0)
6302 "pmovsxwd\t{%1, %0|%0, %1}"
6303 [(set_attr "type" "ssemov")
6304 (set_attr "prefix_extra" "1")
6305 (set_attr "mode" "TI")])
6307 (define_insn "sse4_1_extendv2hiv2di2"
6308 [(set (match_operand:V2DI 0 "register_operand" "=x")
6311 (match_operand:V8HI 1 "register_operand" "x")
6312 (parallel [(const_int 0)
6315 "pmovsxwq\t{%1, %0|%0, %1}"
6316 [(set_attr "type" "ssemov")
6317 (set_attr "prefix_extra" "1")
6318 (set_attr "mode" "TI")])
6320 (define_insn "*sse4_1_extendv2hiv2di2"
6321 [(set (match_operand:V2DI 0 "register_operand" "=x")
6325 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6326 (parallel [(const_int 0)
6329 "pmovsxwq\t{%1, %0|%0, %1}"
6330 [(set_attr "type" "ssemov")
6331 (set_attr "prefix_extra" "1")
6332 (set_attr "mode" "TI")])
6334 (define_insn "sse4_1_extendv2siv2di2"
6335 [(set (match_operand:V2DI 0 "register_operand" "=x")
6338 (match_operand:V4SI 1 "register_operand" "x")
6339 (parallel [(const_int 0)
6342 "pmovsxdq\t{%1, %0|%0, %1}"
6343 [(set_attr "type" "ssemov")
6344 (set_attr "prefix_extra" "1")
6345 (set_attr "mode" "TI")])
6347 (define_insn "*sse4_1_extendv2siv2di2"
6348 [(set (match_operand:V2DI 0 "register_operand" "=x")
6352 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6353 (parallel [(const_int 0)
6356 "pmovsxdq\t{%1, %0|%0, %1}"
6357 [(set_attr "type" "ssemov")
6358 (set_attr "prefix_extra" "1")
6359 (set_attr "mode" "TI")])
6361 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6362 [(set (match_operand:V8HI 0 "register_operand" "=x")
6365 (match_operand:V16QI 1 "register_operand" "x")
6366 (parallel [(const_int 0)
6375 "pmovzxbw\t{%1, %0|%0, %1}"
6376 [(set_attr "type" "ssemov")
6377 (set_attr "prefix_extra" "1")
6378 (set_attr "mode" "TI")])
6380 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6381 [(set (match_operand:V8HI 0 "register_operand" "=x")
6384 (vec_duplicate:V16QI
6385 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6386 (parallel [(const_int 0)
6395 "pmovzxbw\t{%1, %0|%0, %1}"
6396 [(set_attr "type" "ssemov")
6397 (set_attr "prefix_extra" "1")
6398 (set_attr "mode" "TI")])
6400 (define_insn "sse4_1_zero_extendv4qiv4si2"
6401 [(set (match_operand:V4SI 0 "register_operand" "=x")
6404 (match_operand:V16QI 1 "register_operand" "x")
6405 (parallel [(const_int 0)
6410 "pmovzxbd\t{%1, %0|%0, %1}"
6411 [(set_attr "type" "ssemov")
6412 (set_attr "prefix_extra" "1")
6413 (set_attr "mode" "TI")])
6415 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6416 [(set (match_operand:V4SI 0 "register_operand" "=x")
6419 (vec_duplicate:V16QI
6420 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6421 (parallel [(const_int 0)
6426 "pmovzxbd\t{%1, %0|%0, %1}"
6427 [(set_attr "type" "ssemov")
6428 (set_attr "prefix_extra" "1")
6429 (set_attr "mode" "TI")])
6431 (define_insn "sse4_1_zero_extendv2qiv2di2"
6432 [(set (match_operand:V2DI 0 "register_operand" "=x")
6435 (match_operand:V16QI 1 "register_operand" "x")
6436 (parallel [(const_int 0)
6439 "pmovzxbq\t{%1, %0|%0, %1}"
6440 [(set_attr "type" "ssemov")
6441 (set_attr "prefix_extra" "1")
6442 (set_attr "mode" "TI")])
6444 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6445 [(set (match_operand:V2DI 0 "register_operand" "=x")
6448 (vec_duplicate:V16QI
6449 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6450 (parallel [(const_int 0)
6453 "pmovzxbq\t{%1, %0|%0, %1}"
6454 [(set_attr "type" "ssemov")
6455 (set_attr "prefix_extra" "1")
6456 (set_attr "mode" "TI")])
6458 (define_insn "sse4_1_zero_extendv4hiv4si2"
6459 [(set (match_operand:V4SI 0 "register_operand" "=x")
6462 (match_operand:V8HI 1 "register_operand" "x")
6463 (parallel [(const_int 0)
6468 "pmovzxwd\t{%1, %0|%0, %1}"
6469 [(set_attr "type" "ssemov")
6470 (set_attr "prefix_extra" "1")
6471 (set_attr "mode" "TI")])
6473 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6474 [(set (match_operand:V4SI 0 "register_operand" "=x")
6478 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6479 (parallel [(const_int 0)
6484 "pmovzxwd\t{%1, %0|%0, %1}"
6485 [(set_attr "type" "ssemov")
6486 (set_attr "prefix_extra" "1")
6487 (set_attr "mode" "TI")])
6489 (define_insn "sse4_1_zero_extendv2hiv2di2"
6490 [(set (match_operand:V2DI 0 "register_operand" "=x")
6493 (match_operand:V8HI 1 "register_operand" "x")
6494 (parallel [(const_int 0)
6497 "pmovzxwq\t{%1, %0|%0, %1}"
6498 [(set_attr "type" "ssemov")
6499 (set_attr "prefix_extra" "1")
6500 (set_attr "mode" "TI")])
6502 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6503 [(set (match_operand:V2DI 0 "register_operand" "=x")
6507 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6508 (parallel [(const_int 0)
6511 "pmovzxwq\t{%1, %0|%0, %1}"
6512 [(set_attr "type" "ssemov")
6513 (set_attr "prefix_extra" "1")
6514 (set_attr "mode" "TI")])
6516 (define_insn "sse4_1_zero_extendv2siv2di2"
6517 [(set (match_operand:V2DI 0 "register_operand" "=x")
6520 (match_operand:V4SI 1 "register_operand" "x")
6521 (parallel [(const_int 0)
6524 "pmovzxdq\t{%1, %0|%0, %1}"
6525 [(set_attr "type" "ssemov")
6526 (set_attr "prefix_extra" "1")
6527 (set_attr "mode" "TI")])
6529 (define_insn "*sse4_1_zero_extendv2siv2di2"
6530 [(set (match_operand:V2DI 0 "register_operand" "=x")
6534 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6535 (parallel [(const_int 0)
6538 "pmovzxdq\t{%1, %0|%0, %1}"
6539 [(set_attr "type" "ssemov")
6540 (set_attr "prefix_extra" "1")
6541 (set_attr "mode" "TI")])
6543 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6544 ;; But it is not a really compare instruction.
6545 (define_insn "sse4_1_ptest"
6546 [(set (reg:CC FLAGS_REG)
6547 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6548 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6551 "ptest\t{%1, %0|%0, %1}"
6552 [(set_attr "type" "ssecomi")
6553 (set_attr "prefix_extra" "1")
6554 (set_attr "mode" "TI")])
6556 (define_insn "sse4_1_roundpd"
6557 [(set (match_operand:V2DF 0 "register_operand" "=x")
6558 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6559 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6562 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6563 [(set_attr "type" "ssecvt")
6564 (set_attr "prefix_extra" "1")
6565 (set_attr "mode" "V2DF")])
6567 (define_insn "sse4_1_roundps"
6568 [(set (match_operand:V4SF 0 "register_operand" "=x")
6569 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6570 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6573 "roundps\t{%2, %1, %0|%0, %1, %2}"
6574 [(set_attr "type" "ssecvt")
6575 (set_attr "prefix_extra" "1")
6576 (set_attr "mode" "V4SF")])
6578 (define_insn "sse4_1_roundsd"
6579 [(set (match_operand:V2DF 0 "register_operand" "=x")
6581 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6582 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6584 (match_operand:V2DF 1 "register_operand" "0")
6587 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6588 [(set_attr "type" "ssecvt")
6589 (set_attr "prefix_extra" "1")
6590 (set_attr "mode" "V2DF")])
6592 (define_insn "sse4_1_roundss"
6593 [(set (match_operand:V4SF 0 "register_operand" "=x")
6595 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6596 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6598 (match_operand:V4SF 1 "register_operand" "0")
6601 "roundss\t{%3, %2, %0|%0, %2, %3}"
6602 [(set_attr "type" "ssecvt")
6603 (set_attr "prefix_extra" "1")
6604 (set_attr "mode" "V4SF")])
6606 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6608 ;; Intel SSE4.2 string/text processing instructions
6610 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6612 (define_insn_and_split "sse4_2_pcmpestr"
6613 [(set (match_operand:SI 0 "register_operand" "=c,c")
6615 [(match_operand:V16QI 2 "register_operand" "x,x")
6616 (match_operand:SI 3 "register_operand" "a,a")
6617 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6618 (match_operand:SI 5 "register_operand" "d,d")
6619 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6621 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6629 (set (reg:CC FLAGS_REG)
6638 && !(reload_completed || reload_in_progress)"
6643 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6644 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6645 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6648 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6649 operands[3], operands[4],
6650 operands[5], operands[6]));
6652 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6653 operands[3], operands[4],
6654 operands[5], operands[6]));
6655 if (flags && !(ecx || xmm0))
6656 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6657 operands[4], operands[5],
6661 [(set_attr "type" "sselog")
6662 (set_attr "prefix_data16" "1")
6663 (set_attr "prefix_extra" "1")
6664 (set_attr "memory" "none,load")
6665 (set_attr "mode" "TI")])
6667 (define_insn "sse4_2_pcmpestri"
6668 [(set (match_operand:SI 0 "register_operand" "=c,c")
6670 [(match_operand:V16QI 1 "register_operand" "x,x")
6671 (match_operand:SI 2 "register_operand" "a,a")
6672 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6673 (match_operand:SI 4 "register_operand" "d,d")
6674 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6676 (set (reg:CC FLAGS_REG)
6685 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix_data16" "1")
6688 (set_attr "prefix_extra" "1")
6689 (set_attr "memory" "none,load")
6690 (set_attr "mode" "TI")])
6692 (define_insn "sse4_2_pcmpestrm"
6693 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6695 [(match_operand:V16QI 1 "register_operand" "x,x")
6696 (match_operand:SI 2 "register_operand" "a,a")
6697 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6698 (match_operand:SI 4 "register_operand" "d,d")
6699 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6701 (set (reg:CC FLAGS_REG)
6710 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6711 [(set_attr "type" "sselog")
6712 (set_attr "prefix_data16" "1")
6713 (set_attr "prefix_extra" "1")
6714 (set_attr "memory" "none,load")
6715 (set_attr "mode" "TI")])
6717 (define_insn "sse4_2_pcmpestr_cconly"
6718 [(set (reg:CC FLAGS_REG)
6720 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6721 (match_operand:SI 1 "register_operand" "a,a,a,a")
6722 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6723 (match_operand:SI 3 "register_operand" "d,d,d,d")
6724 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6726 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6727 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6730 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6731 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6732 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6733 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6734 [(set_attr "type" "sselog")
6735 (set_attr "prefix_data16" "1")
6736 (set_attr "prefix_extra" "1")
6737 (set_attr "memory" "none,load,none,load")
6738 (set_attr "mode" "TI")])
6740 (define_insn_and_split "sse4_2_pcmpistr"
6741 [(set (match_operand:SI 0 "register_operand" "=c,c")
6743 [(match_operand:V16QI 2 "register_operand" "x,x")
6744 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6745 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6747 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6753 (set (reg:CC FLAGS_REG)
6760 && !(reload_completed || reload_in_progress)"
6765 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6766 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6767 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6770 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6771 operands[3], operands[4]));
6773 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6774 operands[3], operands[4]));
6775 if (flags && !(ecx || xmm0))
6776 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6780 [(set_attr "type" "sselog")
6781 (set_attr "prefix_data16" "1")
6782 (set_attr "prefix_extra" "1")
6783 (set_attr "memory" "none,load")
6784 (set_attr "mode" "TI")])
6786 (define_insn "sse4_2_pcmpistri"
6787 [(set (match_operand:SI 0 "register_operand" "=c,c")
6789 [(match_operand:V16QI 1 "register_operand" "x,x")
6790 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6791 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6793 (set (reg:CC FLAGS_REG)
6800 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6801 [(set_attr "type" "sselog")
6802 (set_attr "prefix_data16" "1")
6803 (set_attr "prefix_extra" "1")
6804 (set_attr "memory" "none,load")
6805 (set_attr "mode" "TI")])
6807 (define_insn "sse4_2_pcmpistrm"
6808 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6810 [(match_operand:V16QI 1 "register_operand" "x,x")
6811 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6812 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6814 (set (reg:CC FLAGS_REG)
6821 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6822 [(set_attr "type" "sselog")
6823 (set_attr "prefix_data16" "1")
6824 (set_attr "prefix_extra" "1")
6825 (set_attr "memory" "none,load")
6826 (set_attr "mode" "TI")])
6828 (define_insn "sse4_2_pcmpistr_cconly"
6829 [(set (reg:CC FLAGS_REG)
6831 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6832 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6833 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6835 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6836 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6839 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6840 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6841 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6842 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6843 [(set_attr "type" "sselog")
6844 (set_attr "prefix_data16" "1")
6845 (set_attr "prefix_extra" "1")
6846 (set_attr "memory" "none,load,none,load")
6847 (set_attr "mode" "TI")])