1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
58 ;; Mapping of immediate bits for blend instructions
59 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
61 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
63 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69 ;; All of these patterns are enabled for SSE1 as well as SSE2.
70 ;; This is essential for maintaining stable calling conventions.
72 (define_expand "mov<mode>"
73 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
74 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
77 ix86_expand_vector_move (<MODE>mode, operands);
81 (define_insn "*mov<mode>_internal"
82 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
83 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
85 && (register_operand (operands[0], <MODE>mode)
86 || register_operand (operands[1], <MODE>mode))"
88 switch (which_alternative)
91 return standard_sse_constant_opcode (insn, operands[1]);
94 if (get_attr_mode (insn) == MODE_V4SF)
95 return "movaps\t{%1, %0|%0, %1}";
97 return "movdqa\t{%1, %0|%0, %1}";
102 [(set_attr "type" "sselog1,ssemov,ssemov")
105 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
106 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
107 (and (eq_attr "alternative" "2")
108 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
110 (const_string "V4SF")
111 (const_string "TI")))])
113 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
114 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
115 ;; from memory, we'd prefer to load the memory directly into the %xmm
116 ;; register. To facilitate this happy circumstance, this pattern won't
117 ;; split until after register allocation. If the 64-bit value didn't
118 ;; come from memory, this is the best we can do. This is much better
119 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
122 (define_insn_and_split "movdi_to_sse"
124 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
125 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
126 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
127 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
129 "&& reload_completed"
132 if (register_operand (operands[1], DImode))
134 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
135 Assemble the 64-bit DImode value in an xmm register. */
136 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
137 gen_rtx_SUBREG (SImode, operands[1], 0)));
138 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
139 gen_rtx_SUBREG (SImode, operands[1], 4)));
140 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
142 else if (memory_operand (operands[1], DImode))
143 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
148 (define_expand "mov<mode>"
149 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
150 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))]
153 ix86_expand_vector_move (<MODE>mode, operands);
157 (define_insn "*movv4sf_internal"
158 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
159 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
161 && (register_operand (operands[0], V4SFmode)
162 || register_operand (operands[1], V4SFmode))"
164 switch (which_alternative)
167 return standard_sse_constant_opcode (insn, operands[1]);
170 return "movaps\t{%1, %0|%0, %1}";
175 [(set_attr "type" "sselog1,ssemov,ssemov")
176 (set_attr "mode" "V4SF")])
179 [(set (match_operand:V4SF 0 "register_operand" "")
180 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
181 "TARGET_SSE && reload_completed"
184 (vec_duplicate:V4SF (match_dup 1))
188 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
189 operands[2] = CONST0_RTX (V4SFmode);
192 (define_insn "*movv2df_internal"
193 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
194 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
196 && (register_operand (operands[0], V2DFmode)
197 || register_operand (operands[1], V2DFmode))"
199 switch (which_alternative)
202 return standard_sse_constant_opcode (insn, operands[1]);
205 if (get_attr_mode (insn) == MODE_V4SF)
206 return "movaps\t{%1, %0|%0, %1}";
208 return "movapd\t{%1, %0|%0, %1}";
213 [(set_attr "type" "sselog1,ssemov,ssemov")
216 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
217 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
218 (and (eq_attr "alternative" "2")
219 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
221 (const_string "V4SF")
222 (const_string "V2DF")))])
225 [(set (match_operand:V2DF 0 "register_operand" "")
226 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
227 "TARGET_SSE2 && reload_completed"
228 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
230 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
231 operands[2] = CONST0_RTX (DFmode);
234 (define_expand "push<mode>1"
235 [(match_operand:SSEMODE 0 "register_operand" "")]
238 ix86_expand_push (<MODE>mode, operands[0]);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
244 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
247 ix86_expand_vector_move_misalign (<MODE>mode, operands);
251 (define_insn "<sse>_movup<ssemodesuffixf2c>"
252 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
254 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
256 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
257 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
258 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
259 [(set_attr "type" "ssemov")
260 (set_attr "mode" "<MODE>")])
262 (define_insn "sse2_movdqu"
263 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
264 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
266 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
267 "movdqu\t{%1, %0|%0, %1}"
268 [(set_attr "type" "ssemov")
269 (set_attr "prefix_data16" "1")
270 (set_attr "mode" "TI")])
272 (define_insn "<sse>_movnt<mode>"
273 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
275 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
277 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
278 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
279 [(set_attr "type" "ssemov")
280 (set_attr "mode" "<MODE>")])
282 (define_insn "sse2_movntv2di"
283 [(set (match_operand:V2DI 0 "memory_operand" "=m")
284 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
287 "movntdq\t{%1, %0|%0, %1}"
288 [(set_attr "type" "ssecvt")
289 (set_attr "prefix_data16" "1")
290 (set_attr "mode" "TI")])
292 (define_insn "sse2_movntsi"
293 [(set (match_operand:SI 0 "memory_operand" "=m")
294 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
297 "movnti\t{%1, %0|%0, %1}"
298 [(set_attr "type" "ssecvt")
299 (set_attr "mode" "V2DF")])
301 (define_insn "sse3_lddqu"
302 [(set (match_operand:V16QI 0 "register_operand" "=x")
303 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
306 "lddqu\t{%1, %0|%0, %1}"
307 [(set_attr "type" "ssecvt")
308 (set_attr "prefix_rep" "1")
309 (set_attr "mode" "TI")])
311 ; Expand patterns for non-temporal stores. At the moment, only those
312 ; that directly map to insns are defined; it would be possible to
313 ; define patterns for other modes that would expand to several insns.
315 (define_expand "storent<mode>"
316 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
318 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
320 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
323 (define_expand "storent<mode>"
324 [(set (match_operand:MODEF 0 "memory_operand" "")
326 [(match_operand:MODEF 1 "register_operand" "")]
331 (define_expand "storentv2di"
332 [(set (match_operand:V2DI 0 "memory_operand" "")
333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
338 (define_expand "storentsi"
339 [(set (match_operand:SI 0 "memory_operand" "")
340 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
345 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
347 ;; Parallel floating point arithmetic
349 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
351 (define_expand "<code><mode>2"
352 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
354 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
355 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
356 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
358 (define_expand "<addsub><mode>3"
359 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
360 (plusminus:SSEMODEF2P
361 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
362 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
363 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
364 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
366 (define_insn "*<addsub><mode>3"
367 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
368 (plusminus:SSEMODEF2P
369 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
370 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
372 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
373 "<addsub>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
374 [(set_attr "type" "sseadd")
375 (set_attr "mode" "<MODE>")])
377 (define_insn "<sse>_vm<addsub><mode>3"
378 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
379 (vec_merge:SSEMODEF2P
380 (plusminus:SSEMODEF2P
381 (match_operand:SSEMODEF2P 1 "register_operand" "0")
382 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
385 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
386 && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
387 "<addsub>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
388 [(set_attr "type" "sseadd")
389 (set_attr "mode" "<ssescalarmode>")])
391 (define_expand "mul<mode>3"
392 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
394 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
395 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
396 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
397 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
399 (define_insn "*mul<mode>3"
400 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
402 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
403 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
404 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
405 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
406 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
407 [(set_attr "type" "ssemul")
408 (set_attr "mode" "<MODE>")])
410 (define_insn "<sse>_vmmul<mode>3"
411 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
412 (vec_merge:SSEMODEF2P
414 (match_operand:SSEMODEF2P 1 "register_operand" "0")
415 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
418 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
419 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
420 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
421 [(set_attr "type" "ssemul")
422 (set_attr "mode" "<ssescalarmode>")])
424 (define_expand "divv4sf3"
425 [(set (match_operand:V4SF 0 "register_operand" "")
426 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
427 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
430 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
432 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
433 && flag_finite_math_only && !flag_trapping_math
434 && flag_unsafe_math_optimizations)
436 ix86_emit_swdivsf (operands[0], operands[1],
437 operands[2], V4SFmode);
442 (define_expand "divv2df3"
443 [(set (match_operand:V2DF 0 "register_operand" "")
444 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
445 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
447 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
449 (define_insn "<sse>_div<mode>3"
450 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
452 (match_operand:SSEMODEF2P 1 "register_operand" "0")
453 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
454 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
455 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
456 [(set_attr "type" "ssediv")
457 (set_attr "mode" "<MODE>")])
459 (define_insn "<sse>_vmdiv<mode>3"
460 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
461 (vec_merge:SSEMODEF2P
463 (match_operand:SSEMODEF2P 1 "register_operand" "0")
464 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
467 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
468 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
469 [(set_attr "type" "ssediv")
470 (set_attr "mode" "<ssescalarmode>")])
472 (define_insn "sse_rcpv4sf2"
473 [(set (match_operand:V4SF 0 "register_operand" "=x")
475 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
477 "rcpps\t{%1, %0|%0, %1}"
478 [(set_attr "type" "sse")
479 (set_attr "mode" "V4SF")])
481 (define_insn "sse_vmrcpv4sf2"
482 [(set (match_operand:V4SF 0 "register_operand" "=x")
484 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
486 (match_operand:V4SF 2 "register_operand" "0")
489 "rcpss\t{%1, %0|%0, %1}"
490 [(set_attr "type" "sse")
491 (set_attr "mode" "SF")])
493 (define_expand "sqrtv4sf2"
494 [(set (match_operand:V4SF 0 "register_operand" "")
495 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
498 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
499 && flag_finite_math_only && !flag_trapping_math
500 && flag_unsafe_math_optimizations)
502 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
507 (define_insn "sse_sqrtv4sf2"
508 [(set (match_operand:V4SF 0 "register_operand" "=x")
509 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
511 "sqrtps\t{%1, %0|%0, %1}"
512 [(set_attr "type" "sse")
513 (set_attr "mode" "V4SF")])
515 (define_insn "sqrtv2df2"
516 [(set (match_operand:V2DF 0 "register_operand" "=x")
517 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
519 "sqrtpd\t{%1, %0|%0, %1}"
520 [(set_attr "type" "sse")
521 (set_attr "mode" "V2DF")])
523 (define_insn "<sse>_vmsqrt<mode>2"
524 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
525 (vec_merge:SSEMODEF2P
527 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
528 (match_operand:SSEMODEF2P 2 "register_operand" "0")
530 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
531 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
532 [(set_attr "type" "sse")
533 (set_attr "mode" "<ssescalarmode>")])
535 (define_expand "rsqrtv4sf2"
536 [(set (match_operand:V4SF 0 "register_operand" "")
538 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
541 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
545 (define_insn "sse_rsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
550 "rsqrtps\t{%1, %0|%0, %1}"
551 [(set_attr "type" "sse")
552 (set_attr "mode" "V4SF")])
554 (define_insn "sse_vmrsqrtv4sf2"
555 [(set (match_operand:V4SF 0 "register_operand" "=x")
557 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
559 (match_operand:V4SF 2 "register_operand" "0")
562 "rsqrtss\t{%1, %0|%0, %1}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "SF")])
566 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
567 ;; isn't really correct, as those rtl operators aren't defined when
568 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
570 (define_expand "<code><mode>3"
571 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
573 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
574 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
575 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
577 if (!flag_finite_math_only)
578 operands[1] = force_reg (<MODE>mode, operands[1]);
579 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
582 (define_insn "*<code><mode>3_finite"
583 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
585 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
586 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
587 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
588 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
589 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
590 [(set_attr "type" "sseadd")
591 (set_attr "mode" "<MODE>")])
593 (define_insn "*<code><mode>3"
594 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596 (match_operand:SSEMODEF2P 1 "register_operand" "0")
597 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
598 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
599 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
600 [(set_attr "type" "sseadd")
601 (set_attr "mode" "<MODE>")])
603 (define_insn "<sse>_vm<code><mode>3"
604 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
605 (vec_merge:SSEMODEF2P
607 (match_operand:SSEMODEF2P 1 "register_operand" "0")
608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
611 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
612 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
613 [(set_attr "type" "sse")
614 (set_attr "mode" "<ssescalarmode>")])
616 ;; These versions of the min/max patterns implement exactly the operations
617 ;; min = (op1 < op2 ? op1 : op2)
618 ;; max = (!(op1 < op2) ? op1 : op2)
619 ;; Their operands are not commutative, and thus they may be used in the
620 ;; presence of -0.0 and NaN.
622 (define_insn "*ieee_smin<mode>3"
623 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
625 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
626 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
628 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
629 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
630 [(set_attr "type" "sseadd")
631 (set_attr "mode" "<MODE>")])
633 (define_insn "*ieee_smax<mode>3"
634 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
636 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
637 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
639 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
640 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
641 [(set_attr "type" "sseadd")
642 (set_attr "mode" "<MODE>")])
644 (define_insn "sse3_addsubv4sf3"
645 [(set (match_operand:V4SF 0 "register_operand" "=x")
648 (match_operand:V4SF 1 "register_operand" "0")
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
650 (minus:V4SF (match_dup 1) (match_dup 2))
653 "addsubps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sseadd")
655 (set_attr "prefix_rep" "1")
656 (set_attr "mode" "V4SF")])
658 (define_insn "sse3_addsubv2df3"
659 [(set (match_operand:V2DF 0 "register_operand" "=x")
662 (match_operand:V2DF 1 "register_operand" "0")
663 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
664 (minus:V2DF (match_dup 1) (match_dup 2))
667 "addsubpd\t{%2, %0|%0, %2}"
668 [(set_attr "type" "sseadd")
669 (set_attr "mode" "V2DF")])
671 (define_insn "sse3_h<addsub>v4sf3"
672 [(set (match_operand:V4SF 0 "register_operand" "=x")
677 (match_operand:V4SF 1 "register_operand" "0")
678 (parallel [(const_int 0)]))
679 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
681 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
682 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
686 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
687 (parallel [(const_int 0)]))
688 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
690 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
691 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
693 "h<addsub>ps\t{%2, %0|%0, %2}"
694 [(set_attr "type" "sseadd")
695 (set_attr "prefix_rep" "1")
696 (set_attr "mode" "V4SF")])
698 (define_insn "sse3_h<addsub>v2df3"
699 [(set (match_operand:V2DF 0 "register_operand" "=x")
703 (match_operand:V2DF 1 "register_operand" "0")
704 (parallel [(const_int 0)]))
705 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
708 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
709 (parallel [(const_int 0)]))
710 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
712 "h<addsub>pd\t{%2, %0|%0, %2}"
713 [(set_attr "type" "sseadd")
714 (set_attr "mode" "V2DF")])
716 (define_expand "reduc_splus_v4sf"
717 [(match_operand:V4SF 0 "register_operand" "")
718 (match_operand:V4SF 1 "register_operand" "")]
723 rtx tmp = gen_reg_rtx (V4SFmode);
724 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
725 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
728 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
732 (define_expand "reduc_splus_v2df"
733 [(match_operand:V2DF 0 "register_operand" "")
734 (match_operand:V2DF 1 "register_operand" "")]
737 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
741 (define_expand "reduc_smax_v4sf"
742 [(match_operand:V4SF 0 "register_operand" "")
743 (match_operand:V4SF 1 "register_operand" "")]
746 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
750 (define_expand "reduc_smin_v4sf"
751 [(match_operand:V4SF 0 "register_operand" "")
752 (match_operand:V4SF 1 "register_operand" "")]
755 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
759 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
761 ;; Parallel floating point comparisons
763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
765 (define_insn "<sse>_maskcmp<mode>3"
766 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
767 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
768 [(match_operand:SSEMODEF4 1 "register_operand" "0")
769 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
770 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
772 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
773 [(set_attr "type" "ssecmp")
774 (set_attr "mode" "<MODE>")])
776 (define_insn "<sse>_vmmaskcmp<mode>3"
777 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
778 (vec_merge:SSEMODEF2P
779 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
780 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
781 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
784 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
785 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
786 [(set_attr "type" "ssecmp")
787 (set_attr "mode" "<ssescalarmode>")])
789 (define_insn "<sse>_comi"
790 [(set (reg:CCFP FLAGS_REG)
793 (match_operand:<ssevecmode> 0 "register_operand" "x")
794 (parallel [(const_int 0)]))
796 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
797 (parallel [(const_int 0)]))))]
798 "SSE_FLOAT_MODE_P (<MODE>mode)"
799 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
800 [(set_attr "type" "ssecomi")
801 (set_attr "mode" "<MODE>")])
803 (define_insn "<sse>_ucomi"
804 [(set (reg:CCFPU FLAGS_REG)
807 (match_operand:<ssevecmode> 0 "register_operand" "x")
808 (parallel [(const_int 0)]))
810 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
811 (parallel [(const_int 0)]))))]
812 "SSE_FLOAT_MODE_P (<MODE>mode)"
813 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
814 [(set_attr "type" "ssecomi")
815 (set_attr "mode" "<MODE>")])
817 (define_expand "vcond<mode>"
818 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
819 (if_then_else:SSEMODEF2P
821 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
822 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
823 (match_operand:SSEMODEF2P 1 "general_operand" "")
824 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
825 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
827 if (ix86_expand_fp_vcond (operands))
833 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
835 ;; Parallel floating point logical operations
837 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
839 (define_insn "<sse>_nand<mode>3"
840 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
843 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
844 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
845 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
846 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
847 [(set_attr "type" "sselog")
848 (set_attr "mode" "<MODE>")])
850 (define_expand "<code><mode>3"
851 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
853 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
854 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
855 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
856 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
858 (define_insn "*<code><mode>3"
859 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
861 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
862 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
863 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
864 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
865 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
866 [(set_attr "type" "sselog")
867 (set_attr "mode" "<MODE>")])
869 ;; Also define scalar versions. These are used for abs, neg, and
870 ;; conditional move. Using subregs into vector modes causes register
871 ;; allocation lossage. These patterns do not allow memory operands
872 ;; because the native instructions read the full 128-bits.
874 (define_insn "*nand<mode>3"
875 [(set (match_operand:MODEF 0 "register_operand" "=x")
878 (match_operand:MODEF 1 "register_operand" "0"))
879 (match_operand:MODEF 2 "register_operand" "x")))]
880 "SSE_FLOAT_MODE_P (<MODE>mode)"
881 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
882 [(set_attr "type" "sselog")
883 (set_attr "mode" "<ssevecmode>")])
885 (define_insn "*<code><mode>3"
886 [(set (match_operand:MODEF 0 "register_operand" "=x")
888 (match_operand:MODEF 1 "register_operand" "0")
889 (match_operand:MODEF 2 "register_operand" "x")))]
890 "SSE_FLOAT_MODE_P (<MODE>mode)"
891 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
892 [(set_attr "type" "sselog")
893 (set_attr "mode" "<ssevecmode>")])
895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
897 ;; SSE5 floating point multiply/accumulate instructions This includes the
898 ;; scalar version of the instructions as well as the vector
900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
902 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
903 ;; combine to generate a multiply/add with two memory references. We then
904 ;; split this insn, into loading up the destination register with one of the
905 ;; memory operations. If we don't manage to split the insn, reload will
906 ;; generate the appropriate moves. The reason this is needed, is that combine
907 ;; has already folded one of the memory references into both the multiply and
908 ;; add insns, and it can't generate a new pseudo. I.e.:
909 ;; (set (reg1) (mem (addr1)))
910 ;; (set (reg2) (mult (reg1) (mem (addr2))))
911 ;; (set (reg3) (plus (reg2) (mem (addr3))))
913 (define_insn "sse5_fmadd<mode>4"
914 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
917 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
918 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
919 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
920 "TARGET_SSE5 && TARGET_FUSED_MADD
921 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
922 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
923 [(set_attr "type" "ssemuladd")
924 (set_attr "mode" "<MODE>")])
926 ;; Split fmadd with two memory operands into a load and the fmadd.
928 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
931 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
932 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
933 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
935 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
936 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
937 && !reg_mentioned_p (operands[0], operands[1])
938 && !reg_mentioned_p (operands[0], operands[2])
939 && !reg_mentioned_p (operands[0], operands[3])"
942 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
943 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
944 operands[2], operands[3]));
948 ;; For the scalar operations, use operand1 for the upper words that aren't
949 ;; modified, so restrict the forms that are generated.
950 ;; Scalar version of fmadd
951 (define_insn "sse5_vmfmadd<mode>4"
952 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
953 (vec_merge:SSEMODEF2P
956 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
957 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
958 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
961 "TARGET_SSE5 && TARGET_FUSED_MADD
962 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
963 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
964 [(set_attr "type" "ssemuladd")
965 (set_attr "mode" "<MODE>")])
967 ;; Floating multiply and subtract
968 ;; Allow two memory operands the same as fmadd
969 (define_insn "sse5_fmsub<mode>4"
970 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
973 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
974 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
975 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
976 "TARGET_SSE5 && TARGET_FUSED_MADD
977 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
978 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
979 [(set_attr "type" "ssemuladd")
980 (set_attr "mode" "<MODE>")])
982 ;; Split fmsub with two memory operands into a load and the fmsub.
984 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
987 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
988 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
989 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
991 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
992 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
993 && !reg_mentioned_p (operands[0], operands[1])
994 && !reg_mentioned_p (operands[0], operands[2])
995 && !reg_mentioned_p (operands[0], operands[3])"
998 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
999 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1000 operands[2], operands[3]));
1004 ;; For the scalar operations, use operand1 for the upper words that aren't
1005 ;; modified, so restrict the forms that are generated.
1006 ;; Scalar version of fmsub
1007 (define_insn "sse5_vmfmsub<mode>4"
1008 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1009 (vec_merge:SSEMODEF2P
1012 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1013 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1014 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1017 "TARGET_SSE5 && TARGET_FUSED_MADD
1018 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1019 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1020 [(set_attr "type" "ssemuladd")
1021 (set_attr "mode" "<MODE>")])
1023 ;; Floating point negative multiply and add
1024 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1025 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1026 ;; Allow two memory operands to help in optimizing.
1027 (define_insn "sse5_fnmadd<mode>4"
1028 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1030 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1032 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1033 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1034 "TARGET_SSE5 && TARGET_FUSED_MADD
1035 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1036 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1037 [(set_attr "type" "ssemuladd")
1038 (set_attr "mode" "<MODE>")])
1040 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1042 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1044 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1046 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1047 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1049 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1050 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1051 && !reg_mentioned_p (operands[0], operands[1])
1052 && !reg_mentioned_p (operands[0], operands[2])
1053 && !reg_mentioned_p (operands[0], operands[3])"
1056 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1057 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1058 operands[2], operands[3]));
1062 ;; For the scalar operations, use operand1 for the upper words that aren't
1063 ;; modified, so restrict the forms that are generated.
1064 ;; Scalar version of fnmadd
1065 (define_insn "sse5_vmfnmadd<mode>4"
1066 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1067 (vec_merge:SSEMODEF2P
1069 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1071 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1072 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1075 "TARGET_SSE5 && TARGET_FUSED_MADD
1076 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1077 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1078 [(set_attr "type" "ssemuladd")
1079 (set_attr "mode" "<MODE>")])
1081 ;; Floating point negative multiply and subtract
1082 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1083 ;; Allow 2 memory operands to help with optimization
1084 (define_insn "sse5_fnmsub<mode>4"
1085 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1089 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1090 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1091 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1092 "TARGET_SSE5 && TARGET_FUSED_MADD
1093 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1094 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1095 [(set_attr "type" "ssemuladd")
1096 (set_attr "mode" "<MODE>")])
1098 ;; Split fnmsub with two memory operands into a load and the fmsub.
1100 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1104 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1105 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1106 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1108 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1109 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1110 && !reg_mentioned_p (operands[0], operands[1])
1111 && !reg_mentioned_p (operands[0], operands[2])
1112 && !reg_mentioned_p (operands[0], operands[3])"
1115 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1116 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1117 operands[2], operands[3]));
1121 ;; For the scalar operations, use operand1 for the upper words that aren't
1122 ;; modified, so restrict the forms that are generated.
1123 ;; Scalar version of fnmsub
1124 (define_insn "sse5_vmfnmsub<mode>4"
1125 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1126 (vec_merge:SSEMODEF2P
1130 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1131 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1132 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1135 "TARGET_SSE5 && TARGET_FUSED_MADD
1136 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1137 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1138 [(set_attr "type" "ssemuladd")
1139 (set_attr "mode" "<MODE>")])
1141 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1142 ;; even if the user used -mno-fused-madd
1143 ;; Parallel instructions. During instruction generation, just default
1144 ;; to registers, and let combine later build the appropriate instruction.
1145 (define_expand "sse5i_fmadd<mode>4"
1146 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1150 (match_operand:SSEMODEF2P 1 "register_operand" "")
1151 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1152 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1153 UNSPEC_SSE5_INTRINSIC))]
1156 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1157 if (TARGET_FUSED_MADD)
1159 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1160 operands[2], operands[3]));
1165 (define_insn "*sse5i_fmadd<mode>4"
1166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1170 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1171 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1172 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1173 UNSPEC_SSE5_INTRINSIC))]
1174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1175 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1176 [(set_attr "type" "ssemuladd")
1177 (set_attr "mode" "<MODE>")])
1179 (define_expand "sse5i_fmsub<mode>4"
1180 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1184 (match_operand:SSEMODEF2P 1 "register_operand" "")
1185 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1186 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1187 UNSPEC_SSE5_INTRINSIC))]
1190 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1191 if (TARGET_FUSED_MADD)
1193 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1194 operands[2], operands[3]));
1199 (define_insn "*sse5i_fmsub<mode>4"
1200 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1204 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1205 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1206 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1207 UNSPEC_SSE5_INTRINSIC))]
1208 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1209 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1210 [(set_attr "type" "ssemuladd")
1211 (set_attr "mode" "<MODE>")])
1213 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1214 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1215 (define_expand "sse5i_fnmadd<mode>4"
1216 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1219 (match_operand:SSEMODEF2P 3 "register_operand" "")
1221 (match_operand:SSEMODEF2P 1 "register_operand" "")
1222 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1223 UNSPEC_SSE5_INTRINSIC))]
1226 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1227 if (TARGET_FUSED_MADD)
1229 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1230 operands[2], operands[3]));
1235 (define_insn "*sse5i_fnmadd<mode>4"
1236 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1239 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1241 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1242 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1243 UNSPEC_SSE5_INTRINSIC))]
1244 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1245 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1246 [(set_attr "type" "ssemuladd")
1247 (set_attr "mode" "<MODE>")])
1249 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1250 (define_expand "sse5i_fnmsub<mode>4"
1251 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1256 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1257 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1258 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1259 UNSPEC_SSE5_INTRINSIC))]
1262 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1263 if (TARGET_FUSED_MADD)
1265 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1266 operands[2], operands[3]));
1271 (define_insn "*sse5i_fnmsub<mode>4"
1272 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1277 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1278 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1279 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1280 UNSPEC_SSE5_INTRINSIC))]
1281 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1282 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1283 [(set_attr "type" "ssemuladd")
1284 (set_attr "mode" "<MODE>")])
1286 ;; Scalar instructions
1287 (define_expand "sse5i_vmfmadd<mode>4"
1288 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1290 [(vec_merge:SSEMODEF2P
1293 (match_operand:SSEMODEF2P 1 "register_operand" "")
1294 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1295 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1298 UNSPEC_SSE5_INTRINSIC))]
1301 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1302 if (TARGET_FUSED_MADD)
1304 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1305 operands[2], operands[3]));
1310 ;; For the scalar operations, use operand1 for the upper words that aren't
1311 ;; modified, so restrict the forms that are accepted.
1312 (define_insn "*sse5i_vmfmadd<mode>4"
1313 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1315 [(vec_merge:SSEMODEF2P
1318 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1319 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1320 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1323 UNSPEC_SSE5_INTRINSIC))]
1324 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1325 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1326 [(set_attr "type" "ssemuladd")
1327 (set_attr "mode" "<ssescalarmode>")])
1329 (define_expand "sse5i_vmfmsub<mode>4"
1330 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1332 [(vec_merge:SSEMODEF2P
1335 (match_operand:SSEMODEF2P 1 "register_operand" "")
1336 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1337 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1340 UNSPEC_SSE5_INTRINSIC))]
1343 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1344 if (TARGET_FUSED_MADD)
1346 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1347 operands[2], operands[3]));
1352 (define_insn "*sse5i_vmfmsub<mode>4"
1353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1355 [(vec_merge:SSEMODEF2P
1358 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1359 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1360 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1363 UNSPEC_SSE5_INTRINSIC))]
1364 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1365 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1366 [(set_attr "type" "ssemuladd")
1367 (set_attr "mode" "<ssescalarmode>")])
1369 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1370 (define_expand "sse5i_vmfnmadd<mode>4"
1371 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1373 [(vec_merge:SSEMODEF2P
1375 (match_operand:SSEMODEF2P 3 "register_operand" "")
1377 (match_operand:SSEMODEF2P 1 "register_operand" "")
1378 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1381 UNSPEC_SSE5_INTRINSIC))]
1384 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1385 if (TARGET_FUSED_MADD)
1387 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1388 operands[2], operands[3]));
1393 (define_insn "*sse5i_vmfnmadd<mode>4"
1394 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1396 [(vec_merge:SSEMODEF2P
1398 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1400 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1401 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1404 UNSPEC_SSE5_INTRINSIC))]
1405 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1406 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1407 [(set_attr "type" "ssemuladd")
1408 (set_attr "mode" "<ssescalarmode>")])
1410 (define_expand "sse5i_vmfnmsub<mode>4"
1411 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1413 [(vec_merge:SSEMODEF2P
1417 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1418 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1419 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1422 UNSPEC_SSE5_INTRINSIC))]
1425 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1426 if (TARGET_FUSED_MADD)
1428 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1429 operands[2], operands[3]));
1434 (define_insn "*sse5i_vmfnmsub<mode>4"
1435 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1437 [(vec_merge:SSEMODEF2P
1441 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1442 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1443 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1446 UNSPEC_SSE5_INTRINSIC))]
1447 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1448 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1449 [(set_attr "type" "ssemuladd")
1450 (set_attr "mode" "<ssescalarmode>")])
1452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1454 ;; Parallel single-precision floating point conversion operations
1456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1458 (define_insn "sse_cvtpi2ps"
1459 [(set (match_operand:V4SF 0 "register_operand" "=x")
1462 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1463 (match_operand:V4SF 1 "register_operand" "0")
1466 "cvtpi2ps\t{%2, %0|%0, %2}"
1467 [(set_attr "type" "ssecvt")
1468 (set_attr "mode" "V4SF")])
1470 (define_insn "sse_cvtps2pi"
1471 [(set (match_operand:V2SI 0 "register_operand" "=y")
1473 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1475 (parallel [(const_int 0) (const_int 1)])))]
1477 "cvtps2pi\t{%1, %0|%0, %1}"
1478 [(set_attr "type" "ssecvt")
1479 (set_attr "unit" "mmx")
1480 (set_attr "mode" "DI")])
1482 (define_insn "sse_cvttps2pi"
1483 [(set (match_operand:V2SI 0 "register_operand" "=y")
1485 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1486 (parallel [(const_int 0) (const_int 1)])))]
1488 "cvttps2pi\t{%1, %0|%0, %1}"
1489 [(set_attr "type" "ssecvt")
1490 (set_attr "unit" "mmx")
1491 (set_attr "mode" "SF")])
1493 (define_insn "sse_cvtsi2ss"
1494 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1497 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1498 (match_operand:V4SF 1 "register_operand" "0,0")
1501 "cvtsi2ss\t{%2, %0|%0, %2}"
1502 [(set_attr "type" "sseicvt")
1503 (set_attr "athlon_decode" "vector,double")
1504 (set_attr "amdfam10_decode" "vector,double")
1505 (set_attr "mode" "SF")])
1507 (define_insn "sse_cvtsi2ssq"
1508 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1511 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1512 (match_operand:V4SF 1 "register_operand" "0,0")
1514 "TARGET_SSE && TARGET_64BIT"
1515 "cvtsi2ssq\t{%2, %0|%0, %2}"
1516 [(set_attr "type" "sseicvt")
1517 (set_attr "athlon_decode" "vector,double")
1518 (set_attr "amdfam10_decode" "vector,double")
1519 (set_attr "mode" "SF")])
1521 (define_insn "sse_cvtss2si"
1522 [(set (match_operand:SI 0 "register_operand" "=r,r")
1525 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1526 (parallel [(const_int 0)]))]
1527 UNSPEC_FIX_NOTRUNC))]
1529 "cvtss2si\t{%1, %0|%0, %1}"
1530 [(set_attr "type" "sseicvt")
1531 (set_attr "athlon_decode" "double,vector")
1532 (set_attr "prefix_rep" "1")
1533 (set_attr "mode" "SI")])
1535 (define_insn "sse_cvtss2si_2"
1536 [(set (match_operand:SI 0 "register_operand" "=r,r")
1537 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1538 UNSPEC_FIX_NOTRUNC))]
1540 "cvtss2si\t{%1, %0|%0, %1}"
1541 [(set_attr "type" "sseicvt")
1542 (set_attr "athlon_decode" "double,vector")
1543 (set_attr "amdfam10_decode" "double,double")
1544 (set_attr "prefix_rep" "1")
1545 (set_attr "mode" "SI")])
1547 (define_insn "sse_cvtss2siq"
1548 [(set (match_operand:DI 0 "register_operand" "=r,r")
1551 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1552 (parallel [(const_int 0)]))]
1553 UNSPEC_FIX_NOTRUNC))]
1554 "TARGET_SSE && TARGET_64BIT"
1555 "cvtss2siq\t{%1, %0|%0, %1}"
1556 [(set_attr "type" "sseicvt")
1557 (set_attr "athlon_decode" "double,vector")
1558 (set_attr "prefix_rep" "1")
1559 (set_attr "mode" "DI")])
1561 (define_insn "sse_cvtss2siq_2"
1562 [(set (match_operand:DI 0 "register_operand" "=r,r")
1563 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1564 UNSPEC_FIX_NOTRUNC))]
1565 "TARGET_SSE && TARGET_64BIT"
1566 "cvtss2siq\t{%1, %0|%0, %1}"
1567 [(set_attr "type" "sseicvt")
1568 (set_attr "athlon_decode" "double,vector")
1569 (set_attr "amdfam10_decode" "double,double")
1570 (set_attr "prefix_rep" "1")
1571 (set_attr "mode" "DI")])
1573 (define_insn "sse_cvttss2si"
1574 [(set (match_operand:SI 0 "register_operand" "=r,r")
1577 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1578 (parallel [(const_int 0)]))))]
1580 "cvttss2si\t{%1, %0|%0, %1}"
1581 [(set_attr "type" "sseicvt")
1582 (set_attr "athlon_decode" "double,vector")
1583 (set_attr "amdfam10_decode" "double,double")
1584 (set_attr "prefix_rep" "1")
1585 (set_attr "mode" "SI")])
1587 (define_insn "sse_cvttss2siq"
1588 [(set (match_operand:DI 0 "register_operand" "=r,r")
1591 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1592 (parallel [(const_int 0)]))))]
1593 "TARGET_SSE && TARGET_64BIT"
1594 "cvttss2siq\t{%1, %0|%0, %1}"
1595 [(set_attr "type" "sseicvt")
1596 (set_attr "athlon_decode" "double,vector")
1597 (set_attr "amdfam10_decode" "double,double")
1598 (set_attr "prefix_rep" "1")
1599 (set_attr "mode" "DI")])
1601 (define_insn "sse2_cvtdq2ps"
1602 [(set (match_operand:V4SF 0 "register_operand" "=x")
1603 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1605 "cvtdq2ps\t{%1, %0|%0, %1}"
1606 [(set_attr "type" "ssecvt")
1607 (set_attr "mode" "V4SF")])
1609 (define_insn "sse2_cvtps2dq"
1610 [(set (match_operand:V4SI 0 "register_operand" "=x")
1611 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1612 UNSPEC_FIX_NOTRUNC))]
1614 "cvtps2dq\t{%1, %0|%0, %1}"
1615 [(set_attr "type" "ssecvt")
1616 (set_attr "prefix_data16" "1")
1617 (set_attr "mode" "TI")])
1619 (define_insn "sse2_cvttps2dq"
1620 [(set (match_operand:V4SI 0 "register_operand" "=x")
1621 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1623 "cvttps2dq\t{%1, %0|%0, %1}"
1624 [(set_attr "type" "ssecvt")
1625 (set_attr "prefix_rep" "1")
1626 (set_attr "mode" "TI")])
1628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1630 ;; Parallel double-precision floating point conversion operations
1632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1634 (define_insn "sse2_cvtpi2pd"
1635 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1636 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1638 "cvtpi2pd\t{%1, %0|%0, %1}"
1639 [(set_attr "type" "ssecvt")
1640 (set_attr "unit" "mmx,*")
1641 (set_attr "mode" "V2DF")])
1643 (define_insn "sse2_cvtpd2pi"
1644 [(set (match_operand:V2SI 0 "register_operand" "=y")
1645 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1646 UNSPEC_FIX_NOTRUNC))]
1648 "cvtpd2pi\t{%1, %0|%0, %1}"
1649 [(set_attr "type" "ssecvt")
1650 (set_attr "unit" "mmx")
1651 (set_attr "prefix_data16" "1")
1652 (set_attr "mode" "DI")])
1654 (define_insn "sse2_cvttpd2pi"
1655 [(set (match_operand:V2SI 0 "register_operand" "=y")
1656 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1658 "cvttpd2pi\t{%1, %0|%0, %1}"
1659 [(set_attr "type" "ssecvt")
1660 (set_attr "unit" "mmx")
1661 (set_attr "prefix_data16" "1")
1662 (set_attr "mode" "TI")])
1664 (define_insn "sse2_cvtsi2sd"
1665 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1668 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1669 (match_operand:V2DF 1 "register_operand" "0,0")
1672 "cvtsi2sd\t{%2, %0|%0, %2}"
1673 [(set_attr "type" "sseicvt")
1674 (set_attr "mode" "DF")
1675 (set_attr "athlon_decode" "double,direct")
1676 (set_attr "amdfam10_decode" "vector,double")])
1678 (define_insn "sse2_cvtsi2sdq"
1679 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1682 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1683 (match_operand:V2DF 1 "register_operand" "0,0")
1685 "TARGET_SSE2 && TARGET_64BIT"
1686 "cvtsi2sdq\t{%2, %0|%0, %2}"
1687 [(set_attr "type" "sseicvt")
1688 (set_attr "mode" "DF")
1689 (set_attr "athlon_decode" "double,direct")
1690 (set_attr "amdfam10_decode" "vector,double")])
1692 (define_insn "sse2_cvtsd2si"
1693 [(set (match_operand:SI 0 "register_operand" "=r,r")
1696 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1697 (parallel [(const_int 0)]))]
1698 UNSPEC_FIX_NOTRUNC))]
1700 "cvtsd2si\t{%1, %0|%0, %1}"
1701 [(set_attr "type" "sseicvt")
1702 (set_attr "athlon_decode" "double,vector")
1703 (set_attr "prefix_rep" "1")
1704 (set_attr "mode" "SI")])
1706 (define_insn "sse2_cvtsd2si_2"
1707 [(set (match_operand:SI 0 "register_operand" "=r,r")
1708 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1709 UNSPEC_FIX_NOTRUNC))]
1711 "cvtsd2si\t{%1, %0|%0, %1}"
1712 [(set_attr "type" "sseicvt")
1713 (set_attr "athlon_decode" "double,vector")
1714 (set_attr "amdfam10_decode" "double,double")
1715 (set_attr "prefix_rep" "1")
1716 (set_attr "mode" "SI")])
1718 (define_insn "sse2_cvtsd2siq"
1719 [(set (match_operand:DI 0 "register_operand" "=r,r")
1722 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1723 (parallel [(const_int 0)]))]
1724 UNSPEC_FIX_NOTRUNC))]
1725 "TARGET_SSE2 && TARGET_64BIT"
1726 "cvtsd2siq\t{%1, %0|%0, %1}"
1727 [(set_attr "type" "sseicvt")
1728 (set_attr "athlon_decode" "double,vector")
1729 (set_attr "prefix_rep" "1")
1730 (set_attr "mode" "DI")])
1732 (define_insn "sse2_cvtsd2siq_2"
1733 [(set (match_operand:DI 0 "register_operand" "=r,r")
1734 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1735 UNSPEC_FIX_NOTRUNC))]
1736 "TARGET_SSE2 && TARGET_64BIT"
1737 "cvtsd2siq\t{%1, %0|%0, %1}"
1738 [(set_attr "type" "sseicvt")
1739 (set_attr "athlon_decode" "double,vector")
1740 (set_attr "amdfam10_decode" "double,double")
1741 (set_attr "prefix_rep" "1")
1742 (set_attr "mode" "DI")])
1744 (define_insn "sse2_cvttsd2si"
1745 [(set (match_operand:SI 0 "register_operand" "=r,r")
1748 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1749 (parallel [(const_int 0)]))))]
1751 "cvttsd2si\t{%1, %0|%0, %1}"
1752 [(set_attr "type" "sseicvt")
1753 (set_attr "prefix_rep" "1")
1754 (set_attr "mode" "SI")
1755 (set_attr "athlon_decode" "double,vector")
1756 (set_attr "amdfam10_decode" "double,double")])
1758 (define_insn "sse2_cvttsd2siq"
1759 [(set (match_operand:DI 0 "register_operand" "=r,r")
1762 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1763 (parallel [(const_int 0)]))))]
1764 "TARGET_SSE2 && TARGET_64BIT"
1765 "cvttsd2siq\t{%1, %0|%0, %1}"
1766 [(set_attr "type" "sseicvt")
1767 (set_attr "prefix_rep" "1")
1768 (set_attr "mode" "DI")
1769 (set_attr "athlon_decode" "double,vector")
1770 (set_attr "amdfam10_decode" "double,double")])
1772 (define_insn "sse2_cvtdq2pd"
1773 [(set (match_operand:V2DF 0 "register_operand" "=x")
1776 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1777 (parallel [(const_int 0) (const_int 1)]))))]
1779 "cvtdq2pd\t{%1, %0|%0, %1}"
1780 [(set_attr "type" "ssecvt")
1781 (set_attr "mode" "V2DF")])
1783 (define_expand "sse2_cvtpd2dq"
1784 [(set (match_operand:V4SI 0 "register_operand" "")
1786 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1790 "operands[2] = CONST0_RTX (V2SImode);")
1792 (define_insn "*sse2_cvtpd2dq"
1793 [(set (match_operand:V4SI 0 "register_operand" "=x")
1795 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1797 (match_operand:V2SI 2 "const0_operand" "")))]
1799 "cvtpd2dq\t{%1, %0|%0, %1}"
1800 [(set_attr "type" "ssecvt")
1801 (set_attr "prefix_rep" "1")
1802 (set_attr "mode" "TI")
1803 (set_attr "amdfam10_decode" "double")])
1805 (define_expand "sse2_cvttpd2dq"
1806 [(set (match_operand:V4SI 0 "register_operand" "")
1808 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1811 "operands[2] = CONST0_RTX (V2SImode);")
1813 (define_insn "*sse2_cvttpd2dq"
1814 [(set (match_operand:V4SI 0 "register_operand" "=x")
1816 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1817 (match_operand:V2SI 2 "const0_operand" "")))]
1819 "cvttpd2dq\t{%1, %0|%0, %1}"
1820 [(set_attr "type" "ssecvt")
1821 (set_attr "prefix_rep" "1")
1822 (set_attr "mode" "TI")
1823 (set_attr "amdfam10_decode" "double")])
1825 (define_insn "sse2_cvtsd2ss"
1826 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1829 (float_truncate:V2SF
1830 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1831 (match_operand:V4SF 1 "register_operand" "0,0")
1834 "cvtsd2ss\t{%2, %0|%0, %2}"
1835 [(set_attr "type" "ssecvt")
1836 (set_attr "athlon_decode" "vector,double")
1837 (set_attr "amdfam10_decode" "vector,double")
1838 (set_attr "mode" "SF")])
1840 (define_insn "sse2_cvtss2sd"
1841 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1845 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1846 (parallel [(const_int 0) (const_int 1)])))
1847 (match_operand:V2DF 1 "register_operand" "0,0")
1850 "cvtss2sd\t{%2, %0|%0, %2}"
1851 [(set_attr "type" "ssecvt")
1852 (set_attr "amdfam10_decode" "vector,double")
1853 (set_attr "mode" "DF")])
1855 (define_expand "sse2_cvtpd2ps"
1856 [(set (match_operand:V4SF 0 "register_operand" "")
1858 (float_truncate:V2SF
1859 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1862 "operands[2] = CONST0_RTX (V2SFmode);")
1864 (define_insn "*sse2_cvtpd2ps"
1865 [(set (match_operand:V4SF 0 "register_operand" "=x")
1867 (float_truncate:V2SF
1868 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1869 (match_operand:V2SF 2 "const0_operand" "")))]
1871 "cvtpd2ps\t{%1, %0|%0, %1}"
1872 [(set_attr "type" "ssecvt")
1873 (set_attr "prefix_data16" "1")
1874 (set_attr "mode" "V4SF")
1875 (set_attr "amdfam10_decode" "double")])
1877 (define_insn "sse2_cvtps2pd"
1878 [(set (match_operand:V2DF 0 "register_operand" "=x")
1881 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1882 (parallel [(const_int 0) (const_int 1)]))))]
1884 "cvtps2pd\t{%1, %0|%0, %1}"
1885 [(set_attr "type" "ssecvt")
1886 (set_attr "mode" "V2DF")
1887 (set_attr "amdfam10_decode" "direct")])
1889 (define_expand "vec_unpacks_hi_v4sf"
1894 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1895 (parallel [(const_int 6)
1899 (set (match_operand:V2DF 0 "register_operand" "")
1903 (parallel [(const_int 0) (const_int 1)]))))]
1906 operands[2] = gen_reg_rtx (V4SFmode);
1909 (define_expand "vec_unpacks_lo_v4sf"
1910 [(set (match_operand:V2DF 0 "register_operand" "")
1913 (match_operand:V4SF 1 "nonimmediate_operand" "")
1914 (parallel [(const_int 0) (const_int 1)]))))]
1917 (define_expand "vec_unpacks_float_hi_v8hi"
1918 [(match_operand:V4SF 0 "register_operand" "")
1919 (match_operand:V8HI 1 "register_operand" "")]
1922 rtx tmp = gen_reg_rtx (V4SImode);
1924 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1925 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1929 (define_expand "vec_unpacks_float_lo_v8hi"
1930 [(match_operand:V4SF 0 "register_operand" "")
1931 (match_operand:V8HI 1 "register_operand" "")]
1934 rtx tmp = gen_reg_rtx (V4SImode);
1936 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1937 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1941 (define_expand "vec_unpacku_float_hi_v8hi"
1942 [(match_operand:V4SF 0 "register_operand" "")
1943 (match_operand:V8HI 1 "register_operand" "")]
1946 rtx tmp = gen_reg_rtx (V4SImode);
1948 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1949 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1953 (define_expand "vec_unpacku_float_lo_v8hi"
1954 [(match_operand:V4SF 0 "register_operand" "")
1955 (match_operand:V8HI 1 "register_operand" "")]
1958 rtx tmp = gen_reg_rtx (V4SImode);
1960 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1961 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1965 (define_expand "vec_unpacks_float_hi_v4si"
1968 (match_operand:V4SI 1 "nonimmediate_operand" "")
1969 (parallel [(const_int 2)
1973 (set (match_operand:V2DF 0 "register_operand" "")
1977 (parallel [(const_int 0) (const_int 1)]))))]
1980 operands[2] = gen_reg_rtx (V4SImode);
1983 (define_expand "vec_unpacks_float_lo_v4si"
1984 [(set (match_operand:V2DF 0 "register_operand" "")
1987 (match_operand:V4SI 1 "nonimmediate_operand" "")
1988 (parallel [(const_int 0) (const_int 1)]))))]
1991 (define_expand "vec_pack_trunc_v2df"
1992 [(match_operand:V4SF 0 "register_operand" "")
1993 (match_operand:V2DF 1 "nonimmediate_operand" "")
1994 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1999 r1 = gen_reg_rtx (V4SFmode);
2000 r2 = gen_reg_rtx (V4SFmode);
2002 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2003 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2004 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2008 (define_expand "vec_pack_sfix_trunc_v2df"
2009 [(match_operand:V4SI 0 "register_operand" "")
2010 (match_operand:V2DF 1 "nonimmediate_operand" "")
2011 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2016 r1 = gen_reg_rtx (V4SImode);
2017 r2 = gen_reg_rtx (V4SImode);
2019 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2020 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2021 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2022 gen_lowpart (V2DImode, r1),
2023 gen_lowpart (V2DImode, r2)));
2027 (define_expand "vec_pack_sfix_v2df"
2028 [(match_operand:V4SI 0 "register_operand" "")
2029 (match_operand:V2DF 1 "nonimmediate_operand" "")
2030 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2035 r1 = gen_reg_rtx (V4SImode);
2036 r2 = gen_reg_rtx (V4SImode);
2038 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2039 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2040 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2041 gen_lowpart (V2DImode, r1),
2042 gen_lowpart (V2DImode, r2)));
2046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2048 ;; Parallel single-precision floating point element swizzling
2050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2052 (define_insn "sse_movhlps"
2053 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2056 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2057 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2058 (parallel [(const_int 6)
2062 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2064 movhlps\t{%2, %0|%0, %2}
2065 movlps\t{%H2, %0|%0, %H2}
2066 movhps\t{%2, %0|%0, %2}"
2067 [(set_attr "type" "ssemov")
2068 (set_attr "mode" "V4SF,V2SF,V2SF")])
2070 (define_insn "sse_movlhps"
2071 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2074 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2075 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2076 (parallel [(const_int 0)
2080 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2082 movlhps\t{%2, %0|%0, %2}
2083 movhps\t{%2, %0|%0, %2}
2084 movlps\t{%2, %H0|%H0, %2}"
2085 [(set_attr "type" "ssemov")
2086 (set_attr "mode" "V4SF,V2SF,V2SF")])
2088 (define_insn "sse_unpckhps"
2089 [(set (match_operand:V4SF 0 "register_operand" "=x")
2092 (match_operand:V4SF 1 "register_operand" "0")
2093 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2094 (parallel [(const_int 2) (const_int 6)
2095 (const_int 3) (const_int 7)])))]
2097 "unpckhps\t{%2, %0|%0, %2}"
2098 [(set_attr "type" "sselog")
2099 (set_attr "mode" "V4SF")])
2101 (define_insn "sse_unpcklps"
2102 [(set (match_operand:V4SF 0 "register_operand" "=x")
2105 (match_operand:V4SF 1 "register_operand" "0")
2106 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2107 (parallel [(const_int 0) (const_int 4)
2108 (const_int 1) (const_int 5)])))]
2110 "unpcklps\t{%2, %0|%0, %2}"
2111 [(set_attr "type" "sselog")
2112 (set_attr "mode" "V4SF")])
2114 ;; These are modeled with the same vec_concat as the others so that we
2115 ;; capture users of shufps that can use the new instructions
2116 (define_insn "sse3_movshdup"
2117 [(set (match_operand:V4SF 0 "register_operand" "=x")
2120 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2122 (parallel [(const_int 1)
2127 "movshdup\t{%1, %0|%0, %1}"
2128 [(set_attr "type" "sse")
2129 (set_attr "prefix_rep" "1")
2130 (set_attr "mode" "V4SF")])
2132 (define_insn "sse3_movsldup"
2133 [(set (match_operand:V4SF 0 "register_operand" "=x")
2136 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2138 (parallel [(const_int 0)
2143 "movsldup\t{%1, %0|%0, %1}"
2144 [(set_attr "type" "sse")
2145 (set_attr "prefix_rep" "1")
2146 (set_attr "mode" "V4SF")])
2148 (define_expand "sse_shufps"
2149 [(match_operand:V4SF 0 "register_operand" "")
2150 (match_operand:V4SF 1 "register_operand" "")
2151 (match_operand:V4SF 2 "nonimmediate_operand" "")
2152 (match_operand:SI 3 "const_int_operand" "")]
2155 int mask = INTVAL (operands[3]);
2156 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2157 GEN_INT ((mask >> 0) & 3),
2158 GEN_INT ((mask >> 2) & 3),
2159 GEN_INT (((mask >> 4) & 3) + 4),
2160 GEN_INT (((mask >> 6) & 3) + 4)));
2164 (define_insn "sse_shufps_1"
2165 [(set (match_operand:V4SF 0 "register_operand" "=x")
2168 (match_operand:V4SF 1 "register_operand" "0")
2169 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2170 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2171 (match_operand 4 "const_0_to_3_operand" "")
2172 (match_operand 5 "const_4_to_7_operand" "")
2173 (match_operand 6 "const_4_to_7_operand" "")])))]
2177 mask |= INTVAL (operands[3]) << 0;
2178 mask |= INTVAL (operands[4]) << 2;
2179 mask |= (INTVAL (operands[5]) - 4) << 4;
2180 mask |= (INTVAL (operands[6]) - 4) << 6;
2181 operands[3] = GEN_INT (mask);
2183 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2185 [(set_attr "type" "sselog")
2186 (set_attr "mode" "V4SF")])
2188 (define_insn "sse_storehps"
2189 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2191 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2192 (parallel [(const_int 2) (const_int 3)])))]
2195 movhps\t{%1, %0|%0, %1}
2196 movhlps\t{%1, %0|%0, %1}
2197 movlps\t{%H1, %0|%0, %H1}"
2198 [(set_attr "type" "ssemov")
2199 (set_attr "mode" "V2SF,V4SF,V2SF")])
2201 (define_insn "sse_loadhps"
2202 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2205 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2206 (parallel [(const_int 0) (const_int 1)]))
2207 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2210 movhps\t{%2, %0|%0, %2}
2211 movlhps\t{%2, %0|%0, %2}
2212 movlps\t{%2, %H0|%H0, %2}"
2213 [(set_attr "type" "ssemov")
2214 (set_attr "mode" "V2SF,V4SF,V2SF")])
2216 (define_insn "sse_storelps"
2217 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2219 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2220 (parallel [(const_int 0) (const_int 1)])))]
2223 movlps\t{%1, %0|%0, %1}
2224 movaps\t{%1, %0|%0, %1}
2225 movlps\t{%1, %0|%0, %1}"
2226 [(set_attr "type" "ssemov")
2227 (set_attr "mode" "V2SF,V4SF,V2SF")])
2229 (define_insn "sse_loadlps"
2230 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2232 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2234 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2235 (parallel [(const_int 2) (const_int 3)]))))]
2238 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2239 movlps\t{%2, %0|%0, %2}
2240 movlps\t{%2, %0|%0, %2}"
2241 [(set_attr "type" "sselog,ssemov,ssemov")
2242 (set_attr "mode" "V4SF,V2SF,V2SF")])
2244 (define_insn "sse_movss"
2245 [(set (match_operand:V4SF 0 "register_operand" "=x")
2247 (match_operand:V4SF 2 "register_operand" "x")
2248 (match_operand:V4SF 1 "register_operand" "0")
2251 "movss\t{%2, %0|%0, %2}"
2252 [(set_attr "type" "ssemov")
2253 (set_attr "mode" "SF")])
2255 (define_insn "*vec_dupv4sf"
2256 [(set (match_operand:V4SF 0 "register_operand" "=x")
2258 (match_operand:SF 1 "register_operand" "0")))]
2260 "shufps\t{$0, %0, %0|%0, %0, 0}"
2261 [(set_attr "type" "sselog1")
2262 (set_attr "mode" "V4SF")])
2264 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2265 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2266 ;; alternatives pretty much forces the MMX alternative to be chosen.
2267 (define_insn "*sse_concatv2sf"
2268 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2270 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2271 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2274 unpcklps\t{%2, %0|%0, %2}
2275 movss\t{%1, %0|%0, %1}
2276 punpckldq\t{%2, %0|%0, %2}
2277 movd\t{%1, %0|%0, %1}"
2278 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2279 (set_attr "mode" "V4SF,SF,DI,DI")])
2281 (define_insn "*sse_concatv4sf"
2282 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2284 (match_operand:V2SF 1 "register_operand" " 0,0")
2285 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2288 movlhps\t{%2, %0|%0, %2}
2289 movhps\t{%2, %0|%0, %2}"
2290 [(set_attr "type" "ssemov")
2291 (set_attr "mode" "V4SF,V2SF")])
2293 (define_expand "vec_initv4sf"
2294 [(match_operand:V4SF 0 "register_operand" "")
2295 (match_operand 1 "" "")]
2298 ix86_expand_vector_init (false, operands[0], operands[1]);
2302 (define_insn "vec_setv4sf_0"
2303 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2306 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2307 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2311 movss\t{%2, %0|%0, %2}
2312 movss\t{%2, %0|%0, %2}
2313 movd\t{%2, %0|%0, %2}
2315 [(set_attr "type" "ssemov")
2316 (set_attr "mode" "SF")])
2318 ;; A subset is vec_setv4sf.
2319 (define_insn "*vec_setv4sf_sse4_1"
2320 [(set (match_operand:V4SF 0 "register_operand" "=x")
2323 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2324 (match_operand:V4SF 1 "register_operand" "0")
2325 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2328 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2329 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2331 [(set_attr "type" "sselog")
2332 (set_attr "prefix_extra" "1")
2333 (set_attr "mode" "V4SF")])
2335 (define_insn "sse4_1_insertps"
2336 [(set (match_operand:V4SF 0 "register_operand" "=x")
2337 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2338 (match_operand:V4SF 1 "register_operand" "0")
2339 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2342 "insertps\t{%3, %2, %0|%0, %2, %3}";
2343 [(set_attr "type" "sselog")
2344 (set_attr "prefix_extra" "1")
2345 (set_attr "mode" "V4SF")])
2348 [(set (match_operand:V4SF 0 "memory_operand" "")
2351 (match_operand:SF 1 "nonmemory_operand" ""))
2354 "TARGET_SSE && reload_completed"
2357 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2361 (define_expand "vec_setv4sf"
2362 [(match_operand:V4SF 0 "register_operand" "")
2363 (match_operand:SF 1 "register_operand" "")
2364 (match_operand 2 "const_int_operand" "")]
2367 ix86_expand_vector_set (false, operands[0], operands[1],
2368 INTVAL (operands[2]));
2372 (define_insn_and_split "*vec_extractv4sf_0"
2373 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2375 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2376 (parallel [(const_int 0)])))]
2377 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2379 "&& reload_completed"
2382 rtx op1 = operands[1];
2384 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2386 op1 = gen_lowpart (SFmode, op1);
2387 emit_move_insn (operands[0], op1);
2391 (define_insn "*sse4_1_extractps"
2392 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2394 (match_operand:V4SF 1 "register_operand" "x")
2395 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2397 "extractps\t{%2, %1, %0|%0, %1, %2}"
2398 [(set_attr "type" "sselog")
2399 (set_attr "prefix_extra" "1")
2400 (set_attr "mode" "V4SF")])
2402 (define_insn_and_split "*vec_extract_v4sf_mem"
2403 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2405 (match_operand:V4SF 1 "memory_operand" "o")
2406 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2412 int i = INTVAL (operands[2]);
2414 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2418 (define_expand "vec_extractv4sf"
2419 [(match_operand:SF 0 "register_operand" "")
2420 (match_operand:V4SF 1 "register_operand" "")
2421 (match_operand 2 "const_int_operand" "")]
2424 ix86_expand_vector_extract (false, operands[0], operands[1],
2425 INTVAL (operands[2]));
2429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2431 ;; Parallel double-precision floating point element swizzling
2433 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2435 (define_insn "sse2_unpckhpd"
2436 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2439 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2440 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2441 (parallel [(const_int 1)
2443 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2445 unpckhpd\t{%2, %0|%0, %2}
2446 movlpd\t{%H1, %0|%0, %H1}
2447 movhpd\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "sselog,ssemov,ssemov")
2449 (set_attr "mode" "V2DF,V1DF,V1DF")])
2451 (define_insn "*sse3_movddup"
2452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2455 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2457 (parallel [(const_int 0)
2459 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2461 movddup\t{%1, %0|%0, %1}
2463 [(set_attr "type" "sselog1,ssemov")
2464 (set_attr "mode" "V2DF")])
2467 [(set (match_operand:V2DF 0 "memory_operand" "")
2470 (match_operand:V2DF 1 "register_operand" "")
2472 (parallel [(const_int 0)
2474 "TARGET_SSE3 && reload_completed"
2477 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2478 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2479 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2483 (define_insn "sse2_unpcklpd"
2484 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2487 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2488 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2489 (parallel [(const_int 0)
2491 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2493 unpcklpd\t{%2, %0|%0, %2}
2494 movhpd\t{%2, %0|%0, %2}
2495 movlpd\t{%2, %H0|%H0, %2}"
2496 [(set_attr "type" "sselog,ssemov,ssemov")
2497 (set_attr "mode" "V2DF,V1DF,V1DF")])
2499 (define_expand "sse2_shufpd"
2500 [(match_operand:V2DF 0 "register_operand" "")
2501 (match_operand:V2DF 1 "register_operand" "")
2502 (match_operand:V2DF 2 "nonimmediate_operand" "")
2503 (match_operand:SI 3 "const_int_operand" "")]
2506 int mask = INTVAL (operands[3]);
2507 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2509 GEN_INT (mask & 2 ? 3 : 2)));
2513 (define_insn "sse2_shufpd_1"
2514 [(set (match_operand:V2DF 0 "register_operand" "=x")
2517 (match_operand:V2DF 1 "register_operand" "0")
2518 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2519 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2520 (match_operand 4 "const_2_to_3_operand" "")])))]
2524 mask = INTVAL (operands[3]);
2525 mask |= (INTVAL (operands[4]) - 2) << 1;
2526 operands[3] = GEN_INT (mask);
2528 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2530 [(set_attr "type" "sselog")
2531 (set_attr "mode" "V2DF")])
2533 (define_insn "sse2_storehpd"
2534 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2536 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2537 (parallel [(const_int 1)])))]
2538 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2540 movhpd\t{%1, %0|%0, %1}
2543 [(set_attr "type" "ssemov,sselog1,ssemov")
2544 (set_attr "mode" "V1DF,V2DF,DF")])
2547 [(set (match_operand:DF 0 "register_operand" "")
2549 (match_operand:V2DF 1 "memory_operand" "")
2550 (parallel [(const_int 1)])))]
2551 "TARGET_SSE2 && reload_completed"
2552 [(set (match_dup 0) (match_dup 1))]
2554 operands[1] = adjust_address (operands[1], DFmode, 8);
2557 (define_insn "sse2_storelpd"
2558 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2560 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2561 (parallel [(const_int 0)])))]
2562 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2564 movlpd\t{%1, %0|%0, %1}
2567 [(set_attr "type" "ssemov")
2568 (set_attr "mode" "V1DF,DF,DF")])
2571 [(set (match_operand:DF 0 "register_operand" "")
2573 (match_operand:V2DF 1 "nonimmediate_operand" "")
2574 (parallel [(const_int 0)])))]
2575 "TARGET_SSE2 && reload_completed"
2578 rtx op1 = operands[1];
2580 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2582 op1 = gen_lowpart (DFmode, op1);
2583 emit_move_insn (operands[0], op1);
2587 (define_insn "sse2_loadhpd"
2588 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2591 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2592 (parallel [(const_int 0)]))
2593 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2594 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2596 movhpd\t{%2, %0|%0, %2}
2597 unpcklpd\t{%2, %0|%0, %2}
2598 shufpd\t{$1, %1, %0|%0, %1, 1}
2600 [(set_attr "type" "ssemov,sselog,sselog,other")
2601 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2604 [(set (match_operand:V2DF 0 "memory_operand" "")
2606 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2607 (match_operand:DF 1 "register_operand" "")))]
2608 "TARGET_SSE2 && reload_completed"
2609 [(set (match_dup 0) (match_dup 1))]
2611 operands[0] = adjust_address (operands[0], DFmode, 8);
2614 (define_insn "sse2_loadlpd"
2615 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2617 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2619 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2620 (parallel [(const_int 1)]))))]
2621 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2623 movsd\t{%2, %0|%0, %2}
2624 movlpd\t{%2, %0|%0, %2}
2625 movsd\t{%2, %0|%0, %2}
2626 shufpd\t{$2, %2, %0|%0, %2, 2}
2627 movhpd\t{%H1, %0|%0, %H1}
2629 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2630 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2633 [(set (match_operand:V2DF 0 "memory_operand" "")
2635 (match_operand:DF 1 "register_operand" "")
2636 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2637 "TARGET_SSE2 && reload_completed"
2638 [(set (match_dup 0) (match_dup 1))]
2640 operands[0] = adjust_address (operands[0], DFmode, 8);
2643 ;; Not sure these two are ever used, but it doesn't hurt to have
2645 (define_insn "*vec_extractv2df_1_sse"
2646 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2648 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2649 (parallel [(const_int 1)])))]
2650 "!TARGET_SSE2 && TARGET_SSE
2651 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2653 movhps\t{%1, %0|%0, %1}
2654 movhlps\t{%1, %0|%0, %1}
2655 movlps\t{%H1, %0|%0, %H1}"
2656 [(set_attr "type" "ssemov")
2657 (set_attr "mode" "V2SF,V4SF,V2SF")])
2659 (define_insn "*vec_extractv2df_0_sse"
2660 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2662 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2663 (parallel [(const_int 0)])))]
2664 "!TARGET_SSE2 && TARGET_SSE
2665 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2667 movlps\t{%1, %0|%0, %1}
2668 movaps\t{%1, %0|%0, %1}
2669 movlps\t{%1, %0|%0, %1}"
2670 [(set_attr "type" "ssemov")
2671 (set_attr "mode" "V2SF,V4SF,V2SF")])
2673 (define_insn "sse2_movsd"
2674 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2676 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2677 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2681 movsd\t{%2, %0|%0, %2}
2682 movlpd\t{%2, %0|%0, %2}
2683 movlpd\t{%2, %0|%0, %2}
2684 shufpd\t{$2, %2, %0|%0, %2, 2}
2685 movhps\t{%H1, %0|%0, %H1}
2686 movhps\t{%1, %H0|%H0, %1}"
2687 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2688 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2690 (define_insn "*vec_dupv2df_sse3"
2691 [(set (match_operand:V2DF 0 "register_operand" "=x")
2693 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2695 "movddup\t{%1, %0|%0, %1}"
2696 [(set_attr "type" "sselog1")
2697 (set_attr "mode" "DF")])
2699 (define_insn "vec_dupv2df"
2700 [(set (match_operand:V2DF 0 "register_operand" "=x")
2702 (match_operand:DF 1 "register_operand" "0")))]
2705 [(set_attr "type" "sselog1")
2706 (set_attr "mode" "V2DF")])
2708 (define_insn "*vec_concatv2df_sse3"
2709 [(set (match_operand:V2DF 0 "register_operand" "=x")
2711 (match_operand:DF 1 "nonimmediate_operand" "xm")
2714 "movddup\t{%1, %0|%0, %1}"
2715 [(set_attr "type" "sselog1")
2716 (set_attr "mode" "DF")])
2718 (define_insn "*vec_concatv2df"
2719 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2721 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2722 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2725 unpcklpd\t{%2, %0|%0, %2}
2726 movhpd\t{%2, %0|%0, %2}
2727 movsd\t{%1, %0|%0, %1}
2728 movlhps\t{%2, %0|%0, %2}
2729 movhps\t{%2, %0|%0, %2}"
2730 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2731 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2733 (define_expand "vec_setv2df"
2734 [(match_operand:V2DF 0 "register_operand" "")
2735 (match_operand:DF 1 "register_operand" "")
2736 (match_operand 2 "const_int_operand" "")]
2739 ix86_expand_vector_set (false, operands[0], operands[1],
2740 INTVAL (operands[2]));
2744 (define_expand "vec_extractv2df"
2745 [(match_operand:DF 0 "register_operand" "")
2746 (match_operand:V2DF 1 "register_operand" "")
2747 (match_operand 2 "const_int_operand" "")]
2750 ix86_expand_vector_extract (false, operands[0], operands[1],
2751 INTVAL (operands[2]));
2755 (define_expand "vec_initv2df"
2756 [(match_operand:V2DF 0 "register_operand" "")
2757 (match_operand 1 "" "")]
2760 ix86_expand_vector_init (false, operands[0], operands[1]);
2764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2766 ;; Parallel integral arithmetic
2768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2770 (define_expand "neg<mode>2"
2771 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2774 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2776 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2778 (define_expand "add<mode>3"
2779 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2780 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2781 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2783 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2785 (define_insn "*add<mode>3"
2786 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2788 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2789 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2790 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2791 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2792 [(set_attr "type" "sseiadd")
2793 (set_attr "prefix_data16" "1")
2794 (set_attr "mode" "TI")])
2796 (define_insn "sse2_ssadd<mode>3"
2797 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2799 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2800 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2801 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2802 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2803 [(set_attr "type" "sseiadd")
2804 (set_attr "prefix_data16" "1")
2805 (set_attr "mode" "TI")])
2807 (define_insn "sse2_usadd<mode>3"
2808 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2810 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2811 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2812 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2813 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2814 [(set_attr "type" "sseiadd")
2815 (set_attr "prefix_data16" "1")
2816 (set_attr "mode" "TI")])
2818 (define_expand "sub<mode>3"
2819 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2820 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2821 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2823 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2825 (define_insn "*sub<mode>3"
2826 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2828 (match_operand:SSEMODEI 1 "register_operand" "0")
2829 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2831 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2832 [(set_attr "type" "sseiadd")
2833 (set_attr "prefix_data16" "1")
2834 (set_attr "mode" "TI")])
2836 (define_insn "sse2_sssub<mode>3"
2837 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2839 (match_operand:SSEMODE12 1 "register_operand" "0")
2840 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2842 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2843 [(set_attr "type" "sseiadd")
2844 (set_attr "prefix_data16" "1")
2845 (set_attr "mode" "TI")])
2847 (define_insn "sse2_ussub<mode>3"
2848 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2850 (match_operand:SSEMODE12 1 "register_operand" "0")
2851 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2853 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2854 [(set_attr "type" "sseiadd")
2855 (set_attr "prefix_data16" "1")
2856 (set_attr "mode" "TI")])
2858 (define_insn_and_split "mulv16qi3"
2859 [(set (match_operand:V16QI 0 "register_operand" "")
2860 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2861 (match_operand:V16QI 2 "register_operand" "")))]
2863 && !(reload_completed || reload_in_progress)"
2868 rtx t[12], op0, op[3];
2873 /* On SSE5, we can take advantage of the pperm instruction to pack and
2874 unpack the bytes. Unpack data such that we've got a source byte in
2875 each low byte of each word. We don't care what goes into the high
2876 byte, so put 0 there. */
2877 for (i = 0; i < 6; ++i)
2878 t[i] = gen_reg_rtx (V8HImode);
2880 for (i = 0; i < 2; i++)
2883 op[1] = operands[i+1];
2884 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2887 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2890 /* Multiply words. */
2891 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2892 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2894 /* Pack the low byte of each word back into a single xmm */
2895 op[0] = operands[0];
2898 ix86_expand_sse5_pack (op);
2902 for (i = 0; i < 12; ++i)
2903 t[i] = gen_reg_rtx (V16QImode);
2905 /* Unpack data such that we've got a source byte in each low byte of
2906 each word. We don't care what goes into the high byte of each word.
2907 Rather than trying to get zero in there, most convenient is to let
2908 it be a copy of the low byte. */
2909 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2910 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2911 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2912 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2914 /* Multiply words. The end-of-line annotations here give a picture of what
2915 the output of that instruction looks like. Dot means don't care; the
2916 letters are the bytes of the result with A being the most significant. */
2917 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2918 gen_lowpart (V8HImode, t[0]),
2919 gen_lowpart (V8HImode, t[1])));
2920 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2921 gen_lowpart (V8HImode, t[2]),
2922 gen_lowpart (V8HImode, t[3])));
2924 /* Extract the relevant bytes and merge them back together. */
2925 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2926 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2927 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2928 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2929 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2930 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2933 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2937 (define_expand "mulv8hi3"
2938 [(set (match_operand:V8HI 0 "register_operand" "")
2939 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2940 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2942 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2944 (define_insn "*mulv8hi3"
2945 [(set (match_operand:V8HI 0 "register_operand" "=x")
2946 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2947 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2948 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2949 "pmullw\t{%2, %0|%0, %2}"
2950 [(set_attr "type" "sseimul")
2951 (set_attr "prefix_data16" "1")
2952 (set_attr "mode" "TI")])
2954 (define_expand "smulv8hi3_highpart"
2955 [(set (match_operand:V8HI 0 "register_operand" "")
2960 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2962 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2965 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2967 (define_insn "*smulv8hi3_highpart"
2968 [(set (match_operand:V8HI 0 "register_operand" "=x")
2973 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2975 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2977 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2978 "pmulhw\t{%2, %0|%0, %2}"
2979 [(set_attr "type" "sseimul")
2980 (set_attr "prefix_data16" "1")
2981 (set_attr "mode" "TI")])
2983 (define_expand "umulv8hi3_highpart"
2984 [(set (match_operand:V8HI 0 "register_operand" "")
2989 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2991 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2994 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2996 (define_insn "*umulv8hi3_highpart"
2997 [(set (match_operand:V8HI 0 "register_operand" "=x")
3002 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3004 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3006 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3007 "pmulhuw\t{%2, %0|%0, %2}"
3008 [(set_attr "type" "sseimul")
3009 (set_attr "prefix_data16" "1")
3010 (set_attr "mode" "TI")])
3012 (define_insn "sse2_umulv2siv2di3"
3013 [(set (match_operand:V2DI 0 "register_operand" "=x")
3017 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3018 (parallel [(const_int 0) (const_int 2)])))
3021 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3022 (parallel [(const_int 0) (const_int 2)])))))]
3023 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3024 "pmuludq\t{%2, %0|%0, %2}"
3025 [(set_attr "type" "sseimul")
3026 (set_attr "prefix_data16" "1")
3027 (set_attr "mode" "TI")])
3029 (define_insn "sse4_1_mulv2siv2di3"
3030 [(set (match_operand:V2DI 0 "register_operand" "=x")
3034 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3035 (parallel [(const_int 0) (const_int 2)])))
3038 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3039 (parallel [(const_int 0) (const_int 2)])))))]
3040 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3041 "pmuldq\t{%2, %0|%0, %2}"
3042 [(set_attr "type" "sseimul")
3043 (set_attr "prefix_extra" "1")
3044 (set_attr "mode" "TI")])
3046 (define_insn "sse2_pmaddwd"
3047 [(set (match_operand:V4SI 0 "register_operand" "=x")
3052 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3053 (parallel [(const_int 0)
3059 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3060 (parallel [(const_int 0)
3066 (vec_select:V4HI (match_dup 1)
3067 (parallel [(const_int 1)
3072 (vec_select:V4HI (match_dup 2)
3073 (parallel [(const_int 1)
3076 (const_int 7)]))))))]
3077 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3078 "pmaddwd\t{%2, %0|%0, %2}"
3079 [(set_attr "type" "sseiadd")
3080 (set_attr "prefix_data16" "1")
3081 (set_attr "mode" "TI")])
3083 (define_expand "mulv4si3"
3084 [(set (match_operand:V4SI 0 "register_operand" "")
3085 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3086 (match_operand:V4SI 2 "register_operand" "")))]
3089 if (TARGET_SSE4_1 || TARGET_SSE5)
3090 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3093 (define_insn "*sse4_1_mulv4si3"
3094 [(set (match_operand:V4SI 0 "register_operand" "=x")
3095 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3096 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3097 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3098 "pmulld\t{%2, %0|%0, %2}"
3099 [(set_attr "type" "sseimul")
3100 (set_attr "prefix_extra" "1")
3101 (set_attr "mode" "TI")])
3103 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3104 ;; multiply/add. In general, we expect the define_split to occur before
3105 ;; register allocation, so we have to handle the corner case where the target
3106 ;; is used as the base or index register in operands 1/2.
3107 (define_insn_and_split "*sse5_mulv4si3"
3108 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3109 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3110 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3113 "&& (reload_completed
3114 || (!reg_mentioned_p (operands[0], operands[1])
3115 && !reg_mentioned_p (operands[0], operands[2])))"
3119 (plus:V4SI (mult:V4SI (match_dup 1)
3123 operands[3] = CONST0_RTX (V4SImode);
3125 [(set_attr "type" "ssemuladd")
3126 (set_attr "mode" "TI")])
3128 (define_insn_and_split "*sse2_mulv4si3"
3129 [(set (match_operand:V4SI 0 "register_operand" "")
3130 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3131 (match_operand:V4SI 2 "register_operand" "")))]
3132 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3133 && !(reload_completed || reload_in_progress)"
3138 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3144 t1 = gen_reg_rtx (V4SImode);
3145 t2 = gen_reg_rtx (V4SImode);
3146 t3 = gen_reg_rtx (V4SImode);
3147 t4 = gen_reg_rtx (V4SImode);
3148 t5 = gen_reg_rtx (V4SImode);
3149 t6 = gen_reg_rtx (V4SImode);
3150 thirtytwo = GEN_INT (32);
3152 /* Multiply elements 2 and 0. */
3153 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3156 /* Shift both input vectors down one element, so that elements 3
3157 and 1 are now in the slots for elements 2 and 0. For K8, at
3158 least, this is faster than using a shuffle. */
3159 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3160 gen_lowpart (TImode, op1),
3162 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3163 gen_lowpart (TImode, op2),
3165 /* Multiply elements 3 and 1. */
3166 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3169 /* Move the results in element 2 down to element 1; we don't care
3170 what goes in elements 2 and 3. */
3171 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3172 const0_rtx, const0_rtx));
3173 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3174 const0_rtx, const0_rtx));
3176 /* Merge the parts back together. */
3177 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3181 (define_insn_and_split "mulv2di3"
3182 [(set (match_operand:V2DI 0 "register_operand" "")
3183 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3184 (match_operand:V2DI 2 "register_operand" "")))]
3186 && !(reload_completed || reload_in_progress)"
3191 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3197 t1 = gen_reg_rtx (V2DImode);
3198 t2 = gen_reg_rtx (V2DImode);
3199 t3 = gen_reg_rtx (V2DImode);
3200 t4 = gen_reg_rtx (V2DImode);
3201 t5 = gen_reg_rtx (V2DImode);
3202 t6 = gen_reg_rtx (V2DImode);
3203 thirtytwo = GEN_INT (32);
3205 /* Multiply low parts. */
3206 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3207 gen_lowpart (V4SImode, op2)));
3209 /* Shift input vectors left 32 bits so we can multiply high parts. */
3210 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3211 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3213 /* Multiply high parts by low parts. */
3214 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3215 gen_lowpart (V4SImode, t3)));
3216 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3217 gen_lowpart (V4SImode, t2)));
3219 /* Shift them back. */
3220 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3221 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3223 /* Add the three parts together. */
3224 emit_insn (gen_addv2di3 (t6, t1, t4));
3225 emit_insn (gen_addv2di3 (op0, t6, t5));
3229 (define_expand "vec_widen_smult_hi_v8hi"
3230 [(match_operand:V4SI 0 "register_operand" "")
3231 (match_operand:V8HI 1 "register_operand" "")
3232 (match_operand:V8HI 2 "register_operand" "")]
3235 rtx op1, op2, t1, t2, dest;
3239 t1 = gen_reg_rtx (V8HImode);
3240 t2 = gen_reg_rtx (V8HImode);
3241 dest = gen_lowpart (V8HImode, operands[0]);
3243 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3244 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3245 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3249 (define_expand "vec_widen_smult_lo_v8hi"
3250 [(match_operand:V4SI 0 "register_operand" "")
3251 (match_operand:V8HI 1 "register_operand" "")
3252 (match_operand:V8HI 2 "register_operand" "")]
3255 rtx op1, op2, t1, t2, dest;
3259 t1 = gen_reg_rtx (V8HImode);
3260 t2 = gen_reg_rtx (V8HImode);
3261 dest = gen_lowpart (V8HImode, operands[0]);
3263 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3264 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3265 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3269 (define_expand "vec_widen_umult_hi_v8hi"
3270 [(match_operand:V4SI 0 "register_operand" "")
3271 (match_operand:V8HI 1 "register_operand" "")
3272 (match_operand:V8HI 2 "register_operand" "")]
3275 rtx op1, op2, t1, t2, dest;
3279 t1 = gen_reg_rtx (V8HImode);
3280 t2 = gen_reg_rtx (V8HImode);
3281 dest = gen_lowpart (V8HImode, operands[0]);
3283 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3284 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3285 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3289 (define_expand "vec_widen_umult_lo_v8hi"
3290 [(match_operand:V4SI 0 "register_operand" "")
3291 (match_operand:V8HI 1 "register_operand" "")
3292 (match_operand:V8HI 2 "register_operand" "")]
3295 rtx op1, op2, t1, t2, dest;
3299 t1 = gen_reg_rtx (V8HImode);
3300 t2 = gen_reg_rtx (V8HImode);
3301 dest = gen_lowpart (V8HImode, operands[0]);
3303 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3304 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3305 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3309 (define_expand "vec_widen_smult_hi_v4si"
3310 [(match_operand:V2DI 0 "register_operand" "")
3311 (match_operand:V4SI 1 "register_operand" "")
3312 (match_operand:V4SI 2 "register_operand" "")]
3315 rtx op1, op2, t1, t2;
3319 t1 = gen_reg_rtx (V4SImode);
3320 t2 = gen_reg_rtx (V4SImode);
3322 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3323 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3324 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3328 (define_expand "vec_widen_smult_lo_v4si"
3329 [(match_operand:V2DI 0 "register_operand" "")
3330 (match_operand:V4SI 1 "register_operand" "")
3331 (match_operand:V4SI 2 "register_operand" "")]
3334 rtx op1, op2, t1, t2;
3338 t1 = gen_reg_rtx (V4SImode);
3339 t2 = gen_reg_rtx (V4SImode);
3341 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3342 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3343 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3347 (define_expand "vec_widen_umult_hi_v4si"
3348 [(match_operand:V2DI 0 "register_operand" "")
3349 (match_operand:V4SI 1 "register_operand" "")
3350 (match_operand:V4SI 2 "register_operand" "")]
3353 rtx op1, op2, t1, t2;
3357 t1 = gen_reg_rtx (V4SImode);
3358 t2 = gen_reg_rtx (V4SImode);
3360 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3361 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3362 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3366 (define_expand "vec_widen_umult_lo_v4si"
3367 [(match_operand:V2DI 0 "register_operand" "")
3368 (match_operand:V4SI 1 "register_operand" "")
3369 (match_operand:V4SI 2 "register_operand" "")]
3372 rtx op1, op2, t1, t2;
3376 t1 = gen_reg_rtx (V4SImode);
3377 t2 = gen_reg_rtx (V4SImode);
3379 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3380 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3381 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3385 (define_expand "sdot_prodv8hi"
3386 [(match_operand:V4SI 0 "register_operand" "")
3387 (match_operand:V8HI 1 "register_operand" "")
3388 (match_operand:V8HI 2 "register_operand" "")
3389 (match_operand:V4SI 3 "register_operand" "")]
3392 rtx t = gen_reg_rtx (V4SImode);
3393 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3394 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3398 (define_expand "udot_prodv4si"
3399 [(match_operand:V2DI 0 "register_operand" "")
3400 (match_operand:V4SI 1 "register_operand" "")
3401 (match_operand:V4SI 2 "register_operand" "")
3402 (match_operand:V2DI 3 "register_operand" "")]
3407 t1 = gen_reg_rtx (V2DImode);
3408 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3409 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3411 t2 = gen_reg_rtx (V4SImode);
3412 t3 = gen_reg_rtx (V4SImode);
3413 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3414 gen_lowpart (TImode, operands[1]),
3416 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3417 gen_lowpart (TImode, operands[2]),
3420 t4 = gen_reg_rtx (V2DImode);
3421 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3423 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3427 (define_insn "ashr<mode>3"
3428 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3430 (match_operand:SSEMODE24 1 "register_operand" "0")
3431 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3433 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3434 [(set_attr "type" "sseishft")
3435 (set_attr "prefix_data16" "1")
3436 (set_attr "mode" "TI")])
3438 (define_insn "lshr<mode>3"
3439 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3440 (lshiftrt:SSEMODE248
3441 (match_operand:SSEMODE248 1 "register_operand" "0")
3442 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3444 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3445 [(set_attr "type" "sseishft")
3446 (set_attr "prefix_data16" "1")
3447 (set_attr "mode" "TI")])
3449 (define_insn "ashl<mode>3"
3450 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3452 (match_operand:SSEMODE248 1 "register_operand" "0")
3453 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3455 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3456 [(set_attr "type" "sseishft")
3457 (set_attr "prefix_data16" "1")
3458 (set_attr "mode" "TI")])
3460 (define_expand "vec_shl_<mode>"
3461 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3462 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3463 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3466 operands[0] = gen_lowpart (TImode, operands[0]);
3467 operands[1] = gen_lowpart (TImode, operands[1]);
3470 (define_expand "vec_shr_<mode>"
3471 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3472 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3473 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3476 operands[0] = gen_lowpart (TImode, operands[0]);
3477 operands[1] = gen_lowpart (TImode, operands[1]);
3480 (define_expand "<code>v16qi3"
3481 [(set (match_operand:V16QI 0 "register_operand" "")
3483 (match_operand:V16QI 1 "nonimmediate_operand" "")
3484 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3486 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3488 (define_insn "*<code>v16qi3"
3489 [(set (match_operand:V16QI 0 "register_operand" "=x")
3491 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3492 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3493 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3494 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3495 [(set_attr "type" "sseiadd")
3496 (set_attr "prefix_data16" "1")
3497 (set_attr "mode" "TI")])
3499 (define_expand "<code>v8hi3"
3500 [(set (match_operand:V8HI 0 "register_operand" "")
3502 (match_operand:V8HI 1 "nonimmediate_operand" "")
3503 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3505 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3507 (define_insn "*<code>v8hi3"
3508 [(set (match_operand:V8HI 0 "register_operand" "=x")
3510 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3511 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3512 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3513 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3514 [(set_attr "type" "sseiadd")
3515 (set_attr "prefix_data16" "1")
3516 (set_attr "mode" "TI")])
3518 (define_expand "umaxv8hi3"
3519 [(set (match_operand:V8HI 0 "register_operand" "")
3520 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3521 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3525 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3528 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3529 if (rtx_equal_p (op3, op2))
3530 op3 = gen_reg_rtx (V8HImode);
3531 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3532 emit_insn (gen_addv8hi3 (op0, op3, op2));
3537 (define_expand "smax<mode>3"
3538 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3539 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3540 (match_operand:SSEMODE14 2 "register_operand" "")))]
3544 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3550 xops[0] = operands[0];
3551 xops[1] = operands[1];
3552 xops[2] = operands[2];
3553 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3554 xops[4] = operands[1];
3555 xops[5] = operands[2];
3556 ok = ix86_expand_int_vcond (xops);
3562 (define_insn "*sse4_1_<code><mode>3"
3563 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3565 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3566 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3567 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3568 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3569 [(set_attr "type" "sseiadd")
3570 (set_attr "prefix_extra" "1")
3571 (set_attr "mode" "TI")])
3573 (define_expand "umaxv4si3"
3574 [(set (match_operand:V4SI 0 "register_operand" "")
3575 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3576 (match_operand:V4SI 2 "register_operand" "")))]
3580 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3586 xops[0] = operands[0];
3587 xops[1] = operands[1];
3588 xops[2] = operands[2];
3589 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3590 xops[4] = operands[1];
3591 xops[5] = operands[2];
3592 ok = ix86_expand_int_vcond (xops);
3598 (define_insn "*sse4_1_<code><mode>3"
3599 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3601 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3602 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3603 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3604 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3605 [(set_attr "type" "sseiadd")
3606 (set_attr "prefix_extra" "1")
3607 (set_attr "mode" "TI")])
3609 (define_expand "smin<mode>3"
3610 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3611 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3612 (match_operand:SSEMODE14 2 "register_operand" "")))]
3616 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3622 xops[0] = operands[0];
3623 xops[1] = operands[2];
3624 xops[2] = operands[1];
3625 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3626 xops[4] = operands[1];
3627 xops[5] = operands[2];
3628 ok = ix86_expand_int_vcond (xops);
3634 (define_expand "umin<mode>3"
3635 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3636 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3637 (match_operand:SSEMODE24 2 "register_operand" "")))]
3641 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3647 xops[0] = operands[0];
3648 xops[1] = operands[2];
3649 xops[2] = operands[1];
3650 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3651 xops[4] = operands[1];
3652 xops[5] = operands[2];
3653 ok = ix86_expand_int_vcond (xops);
3659 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3661 ;; Parallel integral comparisons
3663 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3665 (define_insn "sse2_eq<mode>3"
3666 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3668 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3669 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3670 "TARGET_SSE2 && !TARGET_SSE5
3671 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3672 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3673 [(set_attr "type" "ssecmp")
3674 (set_attr "prefix_data16" "1")
3675 (set_attr "mode" "TI")])
3677 (define_insn "sse4_1_eqv2di3"
3678 [(set (match_operand:V2DI 0 "register_operand" "=x")
3680 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3681 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3682 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3683 "pcmpeqq\t{%2, %0|%0, %2}"
3684 [(set_attr "type" "ssecmp")
3685 (set_attr "prefix_extra" "1")
3686 (set_attr "mode" "TI")])
3688 (define_insn "sse2_gt<mode>3"
3689 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3691 (match_operand:SSEMODE124 1 "register_operand" "0")
3692 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3693 "TARGET_SSE2 && !TARGET_SSE5"
3694 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3695 [(set_attr "type" "ssecmp")
3696 (set_attr "prefix_data16" "1")
3697 (set_attr "mode" "TI")])
3699 (define_insn "sse4_2_gtv2di3"
3700 [(set (match_operand:V2DI 0 "register_operand" "=x")
3702 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3703 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3705 "pcmpgtq\t{%2, %0|%0, %2}"
3706 [(set_attr "type" "ssecmp")
3707 (set_attr "mode" "TI")])
3709 (define_expand "vcond<mode>"
3710 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3711 (if_then_else:SSEMODEI
3712 (match_operator 3 ""
3713 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3714 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3715 (match_operand:SSEMODEI 1 "general_operand" "")
3716 (match_operand:SSEMODEI 2 "general_operand" "")))]
3719 if (ix86_expand_int_vcond (operands))
3725 (define_expand "vcondu<mode>"
3726 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3727 (if_then_else:SSEMODEI
3728 (match_operator 3 ""
3729 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3730 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3731 (match_operand:SSEMODEI 1 "general_operand" "")
3732 (match_operand:SSEMODEI 2 "general_operand" "")))]
3735 if (ix86_expand_int_vcond (operands))
3741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3743 ;; Parallel bitwise logical operations
3745 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3747 (define_expand "one_cmpl<mode>2"
3748 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3749 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3753 int i, n = GET_MODE_NUNITS (<MODE>mode);
3754 rtvec v = rtvec_alloc (n);
3756 for (i = 0; i < n; ++i)
3757 RTVEC_ELT (v, i) = constm1_rtx;
3759 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3762 (define_insn "*sse_nand<mode>3"
3763 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3765 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3766 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3767 "(TARGET_SSE && !TARGET_SSE2)"
3768 "andnps\t{%2, %0|%0, %2}"
3769 [(set_attr "type" "sselog")
3770 (set_attr "mode" "V4SF")])
3772 (define_insn "sse2_nand<mode>3"
3773 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3775 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3776 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3778 "pandn\t{%2, %0|%0, %2}"
3779 [(set_attr "type" "sselog")
3780 (set_attr "prefix_data16" "1")
3781 (set_attr "mode" "TI")])
3783 (define_insn "*nandtf3"
3784 [(set (match_operand:TF 0 "register_operand" "=x")
3786 (not:TF (match_operand:TF 1 "register_operand" "0"))
3787 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3789 "pandn\t{%2, %0|%0, %2}"
3790 [(set_attr "type" "sselog")
3791 (set_attr "prefix_data16" "1")
3792 (set_attr "mode" "TI")])
3794 (define_expand "<code><mode>3"
3795 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3797 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3798 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3800 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3802 (define_insn "*sse_<code><mode>3"
3803 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3805 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3806 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3807 "(TARGET_SSE && !TARGET_SSE2)
3808 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3809 "<plogicprefix>ps\t{%2, %0|%0, %2}"
3810 [(set_attr "type" "sselog")
3811 (set_attr "mode" "V4SF")])
3813 (define_insn "*sse2_<code><mode>3"
3814 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3816 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3817 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3818 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3819 "p<plogicprefix>\t{%2, %0|%0, %2}"
3820 [(set_attr "type" "sselog")
3821 (set_attr "prefix_data16" "1")
3822 (set_attr "mode" "TI")])
3824 (define_expand "<code>tf3"
3825 [(set (match_operand:TF 0 "register_operand" "")
3827 (match_operand:TF 1 "nonimmediate_operand" "")
3828 (match_operand:TF 2 "nonimmediate_operand" "")))]
3830 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3832 (define_insn "*<code>tf3"
3833 [(set (match_operand:TF 0 "register_operand" "=x")
3835 (match_operand:TF 1 "nonimmediate_operand" "%0")
3836 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3837 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3838 "p<plogicprefix>\t{%2, %0|%0, %2}"
3839 [(set_attr "type" "sselog")
3840 (set_attr "prefix_data16" "1")
3841 (set_attr "mode" "TI")])
3843 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3845 ;; Parallel integral element swizzling
3847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3850 ;; op1 = abcdefghijklmnop
3851 ;; op2 = qrstuvwxyz012345
3852 ;; h1 = aqbrcsdteufvgwhx
3853 ;; l1 = iyjzk0l1m2n3o4p5
3854 ;; h2 = aiqybjrzcks0dlt1
3855 ;; l2 = emu2fnv3gow4hpx5
3856 ;; h3 = aeimquy2bfjnrvz3
3857 ;; l3 = cgkosw04dhlptx15
3858 ;; result = bdfhjlnprtvxz135
3859 (define_expand "vec_pack_trunc_v8hi"
3860 [(match_operand:V16QI 0 "register_operand" "")
3861 (match_operand:V8HI 1 "register_operand" "")
3862 (match_operand:V8HI 2 "register_operand" "")]
3865 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3867 op1 = gen_lowpart (V16QImode, operands[1]);
3868 op2 = gen_lowpart (V16QImode, operands[2]);
3869 h1 = gen_reg_rtx (V16QImode);
3870 l1 = gen_reg_rtx (V16QImode);
3871 h2 = gen_reg_rtx (V16QImode);
3872 l2 = gen_reg_rtx (V16QImode);
3873 h3 = gen_reg_rtx (V16QImode);
3874 l3 = gen_reg_rtx (V16QImode);
3876 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3877 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3878 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3879 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3880 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3881 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3882 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3893 ;; result = bdfhjlnp
3894 (define_expand "vec_pack_trunc_v4si"
3895 [(match_operand:V8HI 0 "register_operand" "")
3896 (match_operand:V4SI 1 "register_operand" "")
3897 (match_operand:V4SI 2 "register_operand" "")]
3900 rtx op1, op2, h1, l1, h2, l2;
3902 op1 = gen_lowpart (V8HImode, operands[1]);
3903 op2 = gen_lowpart (V8HImode, operands[2]);
3904 h1 = gen_reg_rtx (V8HImode);
3905 l1 = gen_reg_rtx (V8HImode);
3906 h2 = gen_reg_rtx (V8HImode);
3907 l2 = gen_reg_rtx (V8HImode);
3909 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3910 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3911 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3912 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3913 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3923 (define_expand "vec_pack_trunc_v2di"
3924 [(match_operand:V4SI 0 "register_operand" "")
3925 (match_operand:V2DI 1 "register_operand" "")
3926 (match_operand:V2DI 2 "register_operand" "")]
3929 rtx op1, op2, h1, l1;
3931 op1 = gen_lowpart (V4SImode, operands[1]);
3932 op2 = gen_lowpart (V4SImode, operands[2]);
3933 h1 = gen_reg_rtx (V4SImode);
3934 l1 = gen_reg_rtx (V4SImode);
3936 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3937 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3938 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3942 (define_expand "vec_interleave_highv16qi"
3943 [(set (match_operand:V16QI 0 "register_operand" "")
3946 (match_operand:V16QI 1 "register_operand" "")
3947 (match_operand:V16QI 2 "nonimmediate_operand" ""))
3948 (parallel [(const_int 8) (const_int 24)
3949 (const_int 9) (const_int 25)
3950 (const_int 10) (const_int 26)
3951 (const_int 11) (const_int 27)
3952 (const_int 12) (const_int 28)
3953 (const_int 13) (const_int 29)
3954 (const_int 14) (const_int 30)
3955 (const_int 15) (const_int 31)])))]
3958 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3962 (define_expand "vec_interleave_lowv16qi"
3963 [(set (match_operand:V16QI 0 "register_operand" "")
3966 (match_operand:V16QI 1 "register_operand" "")
3967 (match_operand:V16QI 2 "nonimmediate_operand" ""))
3968 (parallel [(const_int 0) (const_int 16)
3969 (const_int 1) (const_int 17)
3970 (const_int 2) (const_int 18)
3971 (const_int 3) (const_int 19)
3972 (const_int 4) (const_int 20)
3973 (const_int 5) (const_int 21)
3974 (const_int 6) (const_int 22)
3975 (const_int 7) (const_int 23)])))]
3978 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3982 (define_expand "vec_interleave_highv8hi"
3983 [(set (match_operand:V8HI 0 "register_operand" "=")
3986 (match_operand:V8HI 1 "register_operand" "")
3987 (match_operand:V8HI 2 "nonimmediate_operand" ""))
3988 (parallel [(const_int 4) (const_int 12)
3989 (const_int 5) (const_int 13)
3990 (const_int 6) (const_int 14)
3991 (const_int 7) (const_int 15)])))]
3994 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3998 (define_expand "vec_interleave_lowv8hi"
3999 [(set (match_operand:V8HI 0 "register_operand" "")
4002 (match_operand:V8HI 1 "register_operand" "")
4003 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4004 (parallel [(const_int 0) (const_int 8)
4005 (const_int 1) (const_int 9)
4006 (const_int 2) (const_int 10)
4007 (const_int 3) (const_int 11)])))]
4010 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4014 (define_expand "vec_interleave_highv4si"
4015 [(set (match_operand:V4SI 0 "register_operand" "")
4018 (match_operand:V4SI 1 "register_operand" "")
4019 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4020 (parallel [(const_int 2) (const_int 6)
4021 (const_int 3) (const_int 7)])))]
4024 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4028 (define_expand "vec_interleave_lowv4si"
4029 [(set (match_operand:V4SI 0 "register_operand" "")
4032 (match_operand:V4SI 1 "register_operand" "")
4033 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4034 (parallel [(const_int 0) (const_int 4)
4035 (const_int 1) (const_int 5)])))]
4038 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4042 (define_expand "vec_interleave_highv2di"
4043 [(set (match_operand:V2DI 0 "register_operand" "")
4046 (match_operand:V2DI 1 "register_operand" "")
4047 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4048 (parallel [(const_int 1)
4052 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4056 (define_expand "vec_interleave_lowv2di"
4057 [(set (match_operand:V2DI 0 "register_operand" "")
4060 (match_operand:V2DI 1 "register_operand" "")
4061 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4062 (parallel [(const_int 0)
4066 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4070 (define_insn "sse2_packsswb"
4071 [(set (match_operand:V16QI 0 "register_operand" "=x")
4074 (match_operand:V8HI 1 "register_operand" "0"))
4076 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4078 "packsswb\t{%2, %0|%0, %2}"
4079 [(set_attr "type" "sselog")
4080 (set_attr "prefix_data16" "1")
4081 (set_attr "mode" "TI")])
4083 (define_insn "sse2_packssdw"
4084 [(set (match_operand:V8HI 0 "register_operand" "=x")
4087 (match_operand:V4SI 1 "register_operand" "0"))
4089 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4091 "packssdw\t{%2, %0|%0, %2}"
4092 [(set_attr "type" "sselog")
4093 (set_attr "prefix_data16" "1")
4094 (set_attr "mode" "TI")])
4096 (define_insn "sse2_packuswb"
4097 [(set (match_operand:V16QI 0 "register_operand" "=x")
4100 (match_operand:V8HI 1 "register_operand" "0"))
4102 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4104 "packuswb\t{%2, %0|%0, %2}"
4105 [(set_attr "type" "sselog")
4106 (set_attr "prefix_data16" "1")
4107 (set_attr "mode" "TI")])
4109 (define_insn "sse2_punpckhbw"
4110 [(set (match_operand:V16QI 0 "register_operand" "=x")
4113 (match_operand:V16QI 1 "register_operand" "0")
4114 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4115 (parallel [(const_int 8) (const_int 24)
4116 (const_int 9) (const_int 25)
4117 (const_int 10) (const_int 26)
4118 (const_int 11) (const_int 27)
4119 (const_int 12) (const_int 28)
4120 (const_int 13) (const_int 29)
4121 (const_int 14) (const_int 30)
4122 (const_int 15) (const_int 31)])))]
4124 "punpckhbw\t{%2, %0|%0, %2}"
4125 [(set_attr "type" "sselog")
4126 (set_attr "prefix_data16" "1")
4127 (set_attr "mode" "TI")])
4129 (define_insn "sse2_punpcklbw"
4130 [(set (match_operand:V16QI 0 "register_operand" "=x")
4133 (match_operand:V16QI 1 "register_operand" "0")
4134 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4135 (parallel [(const_int 0) (const_int 16)
4136 (const_int 1) (const_int 17)
4137 (const_int 2) (const_int 18)
4138 (const_int 3) (const_int 19)
4139 (const_int 4) (const_int 20)
4140 (const_int 5) (const_int 21)
4141 (const_int 6) (const_int 22)
4142 (const_int 7) (const_int 23)])))]
4144 "punpcklbw\t{%2, %0|%0, %2}"
4145 [(set_attr "type" "sselog")
4146 (set_attr "prefix_data16" "1")
4147 (set_attr "mode" "TI")])
4149 (define_insn "sse2_punpckhwd"
4150 [(set (match_operand:V8HI 0 "register_operand" "=x")
4153 (match_operand:V8HI 1 "register_operand" "0")
4154 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4155 (parallel [(const_int 4) (const_int 12)
4156 (const_int 5) (const_int 13)
4157 (const_int 6) (const_int 14)
4158 (const_int 7) (const_int 15)])))]
4160 "punpckhwd\t{%2, %0|%0, %2}"
4161 [(set_attr "type" "sselog")
4162 (set_attr "prefix_data16" "1")
4163 (set_attr "mode" "TI")])
4165 (define_insn "sse2_punpcklwd"
4166 [(set (match_operand:V8HI 0 "register_operand" "=x")
4169 (match_operand:V8HI 1 "register_operand" "0")
4170 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4171 (parallel [(const_int 0) (const_int 8)
4172 (const_int 1) (const_int 9)
4173 (const_int 2) (const_int 10)
4174 (const_int 3) (const_int 11)])))]
4176 "punpcklwd\t{%2, %0|%0, %2}"
4177 [(set_attr "type" "sselog")
4178 (set_attr "prefix_data16" "1")
4179 (set_attr "mode" "TI")])
4181 (define_insn "sse2_punpckhdq"
4182 [(set (match_operand:V4SI 0 "register_operand" "=x")
4185 (match_operand:V4SI 1 "register_operand" "0")
4186 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4187 (parallel [(const_int 2) (const_int 6)
4188 (const_int 3) (const_int 7)])))]
4190 "punpckhdq\t{%2, %0|%0, %2}"
4191 [(set_attr "type" "sselog")
4192 (set_attr "prefix_data16" "1")
4193 (set_attr "mode" "TI")])
4195 (define_insn "sse2_punpckldq"
4196 [(set (match_operand:V4SI 0 "register_operand" "=x")
4199 (match_operand:V4SI 1 "register_operand" "0")
4200 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4201 (parallel [(const_int 0) (const_int 4)
4202 (const_int 1) (const_int 5)])))]
4204 "punpckldq\t{%2, %0|%0, %2}"
4205 [(set_attr "type" "sselog")
4206 (set_attr "prefix_data16" "1")
4207 (set_attr "mode" "TI")])
4209 (define_insn "sse2_punpckhqdq"
4210 [(set (match_operand:V2DI 0 "register_operand" "=x")
4213 (match_operand:V2DI 1 "register_operand" "0")
4214 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4215 (parallel [(const_int 1)
4218 "punpckhqdq\t{%2, %0|%0, %2}"
4219 [(set_attr "type" "sselog")
4220 (set_attr "prefix_data16" "1")
4221 (set_attr "mode" "TI")])
4223 (define_insn "sse2_punpcklqdq"
4224 [(set (match_operand:V2DI 0 "register_operand" "=x")
4227 (match_operand:V2DI 1 "register_operand" "0")
4228 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4229 (parallel [(const_int 0)
4232 "punpcklqdq\t{%2, %0|%0, %2}"
4233 [(set_attr "type" "sselog")
4234 (set_attr "prefix_data16" "1")
4235 (set_attr "mode" "TI")])
4237 (define_insn "*sse4_1_pinsrb"
4238 [(set (match_operand:V16QI 0 "register_operand" "=x")
4240 (vec_duplicate:V16QI
4241 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4242 (match_operand:V16QI 1 "register_operand" "0")
4243 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4246 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4247 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4249 [(set_attr "type" "sselog")
4250 (set_attr "prefix_extra" "1")
4251 (set_attr "mode" "TI")])
4253 (define_insn "*sse2_pinsrw"
4254 [(set (match_operand:V8HI 0 "register_operand" "=x")
4257 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4258 (match_operand:V8HI 1 "register_operand" "0")
4259 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4262 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4263 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4265 [(set_attr "type" "sselog")
4266 (set_attr "prefix_data16" "1")
4267 (set_attr "mode" "TI")])
4269 ;; It must come before sse2_loadld since it is preferred.
4270 (define_insn "*sse4_1_pinsrd"
4271 [(set (match_operand:V4SI 0 "register_operand" "=x")
4274 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4275 (match_operand:V4SI 1 "register_operand" "0")
4276 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4279 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4280 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4282 [(set_attr "type" "sselog")
4283 (set_attr "prefix_extra" "1")
4284 (set_attr "mode" "TI")])
4286 (define_insn "*sse4_1_pinsrq"
4287 [(set (match_operand:V2DI 0 "register_operand" "=x")
4290 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4291 (match_operand:V2DI 1 "register_operand" "0")
4292 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4295 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4296 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4298 [(set_attr "type" "sselog")
4299 (set_attr "prefix_extra" "1")
4300 (set_attr "mode" "TI")])
4302 (define_insn "*sse4_1_pextrb"
4303 [(set (match_operand:SI 0 "register_operand" "=r")
4306 (match_operand:V16QI 1 "register_operand" "x")
4307 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4309 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4310 [(set_attr "type" "sselog")
4311 (set_attr "prefix_extra" "1")
4312 (set_attr "mode" "TI")])
4314 (define_insn "*sse4_1_pextrb_memory"
4315 [(set (match_operand:QI 0 "memory_operand" "=m")
4317 (match_operand:V16QI 1 "register_operand" "x")
4318 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4320 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4321 [(set_attr "type" "sselog")
4322 (set_attr "prefix_extra" "1")
4323 (set_attr "mode" "TI")])
4325 (define_insn "*sse2_pextrw"
4326 [(set (match_operand:SI 0 "register_operand" "=r")
4329 (match_operand:V8HI 1 "register_operand" "x")
4330 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4332 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4333 [(set_attr "type" "sselog")
4334 (set_attr "prefix_data16" "1")
4335 (set_attr "mode" "TI")])
4337 (define_insn "*sse4_1_pextrw_memory"
4338 [(set (match_operand:HI 0 "memory_operand" "=m")
4340 (match_operand:V8HI 1 "register_operand" "x")
4341 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4343 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4344 [(set_attr "type" "sselog")
4345 (set_attr "prefix_extra" "1")
4346 (set_attr "mode" "TI")])
4348 (define_insn "*sse4_1_pextrd"
4349 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4351 (match_operand:V4SI 1 "register_operand" "x")
4352 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4354 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4355 [(set_attr "type" "sselog")
4356 (set_attr "prefix_extra" "1")
4357 (set_attr "mode" "TI")])
4359 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4360 (define_insn "*sse4_1_pextrq"
4361 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4363 (match_operand:V2DI 1 "register_operand" "x")
4364 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4365 "TARGET_SSE4_1 && TARGET_64BIT"
4366 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4367 [(set_attr "type" "sselog")
4368 (set_attr "prefix_extra" "1")
4369 (set_attr "mode" "TI")])
4371 (define_expand "sse2_pshufd"
4372 [(match_operand:V4SI 0 "register_operand" "")
4373 (match_operand:V4SI 1 "nonimmediate_operand" "")
4374 (match_operand:SI 2 "const_int_operand" "")]
4377 int mask = INTVAL (operands[2]);
4378 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4379 GEN_INT ((mask >> 0) & 3),
4380 GEN_INT ((mask >> 2) & 3),
4381 GEN_INT ((mask >> 4) & 3),
4382 GEN_INT ((mask >> 6) & 3)));
4386 (define_insn "sse2_pshufd_1"
4387 [(set (match_operand:V4SI 0 "register_operand" "=x")
4389 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4390 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4391 (match_operand 3 "const_0_to_3_operand" "")
4392 (match_operand 4 "const_0_to_3_operand" "")
4393 (match_operand 5 "const_0_to_3_operand" "")])))]
4397 mask |= INTVAL (operands[2]) << 0;
4398 mask |= INTVAL (operands[3]) << 2;
4399 mask |= INTVAL (operands[4]) << 4;
4400 mask |= INTVAL (operands[5]) << 6;
4401 operands[2] = GEN_INT (mask);
4403 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4405 [(set_attr "type" "sselog1")
4406 (set_attr "prefix_data16" "1")
4407 (set_attr "mode" "TI")])
4409 (define_expand "sse2_pshuflw"
4410 [(match_operand:V8HI 0 "register_operand" "")
4411 (match_operand:V8HI 1 "nonimmediate_operand" "")
4412 (match_operand:SI 2 "const_int_operand" "")]
4415 int mask = INTVAL (operands[2]);
4416 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4417 GEN_INT ((mask >> 0) & 3),
4418 GEN_INT ((mask >> 2) & 3),
4419 GEN_INT ((mask >> 4) & 3),
4420 GEN_INT ((mask >> 6) & 3)));
4424 (define_insn "sse2_pshuflw_1"
4425 [(set (match_operand:V8HI 0 "register_operand" "=x")
4427 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4428 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4429 (match_operand 3 "const_0_to_3_operand" "")
4430 (match_operand 4 "const_0_to_3_operand" "")
4431 (match_operand 5 "const_0_to_3_operand" "")
4439 mask |= INTVAL (operands[2]) << 0;
4440 mask |= INTVAL (operands[3]) << 2;
4441 mask |= INTVAL (operands[4]) << 4;
4442 mask |= INTVAL (operands[5]) << 6;
4443 operands[2] = GEN_INT (mask);
4445 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4447 [(set_attr "type" "sselog")
4448 (set_attr "prefix_rep" "1")
4449 (set_attr "mode" "TI")])
4451 (define_expand "sse2_pshufhw"
4452 [(match_operand:V8HI 0 "register_operand" "")
4453 (match_operand:V8HI 1 "nonimmediate_operand" "")
4454 (match_operand:SI 2 "const_int_operand" "")]
4457 int mask = INTVAL (operands[2]);
4458 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4459 GEN_INT (((mask >> 0) & 3) + 4),
4460 GEN_INT (((mask >> 2) & 3) + 4),
4461 GEN_INT (((mask >> 4) & 3) + 4),
4462 GEN_INT (((mask >> 6) & 3) + 4)));
4466 (define_insn "sse2_pshufhw_1"
4467 [(set (match_operand:V8HI 0 "register_operand" "=x")
4469 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4470 (parallel [(const_int 0)
4474 (match_operand 2 "const_4_to_7_operand" "")
4475 (match_operand 3 "const_4_to_7_operand" "")
4476 (match_operand 4 "const_4_to_7_operand" "")
4477 (match_operand 5 "const_4_to_7_operand" "")])))]
4481 mask |= (INTVAL (operands[2]) - 4) << 0;
4482 mask |= (INTVAL (operands[3]) - 4) << 2;
4483 mask |= (INTVAL (operands[4]) - 4) << 4;
4484 mask |= (INTVAL (operands[5]) - 4) << 6;
4485 operands[2] = GEN_INT (mask);
4487 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4489 [(set_attr "type" "sselog")
4490 (set_attr "prefix_rep" "1")
4491 (set_attr "mode" "TI")])
4493 (define_expand "sse2_loadd"
4494 [(set (match_operand:V4SI 0 "register_operand" "")
4497 (match_operand:SI 1 "nonimmediate_operand" ""))
4501 "operands[2] = CONST0_RTX (V4SImode);")
4503 (define_insn "sse2_loadld"
4504 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4507 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4508 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4512 movd\t{%2, %0|%0, %2}
4513 movd\t{%2, %0|%0, %2}
4514 movss\t{%2, %0|%0, %2}
4515 movss\t{%2, %0|%0, %2}"
4516 [(set_attr "type" "ssemov")
4517 (set_attr "mode" "TI,TI,V4SF,SF")])
4519 (define_insn_and_split "sse2_stored"
4520 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4522 (match_operand:V4SI 1 "register_operand" "x,Yi")
4523 (parallel [(const_int 0)])))]
4526 "&& reload_completed
4527 && (TARGET_INTER_UNIT_MOVES
4528 || MEM_P (operands [0])
4529 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4530 [(set (match_dup 0) (match_dup 1))]
4532 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4535 (define_insn_and_split "*vec_ext_v4si_mem"
4536 [(set (match_operand:SI 0 "register_operand" "=r")
4538 (match_operand:V4SI 1 "memory_operand" "o")
4539 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4545 int i = INTVAL (operands[2]);
4547 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4551 (define_expand "sse_storeq"
4552 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4554 (match_operand:V2DI 1 "register_operand" "")
4555 (parallel [(const_int 0)])))]
4559 (define_insn "*sse2_storeq_rex64"
4560 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4562 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4563 (parallel [(const_int 0)])))]
4564 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4568 mov{q}\t{%1, %0|%0, %1}"
4569 [(set_attr "type" "*,*,imov")
4570 (set_attr "mode" "*,*,DI")])
4572 (define_insn "*sse2_storeq"
4573 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4575 (match_operand:V2DI 1 "register_operand" "x")
4576 (parallel [(const_int 0)])))]
4581 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4583 (match_operand:V2DI 1 "register_operand" "")
4584 (parallel [(const_int 0)])))]
4587 && (TARGET_INTER_UNIT_MOVES
4588 || MEM_P (operands [0])
4589 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4590 [(set (match_dup 0) (match_dup 1))]
4592 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4595 (define_insn "*vec_extractv2di_1_rex64"
4596 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4598 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4599 (parallel [(const_int 1)])))]
4600 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4602 movhps\t{%1, %0|%0, %1}
4603 psrldq\t{$8, %0|%0, 8}
4604 movq\t{%H1, %0|%0, %H1}
4605 mov{q}\t{%H1, %0|%0, %H1}"
4606 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4607 (set_attr "memory" "*,none,*,*")
4608 (set_attr "mode" "V2SF,TI,TI,DI")])
4610 (define_insn "*vec_extractv2di_1_sse2"
4611 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4613 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4614 (parallel [(const_int 1)])))]
4616 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4618 movhps\t{%1, %0|%0, %1}
4619 psrldq\t{$8, %0|%0, 8}
4620 movq\t{%H1, %0|%0, %H1}"
4621 [(set_attr "type" "ssemov,sseishft,ssemov")
4622 (set_attr "memory" "*,none,*")
4623 (set_attr "mode" "V2SF,TI,TI")])
4625 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4626 (define_insn "*vec_extractv2di_1_sse"
4627 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4629 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4630 (parallel [(const_int 1)])))]
4631 "!TARGET_SSE2 && TARGET_SSE
4632 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4634 movhps\t{%1, %0|%0, %1}
4635 movhlps\t{%1, %0|%0, %1}
4636 movlps\t{%H1, %0|%0, %H1}"
4637 [(set_attr "type" "ssemov")
4638 (set_attr "mode" "V2SF,V4SF,V2SF")])
4640 (define_insn "*vec_dupv4si"
4641 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4643 (match_operand:SI 1 "register_operand" " Y2,0")))]
4646 pshufd\t{$0, %1, %0|%0, %1, 0}
4647 shufps\t{$0, %0, %0|%0, %0, 0}"
4648 [(set_attr "type" "sselog1")
4649 (set_attr "mode" "TI,V4SF")])
4651 (define_insn "*vec_dupv2di"
4652 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4654 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4659 [(set_attr "type" "sselog1,ssemov")
4660 (set_attr "mode" "TI,V4SF")])
4662 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4663 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4664 ;; alternatives pretty much forces the MMX alternative to be chosen.
4665 (define_insn "*sse2_concatv2si"
4666 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4668 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4669 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4672 punpckldq\t{%2, %0|%0, %2}
4673 movd\t{%1, %0|%0, %1}
4674 punpckldq\t{%2, %0|%0, %2}
4675 movd\t{%1, %0|%0, %1}"
4676 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4677 (set_attr "mode" "TI,TI,DI,DI")])
4679 (define_insn "*sse1_concatv2si"
4680 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4682 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4683 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4686 unpcklps\t{%2, %0|%0, %2}
4687 movss\t{%1, %0|%0, %1}
4688 punpckldq\t{%2, %0|%0, %2}
4689 movd\t{%1, %0|%0, %1}"
4690 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4691 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4693 (define_insn "*vec_concatv4si_1"
4694 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4696 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4697 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4700 punpcklqdq\t{%2, %0|%0, %2}
4701 movlhps\t{%2, %0|%0, %2}
4702 movhps\t{%2, %0|%0, %2}"
4703 [(set_attr "type" "sselog,ssemov,ssemov")
4704 (set_attr "mode" "TI,V4SF,V2SF")])
4706 (define_insn "vec_concatv2di"
4707 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4709 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4710 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4711 "!TARGET_64BIT && TARGET_SSE"
4713 movq\t{%1, %0|%0, %1}
4714 movq2dq\t{%1, %0|%0, %1}
4715 punpcklqdq\t{%2, %0|%0, %2}
4716 movlhps\t{%2, %0|%0, %2}
4717 movhps\t{%2, %0|%0, %2}
4718 movlps\t{%1, %0|%0, %1}"
4719 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4720 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4722 (define_insn "*vec_concatv2di_rex"
4723 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
4725 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4726 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
4729 movq\t{%1, %0|%0, %1}
4730 movq\t{%1, %0|%0, %1}
4731 movq2dq\t{%1, %0|%0, %1}
4732 punpcklqdq\t{%2, %0|%0, %2}
4733 movlhps\t{%2, %0|%0, %2}
4734 movhps\t{%2, %0|%0, %2}
4735 movlps\t{%1, %0|%0, %1}"
4736 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4737 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4739 (define_expand "vec_setv2di"
4740 [(match_operand:V2DI 0 "register_operand" "")
4741 (match_operand:DI 1 "register_operand" "")
4742 (match_operand 2 "const_int_operand" "")]
4745 ix86_expand_vector_set (false, operands[0], operands[1],
4746 INTVAL (operands[2]));
4750 (define_expand "vec_extractv2di"
4751 [(match_operand:DI 0 "register_operand" "")
4752 (match_operand:V2DI 1 "register_operand" "")
4753 (match_operand 2 "const_int_operand" "")]
4756 ix86_expand_vector_extract (false, operands[0], operands[1],
4757 INTVAL (operands[2]));
4761 (define_expand "vec_initv2di"
4762 [(match_operand:V2DI 0 "register_operand" "")
4763 (match_operand 1 "" "")]
4766 ix86_expand_vector_init (false, operands[0], operands[1]);
4770 (define_expand "vec_setv4si"
4771 [(match_operand:V4SI 0 "register_operand" "")
4772 (match_operand:SI 1 "register_operand" "")
4773 (match_operand 2 "const_int_operand" "")]
4776 ix86_expand_vector_set (false, operands[0], operands[1],
4777 INTVAL (operands[2]));
4781 (define_expand "vec_extractv4si"
4782 [(match_operand:SI 0 "register_operand" "")
4783 (match_operand:V4SI 1 "register_operand" "")
4784 (match_operand 2 "const_int_operand" "")]
4787 ix86_expand_vector_extract (false, operands[0], operands[1],
4788 INTVAL (operands[2]));
4792 (define_expand "vec_initv4si"
4793 [(match_operand:V4SI 0 "register_operand" "")
4794 (match_operand 1 "" "")]
4797 ix86_expand_vector_init (false, operands[0], operands[1]);
4801 (define_expand "vec_setv8hi"
4802 [(match_operand:V8HI 0 "register_operand" "")
4803 (match_operand:HI 1 "register_operand" "")
4804 (match_operand 2 "const_int_operand" "")]
4807 ix86_expand_vector_set (false, operands[0], operands[1],
4808 INTVAL (operands[2]));
4812 (define_expand "vec_extractv8hi"
4813 [(match_operand:HI 0 "register_operand" "")
4814 (match_operand:V8HI 1 "register_operand" "")
4815 (match_operand 2 "const_int_operand" "")]
4818 ix86_expand_vector_extract (false, operands[0], operands[1],
4819 INTVAL (operands[2]));
4823 (define_expand "vec_initv8hi"
4824 [(match_operand:V8HI 0 "register_operand" "")
4825 (match_operand 1 "" "")]
4828 ix86_expand_vector_init (false, operands[0], operands[1]);
4832 (define_expand "vec_setv16qi"
4833 [(match_operand:V16QI 0 "register_operand" "")
4834 (match_operand:QI 1 "register_operand" "")
4835 (match_operand 2 "const_int_operand" "")]
4838 ix86_expand_vector_set (false, operands[0], operands[1],
4839 INTVAL (operands[2]));
4843 (define_expand "vec_extractv16qi"
4844 [(match_operand:QI 0 "register_operand" "")
4845 (match_operand:V16QI 1 "register_operand" "")
4846 (match_operand 2 "const_int_operand" "")]
4849 ix86_expand_vector_extract (false, operands[0], operands[1],
4850 INTVAL (operands[2]));
4854 (define_expand "vec_initv16qi"
4855 [(match_operand:V16QI 0 "register_operand" "")
4856 (match_operand 1 "" "")]
4859 ix86_expand_vector_init (false, operands[0], operands[1]);
4863 (define_expand "vec_unpacku_hi_v16qi"
4864 [(match_operand:V8HI 0 "register_operand" "")
4865 (match_operand:V16QI 1 "register_operand" "")]
4869 ix86_expand_sse4_unpack (operands, true, true);
4870 else if (TARGET_SSE5)
4871 ix86_expand_sse5_unpack (operands, true, true);
4873 ix86_expand_sse_unpack (operands, true, true);
4877 (define_expand "vec_unpacks_hi_v16qi"
4878 [(match_operand:V8HI 0 "register_operand" "")
4879 (match_operand:V16QI 1 "register_operand" "")]
4883 ix86_expand_sse4_unpack (operands, false, true);
4884 else if (TARGET_SSE5)
4885 ix86_expand_sse5_unpack (operands, false, true);
4887 ix86_expand_sse_unpack (operands, false, true);
4891 (define_expand "vec_unpacku_lo_v16qi"
4892 [(match_operand:V8HI 0 "register_operand" "")
4893 (match_operand:V16QI 1 "register_operand" "")]
4897 ix86_expand_sse4_unpack (operands, true, false);
4898 else if (TARGET_SSE5)
4899 ix86_expand_sse5_unpack (operands, true, false);
4901 ix86_expand_sse_unpack (operands, true, false);
4905 (define_expand "vec_unpacks_lo_v16qi"
4906 [(match_operand:V8HI 0 "register_operand" "")
4907 (match_operand:V16QI 1 "register_operand" "")]
4911 ix86_expand_sse4_unpack (operands, false, false);
4912 else if (TARGET_SSE5)
4913 ix86_expand_sse5_unpack (operands, false, false);
4915 ix86_expand_sse_unpack (operands, false, false);
4919 (define_expand "vec_unpacku_hi_v8hi"
4920 [(match_operand:V4SI 0 "register_operand" "")
4921 (match_operand:V8HI 1 "register_operand" "")]
4925 ix86_expand_sse4_unpack (operands, true, true);
4926 else if (TARGET_SSE5)
4927 ix86_expand_sse5_unpack (operands, true, true);
4929 ix86_expand_sse_unpack (operands, true, true);
4933 (define_expand "vec_unpacks_hi_v8hi"
4934 [(match_operand:V4SI 0 "register_operand" "")
4935 (match_operand:V8HI 1 "register_operand" "")]
4939 ix86_expand_sse4_unpack (operands, false, true);
4940 else if (TARGET_SSE5)
4941 ix86_expand_sse5_unpack (operands, false, true);
4943 ix86_expand_sse_unpack (operands, false, true);
4947 (define_expand "vec_unpacku_lo_v8hi"
4948 [(match_operand:V4SI 0 "register_operand" "")
4949 (match_operand:V8HI 1 "register_operand" "")]
4953 ix86_expand_sse4_unpack (operands, true, false);
4954 else if (TARGET_SSE5)
4955 ix86_expand_sse5_unpack (operands, true, false);
4957 ix86_expand_sse_unpack (operands, true, false);
4961 (define_expand "vec_unpacks_lo_v8hi"
4962 [(match_operand:V4SI 0 "register_operand" "")
4963 (match_operand:V8HI 1 "register_operand" "")]
4967 ix86_expand_sse4_unpack (operands, false, false);
4968 else if (TARGET_SSE5)
4969 ix86_expand_sse5_unpack (operands, false, false);
4971 ix86_expand_sse_unpack (operands, false, false);
4975 (define_expand "vec_unpacku_hi_v4si"
4976 [(match_operand:V2DI 0 "register_operand" "")
4977 (match_operand:V4SI 1 "register_operand" "")]
4981 ix86_expand_sse4_unpack (operands, true, true);
4982 else if (TARGET_SSE5)
4983 ix86_expand_sse5_unpack (operands, true, true);
4985 ix86_expand_sse_unpack (operands, true, true);
4989 (define_expand "vec_unpacks_hi_v4si"
4990 [(match_operand:V2DI 0 "register_operand" "")
4991 (match_operand:V4SI 1 "register_operand" "")]
4995 ix86_expand_sse4_unpack (operands, false, true);
4996 else if (TARGET_SSE5)
4997 ix86_expand_sse5_unpack (operands, false, true);
4999 ix86_expand_sse_unpack (operands, false, true);
5003 (define_expand "vec_unpacku_lo_v4si"
5004 [(match_operand:V2DI 0 "register_operand" "")
5005 (match_operand:V4SI 1 "register_operand" "")]
5009 ix86_expand_sse4_unpack (operands, true, false);
5010 else if (TARGET_SSE5)
5011 ix86_expand_sse5_unpack (operands, true, false);
5013 ix86_expand_sse_unpack (operands, true, false);
5017 (define_expand "vec_unpacks_lo_v4si"
5018 [(match_operand:V2DI 0 "register_operand" "")
5019 (match_operand:V4SI 1 "register_operand" "")]
5023 ix86_expand_sse4_unpack (operands, false, false);
5024 else if (TARGET_SSE5)
5025 ix86_expand_sse5_unpack (operands, false, false);
5027 ix86_expand_sse_unpack (operands, false, false);
5031 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5035 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5037 (define_insn "sse2_uavgv16qi3"
5038 [(set (match_operand:V16QI 0 "register_operand" "=x")
5044 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5046 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5047 (const_vector:V16QI [(const_int 1) (const_int 1)
5048 (const_int 1) (const_int 1)
5049 (const_int 1) (const_int 1)
5050 (const_int 1) (const_int 1)
5051 (const_int 1) (const_int 1)
5052 (const_int 1) (const_int 1)
5053 (const_int 1) (const_int 1)
5054 (const_int 1) (const_int 1)]))
5056 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5057 "pavgb\t{%2, %0|%0, %2}"
5058 [(set_attr "type" "sseiadd")
5059 (set_attr "prefix_data16" "1")
5060 (set_attr "mode" "TI")])
5062 (define_insn "sse2_uavgv8hi3"
5063 [(set (match_operand:V8HI 0 "register_operand" "=x")
5069 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5071 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5072 (const_vector:V8HI [(const_int 1) (const_int 1)
5073 (const_int 1) (const_int 1)
5074 (const_int 1) (const_int 1)
5075 (const_int 1) (const_int 1)]))
5077 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5078 "pavgw\t{%2, %0|%0, %2}"
5079 [(set_attr "type" "sseiadd")
5080 (set_attr "prefix_data16" "1")
5081 (set_attr "mode" "TI")])
5083 ;; The correct representation for this is absolutely enormous, and
5084 ;; surely not generally useful.
5085 (define_insn "sse2_psadbw"
5086 [(set (match_operand:V2DI 0 "register_operand" "=x")
5087 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5088 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5091 "psadbw\t{%2, %0|%0, %2}"
5092 [(set_attr "type" "sseiadd")
5093 (set_attr "prefix_data16" "1")
5094 (set_attr "mode" "TI")])
5096 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5097 [(set (match_operand:SI 0 "register_operand" "=r")
5099 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5101 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5102 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5103 [(set_attr "type" "ssecvt")
5104 (set_attr "mode" "<MODE>")])
5106 (define_insn "sse2_pmovmskb"
5107 [(set (match_operand:SI 0 "register_operand" "=r")
5108 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5111 "pmovmskb\t{%1, %0|%0, %1}"
5112 [(set_attr "type" "ssecvt")
5113 (set_attr "prefix_data16" "1")
5114 (set_attr "mode" "SI")])
5116 (define_expand "sse2_maskmovdqu"
5117 [(set (match_operand:V16QI 0 "memory_operand" "")
5118 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5119 (match_operand:V16QI 2 "register_operand" "")
5125 (define_insn "*sse2_maskmovdqu"
5126 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5127 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5128 (match_operand:V16QI 2 "register_operand" "x")
5129 (mem:V16QI (match_dup 0))]
5131 "TARGET_SSE2 && !TARGET_64BIT"
5132 ;; @@@ check ordering of operands in intel/nonintel syntax
5133 "maskmovdqu\t{%2, %1|%1, %2}"
5134 [(set_attr "type" "ssecvt")
5135 (set_attr "prefix_data16" "1")
5136 (set_attr "mode" "TI")])
5138 (define_insn "*sse2_maskmovdqu_rex64"
5139 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5140 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5141 (match_operand:V16QI 2 "register_operand" "x")
5142 (mem:V16QI (match_dup 0))]
5144 "TARGET_SSE2 && TARGET_64BIT"
5145 ;; @@@ check ordering of operands in intel/nonintel syntax
5146 "maskmovdqu\t{%2, %1|%1, %2}"
5147 [(set_attr "type" "ssecvt")
5148 (set_attr "prefix_data16" "1")
5149 (set_attr "mode" "TI")])
5151 (define_insn "sse_ldmxcsr"
5152 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5156 [(set_attr "type" "sse")
5157 (set_attr "memory" "load")])
5159 (define_insn "sse_stmxcsr"
5160 [(set (match_operand:SI 0 "memory_operand" "=m")
5161 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5164 [(set_attr "type" "sse")
5165 (set_attr "memory" "store")])
5167 (define_expand "sse_sfence"
5169 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5170 "TARGET_SSE || TARGET_3DNOW_A"
5172 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5173 MEM_VOLATILE_P (operands[0]) = 1;
5176 (define_insn "*sse_sfence"
5177 [(set (match_operand:BLK 0 "" "")
5178 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5179 "TARGET_SSE || TARGET_3DNOW_A"
5181 [(set_attr "type" "sse")
5182 (set_attr "memory" "unknown")])
5184 (define_insn "sse2_clflush"
5185 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5189 [(set_attr "type" "sse")
5190 (set_attr "memory" "unknown")])
5192 (define_expand "sse2_mfence"
5194 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5197 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5198 MEM_VOLATILE_P (operands[0]) = 1;
5201 (define_insn "*sse2_mfence"
5202 [(set (match_operand:BLK 0 "" "")
5203 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5206 [(set_attr "type" "sse")
5207 (set_attr "memory" "unknown")])
5209 (define_expand "sse2_lfence"
5211 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5214 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5215 MEM_VOLATILE_P (operands[0]) = 1;
5218 (define_insn "*sse2_lfence"
5219 [(set (match_operand:BLK 0 "" "")
5220 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5223 [(set_attr "type" "sse")
5224 (set_attr "memory" "unknown")])
5226 (define_insn "sse3_mwait"
5227 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5228 (match_operand:SI 1 "register_operand" "c")]
5231 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5232 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5233 ;; we only need to set up 32bit registers.
5235 [(set_attr "length" "3")])
5237 (define_insn "sse3_monitor"
5238 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5239 (match_operand:SI 1 "register_operand" "c")
5240 (match_operand:SI 2 "register_operand" "d")]
5242 "TARGET_SSE3 && !TARGET_64BIT"
5243 "monitor\t%0, %1, %2"
5244 [(set_attr "length" "3")])
5246 (define_insn "sse3_monitor64"
5247 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5248 (match_operand:SI 1 "register_operand" "c")
5249 (match_operand:SI 2 "register_operand" "d")]
5251 "TARGET_SSE3 && TARGET_64BIT"
5252 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5253 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5254 ;; zero extended to 64bit, we only need to set up 32bit registers.
5256 [(set_attr "length" "3")])
5258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5260 ;; SSSE3 instructions
5262 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5264 (define_insn "ssse3_phaddwv8hi3"
5265 [(set (match_operand:V8HI 0 "register_operand" "=x")
5271 (match_operand:V8HI 1 "register_operand" "0")
5272 (parallel [(const_int 0)]))
5273 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5275 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5276 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5279 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5280 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5282 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5283 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5288 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5289 (parallel [(const_int 0)]))
5290 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5292 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5293 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5296 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5297 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5299 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5300 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5302 "phaddw\t{%2, %0|%0, %2}"
5303 [(set_attr "type" "sseiadd")
5304 (set_attr "prefix_data16" "1")
5305 (set_attr "prefix_extra" "1")
5306 (set_attr "mode" "TI")])
5308 (define_insn "ssse3_phaddwv4hi3"
5309 [(set (match_operand:V4HI 0 "register_operand" "=y")
5314 (match_operand:V4HI 1 "register_operand" "0")
5315 (parallel [(const_int 0)]))
5316 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5318 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5319 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5323 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5324 (parallel [(const_int 0)]))
5325 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5327 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5328 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5330 "phaddw\t{%2, %0|%0, %2}"
5331 [(set_attr "type" "sseiadd")
5332 (set_attr "prefix_extra" "1")
5333 (set_attr "mode" "DI")])
5335 (define_insn "ssse3_phadddv4si3"
5336 [(set (match_operand:V4SI 0 "register_operand" "=x")
5341 (match_operand:V4SI 1 "register_operand" "0")
5342 (parallel [(const_int 0)]))
5343 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5345 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5346 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5350 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5351 (parallel [(const_int 0)]))
5352 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5354 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5355 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5357 "phaddd\t{%2, %0|%0, %2}"
5358 [(set_attr "type" "sseiadd")
5359 (set_attr "prefix_data16" "1")
5360 (set_attr "prefix_extra" "1")
5361 (set_attr "mode" "TI")])
5363 (define_insn "ssse3_phadddv2si3"
5364 [(set (match_operand:V2SI 0 "register_operand" "=y")
5368 (match_operand:V2SI 1 "register_operand" "0")
5369 (parallel [(const_int 0)]))
5370 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5373 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5374 (parallel [(const_int 0)]))
5375 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5377 "phaddd\t{%2, %0|%0, %2}"
5378 [(set_attr "type" "sseiadd")
5379 (set_attr "prefix_extra" "1")
5380 (set_attr "mode" "DI")])
5382 (define_insn "ssse3_phaddswv8hi3"
5383 [(set (match_operand:V8HI 0 "register_operand" "=x")
5389 (match_operand:V8HI 1 "register_operand" "0")
5390 (parallel [(const_int 0)]))
5391 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5393 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5394 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5397 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5398 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5400 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5401 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5406 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5407 (parallel [(const_int 0)]))
5408 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5410 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5411 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5414 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5415 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5417 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5418 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5420 "phaddsw\t{%2, %0|%0, %2}"
5421 [(set_attr "type" "sseiadd")
5422 (set_attr "prefix_data16" "1")
5423 (set_attr "prefix_extra" "1")
5424 (set_attr "mode" "TI")])
5426 (define_insn "ssse3_phaddswv4hi3"
5427 [(set (match_operand:V4HI 0 "register_operand" "=y")
5432 (match_operand:V4HI 1 "register_operand" "0")
5433 (parallel [(const_int 0)]))
5434 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5436 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5437 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5441 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5442 (parallel [(const_int 0)]))
5443 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5445 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5446 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5448 "phaddsw\t{%2, %0|%0, %2}"
5449 [(set_attr "type" "sseiadd")
5450 (set_attr "prefix_extra" "1")
5451 (set_attr "mode" "DI")])
5453 (define_insn "ssse3_phsubwv8hi3"
5454 [(set (match_operand:V8HI 0 "register_operand" "=x")
5460 (match_operand:V8HI 1 "register_operand" "0")
5461 (parallel [(const_int 0)]))
5462 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5464 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5465 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5468 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5469 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5471 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5472 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5477 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5478 (parallel [(const_int 0)]))
5479 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5481 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5482 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5485 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5486 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5488 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5489 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5491 "phsubw\t{%2, %0|%0, %2}"
5492 [(set_attr "type" "sseiadd")
5493 (set_attr "prefix_data16" "1")
5494 (set_attr "prefix_extra" "1")
5495 (set_attr "mode" "TI")])
5497 (define_insn "ssse3_phsubwv4hi3"
5498 [(set (match_operand:V4HI 0 "register_operand" "=y")
5503 (match_operand:V4HI 1 "register_operand" "0")
5504 (parallel [(const_int 0)]))
5505 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5507 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5508 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5512 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5513 (parallel [(const_int 0)]))
5514 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5516 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5517 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5519 "phsubw\t{%2, %0|%0, %2}"
5520 [(set_attr "type" "sseiadd")
5521 (set_attr "prefix_extra" "1")
5522 (set_attr "mode" "DI")])
5524 (define_insn "ssse3_phsubdv4si3"
5525 [(set (match_operand:V4SI 0 "register_operand" "=x")
5530 (match_operand:V4SI 1 "register_operand" "0")
5531 (parallel [(const_int 0)]))
5532 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5534 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5535 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5539 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5540 (parallel [(const_int 0)]))
5541 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5543 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5544 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5546 "phsubd\t{%2, %0|%0, %2}"
5547 [(set_attr "type" "sseiadd")
5548 (set_attr "prefix_data16" "1")
5549 (set_attr "prefix_extra" "1")
5550 (set_attr "mode" "TI")])
5552 (define_insn "ssse3_phsubdv2si3"
5553 [(set (match_operand:V2SI 0 "register_operand" "=y")
5557 (match_operand:V2SI 1 "register_operand" "0")
5558 (parallel [(const_int 0)]))
5559 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5562 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5563 (parallel [(const_int 0)]))
5564 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5566 "phsubd\t{%2, %0|%0, %2}"
5567 [(set_attr "type" "sseiadd")
5568 (set_attr "prefix_extra" "1")
5569 (set_attr "mode" "DI")])
5571 (define_insn "ssse3_phsubswv8hi3"
5572 [(set (match_operand:V8HI 0 "register_operand" "=x")
5578 (match_operand:V8HI 1 "register_operand" "0")
5579 (parallel [(const_int 0)]))
5580 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5582 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5583 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5586 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5587 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5589 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5590 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5595 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5596 (parallel [(const_int 0)]))
5597 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5599 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5600 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5603 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5604 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5606 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5607 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5609 "phsubsw\t{%2, %0|%0, %2}"
5610 [(set_attr "type" "sseiadd")
5611 (set_attr "prefix_data16" "1")
5612 (set_attr "prefix_extra" "1")
5613 (set_attr "mode" "TI")])
5615 (define_insn "ssse3_phsubswv4hi3"
5616 [(set (match_operand:V4HI 0 "register_operand" "=y")
5621 (match_operand:V4HI 1 "register_operand" "0")
5622 (parallel [(const_int 0)]))
5623 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5625 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5626 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5630 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5631 (parallel [(const_int 0)]))
5632 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5634 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5635 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5637 "phsubsw\t{%2, %0|%0, %2}"
5638 [(set_attr "type" "sseiadd")
5639 (set_attr "prefix_extra" "1")
5640 (set_attr "mode" "DI")])
5642 (define_insn "ssse3_pmaddubswv8hi3"
5643 [(set (match_operand:V8HI 0 "register_operand" "=x")
5648 (match_operand:V16QI 1 "nonimmediate_operand" "0")
5649 (parallel [(const_int 0)
5659 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5660 (parallel [(const_int 0)
5670 (vec_select:V16QI (match_dup 1)
5671 (parallel [(const_int 1)
5680 (vec_select:V16QI (match_dup 2)
5681 (parallel [(const_int 1)
5688 (const_int 15)]))))))]
5690 "pmaddubsw\t{%2, %0|%0, %2}"
5691 [(set_attr "type" "sseiadd")
5692 (set_attr "prefix_data16" "1")
5693 (set_attr "prefix_extra" "1")
5694 (set_attr "mode" "TI")])
5696 (define_insn "ssse3_pmaddubswv4hi3"
5697 [(set (match_operand:V4HI 0 "register_operand" "=y")
5702 (match_operand:V8QI 1 "nonimmediate_operand" "0")
5703 (parallel [(const_int 0)
5709 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5710 (parallel [(const_int 0)
5716 (vec_select:V8QI (match_dup 1)
5717 (parallel [(const_int 1)
5722 (vec_select:V8QI (match_dup 2)
5723 (parallel [(const_int 1)
5726 (const_int 7)]))))))]
5728 "pmaddubsw\t{%2, %0|%0, %2}"
5729 [(set_attr "type" "sseiadd")
5730 (set_attr "prefix_extra" "1")
5731 (set_attr "mode" "DI")])
5733 (define_insn "ssse3_pmulhrswv8hi3"
5734 [(set (match_operand:V8HI 0 "register_operand" "=x")
5741 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5743 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5745 (const_vector:V8HI [(const_int 1) (const_int 1)
5746 (const_int 1) (const_int 1)
5747 (const_int 1) (const_int 1)
5748 (const_int 1) (const_int 1)]))
5750 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5751 "pmulhrsw\t{%2, %0|%0, %2}"
5752 [(set_attr "type" "sseimul")
5753 (set_attr "prefix_data16" "1")
5754 (set_attr "prefix_extra" "1")
5755 (set_attr "mode" "TI")])
5757 (define_insn "ssse3_pmulhrswv4hi3"
5758 [(set (match_operand:V4HI 0 "register_operand" "=y")
5765 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5767 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5769 (const_vector:V4HI [(const_int 1) (const_int 1)
5770 (const_int 1) (const_int 1)]))
5772 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5773 "pmulhrsw\t{%2, %0|%0, %2}"
5774 [(set_attr "type" "sseimul")
5775 (set_attr "prefix_extra" "1")
5776 (set_attr "mode" "DI")])
5778 (define_insn "ssse3_pshufbv16qi3"
5779 [(set (match_operand:V16QI 0 "register_operand" "=x")
5780 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5781 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5784 "pshufb\t{%2, %0|%0, %2}";
5785 [(set_attr "type" "sselog1")
5786 (set_attr "prefix_data16" "1")
5787 (set_attr "prefix_extra" "1")
5788 (set_attr "mode" "TI")])
5790 (define_insn "ssse3_pshufbv8qi3"
5791 [(set (match_operand:V8QI 0 "register_operand" "=y")
5792 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5793 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5796 "pshufb\t{%2, %0|%0, %2}";
5797 [(set_attr "type" "sselog1")
5798 (set_attr "prefix_extra" "1")
5799 (set_attr "mode" "DI")])
5801 (define_insn "ssse3_psign<mode>3"
5802 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5804 [(match_operand:SSEMODE124 1 "register_operand" "0")
5805 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5808 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5809 [(set_attr "type" "sselog1")
5810 (set_attr "prefix_data16" "1")
5811 (set_attr "prefix_extra" "1")
5812 (set_attr "mode" "TI")])
5814 (define_insn "ssse3_psign<mode>3"
5815 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5817 [(match_operand:MMXMODEI 1 "register_operand" "0")
5818 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5821 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5822 [(set_attr "type" "sselog1")
5823 (set_attr "prefix_extra" "1")
5824 (set_attr "mode" "DI")])
5826 (define_insn "ssse3_palignrti"
5827 [(set (match_operand:TI 0 "register_operand" "=x")
5828 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5829 (match_operand:TI 2 "nonimmediate_operand" "xm")
5830 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5834 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5835 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5837 [(set_attr "type" "sseishft")
5838 (set_attr "prefix_data16" "1")
5839 (set_attr "prefix_extra" "1")
5840 (set_attr "mode" "TI")])
5842 (define_insn "ssse3_palignrdi"
5843 [(set (match_operand:DI 0 "register_operand" "=y")
5844 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5845 (match_operand:DI 2 "nonimmediate_operand" "ym")
5846 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5850 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5851 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5853 [(set_attr "type" "sseishft")
5854 (set_attr "prefix_extra" "1")
5855 (set_attr "mode" "DI")])
5857 (define_insn "abs<mode>2"
5858 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5859 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5861 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5862 [(set_attr "type" "sselog1")
5863 (set_attr "prefix_data16" "1")
5864 (set_attr "prefix_extra" "1")
5865 (set_attr "mode" "TI")])
5867 (define_insn "abs<mode>2"
5868 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5869 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5871 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5872 [(set_attr "type" "sselog1")
5873 (set_attr "prefix_extra" "1")
5874 (set_attr "mode" "DI")])
5876 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5878 ;; AMD SSE4A instructions
5880 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5882 (define_insn "sse4a_movnt<mode>"
5883 [(set (match_operand:MODEF 0 "memory_operand" "=m")
5885 [(match_operand:MODEF 1 "register_operand" "x")]
5888 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
5889 [(set_attr "type" "ssemov")
5890 (set_attr "mode" "<MODE>")])
5892 (define_insn "sse4a_vmmovnt<mode>"
5893 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
5894 (unspec:<ssescalarmode>
5895 [(vec_select:<ssescalarmode>
5896 (match_operand:SSEMODEF2P 1 "register_operand" "x")
5897 (parallel [(const_int 0)]))]
5900 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5901 [(set_attr "type" "ssemov")
5902 (set_attr "mode" "<ssescalarmode>")])
5904 (define_insn "sse4a_extrqi"
5905 [(set (match_operand:V2DI 0 "register_operand" "=x")
5906 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5907 (match_operand 2 "const_int_operand" "")
5908 (match_operand 3 "const_int_operand" "")]
5911 "extrq\t{%3, %2, %0|%0, %2, %3}"
5912 [(set_attr "type" "sse")
5913 (set_attr "prefix_data16" "1")
5914 (set_attr "mode" "TI")])
5916 (define_insn "sse4a_extrq"
5917 [(set (match_operand:V2DI 0 "register_operand" "=x")
5918 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5919 (match_operand:V16QI 2 "register_operand" "x")]
5922 "extrq\t{%2, %0|%0, %2}"
5923 [(set_attr "type" "sse")
5924 (set_attr "prefix_data16" "1")
5925 (set_attr "mode" "TI")])
5927 (define_insn "sse4a_insertqi"
5928 [(set (match_operand:V2DI 0 "register_operand" "=x")
5929 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5930 (match_operand:V2DI 2 "register_operand" "x")
5931 (match_operand 3 "const_int_operand" "")
5932 (match_operand 4 "const_int_operand" "")]
5935 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5936 [(set_attr "type" "sseins")
5937 (set_attr "prefix_rep" "1")
5938 (set_attr "mode" "TI")])
5940 (define_insn "sse4a_insertq"
5941 [(set (match_operand:V2DI 0 "register_operand" "=x")
5942 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5943 (match_operand:V2DI 2 "register_operand" "x")]
5946 "insertq\t{%2, %0|%0, %2}"
5947 [(set_attr "type" "sseins")
5948 (set_attr "prefix_rep" "1")
5949 (set_attr "mode" "TI")])
5951 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5953 ;; Intel SSE4.1 instructions
5955 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5957 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
5958 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
5959 (vec_merge:SSEMODEF2P
5960 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
5961 (match_operand:SSEMODEF2P 1 "register_operand" "0")
5962 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
5964 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
5965 [(set_attr "type" "ssemov")
5966 (set_attr "prefix_extra" "1")
5967 (set_attr "mode" "<MODE>")])
5969 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
5970 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
5972 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
5973 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
5974 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
5977 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
5978 [(set_attr "type" "ssemov")
5979 (set_attr "prefix_extra" "1")
5980 (set_attr "mode" "<MODE>")])
5982 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
5983 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
5985 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
5986 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
5987 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5990 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
5991 [(set_attr "type" "ssemul")
5992 (set_attr "prefix_extra" "1")
5993 (set_attr "mode" "<MODE>")])
5995 (define_insn "sse4_1_movntdqa"
5996 [(set (match_operand:V2DI 0 "register_operand" "=x")
5997 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6000 "movntdqa\t{%1, %0|%0, %1}"
6001 [(set_attr "type" "ssecvt")
6002 (set_attr "prefix_extra" "1")
6003 (set_attr "mode" "TI")])
6005 (define_insn "sse4_1_mpsadbw"
6006 [(set (match_operand:V16QI 0 "register_operand" "=x")
6007 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6008 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6009 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6012 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6013 [(set_attr "type" "sselog1")
6014 (set_attr "prefix_extra" "1")
6015 (set_attr "mode" "TI")])
6017 (define_insn "sse4_1_packusdw"
6018 [(set (match_operand:V8HI 0 "register_operand" "=x")
6021 (match_operand:V4SI 1 "register_operand" "0"))
6023 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6025 "packusdw\t{%2, %0|%0, %2}"
6026 [(set_attr "type" "sselog")
6027 (set_attr "prefix_extra" "1")
6028 (set_attr "mode" "TI")])
6030 (define_insn "sse4_1_pblendvb"
6031 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6032 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6033 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6034 (match_operand:V16QI 3 "register_operand" "Yz")]
6037 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6038 [(set_attr "type" "ssemov")
6039 (set_attr "prefix_extra" "1")
6040 (set_attr "mode" "TI")])
6042 (define_insn "sse4_1_pblendw"
6043 [(set (match_operand:V8HI 0 "register_operand" "=x")
6045 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6046 (match_operand:V8HI 1 "register_operand" "0")
6047 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6049 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6050 [(set_attr "type" "ssemov")
6051 (set_attr "prefix_extra" "1")
6052 (set_attr "mode" "TI")])
6054 (define_insn "sse4_1_phminposuw"
6055 [(set (match_operand:V8HI 0 "register_operand" "=x")
6056 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6057 UNSPEC_PHMINPOSUW))]
6059 "phminposuw\t{%1, %0|%0, %1}"
6060 [(set_attr "type" "sselog1")
6061 (set_attr "prefix_extra" "1")
6062 (set_attr "mode" "TI")])
6064 (define_insn "sse4_1_extendv8qiv8hi2"
6065 [(set (match_operand:V8HI 0 "register_operand" "=x")
6068 (match_operand:V16QI 1 "register_operand" "x")
6069 (parallel [(const_int 0)
6078 "pmovsxbw\t{%1, %0|%0, %1}"
6079 [(set_attr "type" "ssemov")
6080 (set_attr "prefix_extra" "1")
6081 (set_attr "mode" "TI")])
6083 (define_insn "*sse4_1_extendv8qiv8hi2"
6084 [(set (match_operand:V8HI 0 "register_operand" "=x")
6087 (vec_duplicate:V16QI
6088 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6089 (parallel [(const_int 0)
6098 "pmovsxbw\t{%1, %0|%0, %1}"
6099 [(set_attr "type" "ssemov")
6100 (set_attr "prefix_extra" "1")
6101 (set_attr "mode" "TI")])
6103 (define_insn "sse4_1_extendv4qiv4si2"
6104 [(set (match_operand:V4SI 0 "register_operand" "=x")
6107 (match_operand:V16QI 1 "register_operand" "x")
6108 (parallel [(const_int 0)
6113 "pmovsxbd\t{%1, %0|%0, %1}"
6114 [(set_attr "type" "ssemov")
6115 (set_attr "prefix_extra" "1")
6116 (set_attr "mode" "TI")])
6118 (define_insn "*sse4_1_extendv4qiv4si2"
6119 [(set (match_operand:V4SI 0 "register_operand" "=x")
6122 (vec_duplicate:V16QI
6123 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6124 (parallel [(const_int 0)
6129 "pmovsxbd\t{%1, %0|%0, %1}"
6130 [(set_attr "type" "ssemov")
6131 (set_attr "prefix_extra" "1")
6132 (set_attr "mode" "TI")])
6134 (define_insn "sse4_1_extendv2qiv2di2"
6135 [(set (match_operand:V2DI 0 "register_operand" "=x")
6138 (match_operand:V16QI 1 "register_operand" "x")
6139 (parallel [(const_int 0)
6142 "pmovsxbq\t{%1, %0|%0, %1}"
6143 [(set_attr "type" "ssemov")
6144 (set_attr "prefix_extra" "1")
6145 (set_attr "mode" "TI")])
6147 (define_insn "*sse4_1_extendv2qiv2di2"
6148 [(set (match_operand:V2DI 0 "register_operand" "=x")
6151 (vec_duplicate:V16QI
6152 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6153 (parallel [(const_int 0)
6156 "pmovsxbq\t{%1, %0|%0, %1}"
6157 [(set_attr "type" "ssemov")
6158 (set_attr "prefix_extra" "1")
6159 (set_attr "mode" "TI")])
6161 (define_insn "sse4_1_extendv4hiv4si2"
6162 [(set (match_operand:V4SI 0 "register_operand" "=x")
6165 (match_operand:V8HI 1 "register_operand" "x")
6166 (parallel [(const_int 0)
6171 "pmovsxwd\t{%1, %0|%0, %1}"
6172 [(set_attr "type" "ssemov")
6173 (set_attr "prefix_extra" "1")
6174 (set_attr "mode" "TI")])
6176 (define_insn "*sse4_1_extendv4hiv4si2"
6177 [(set (match_operand:V4SI 0 "register_operand" "=x")
6181 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6182 (parallel [(const_int 0)
6187 "pmovsxwd\t{%1, %0|%0, %1}"
6188 [(set_attr "type" "ssemov")
6189 (set_attr "prefix_extra" "1")
6190 (set_attr "mode" "TI")])
6192 (define_insn "sse4_1_extendv2hiv2di2"
6193 [(set (match_operand:V2DI 0 "register_operand" "=x")
6196 (match_operand:V8HI 1 "register_operand" "x")
6197 (parallel [(const_int 0)
6200 "pmovsxwq\t{%1, %0|%0, %1}"
6201 [(set_attr "type" "ssemov")
6202 (set_attr "prefix_extra" "1")
6203 (set_attr "mode" "TI")])
6205 (define_insn "*sse4_1_extendv2hiv2di2"
6206 [(set (match_operand:V2DI 0 "register_operand" "=x")
6210 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6211 (parallel [(const_int 0)
6214 "pmovsxwq\t{%1, %0|%0, %1}"
6215 [(set_attr "type" "ssemov")
6216 (set_attr "prefix_extra" "1")
6217 (set_attr "mode" "TI")])
6219 (define_insn "sse4_1_extendv2siv2di2"
6220 [(set (match_operand:V2DI 0 "register_operand" "=x")
6223 (match_operand:V4SI 1 "register_operand" "x")
6224 (parallel [(const_int 0)
6227 "pmovsxdq\t{%1, %0|%0, %1}"
6228 [(set_attr "type" "ssemov")
6229 (set_attr "prefix_extra" "1")
6230 (set_attr "mode" "TI")])
6232 (define_insn "*sse4_1_extendv2siv2di2"
6233 [(set (match_operand:V2DI 0 "register_operand" "=x")
6237 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6238 (parallel [(const_int 0)
6241 "pmovsxdq\t{%1, %0|%0, %1}"
6242 [(set_attr "type" "ssemov")
6243 (set_attr "prefix_extra" "1")
6244 (set_attr "mode" "TI")])
6246 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6247 [(set (match_operand:V8HI 0 "register_operand" "=x")
6250 (match_operand:V16QI 1 "register_operand" "x")
6251 (parallel [(const_int 0)
6260 "pmovzxbw\t{%1, %0|%0, %1}"
6261 [(set_attr "type" "ssemov")
6262 (set_attr "prefix_extra" "1")
6263 (set_attr "mode" "TI")])
6265 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6266 [(set (match_operand:V8HI 0 "register_operand" "=x")
6269 (vec_duplicate:V16QI
6270 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6271 (parallel [(const_int 0)
6280 "pmovzxbw\t{%1, %0|%0, %1}"
6281 [(set_attr "type" "ssemov")
6282 (set_attr "prefix_extra" "1")
6283 (set_attr "mode" "TI")])
6285 (define_insn "sse4_1_zero_extendv4qiv4si2"
6286 [(set (match_operand:V4SI 0 "register_operand" "=x")
6289 (match_operand:V16QI 1 "register_operand" "x")
6290 (parallel [(const_int 0)
6295 "pmovzxbd\t{%1, %0|%0, %1}"
6296 [(set_attr "type" "ssemov")
6297 (set_attr "prefix_extra" "1")
6298 (set_attr "mode" "TI")])
6300 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6301 [(set (match_operand:V4SI 0 "register_operand" "=x")
6304 (vec_duplicate:V16QI
6305 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6306 (parallel [(const_int 0)
6311 "pmovzxbd\t{%1, %0|%0, %1}"
6312 [(set_attr "type" "ssemov")
6313 (set_attr "prefix_extra" "1")
6314 (set_attr "mode" "TI")])
6316 (define_insn "sse4_1_zero_extendv2qiv2di2"
6317 [(set (match_operand:V2DI 0 "register_operand" "=x")
6320 (match_operand:V16QI 1 "register_operand" "x")
6321 (parallel [(const_int 0)
6324 "pmovzxbq\t{%1, %0|%0, %1}"
6325 [(set_attr "type" "ssemov")
6326 (set_attr "prefix_extra" "1")
6327 (set_attr "mode" "TI")])
6329 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6330 [(set (match_operand:V2DI 0 "register_operand" "=x")
6333 (vec_duplicate:V16QI
6334 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6335 (parallel [(const_int 0)
6338 "pmovzxbq\t{%1, %0|%0, %1}"
6339 [(set_attr "type" "ssemov")
6340 (set_attr "prefix_extra" "1")
6341 (set_attr "mode" "TI")])
6343 (define_insn "sse4_1_zero_extendv4hiv4si2"
6344 [(set (match_operand:V4SI 0 "register_operand" "=x")
6347 (match_operand:V8HI 1 "register_operand" "x")
6348 (parallel [(const_int 0)
6353 "pmovzxwd\t{%1, %0|%0, %1}"
6354 [(set_attr "type" "ssemov")
6355 (set_attr "prefix_extra" "1")
6356 (set_attr "mode" "TI")])
6358 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6359 [(set (match_operand:V4SI 0 "register_operand" "=x")
6363 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6364 (parallel [(const_int 0)
6369 "pmovzxwd\t{%1, %0|%0, %1}"
6370 [(set_attr "type" "ssemov")
6371 (set_attr "prefix_extra" "1")
6372 (set_attr "mode" "TI")])
6374 (define_insn "sse4_1_zero_extendv2hiv2di2"
6375 [(set (match_operand:V2DI 0 "register_operand" "=x")
6378 (match_operand:V8HI 1 "register_operand" "x")
6379 (parallel [(const_int 0)
6382 "pmovzxwq\t{%1, %0|%0, %1}"
6383 [(set_attr "type" "ssemov")
6384 (set_attr "prefix_extra" "1")
6385 (set_attr "mode" "TI")])
6387 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6388 [(set (match_operand:V2DI 0 "register_operand" "=x")
6392 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6393 (parallel [(const_int 0)
6396 "pmovzxwq\t{%1, %0|%0, %1}"
6397 [(set_attr "type" "ssemov")
6398 (set_attr "prefix_extra" "1")
6399 (set_attr "mode" "TI")])
6401 (define_insn "sse4_1_zero_extendv2siv2di2"
6402 [(set (match_operand:V2DI 0 "register_operand" "=x")
6405 (match_operand:V4SI 1 "register_operand" "x")
6406 (parallel [(const_int 0)
6409 "pmovzxdq\t{%1, %0|%0, %1}"
6410 [(set_attr "type" "ssemov")
6411 (set_attr "prefix_extra" "1")
6412 (set_attr "mode" "TI")])
6414 (define_insn "*sse4_1_zero_extendv2siv2di2"
6415 [(set (match_operand:V2DI 0 "register_operand" "=x")
6419 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6420 (parallel [(const_int 0)
6423 "pmovzxdq\t{%1, %0|%0, %1}"
6424 [(set_attr "type" "ssemov")
6425 (set_attr "prefix_extra" "1")
6426 (set_attr "mode" "TI")])
6428 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6429 ;; But it is not a really compare instruction.
6430 (define_insn "sse4_1_ptest"
6431 [(set (reg:CC FLAGS_REG)
6432 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6433 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6436 "ptest\t{%1, %0|%0, %1}"
6437 [(set_attr "type" "ssecomi")
6438 (set_attr "prefix_extra" "1")
6439 (set_attr "mode" "TI")])
6441 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6442 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6444 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6445 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6448 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6449 [(set_attr "type" "ssecvt")
6450 (set_attr "prefix_extra" "1")
6451 (set_attr "mode" "<MODE>")])
6453 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6454 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6455 (vec_merge:SSEMODEF2P
6457 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6458 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6460 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6463 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6464 [(set_attr "type" "ssecvt")
6465 (set_attr "prefix_extra" "1")
6466 (set_attr "mode" "<MODE>")])
6468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6470 ;; Intel SSE4.2 string/text processing instructions
6472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6474 (define_insn_and_split "sse4_2_pcmpestr"
6475 [(set (match_operand:SI 0 "register_operand" "=c,c")
6477 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6478 (match_operand:SI 3 "register_operand" "a,a")
6479 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6480 (match_operand:SI 5 "register_operand" "d,d")
6481 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6483 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6491 (set (reg:CC FLAGS_REG)
6500 && !(reload_completed || reload_in_progress)"
6505 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6506 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6507 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6510 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6511 operands[3], operands[4],
6512 operands[5], operands[6]));
6514 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6515 operands[3], operands[4],
6516 operands[5], operands[6]));
6517 if (flags && !(ecx || xmm0))
6518 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6519 operands[2], operands[3],
6520 operands[4], operands[5],
6524 [(set_attr "type" "sselog")
6525 (set_attr "prefix_data16" "1")
6526 (set_attr "prefix_extra" "1")
6527 (set_attr "memory" "none,load")
6528 (set_attr "mode" "TI")])
6530 (define_insn "sse4_2_pcmpestri"
6531 [(set (match_operand:SI 0 "register_operand" "=c,c")
6533 [(match_operand:V16QI 1 "register_operand" "x,x")
6534 (match_operand:SI 2 "register_operand" "a,a")
6535 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6536 (match_operand:SI 4 "register_operand" "d,d")
6537 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6539 (set (reg:CC FLAGS_REG)
6548 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6549 [(set_attr "type" "sselog")
6550 (set_attr "prefix_data16" "1")
6551 (set_attr "prefix_extra" "1")
6552 (set_attr "memory" "none,load")
6553 (set_attr "mode" "TI")])
6555 (define_insn "sse4_2_pcmpestrm"
6556 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6558 [(match_operand:V16QI 1 "register_operand" "x,x")
6559 (match_operand:SI 2 "register_operand" "a,a")
6560 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6561 (match_operand:SI 4 "register_operand" "d,d")
6562 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6564 (set (reg:CC FLAGS_REG)
6573 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6574 [(set_attr "type" "sselog")
6575 (set_attr "prefix_data16" "1")
6576 (set_attr "prefix_extra" "1")
6577 (set_attr "memory" "none,load")
6578 (set_attr "mode" "TI")])
6580 (define_insn "sse4_2_pcmpestr_cconly"
6581 [(set (reg:CC FLAGS_REG)
6583 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6584 (match_operand:SI 3 "register_operand" "a,a,a,a")
6585 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6586 (match_operand:SI 5 "register_operand" "d,d,d,d")
6587 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6589 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6590 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6593 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6594 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6595 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6596 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6597 [(set_attr "type" "sselog")
6598 (set_attr "prefix_data16" "1")
6599 (set_attr "prefix_extra" "1")
6600 (set_attr "memory" "none,load,none,load")
6601 (set_attr "mode" "TI")])
6603 (define_insn_and_split "sse4_2_pcmpistr"
6604 [(set (match_operand:SI 0 "register_operand" "=c,c")
6606 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6607 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6608 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6610 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6616 (set (reg:CC FLAGS_REG)
6623 && !(reload_completed || reload_in_progress)"
6628 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6629 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6630 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6633 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6634 operands[3], operands[4]));
6636 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6637 operands[3], operands[4]));
6638 if (flags && !(ecx || xmm0))
6639 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6640 operands[2], operands[3],
6644 [(set_attr "type" "sselog")
6645 (set_attr "prefix_data16" "1")
6646 (set_attr "prefix_extra" "1")
6647 (set_attr "memory" "none,load")
6648 (set_attr "mode" "TI")])
6650 (define_insn "sse4_2_pcmpistri"
6651 [(set (match_operand:SI 0 "register_operand" "=c,c")
6653 [(match_operand:V16QI 1 "register_operand" "x,x")
6654 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6655 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6657 (set (reg:CC FLAGS_REG)
6664 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6665 [(set_attr "type" "sselog")
6666 (set_attr "prefix_data16" "1")
6667 (set_attr "prefix_extra" "1")
6668 (set_attr "memory" "none,load")
6669 (set_attr "mode" "TI")])
6671 (define_insn "sse4_2_pcmpistrm"
6672 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6674 [(match_operand:V16QI 1 "register_operand" "x,x")
6675 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6676 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6678 (set (reg:CC FLAGS_REG)
6685 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix_data16" "1")
6688 (set_attr "prefix_extra" "1")
6689 (set_attr "memory" "none,load")
6690 (set_attr "mode" "TI")])
6692 (define_insn "sse4_2_pcmpistr_cconly"
6693 [(set (reg:CC FLAGS_REG)
6695 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6696 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6697 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6699 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6700 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6703 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6704 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6705 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6706 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6707 [(set_attr "type" "sselog")
6708 (set_attr "prefix_data16" "1")
6709 (set_attr "prefix_extra" "1")
6710 (set_attr "memory" "none,load,none,load")
6711 (set_attr "mode" "TI")])
6713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6715 ;; SSE5 instructions
6717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6719 ;; SSE5 parallel integer multiply/add instructions.
6720 ;; Note the instruction does not allow the value being added to be a memory
6721 ;; operation. However by pretending via the nonimmediate_operand predicate
6722 ;; that it does and splitting it later allows the following to be recognized:
6723 ;; a[i] = b[i] * c[i] + d[i];
6724 (define_insn "sse5_pmacsww"
6725 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6728 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6729 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6730 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6731 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6733 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6734 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6735 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6736 [(set_attr "type" "ssemuladd")
6737 (set_attr "mode" "TI")])
6739 ;; Split pmacsww with two memory operands into a load and the pmacsww.
6741 [(set (match_operand:V8HI 0 "register_operand" "")
6743 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
6744 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6745 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
6747 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6748 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6749 && !reg_mentioned_p (operands[0], operands[1])
6750 && !reg_mentioned_p (operands[0], operands[2])
6751 && !reg_mentioned_p (operands[0], operands[3])"
6754 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
6755 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
6760 (define_insn "sse5_pmacssww"
6761 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6763 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6764 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6765 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6766 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6768 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6769 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6770 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6771 [(set_attr "type" "ssemuladd")
6772 (set_attr "mode" "TI")])
6774 ;; Note the instruction does not allow the value being added to be a memory
6775 ;; operation. However by pretending via the nonimmediate_operand predicate
6776 ;; that it does and splitting it later allows the following to be recognized:
6777 ;; a[i] = b[i] * c[i] + d[i];
6778 (define_insn "sse5_pmacsdd"
6779 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6782 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6783 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6784 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6785 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6787 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6788 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6789 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6790 [(set_attr "type" "ssemuladd")
6791 (set_attr "mode" "TI")])
6793 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
6795 [(set (match_operand:V4SI 0 "register_operand" "")
6797 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
6798 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6799 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
6801 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6802 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6803 && !reg_mentioned_p (operands[0], operands[1])
6804 && !reg_mentioned_p (operands[0], operands[2])
6805 && !reg_mentioned_p (operands[0], operands[3])"
6808 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
6809 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
6814 (define_insn "sse5_pmacssdd"
6815 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6817 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6818 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6819 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6820 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6822 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6823 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6824 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6825 [(set_attr "type" "ssemuladd")
6826 (set_attr "mode" "TI")])
6828 (define_insn "sse5_pmacssdql"
6829 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6834 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6835 (parallel [(const_int 1)
6838 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6839 (parallel [(const_int 1)
6841 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6842 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6844 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6845 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6846 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6847 [(set_attr "type" "ssemuladd")
6848 (set_attr "mode" "TI")])
6850 (define_insn "sse5_pmacssdqh"
6851 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6856 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6857 (parallel [(const_int 0)
6861 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6862 (parallel [(const_int 0)
6864 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6865 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6867 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6868 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6869 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6870 [(set_attr "type" "ssemuladd")
6871 (set_attr "mode" "TI")])
6873 (define_insn "sse5_pmacsdql"
6874 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6879 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6880 (parallel [(const_int 1)
6884 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6885 (parallel [(const_int 1)
6887 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6888 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6890 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6891 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6892 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6893 [(set_attr "type" "ssemuladd")
6894 (set_attr "mode" "TI")])
6896 (define_insn "sse5_pmacsdqh"
6897 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6902 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6903 (parallel [(const_int 0)
6907 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6908 (parallel [(const_int 0)
6910 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6911 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6913 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6914 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6915 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6916 [(set_attr "type" "ssemuladd")
6917 (set_attr "mode" "TI")])
6919 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
6920 (define_insn "sse5_pmacsswd"
6921 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6926 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
6927 (parallel [(const_int 1)
6933 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
6934 (parallel [(const_int 1)
6938 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
6939 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6941 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6942 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6943 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6944 [(set_attr "type" "ssemuladd")
6945 (set_attr "mode" "TI")])
6947 (define_insn "sse5_pmacswd"
6948 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6953 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
6954 (parallel [(const_int 1)
6960 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
6961 (parallel [(const_int 1)
6965 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
6966 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6968 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6969 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6970 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6971 [(set_attr "type" "ssemuladd")
6972 (set_attr "mode" "TI")])
6974 (define_insn "sse5_pmadcsswd"
6975 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6981 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
6982 (parallel [(const_int 0)
6988 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
6989 (parallel [(const_int 0)
6997 (parallel [(const_int 1)
7004 (parallel [(const_int 1)
7008 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7009 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7011 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7012 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7013 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7014 [(set_attr "type" "ssemuladd")
7015 (set_attr "mode" "TI")])
7017 (define_insn "sse5_pmadcswd"
7018 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7024 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7025 (parallel [(const_int 0)
7031 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7032 (parallel [(const_int 0)
7040 (parallel [(const_int 1)
7047 (parallel [(const_int 1)
7051 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7052 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7054 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7055 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7056 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7057 [(set_attr "type" "ssemuladd")
7058 (set_attr "mode" "TI")])
7060 ;; SSE5 parallel XMM conditional moves
7061 (define_insn "sse5_pcmov_<mode>"
7062 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7063 (if_then_else:SSEMODE
7064 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
7065 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
7066 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
7067 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7069 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7070 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7071 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7072 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7073 andps\t{%2, %0|%0, %2}
7074 andnps\t{%1, %0|%0, %1}"
7075 [(set_attr "type" "sse4arg")])
7077 ;; SSE5 horizontal add/subtract instructions
7078 (define_insn "sse5_phaddbw"
7079 [(set (match_operand:V8HI 0 "register_operand" "=x")
7083 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7084 (parallel [(const_int 0)
7095 (parallel [(const_int 1)
7102 (const_int 15)])))))]
7104 "phaddbw\t{%1, %0|%0, %1}"
7105 [(set_attr "type" "sseiadd1")])
7107 (define_insn "sse5_phaddbd"
7108 [(set (match_operand:V4SI 0 "register_operand" "=x")
7113 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7114 (parallel [(const_int 0)
7121 (parallel [(const_int 1)
7129 (parallel [(const_int 2)
7136 (parallel [(const_int 3)
7139 (const_int 15)]))))))]
7141 "phaddbd\t{%1, %0|%0, %1}"
7142 [(set_attr "type" "sseiadd1")])
7144 (define_insn "sse5_phaddbq"
7145 [(set (match_operand:V2DI 0 "register_operand" "=x")
7151 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7152 (parallel [(const_int 0)
7157 (parallel [(const_int 1)
7163 (parallel [(const_int 2)
7168 (parallel [(const_int 3)
7175 (parallel [(const_int 8)
7180 (parallel [(const_int 9)
7186 (parallel [(const_int 10)
7191 (parallel [(const_int 11)
7192 (const_int 15)])))))))]
7194 "phaddbq\t{%1, %0|%0, %1}"
7195 [(set_attr "type" "sseiadd1")])
7197 (define_insn "sse5_phaddwd"
7198 [(set (match_operand:V4SI 0 "register_operand" "=x")
7202 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7203 (parallel [(const_int 0)
7210 (parallel [(const_int 1)
7213 (const_int 7)])))))]
7215 "phaddwd\t{%1, %0|%0, %1}"
7216 [(set_attr "type" "sseiadd1")])
7218 (define_insn "sse5_phaddwq"
7219 [(set (match_operand:V2DI 0 "register_operand" "=x")
7224 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7225 (parallel [(const_int 0)
7230 (parallel [(const_int 1)
7236 (parallel [(const_int 2)
7241 (parallel [(const_int 3)
7242 (const_int 7)]))))))]
7244 "phaddwq\t{%1, %0|%0, %1}"
7245 [(set_attr "type" "sseiadd1")])
7247 (define_insn "sse5_phadddq"
7248 [(set (match_operand:V2DI 0 "register_operand" "=x")
7252 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7253 (parallel [(const_int 0)
7258 (parallel [(const_int 1)
7259 (const_int 3)])))))]
7261 "phadddq\t{%1, %0|%0, %1}"
7262 [(set_attr "type" "sseiadd1")])
7264 (define_insn "sse5_phaddubw"
7265 [(set (match_operand:V8HI 0 "register_operand" "=x")
7269 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7270 (parallel [(const_int 0)
7281 (parallel [(const_int 1)
7288 (const_int 15)])))))]
7290 "phaddubw\t{%1, %0|%0, %1}"
7291 [(set_attr "type" "sseiadd1")])
7293 (define_insn "sse5_phaddubd"
7294 [(set (match_operand:V4SI 0 "register_operand" "=x")
7299 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7300 (parallel [(const_int 0)
7307 (parallel [(const_int 1)
7315 (parallel [(const_int 2)
7322 (parallel [(const_int 3)
7325 (const_int 15)]))))))]
7327 "phaddubd\t{%1, %0|%0, %1}"
7328 [(set_attr "type" "sseiadd1")])
7330 (define_insn "sse5_phaddubq"
7331 [(set (match_operand:V2DI 0 "register_operand" "=x")
7337 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7338 (parallel [(const_int 0)
7343 (parallel [(const_int 1)
7349 (parallel [(const_int 2)
7354 (parallel [(const_int 3)
7361 (parallel [(const_int 8)
7366 (parallel [(const_int 9)
7372 (parallel [(const_int 10)
7377 (parallel [(const_int 11)
7378 (const_int 15)])))))))]
7380 "phaddubq\t{%1, %0|%0, %1}"
7381 [(set_attr "type" "sseiadd1")])
7383 (define_insn "sse5_phadduwd"
7384 [(set (match_operand:V4SI 0 "register_operand" "=x")
7388 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7389 (parallel [(const_int 0)
7396 (parallel [(const_int 1)
7399 (const_int 7)])))))]
7401 "phadduwd\t{%1, %0|%0, %1}"
7402 [(set_attr "type" "sseiadd1")])
7404 (define_insn "sse5_phadduwq"
7405 [(set (match_operand:V2DI 0 "register_operand" "=x")
7410 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7411 (parallel [(const_int 0)
7416 (parallel [(const_int 1)
7422 (parallel [(const_int 2)
7427 (parallel [(const_int 3)
7428 (const_int 7)]))))))]
7430 "phadduwq\t{%1, %0|%0, %1}"
7431 [(set_attr "type" "sseiadd1")])
7433 (define_insn "sse5_phaddudq"
7434 [(set (match_operand:V2DI 0 "register_operand" "=x")
7438 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7439 (parallel [(const_int 0)
7444 (parallel [(const_int 1)
7445 (const_int 3)])))))]
7447 "phaddudq\t{%1, %0|%0, %1}"
7448 [(set_attr "type" "sseiadd1")])
7450 (define_insn "sse5_phsubbw"
7451 [(set (match_operand:V8HI 0 "register_operand" "=x")
7455 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7456 (parallel [(const_int 0)
7467 (parallel [(const_int 1)
7474 (const_int 15)])))))]
7476 "phsubbw\t{%1, %0|%0, %1}"
7477 [(set_attr "type" "sseiadd1")])
7479 (define_insn "sse5_phsubwd"
7480 [(set (match_operand:V4SI 0 "register_operand" "=x")
7484 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7485 (parallel [(const_int 0)
7492 (parallel [(const_int 1)
7495 (const_int 7)])))))]
7497 "phsubwd\t{%1, %0|%0, %1}"
7498 [(set_attr "type" "sseiadd1")])
7500 (define_insn "sse5_phsubdq"
7501 [(set (match_operand:V2DI 0 "register_operand" "=x")
7505 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7506 (parallel [(const_int 0)
7511 (parallel [(const_int 1)
7512 (const_int 3)])))))]
7514 "phsubdq\t{%1, %0|%0, %1}"
7515 [(set_attr "type" "sseiadd1")])
7517 ;; SSE5 permute instructions
7518 (define_insn "sse5_pperm"
7519 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7521 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7522 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7523 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7524 UNSPEC_SSE5_PERMUTE))]
7525 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7526 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7527 [(set_attr "type" "sse4arg")
7528 (set_attr "mode" "TI")])
7530 ;; The following are for the various unpack insns which doesn't need the first
7531 ;; source operand, so we can just use the output operand for the first operand.
7532 ;; This allows either of the other two operands to be a memory operand. We
7533 ;; can't just use the first operand as an argument to the normal pperm because
7534 ;; then an output only argument, suddenly becomes an input operand.
7535 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7536 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7539 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7540 (match_operand 2 "" "")))) ;; parallel with const_int's
7541 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7543 && (register_operand (operands[1], V16QImode)
7544 || register_operand (operands[2], V16QImode))"
7545 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7546 [(set_attr "type" "sseadd")
7547 (set_attr "mode" "TI")])
7549 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7550 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7553 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7554 (match_operand 2 "" "")))) ;; parallel with const_int's
7555 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7557 && (register_operand (operands[1], V16QImode)
7558 || register_operand (operands[2], V16QImode))"
7559 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7560 [(set_attr "type" "sseadd")
7561 (set_attr "mode" "TI")])
7563 (define_insn "sse5_pperm_zero_v8hi_v4si"
7564 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7567 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7568 (match_operand 2 "" "")))) ;; parallel with const_int's
7569 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7571 && (register_operand (operands[1], V8HImode)
7572 || register_operand (operands[2], V16QImode))"
7573 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7574 [(set_attr "type" "sseadd")
7575 (set_attr "mode" "TI")])
7577 (define_insn "sse5_pperm_sign_v8hi_v4si"
7578 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7581 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7582 (match_operand 2 "" "")))) ;; parallel with const_int's
7583 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7585 && (register_operand (operands[1], V8HImode)
7586 || register_operand (operands[2], V16QImode))"
7587 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7588 [(set_attr "type" "sseadd")
7589 (set_attr "mode" "TI")])
7591 (define_insn "sse5_pperm_zero_v4si_v2di"
7592 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7595 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7596 (match_operand 2 "" "")))) ;; parallel with const_int's
7597 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7599 && (register_operand (operands[1], V4SImode)
7600 || register_operand (operands[2], V16QImode))"
7601 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7602 [(set_attr "type" "sseadd")
7603 (set_attr "mode" "TI")])
7605 (define_insn "sse5_pperm_sign_v4si_v2di"
7606 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7609 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7610 (match_operand 2 "" "")))) ;; parallel with const_int's
7611 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7613 && (register_operand (operands[1], V4SImode)
7614 || register_operand (operands[2], V16QImode))"
7615 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7616 [(set_attr "type" "sseadd")
7617 (set_attr "mode" "TI")])
7619 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7620 (define_insn "sse5_pperm_pack_v2di_v4si"
7621 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7624 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7626 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7627 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7628 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7629 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7630 [(set_attr "type" "sse4arg")
7631 (set_attr "mode" "TI")])
7633 (define_insn "sse5_pperm_pack_v4si_v8hi"
7634 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7637 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7639 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7640 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7641 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7642 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7643 [(set_attr "type" "sse4arg")
7644 (set_attr "mode" "TI")])
7646 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7647 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7650 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7652 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7653 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7654 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7655 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7656 [(set_attr "type" "sse4arg")
7657 (set_attr "mode" "TI")])
7659 ;; Floating point permutation (permps, permpd)
7660 (define_insn "sse5_perm<mode>"
7661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7663 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7664 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7665 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7666 UNSPEC_SSE5_PERMUTE))]
7667 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7668 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7669 [(set_attr "type" "sse4arg")
7670 (set_attr "mode" "<MODE>")])
7672 ;; SSE5 packed rotate instructions
7673 (define_insn "rotl<mode>3"
7674 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7676 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
7677 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
7679 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7680 [(set_attr "type" "sseishft")
7681 (set_attr "mode" "TI")])
7683 (define_insn "sse5_rotl<mode>3"
7684 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7686 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7687 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
7688 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7689 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7690 [(set_attr "type" "sseishft")
7691 (set_attr "mode" "TI")])
7693 ;; SSE5 packed shift instructions. Note negative values for the shift amount
7694 ;; convert this into a right shift instead of left shift. For now, model this
7695 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
7696 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
7697 (define_insn "sse5_ashl<mode>3"
7698 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7700 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7701 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7702 UNSPEC_SSE5_ASHIFT))]
7703 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7704 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7705 [(set_attr "type" "sseishft")
7706 (set_attr "mode" "TI")])
7708 (define_insn "sse5_lshl<mode>3"
7709 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7711 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7712 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7713 UNSPEC_SSE5_LSHIFT))]
7714 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7715 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7716 [(set_attr "type" "sseishft")
7717 (set_attr "mode" "TI")])
7719 ;; SSE5 FRCZ support
7721 (define_insn "sse5_frcz<mode>2"
7722 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7724 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
7727 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
7728 [(set_attr "type" "ssecvt1")
7729 (set_attr "prefix_extra" "1")
7730 (set_attr "mode" "<MODE>")])
7733 (define_insn "sse5_vmfrcz<mode>2"
7734 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7735 (vec_merge:SSEMODEF2P
7737 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
7739 (match_operand:SSEMODEF2P 1 "register_operand" "0")
7742 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
7743 [(set_attr "type" "ssecvt1")
7744 (set_attr "prefix_extra" "1")
7745 (set_attr "mode" "<MODE>")])
7747 (define_insn "sse5_cvtph2ps"
7748 [(set (match_operand:V4SF 0 "register_operand" "=x")
7749 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
7752 "cvtph2ps\t{%1, %0|%0, %1}"
7753 [(set_attr "type" "ssecvt")
7754 (set_attr "mode" "V4SF")])
7756 (define_insn "sse5_cvtps2ph"
7757 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
7758 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
7761 "cvtps2ph\t{%1, %0|%0, %1}"
7762 [(set_attr "type" "ssecvt")
7763 (set_attr "mode" "V4SF")])
7765 ;; Scalar versions of the com instructions that use vector types that are
7766 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
7767 ;; com instructions fill in 0's in the upper bits instead of leaving them
7768 ;; unmodified, so we use const_vector of 0 instead of match_dup.
7769 (define_expand "sse5_vmmaskcmp<mode>3"
7770 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
7771 (vec_merge:SSEMODEF2P
7772 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7773 [(match_operand:SSEMODEF2P 2 "register_operand" "")
7774 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
7779 operands[4] = CONST0_RTX (<MODE>mode);
7782 (define_insn "*sse5_vmmaskcmp<mode>3"
7783 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7784 (vec_merge:SSEMODEF2P
7785 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7786 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
7787 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
7788 (match_operand:SSEMODEF2P 4 "")
7791 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
7792 [(set_attr "type" "sse4arg")
7793 (set_attr "mode" "<ssescalarmode>")])
7795 ;; We don't have a comparison operator that always returns true/false, so
7796 ;; handle comfalse and comtrue specially.
7797 (define_insn "sse5_com_tf<mode>3"
7798 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7800 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
7801 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
7802 (match_operand:SI 3 "const_int_operand" "n")]
7803 UNSPEC_SSE5_TRUEFALSE))]
7806 const char *ret = NULL;
7808 switch (INTVAL (operands[3]))
7811 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7815 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7819 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7823 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7832 [(set_attr "type" "ssecmp")
7833 (set_attr "mode" "<MODE>")])
7835 (define_insn "sse5_maskcmp<mode>3"
7836 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7837 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7838 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
7839 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
7841 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
7842 [(set_attr "type" "ssecmp")
7843 (set_attr "mode" "<MODE>")])
7845 (define_insn "sse5_maskcmp<mode>3"
7846 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7847 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
7848 [(match_operand:SSEMODE1248 2 "register_operand" "x")
7849 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
7851 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
7852 [(set_attr "type" "sse4arg")
7853 (set_attr "mode" "TI")])
7855 (define_insn "sse5_maskcmp_uns<mode>3"
7856 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7857 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
7858 [(match_operand:SSEMODE1248 2 "register_operand" "x")
7859 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
7861 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
7862 [(set_attr "type" "ssecmp")
7863 (set_attr "mode" "TI")])
7865 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
7866 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
7867 ;; the exact instruction generated for the intrinsic.
7868 (define_insn "sse5_maskcmp_uns2<mode>3"
7869 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7871 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
7872 [(match_operand:SSEMODE1248 2 "register_operand" "x")
7873 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
7874 UNSPEC_SSE5_UNSIGNED_CMP))]
7876 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
7877 [(set_attr "type" "ssecmp")
7878 (set_attr "mode" "TI")])
7880 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
7881 ;; being added here to be complete.
7882 (define_insn "sse5_pcom_tf<mode>3"
7883 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7885 [(match_operand:SSEMODE1248 1 "register_operand" "x")
7886 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
7887 (match_operand:SI 3 "const_int_operand" "n")]
7888 UNSPEC_SSE5_TRUEFALSE))]
7891 return ((INTVAL (operands[3]) != 0)
7892 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7893 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
7895 [(set_attr "type" "ssecmp")
7896 (set_attr "mode" "TI")])
7898 (define_insn "aesenc"
7899 [(set (match_operand:V2DI 0 "register_operand" "=x")
7900 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7901 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7904 "aesenc\t{%2, %0|%0, %2}"
7905 [(set_attr "type" "sselog1")
7906 (set_attr "prefix_extra" "1")
7907 (set_attr "mode" "TI")])
7909 (define_insn "aesenclast"
7910 [(set (match_operand:V2DI 0 "register_operand" "=x")
7911 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7912 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7913 UNSPEC_AESENCLAST))]
7915 "aesenclast\t{%2, %0|%0, %2}"
7916 [(set_attr "type" "sselog1")
7917 (set_attr "prefix_extra" "1")
7918 (set_attr "mode" "TI")])
7920 (define_insn "aesdec"
7921 [(set (match_operand:V2DI 0 "register_operand" "=x")
7922 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7923 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7926 "aesdec\t{%2, %0|%0, %2}"
7927 [(set_attr "type" "sselog1")
7928 (set_attr "prefix_extra" "1")
7929 (set_attr "mode" "TI")])
7931 (define_insn "aesdeclast"
7932 [(set (match_operand:V2DI 0 "register_operand" "=x")
7933 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7934 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7935 UNSPEC_AESDECLAST))]
7937 "aesdeclast\t{%2, %0|%0, %2}"
7938 [(set_attr "type" "sselog1")
7939 (set_attr "prefix_extra" "1")
7940 (set_attr "mode" "TI")])
7942 (define_insn "aesimc"
7943 [(set (match_operand:V2DI 0 "register_operand" "=x")
7944 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
7947 "aesimc\t{%1, %0|%0, %1}"
7948 [(set_attr "type" "sselog1")
7949 (set_attr "prefix_extra" "1")
7950 (set_attr "mode" "TI")])
7952 (define_insn "aeskeygenassist"
7953 [(set (match_operand:V2DI 0 "register_operand" "=x")
7954 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
7955 (match_operand:SI 2 "const_0_to_255_operand" "n")]
7956 UNSPEC_AESKEYGENASSIST))]
7958 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
7959 [(set_attr "type" "sselog1")
7960 (set_attr "prefix_extra" "1")
7961 (set_attr "mode" "TI")])
7963 (define_insn "pclmulqdq"
7964 [(set (match_operand:V2DI 0 "register_operand" "=x")
7965 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7966 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
7967 (match_operand:SI 3 "const_0_to_255_operand" "n")]
7970 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
7971 [(set_attr "type" "sselog1")
7972 (set_attr "prefix_extra" "1")
7973 (set_attr "mode" "TI")])