1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")])
47 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd")])
48 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
50 ;; Mapping of the max integer size for sse5 rotate immediate constraint
51 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
53 ;; Mapping of vector modes back to the scalar modes
54 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
56 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
58 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
62 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
64 ;; All of these patterns are enabled for SSE1 as well as SSE2.
65 ;; This is essential for maintaining stable calling conventions.
67 (define_expand "mov<mode>"
68 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
69 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
72 ix86_expand_vector_move (<MODE>mode, operands);
76 (define_insn "*mov<mode>_internal"
77 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
78 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
80 && (register_operand (operands[0], <MODE>mode)
81 || register_operand (operands[1], <MODE>mode))"
83 switch (which_alternative)
86 return standard_sse_constant_opcode (insn, operands[1]);
89 if (get_attr_mode (insn) == MODE_V4SF)
90 return "movaps\t{%1, %0|%0, %1}";
92 return "movdqa\t{%1, %0|%0, %1}";
97 [(set_attr "type" "sselog1,ssemov,ssemov")
100 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
101 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
102 (and (eq_attr "alternative" "2")
103 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
105 (const_string "V4SF")
106 (const_string "TI")))])
108 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
109 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
110 ;; from memory, we'd prefer to load the memory directly into the %xmm
111 ;; register. To facilitate this happy circumstance, this pattern won't
112 ;; split until after register allocation. If the 64-bit value didn't
113 ;; come from memory, this is the best we can do. This is much better
114 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
117 (define_insn_and_split "movdi_to_sse"
119 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
120 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
121 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
122 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
124 "&& reload_completed"
127 if (register_operand (operands[1], DImode))
129 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
130 Assemble the 64-bit DImode value in an xmm register. */
131 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
132 gen_rtx_SUBREG (SImode, operands[1], 0)));
133 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
134 gen_rtx_SUBREG (SImode, operands[1], 4)));
135 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
137 else if (memory_operand (operands[1], DImode))
138 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
143 (define_expand "mov<mode>"
144 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
145 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))]
148 ix86_expand_vector_move (<MODE>mode, operands);
152 (define_insn "*movv4sf_internal"
153 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
154 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
156 && (register_operand (operands[0], V4SFmode)
157 || register_operand (operands[1], V4SFmode))"
159 switch (which_alternative)
162 return standard_sse_constant_opcode (insn, operands[1]);
165 return "movaps\t{%1, %0|%0, %1}";
170 [(set_attr "type" "sselog1,ssemov,ssemov")
171 (set_attr "mode" "V4SF")])
174 [(set (match_operand:V4SF 0 "register_operand" "")
175 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
176 "TARGET_SSE && reload_completed"
179 (vec_duplicate:V4SF (match_dup 1))
183 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
184 operands[2] = CONST0_RTX (V4SFmode);
187 (define_insn "*movv2df_internal"
188 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
189 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
191 && (register_operand (operands[0], V2DFmode)
192 || register_operand (operands[1], V2DFmode))"
194 switch (which_alternative)
197 return standard_sse_constant_opcode (insn, operands[1]);
200 if (get_attr_mode (insn) == MODE_V4SF)
201 return "movaps\t{%1, %0|%0, %1}";
203 return "movapd\t{%1, %0|%0, %1}";
208 [(set_attr "type" "sselog1,ssemov,ssemov")
211 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
212 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
213 (and (eq_attr "alternative" "2")
214 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
216 (const_string "V4SF")
217 (const_string "V2DF")))])
220 [(set (match_operand:V2DF 0 "register_operand" "")
221 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
222 "TARGET_SSE2 && reload_completed"
223 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
225 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
226 operands[2] = CONST0_RTX (DFmode);
229 (define_expand "push<mode>1"
230 [(match_operand:SSEMODE 0 "register_operand" "")]
233 ix86_expand_push (<MODE>mode, operands[0]);
237 (define_expand "movmisalign<mode>"
238 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
239 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
242 ix86_expand_vector_move_misalign (<MODE>mode, operands);
246 (define_insn "<sse>_movup<ssemodesuffixf2c>"
247 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
249 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
251 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
252 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
253 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssemov")
255 (set_attr "mode" "<MODE>")])
257 (define_insn "sse2_movdqu"
258 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
259 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
261 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
262 "movdqu\t{%1, %0|%0, %1}"
263 [(set_attr "type" "ssemov")
264 (set_attr "prefix_data16" "1")
265 (set_attr "mode" "TI")])
267 (define_insn "<sse>_movnt<mode>"
268 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
270 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
272 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
273 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
274 [(set_attr "type" "ssemov")
275 (set_attr "mode" "<MODE>")])
277 (define_insn "sse2_movntv2di"
278 [(set (match_operand:V2DI 0 "memory_operand" "=m")
279 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
282 "movntdq\t{%1, %0|%0, %1}"
283 [(set_attr "type" "ssecvt")
284 (set_attr "prefix_data16" "1")
285 (set_attr "mode" "TI")])
287 (define_insn "sse2_movntsi"
288 [(set (match_operand:SI 0 "memory_operand" "=m")
289 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
292 "movnti\t{%1, %0|%0, %1}"
293 [(set_attr "type" "ssecvt")
294 (set_attr "mode" "V2DF")])
296 (define_insn "sse3_lddqu"
297 [(set (match_operand:V16QI 0 "register_operand" "=x")
298 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
301 "lddqu\t{%1, %0|%0, %1}"
302 [(set_attr "type" "ssecvt")
303 (set_attr "prefix_rep" "1")
304 (set_attr "mode" "TI")])
306 ; Expand patterns for non-temporal stores. At the moment, only those
307 ; that directly map to insns are defined; it would be possible to
308 ; define patterns for other modes that would expand to several insns.
310 (define_expand "storent<mode>"
311 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
313 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
315 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
318 (define_expand "storent<mode>"
319 [(set (match_operand:MODEF 0 "memory_operand" "")
321 [(match_operand:MODEF 1 "register_operand" "")]
326 (define_expand "storentv2di"
327 [(set (match_operand:V2DI 0 "memory_operand" "")
328 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
333 (define_expand "storentsi"
334 [(set (match_operand:SI 0 "memory_operand" "")
335 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
340 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
342 ;; Parallel floating point arithmetic
344 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
346 (define_expand "neg<mode>2"
347 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
348 (neg:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
349 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
350 "ix86_expand_fp_absneg_operator (NEG, <MODE>mode, operands); DONE;")
352 (define_expand "abs<mode>2"
353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
354 (abs:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
355 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
356 "ix86_expand_fp_absneg_operator (ABS, <MODE>mode, operands); DONE;")
358 (define_expand "add<mode>3"
359 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
361 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
362 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
363 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
364 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
366 (define_insn "*add<mode>3"
367 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
369 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
370 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
372 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
373 "addp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
374 [(set_attr "type" "sseadd")
375 (set_attr "mode" "<MODE>")])
377 (define_insn "<sse>_vmadd<mode>3"
378 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
379 (vec_merge:SSEMODEF2P
381 (match_operand:SSEMODEF2P 1 "register_operand" "0")
382 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
385 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
386 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
387 "adds<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
388 [(set_attr "type" "sseadd")
389 (set_attr "mode" "<ssescalarmode>")])
391 (define_expand "sub<mode>3"
392 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
394 (match_operand:SSEMODEF2P 1 "register_operand" "")
395 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
396 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
397 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
399 (define_insn "*sub<mode>3"
400 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
402 (match_operand:SSEMODEF2P 1 "register_operand" "0")
403 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
404 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
405 "subp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
406 [(set_attr "type" "sseadd")
407 (set_attr "mode" "<MODE>")])
409 (define_insn "<sse>_vmsub<mode>3"
410 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
411 (vec_merge:SSEMODEF2P
413 (match_operand:SSEMODEF2P 1 "register_operand" "0")
414 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
417 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
418 "subs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
419 [(set_attr "type" "sseadd")
420 (set_attr "mode" "<ssescalarmode>")])
422 (define_expand "mul<mode>3"
423 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
425 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
426 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
427 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
428 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
430 (define_insn "*mul<mode>3"
431 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
433 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
434 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
435 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
436 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
437 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
438 [(set_attr "type" "ssemul")
439 (set_attr "mode" "<MODE>")])
441 (define_insn "<sse>_vmmul<mode>3"
442 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
443 (vec_merge:SSEMODEF2P
445 (match_operand:SSEMODEF2P 1 "register_operand" "0")
446 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
449 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
450 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
451 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
452 [(set_attr "type" "ssemul")
453 (set_attr "mode" "<ssescalarmode>")])
455 (define_expand "divv4sf3"
456 [(set (match_operand:V4SF 0 "register_operand" "")
457 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
458 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
461 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
463 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
464 && flag_finite_math_only && !flag_trapping_math
465 && flag_unsafe_math_optimizations)
467 ix86_emit_swdivsf (operands[0], operands[1],
468 operands[2], V4SFmode);
473 (define_expand "divv2df3"
474 [(set (match_operand:V2DF 0 "register_operand" "")
475 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
476 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
478 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
480 (define_insn "<sse>_div<mode>3"
481 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
483 (match_operand:SSEMODEF2P 1 "register_operand" "0")
484 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
485 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
486 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
487 [(set_attr "type" "ssediv")
488 (set_attr "mode" "<MODE>")])
490 (define_insn "<sse>_vmdiv<mode>3"
491 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
492 (vec_merge:SSEMODEF2P
494 (match_operand:SSEMODEF2P 1 "register_operand" "0")
495 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
498 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
499 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
500 [(set_attr "type" "ssediv")
501 (set_attr "mode" "<ssescalarmode>")])
503 (define_insn "sse_rcpv4sf2"
504 [(set (match_operand:V4SF 0 "register_operand" "=x")
506 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
508 "rcpps\t{%1, %0|%0, %1}"
509 [(set_attr "type" "sse")
510 (set_attr "mode" "V4SF")])
512 (define_insn "sse_vmrcpv4sf2"
513 [(set (match_operand:V4SF 0 "register_operand" "=x")
515 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
517 (match_operand:V4SF 2 "register_operand" "0")
520 "rcpss\t{%1, %0|%0, %1}"
521 [(set_attr "type" "sse")
522 (set_attr "mode" "SF")])
524 (define_expand "sqrtv4sf2"
525 [(set (match_operand:V4SF 0 "register_operand" "")
526 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
529 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
530 && flag_finite_math_only && !flag_trapping_math
531 && flag_unsafe_math_optimizations)
533 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
538 (define_insn "sse_sqrtv4sf2"
539 [(set (match_operand:V4SF 0 "register_operand" "=x")
540 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
542 "sqrtps\t{%1, %0|%0, %1}"
543 [(set_attr "type" "sse")
544 (set_attr "mode" "V4SF")])
546 (define_insn "sqrtv2df2"
547 [(set (match_operand:V2DF 0 "register_operand" "=x")
548 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
550 "sqrtpd\t{%1, %0|%0, %1}"
551 [(set_attr "type" "sse")
552 (set_attr "mode" "V2DF")])
554 (define_insn "<sse>_vmsqrt<mode>2"
555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
556 (vec_merge:SSEMODEF2P
558 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
559 (match_operand:SSEMODEF2P 2 "register_operand" "0")
561 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
562 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "<ssescalarmode>")])
566 (define_expand "rsqrtv4sf2"
567 [(set (match_operand:V4SF 0 "register_operand" "")
569 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
572 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
576 (define_insn "sse_rsqrtv4sf2"
577 [(set (match_operand:V4SF 0 "register_operand" "=x")
579 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
581 "rsqrtps\t{%1, %0|%0, %1}"
582 [(set_attr "type" "sse")
583 (set_attr "mode" "V4SF")])
585 (define_insn "sse_vmrsqrtv4sf2"
586 [(set (match_operand:V4SF 0 "register_operand" "=x")
588 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
590 (match_operand:V4SF 2 "register_operand" "0")
593 "rsqrtss\t{%1, %0|%0, %1}"
594 [(set_attr "type" "sse")
595 (set_attr "mode" "SF")])
597 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
598 ;; isn't really correct, as those rtl operators aren't defined when
599 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
601 (define_expand "smin<mode>3"
602 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
604 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
605 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
606 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
608 if (!flag_finite_math_only)
609 operands[1] = force_reg (<MODE>mode, operands[1]);
610 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
613 (define_insn "*smin<mode>3_finite"
614 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
616 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
617 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
618 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
619 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
620 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
621 [(set_attr "type" "sseadd")
622 (set_attr "mode" "<MODE>")])
624 (define_insn "*smin<mode>3"
625 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
627 (match_operand:SSEMODEF2P 1 "register_operand" "0")
628 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
629 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
630 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "<MODE>")])
634 (define_insn "<sse>_vmsmin<mode>3"
635 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
636 (vec_merge:SSEMODEF2P
638 (match_operand:SSEMODEF2P 1 "register_operand" "0")
639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
642 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
643 "mins<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
644 [(set_attr "type" "sse")
645 (set_attr "mode" "<ssescalarmode>")])
647 (define_expand "smax<mode>3"
648 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
650 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
651 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
652 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
654 if (!flag_finite_math_only)
655 operands[1] = force_reg (<MODE>mode, operands[1]);
656 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
659 (define_insn "*smax<mode>3_finite"
660 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
662 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
663 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
664 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
665 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
666 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
667 [(set_attr "type" "sseadd")
668 (set_attr "mode" "<MODE>")])
670 (define_insn "*smax<mode>3"
671 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
673 (match_operand:SSEMODEF2P 1 "register_operand" "0")
674 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
675 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
676 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
677 [(set_attr "type" "sseadd")
678 (set_attr "mode" "<MODE>")])
680 (define_insn "<sse>_vmsmax<mode>3"
681 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
682 (vec_merge:SSEMODEF2P
684 (match_operand:SSEMODEF2P 1 "register_operand" "0")
685 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
688 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
689 "maxs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
690 [(set_attr "type" "sseadd")
691 (set_attr "mode" "<ssescalarmode>")])
693 ;; These versions of the min/max patterns implement exactly the operations
694 ;; min = (op1 < op2 ? op1 : op2)
695 ;; max = (!(op1 < op2) ? op1 : op2)
696 ;; Their operands are not commutative, and thus they may be used in the
697 ;; presence of -0.0 and NaN.
699 (define_insn "*ieee_smin<mode>3"
700 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
702 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
703 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
705 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
706 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
707 [(set_attr "type" "sseadd")
708 (set_attr "mode" "<MODE>")])
710 (define_insn "*ieee_smax<mode>3"
711 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
713 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
714 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
716 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
717 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
718 [(set_attr "type" "sseadd")
719 (set_attr "mode" "<MODE>")])
721 (define_insn "sse3_addsubv4sf3"
722 [(set (match_operand:V4SF 0 "register_operand" "=x")
725 (match_operand:V4SF 1 "register_operand" "0")
726 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
727 (minus:V4SF (match_dup 1) (match_dup 2))
730 "addsubps\t{%2, %0|%0, %2}"
731 [(set_attr "type" "sseadd")
732 (set_attr "prefix_rep" "1")
733 (set_attr "mode" "V4SF")])
735 (define_insn "sse3_addsubv2df3"
736 [(set (match_operand:V2DF 0 "register_operand" "=x")
739 (match_operand:V2DF 1 "register_operand" "0")
740 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
741 (minus:V2DF (match_dup 1) (match_dup 2))
744 "addsubpd\t{%2, %0|%0, %2}"
745 [(set_attr "type" "sseadd")
746 (set_attr "mode" "V2DF")])
748 (define_insn "sse3_haddv4sf3"
749 [(set (match_operand:V4SF 0 "register_operand" "=x")
754 (match_operand:V4SF 1 "register_operand" "0")
755 (parallel [(const_int 0)]))
756 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
758 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
759 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
763 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
764 (parallel [(const_int 0)]))
765 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
767 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
768 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
770 "haddps\t{%2, %0|%0, %2}"
771 [(set_attr "type" "sseadd")
772 (set_attr "prefix_rep" "1")
773 (set_attr "mode" "V4SF")])
775 (define_insn "sse3_haddv2df3"
776 [(set (match_operand:V2DF 0 "register_operand" "=x")
780 (match_operand:V2DF 1 "register_operand" "0")
781 (parallel [(const_int 0)]))
782 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
785 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
786 (parallel [(const_int 0)]))
787 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
789 "haddpd\t{%2, %0|%0, %2}"
790 [(set_attr "type" "sseadd")
791 (set_attr "mode" "V2DF")])
793 (define_insn "sse3_hsubv4sf3"
794 [(set (match_operand:V4SF 0 "register_operand" "=x")
799 (match_operand:V4SF 1 "register_operand" "0")
800 (parallel [(const_int 0)]))
801 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
803 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
804 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
809 (parallel [(const_int 0)]))
810 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
812 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
813 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
815 "hsubps\t{%2, %0|%0, %2}"
816 [(set_attr "type" "sseadd")
817 (set_attr "prefix_rep" "1")
818 (set_attr "mode" "V4SF")])
820 (define_insn "sse3_hsubv2df3"
821 [(set (match_operand:V2DF 0 "register_operand" "=x")
825 (match_operand:V2DF 1 "register_operand" "0")
826 (parallel [(const_int 0)]))
827 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
830 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
831 (parallel [(const_int 0)]))
832 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
834 "hsubpd\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sseadd")
836 (set_attr "mode" "V2DF")])
838 (define_expand "reduc_splus_v4sf"
839 [(match_operand:V4SF 0 "register_operand" "")
840 (match_operand:V4SF 1 "register_operand" "")]
845 rtx tmp = gen_reg_rtx (V4SFmode);
846 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
847 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
850 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
854 (define_expand "reduc_splus_v2df"
855 [(match_operand:V2DF 0 "register_operand" "")
856 (match_operand:V2DF 1 "register_operand" "")]
859 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
863 (define_expand "reduc_smax_v4sf"
864 [(match_operand:V4SF 0 "register_operand" "")
865 (match_operand:V4SF 1 "register_operand" "")]
868 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
872 (define_expand "reduc_smin_v4sf"
873 [(match_operand:V4SF 0 "register_operand" "")
874 (match_operand:V4SF 1 "register_operand" "")]
877 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
883 ;; Parallel floating point comparisons
885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
887 (define_insn "<sse>_maskcmp<mode>3"
888 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
889 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
890 [(match_operand:SSEMODEF4 1 "register_operand" "0")
891 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
892 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
894 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
895 [(set_attr "type" "ssecmp")
896 (set_attr "mode" "<MODE>")])
898 (define_insn "<sse>_vmmaskcmp<mode>3"
899 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
900 (vec_merge:SSEMODEF2P
901 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
902 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
903 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
906 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
907 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
908 [(set_attr "type" "ssecmp")
909 (set_attr "mode" "<ssescalarmode>")])
911 (define_insn "<sse>_comi"
912 [(set (reg:CCFP FLAGS_REG)
915 (match_operand:<ssevecmode> 0 "register_operand" "x")
916 (parallel [(const_int 0)]))
918 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
919 (parallel [(const_int 0)]))))]
920 "SSE_FLOAT_MODE_P (<MODE>mode)"
921 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
922 [(set_attr "type" "ssecomi")
923 (set_attr "mode" "<MODE>")])
925 (define_insn "<sse>_ucomi"
926 [(set (reg:CCFPU FLAGS_REG)
929 (match_operand:<ssevecmode> 0 "register_operand" "x")
930 (parallel [(const_int 0)]))
932 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
933 (parallel [(const_int 0)]))))]
934 "SSE_FLOAT_MODE_P (<MODE>mode)"
935 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
936 [(set_attr "type" "ssecomi")
937 (set_attr "mode" "<MODE>")])
939 (define_expand "vcond<mode>"
940 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
941 (if_then_else:SSEMODEF2P
943 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
944 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
945 (match_operand:SSEMODEF2P 1 "general_operand" "")
946 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
947 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
949 if (ix86_expand_fp_vcond (operands))
955 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
957 ;; Parallel floating point logical operations
959 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
961 (define_expand "and<mode>3"
962 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
964 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
965 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
966 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
967 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
969 (define_insn "*and<mode>3"
970 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
972 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
973 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
974 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
975 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
976 "andp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
977 [(set_attr "type" "sselog")
978 (set_attr "mode" "<MODE>")])
980 (define_insn "<sse>_nand<mode>3"
981 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
984 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
985 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
986 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
987 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
988 [(set_attr "type" "sselog")
989 (set_attr "mode" "<MODE>")])
991 (define_expand "ior<mode>3"
992 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
994 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
995 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
996 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
997 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
999 (define_insn "*ior<mode>3"
1000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1002 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1003 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1004 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1005 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
1006 "orp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1007 [(set_attr "type" "sselog")
1008 (set_attr "mode" "<MODE>")])
1010 (define_expand "xor<mode>3"
1011 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1013 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1014 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1015 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1016 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
1018 (define_insn "*xor<mode>3"
1019 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1021 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1023 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1024 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
1025 "xorp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1026 [(set_attr "type" "sselog")
1027 (set_attr "mode" "<MODE>")])
1029 ;; Also define scalar versions. These are used for abs, neg, and
1030 ;; conditional move. Using subregs into vector modes causes register
1031 ;; allocation lossage. These patterns do not allow memory operands
1032 ;; because the native instructions read the full 128-bits.
1034 (define_insn "*and<mode>3"
1035 [(set (match_operand:MODEF 0 "register_operand" "=x")
1037 (match_operand:MODEF 1 "register_operand" "0")
1038 (match_operand:MODEF 2 "register_operand" "x")))]
1039 "SSE_FLOAT_MODE_P (<MODE>mode)"
1040 "andp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1041 [(set_attr "type" "sselog")
1042 (set_attr "mode" "<ssevecmode>")])
1044 (define_insn "*nand<mode>3"
1045 [(set (match_operand:MODEF 0 "register_operand" "=x")
1048 (match_operand:MODEF 1 "register_operand" "0"))
1049 (match_operand:MODEF 2 "register_operand" "x")))]
1050 "SSE_FLOAT_MODE_P (<MODE>mode)"
1051 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sselog")
1053 (set_attr "mode" "<ssevecmode>")])
1055 (define_insn "*ior<mode>3"
1056 [(set (match_operand:MODEF 0 "register_operand" "=x")
1058 (match_operand:MODEF 1 "register_operand" "0")
1059 (match_operand:MODEF 2 "register_operand" "x")))]
1060 "SSE_FLOAT_MODE_P (<MODE>mode)"
1061 "orp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1062 [(set_attr "type" "sselog")
1063 (set_attr "mode" "<ssevecmode>")])
1065 (define_insn "*xor<mode>3"
1066 [(set (match_operand:MODEF 0 "register_operand" "=x")
1068 (match_operand:MODEF 1 "register_operand" "0")
1069 (match_operand:MODEF 2 "register_operand" "x")))]
1070 "SSE_FLOAT_MODE_P (<MODE>mode)"
1071 "xorp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1072 [(set_attr "type" "sselog")
1073 (set_attr "mode" "<ssevecmode>")])
1075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1077 ;; SSE5 floating point multiply/accumulate instructions This includes the
1078 ;; scalar version of the instructions as well as the vector
1080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1082 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1083 ;; combine to generate a multiply/add with two memory references. We then
1084 ;; split this insn, into loading up the destination register with one of the
1085 ;; memory operations. If we don't manage to split the insn, reload will
1086 ;; generate the appropriate moves. The reason this is needed, is that combine
1087 ;; has already folded one of the memory references into both the multiply and
1088 ;; add insns, and it can't generate a new pseudo. I.e.:
1089 ;; (set (reg1) (mem (addr1)))
1090 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1091 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1093 (define_insn "sse5_fmadd<mode>4"
1094 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1097 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1098 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1099 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1100 "TARGET_SSE5 && TARGET_FUSED_MADD
1101 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1102 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1103 [(set_attr "type" "ssemuladd")
1104 (set_attr "mode" "<MODE>")])
1106 ;; Split fmadd with two memory operands into a load and the fmadd.
1108 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1111 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1112 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1113 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1115 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1116 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1117 && !reg_mentioned_p (operands[0], operands[1])
1118 && !reg_mentioned_p (operands[0], operands[2])
1119 && !reg_mentioned_p (operands[0], operands[3])"
1122 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1123 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1124 operands[2], operands[3]));
1128 ;; For the scalar operations, use operand1 for the upper words that aren't
1129 ;; modified, so restrict the forms that are generated.
1130 ;; Scalar version of fmadd
1131 (define_insn "sse5_vmfmadd<mode>4"
1132 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1133 (vec_merge:SSEMODEF2P
1136 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1137 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1138 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1141 "TARGET_SSE5 && TARGET_FUSED_MADD
1142 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1143 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1144 [(set_attr "type" "ssemuladd")
1145 (set_attr "mode" "<MODE>")])
1147 ;; Floating multiply and subtract
1148 ;; Allow two memory operands the same as fmadd
1149 (define_insn "sse5_fmsub<mode>4"
1150 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1153 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1154 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1155 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1156 "TARGET_SSE5 && TARGET_FUSED_MADD
1157 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1158 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1159 [(set_attr "type" "ssemuladd")
1160 (set_attr "mode" "<MODE>")])
1162 ;; Split fmsub with two memory operands into a load and the fmsub.
1164 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1167 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1168 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1169 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1171 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1172 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1173 && !reg_mentioned_p (operands[0], operands[1])
1174 && !reg_mentioned_p (operands[0], operands[2])
1175 && !reg_mentioned_p (operands[0], operands[3])"
1178 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1179 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1180 operands[2], operands[3]));
1184 ;; For the scalar operations, use operand1 for the upper words that aren't
1185 ;; modified, so restrict the forms that are generated.
1186 ;; Scalar version of fmsub
1187 (define_insn "sse5_vmfmsub<mode>4"
1188 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1189 (vec_merge:SSEMODEF2P
1192 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1193 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1194 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1197 "TARGET_SSE5 && TARGET_FUSED_MADD
1198 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1199 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1200 [(set_attr "type" "ssemuladd")
1201 (set_attr "mode" "<MODE>")])
1203 ;; Floating point negative multiply and add
1204 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1205 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1206 ;; Allow two memory operands to help in optimizing.
1207 (define_insn "sse5_fnmadd<mode>4"
1208 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1210 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1212 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1213 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1214 "TARGET_SSE5 && TARGET_FUSED_MADD
1215 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1216 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1217 [(set_attr "type" "ssemuladd")
1218 (set_attr "mode" "<MODE>")])
1220 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1222 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1224 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1226 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1227 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1229 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1230 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1231 && !reg_mentioned_p (operands[0], operands[1])
1232 && !reg_mentioned_p (operands[0], operands[2])
1233 && !reg_mentioned_p (operands[0], operands[3])"
1236 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1237 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1238 operands[2], operands[3]));
1242 ;; For the scalar operations, use operand1 for the upper words that aren't
1243 ;; modified, so restrict the forms that are generated.
1244 ;; Scalar version of fnmadd
1245 (define_insn "sse5_vmfnmadd<mode>4"
1246 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1247 (vec_merge:SSEMODEF2P
1249 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1251 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1252 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1255 "TARGET_SSE5 && TARGET_FUSED_MADD
1256 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1257 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1258 [(set_attr "type" "ssemuladd")
1259 (set_attr "mode" "<MODE>")])
1261 ;; Floating point negative multiply and subtract
1262 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1263 ;; Allow 2 memory operands to help with optimization
1264 (define_insn "sse5_fnmsub<mode>4"
1265 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1269 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1270 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1271 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1272 "TARGET_SSE5 && TARGET_FUSED_MADD
1273 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1274 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1275 [(set_attr "type" "ssemuladd")
1276 (set_attr "mode" "<MODE>")])
1278 ;; Split fnmsub with two memory operands into a load and the fmsub.
1280 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1284 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1285 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1286 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1288 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1289 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1290 && !reg_mentioned_p (operands[0], operands[1])
1291 && !reg_mentioned_p (operands[0], operands[2])
1292 && !reg_mentioned_p (operands[0], operands[3])"
1295 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1296 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1297 operands[2], operands[3]));
1301 ;; For the scalar operations, use operand1 for the upper words that aren't
1302 ;; modified, so restrict the forms that are generated.
1303 ;; Scalar version of fnmsub
1304 (define_insn "sse5_vmfnmsub<mode>4"
1305 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1306 (vec_merge:SSEMODEF2P
1310 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1311 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1312 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1315 "TARGET_SSE5 && TARGET_FUSED_MADD
1316 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1317 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1318 [(set_attr "type" "ssemuladd")
1319 (set_attr "mode" "<MODE>")])
1321 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1322 ;; even if the user used -mno-fused-madd
1323 ;; Parallel instructions. During instruction generation, just default
1324 ;; to registers, and let combine later build the appropriate instruction.
1325 (define_expand "sse5i_fmadd<mode>4"
1326 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1330 (match_operand:SSEMODEF2P 1 "register_operand" "")
1331 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1332 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1333 UNSPEC_SSE5_INTRINSIC))]
1336 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1337 if (TARGET_FUSED_MADD)
1339 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1340 operands[2], operands[3]));
1345 (define_insn "*sse5i_fmadd<mode>4"
1346 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1350 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1351 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1352 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1353 UNSPEC_SSE5_INTRINSIC))]
1354 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1355 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1356 [(set_attr "type" "ssemuladd")
1357 (set_attr "mode" "<MODE>")])
1359 (define_expand "sse5i_fmsub<mode>4"
1360 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1364 (match_operand:SSEMODEF2P 1 "register_operand" "")
1365 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1366 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1367 UNSPEC_SSE5_INTRINSIC))]
1370 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1371 if (TARGET_FUSED_MADD)
1373 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1374 operands[2], operands[3]));
1379 (define_insn "*sse5i_fmsub<mode>4"
1380 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1384 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1385 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1386 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1387 UNSPEC_SSE5_INTRINSIC))]
1388 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1389 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1390 [(set_attr "type" "ssemuladd")
1391 (set_attr "mode" "<MODE>")])
1393 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1394 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1395 (define_expand "sse5i_fnmadd<mode>4"
1396 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1399 (match_operand:SSEMODEF2P 3 "register_operand" "")
1401 (match_operand:SSEMODEF2P 1 "register_operand" "")
1402 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1403 UNSPEC_SSE5_INTRINSIC))]
1406 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1407 if (TARGET_FUSED_MADD)
1409 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1410 operands[2], operands[3]));
1415 (define_insn "*sse5i_fnmadd<mode>4"
1416 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1419 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1421 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1422 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1423 UNSPEC_SSE5_INTRINSIC))]
1424 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1425 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1426 [(set_attr "type" "ssemuladd")
1427 (set_attr "mode" "<MODE>")])
1429 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1430 (define_expand "sse5i_fnmsub<mode>4"
1431 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1436 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1437 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1438 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1439 UNSPEC_SSE5_INTRINSIC))]
1442 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1443 if (TARGET_FUSED_MADD)
1445 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1446 operands[2], operands[3]));
1451 (define_insn "*sse5i_fnmsub<mode>4"
1452 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1457 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1458 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1459 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1460 UNSPEC_SSE5_INTRINSIC))]
1461 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1462 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1463 [(set_attr "type" "ssemuladd")
1464 (set_attr "mode" "<MODE>")])
1466 ;; Scalar instructions
1467 (define_expand "sse5i_vmfmadd<mode>4"
1468 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1470 [(vec_merge:SSEMODEF2P
1473 (match_operand:SSEMODEF2P 1 "register_operand" "")
1474 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1475 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1478 UNSPEC_SSE5_INTRINSIC))]
1481 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1482 if (TARGET_FUSED_MADD)
1484 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1485 operands[2], operands[3]));
1490 ;; For the scalar operations, use operand1 for the upper words that aren't
1491 ;; modified, so restrict the forms that are accepted.
1492 (define_insn "*sse5i_vmfmadd<mode>4"
1493 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1495 [(vec_merge:SSEMODEF2P
1498 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1499 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1500 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1503 UNSPEC_SSE5_INTRINSIC))]
1504 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1505 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1506 [(set_attr "type" "ssemuladd")
1507 (set_attr "mode" "<ssescalarmode>")])
1509 (define_expand "sse5i_vmfmsub<mode>4"
1510 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1512 [(vec_merge:SSEMODEF2P
1515 (match_operand:SSEMODEF2P 1 "register_operand" "")
1516 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1517 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1520 UNSPEC_SSE5_INTRINSIC))]
1523 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1524 if (TARGET_FUSED_MADD)
1526 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1527 operands[2], operands[3]));
1532 (define_insn "*sse5i_vmfmsub<mode>4"
1533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1535 [(vec_merge:SSEMODEF2P
1538 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1539 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1540 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1543 UNSPEC_SSE5_INTRINSIC))]
1544 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1545 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1546 [(set_attr "type" "ssemuladd")
1547 (set_attr "mode" "<ssescalarmode>")])
1549 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1550 (define_expand "sse5i_vmfnmadd<mode>4"
1551 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1553 [(vec_merge:SSEMODEF2P
1555 (match_operand:SSEMODEF2P 3 "register_operand" "")
1557 (match_operand:SSEMODEF2P 1 "register_operand" "")
1558 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1561 UNSPEC_SSE5_INTRINSIC))]
1564 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1565 if (TARGET_FUSED_MADD)
1567 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1568 operands[2], operands[3]));
1573 (define_insn "*sse5i_vmfnmadd<mode>4"
1574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1576 [(vec_merge:SSEMODEF2P
1578 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1580 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1581 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1584 UNSPEC_SSE5_INTRINSIC))]
1585 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1586 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1587 [(set_attr "type" "ssemuladd")
1588 (set_attr "mode" "<ssescalarmode>")])
1590 (define_expand "sse5i_vmfnmsub<mode>4"
1591 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1593 [(vec_merge:SSEMODEF2P
1597 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1598 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1599 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1602 UNSPEC_SSE5_INTRINSIC))]
1605 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1606 if (TARGET_FUSED_MADD)
1608 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1609 operands[2], operands[3]));
1614 (define_insn "*sse5i_vmfnmsub<mode>4"
1615 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1617 [(vec_merge:SSEMODEF2P
1621 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1622 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1623 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1626 UNSPEC_SSE5_INTRINSIC))]
1627 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1628 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1629 [(set_attr "type" "ssemuladd")
1630 (set_attr "mode" "<ssescalarmode>")])
1632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1634 ;; Parallel single-precision floating point conversion operations
1636 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1638 (define_insn "sse_cvtpi2ps"
1639 [(set (match_operand:V4SF 0 "register_operand" "=x")
1642 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1643 (match_operand:V4SF 1 "register_operand" "0")
1646 "cvtpi2ps\t{%2, %0|%0, %2}"
1647 [(set_attr "type" "ssecvt")
1648 (set_attr "mode" "V4SF")])
1650 (define_insn "sse_cvtps2pi"
1651 [(set (match_operand:V2SI 0 "register_operand" "=y")
1653 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1655 (parallel [(const_int 0) (const_int 1)])))]
1657 "cvtps2pi\t{%1, %0|%0, %1}"
1658 [(set_attr "type" "ssecvt")
1659 (set_attr "unit" "mmx")
1660 (set_attr "mode" "DI")])
1662 (define_insn "sse_cvttps2pi"
1663 [(set (match_operand:V2SI 0 "register_operand" "=y")
1665 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1666 (parallel [(const_int 0) (const_int 1)])))]
1668 "cvttps2pi\t{%1, %0|%0, %1}"
1669 [(set_attr "type" "ssecvt")
1670 (set_attr "unit" "mmx")
1671 (set_attr "mode" "SF")])
1673 (define_insn "sse_cvtsi2ss"
1674 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1677 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1678 (match_operand:V4SF 1 "register_operand" "0,0")
1681 "cvtsi2ss\t{%2, %0|%0, %2}"
1682 [(set_attr "type" "sseicvt")
1683 (set_attr "athlon_decode" "vector,double")
1684 (set_attr "amdfam10_decode" "vector,double")
1685 (set_attr "mode" "SF")])
1687 (define_insn "sse_cvtsi2ssq"
1688 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1691 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1692 (match_operand:V4SF 1 "register_operand" "0,0")
1694 "TARGET_SSE && TARGET_64BIT"
1695 "cvtsi2ssq\t{%2, %0|%0, %2}"
1696 [(set_attr "type" "sseicvt")
1697 (set_attr "athlon_decode" "vector,double")
1698 (set_attr "amdfam10_decode" "vector,double")
1699 (set_attr "mode" "SF")])
1701 (define_insn "sse_cvtss2si"
1702 [(set (match_operand:SI 0 "register_operand" "=r,r")
1705 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1706 (parallel [(const_int 0)]))]
1707 UNSPEC_FIX_NOTRUNC))]
1709 "cvtss2si\t{%1, %0|%0, %1}"
1710 [(set_attr "type" "sseicvt")
1711 (set_attr "athlon_decode" "double,vector")
1712 (set_attr "prefix_rep" "1")
1713 (set_attr "mode" "SI")])
1715 (define_insn "sse_cvtss2si_2"
1716 [(set (match_operand:SI 0 "register_operand" "=r,r")
1717 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1718 UNSPEC_FIX_NOTRUNC))]
1720 "cvtss2si\t{%1, %0|%0, %1}"
1721 [(set_attr "type" "sseicvt")
1722 (set_attr "athlon_decode" "double,vector")
1723 (set_attr "amdfam10_decode" "double,double")
1724 (set_attr "prefix_rep" "1")
1725 (set_attr "mode" "SI")])
1727 (define_insn "sse_cvtss2siq"
1728 [(set (match_operand:DI 0 "register_operand" "=r,r")
1731 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1732 (parallel [(const_int 0)]))]
1733 UNSPEC_FIX_NOTRUNC))]
1734 "TARGET_SSE && TARGET_64BIT"
1735 "cvtss2siq\t{%1, %0|%0, %1}"
1736 [(set_attr "type" "sseicvt")
1737 (set_attr "athlon_decode" "double,vector")
1738 (set_attr "prefix_rep" "1")
1739 (set_attr "mode" "DI")])
1741 (define_insn "sse_cvtss2siq_2"
1742 [(set (match_operand:DI 0 "register_operand" "=r,r")
1743 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1744 UNSPEC_FIX_NOTRUNC))]
1745 "TARGET_SSE && TARGET_64BIT"
1746 "cvtss2siq\t{%1, %0|%0, %1}"
1747 [(set_attr "type" "sseicvt")
1748 (set_attr "athlon_decode" "double,vector")
1749 (set_attr "amdfam10_decode" "double,double")
1750 (set_attr "prefix_rep" "1")
1751 (set_attr "mode" "DI")])
1753 (define_insn "sse_cvttss2si"
1754 [(set (match_operand:SI 0 "register_operand" "=r,r")
1757 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1758 (parallel [(const_int 0)]))))]
1760 "cvttss2si\t{%1, %0|%0, %1}"
1761 [(set_attr "type" "sseicvt")
1762 (set_attr "athlon_decode" "double,vector")
1763 (set_attr "amdfam10_decode" "double,double")
1764 (set_attr "prefix_rep" "1")
1765 (set_attr "mode" "SI")])
1767 (define_insn "sse_cvttss2siq"
1768 [(set (match_operand:DI 0 "register_operand" "=r,r")
1771 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1772 (parallel [(const_int 0)]))))]
1773 "TARGET_SSE && TARGET_64BIT"
1774 "cvttss2siq\t{%1, %0|%0, %1}"
1775 [(set_attr "type" "sseicvt")
1776 (set_attr "athlon_decode" "double,vector")
1777 (set_attr "amdfam10_decode" "double,double")
1778 (set_attr "prefix_rep" "1")
1779 (set_attr "mode" "DI")])
1781 (define_insn "sse2_cvtdq2ps"
1782 [(set (match_operand:V4SF 0 "register_operand" "=x")
1783 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1785 "cvtdq2ps\t{%1, %0|%0, %1}"
1786 [(set_attr "type" "ssecvt")
1787 (set_attr "mode" "V4SF")])
1789 (define_insn "sse2_cvtps2dq"
1790 [(set (match_operand:V4SI 0 "register_operand" "=x")
1791 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1792 UNSPEC_FIX_NOTRUNC))]
1794 "cvtps2dq\t{%1, %0|%0, %1}"
1795 [(set_attr "type" "ssecvt")
1796 (set_attr "prefix_data16" "1")
1797 (set_attr "mode" "TI")])
1799 (define_insn "sse2_cvttps2dq"
1800 [(set (match_operand:V4SI 0 "register_operand" "=x")
1801 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1803 "cvttps2dq\t{%1, %0|%0, %1}"
1804 [(set_attr "type" "ssecvt")
1805 (set_attr "prefix_rep" "1")
1806 (set_attr "mode" "TI")])
1808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1810 ;; Parallel double-precision floating point conversion operations
1812 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1814 (define_insn "sse2_cvtpi2pd"
1815 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1816 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1818 "cvtpi2pd\t{%1, %0|%0, %1}"
1819 [(set_attr "type" "ssecvt")
1820 (set_attr "unit" "mmx,*")
1821 (set_attr "mode" "V2DF")])
1823 (define_insn "sse2_cvtpd2pi"
1824 [(set (match_operand:V2SI 0 "register_operand" "=y")
1825 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1826 UNSPEC_FIX_NOTRUNC))]
1828 "cvtpd2pi\t{%1, %0|%0, %1}"
1829 [(set_attr "type" "ssecvt")
1830 (set_attr "unit" "mmx")
1831 (set_attr "prefix_data16" "1")
1832 (set_attr "mode" "DI")])
1834 (define_insn "sse2_cvttpd2pi"
1835 [(set (match_operand:V2SI 0 "register_operand" "=y")
1836 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1838 "cvttpd2pi\t{%1, %0|%0, %1}"
1839 [(set_attr "type" "ssecvt")
1840 (set_attr "unit" "mmx")
1841 (set_attr "prefix_data16" "1")
1842 (set_attr "mode" "TI")])
1844 (define_insn "sse2_cvtsi2sd"
1845 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1848 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1849 (match_operand:V2DF 1 "register_operand" "0,0")
1852 "cvtsi2sd\t{%2, %0|%0, %2}"
1853 [(set_attr "type" "sseicvt")
1854 (set_attr "mode" "DF")
1855 (set_attr "athlon_decode" "double,direct")
1856 (set_attr "amdfam10_decode" "vector,double")])
1858 (define_insn "sse2_cvtsi2sdq"
1859 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1862 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1863 (match_operand:V2DF 1 "register_operand" "0,0")
1865 "TARGET_SSE2 && TARGET_64BIT"
1866 "cvtsi2sdq\t{%2, %0|%0, %2}"
1867 [(set_attr "type" "sseicvt")
1868 (set_attr "mode" "DF")
1869 (set_attr "athlon_decode" "double,direct")
1870 (set_attr "amdfam10_decode" "vector,double")])
1872 (define_insn "sse2_cvtsd2si"
1873 [(set (match_operand:SI 0 "register_operand" "=r,r")
1876 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1877 (parallel [(const_int 0)]))]
1878 UNSPEC_FIX_NOTRUNC))]
1880 "cvtsd2si\t{%1, %0|%0, %1}"
1881 [(set_attr "type" "sseicvt")
1882 (set_attr "athlon_decode" "double,vector")
1883 (set_attr "prefix_rep" "1")
1884 (set_attr "mode" "SI")])
1886 (define_insn "sse2_cvtsd2si_2"
1887 [(set (match_operand:SI 0 "register_operand" "=r,r")
1888 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1889 UNSPEC_FIX_NOTRUNC))]
1891 "cvtsd2si\t{%1, %0|%0, %1}"
1892 [(set_attr "type" "sseicvt")
1893 (set_attr "athlon_decode" "double,vector")
1894 (set_attr "amdfam10_decode" "double,double")
1895 (set_attr "prefix_rep" "1")
1896 (set_attr "mode" "SI")])
1898 (define_insn "sse2_cvtsd2siq"
1899 [(set (match_operand:DI 0 "register_operand" "=r,r")
1902 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1903 (parallel [(const_int 0)]))]
1904 UNSPEC_FIX_NOTRUNC))]
1905 "TARGET_SSE2 && TARGET_64BIT"
1906 "cvtsd2siq\t{%1, %0|%0, %1}"
1907 [(set_attr "type" "sseicvt")
1908 (set_attr "athlon_decode" "double,vector")
1909 (set_attr "prefix_rep" "1")
1910 (set_attr "mode" "DI")])
1912 (define_insn "sse2_cvtsd2siq_2"
1913 [(set (match_operand:DI 0 "register_operand" "=r,r")
1914 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1915 UNSPEC_FIX_NOTRUNC))]
1916 "TARGET_SSE2 && TARGET_64BIT"
1917 "cvtsd2siq\t{%1, %0|%0, %1}"
1918 [(set_attr "type" "sseicvt")
1919 (set_attr "athlon_decode" "double,vector")
1920 (set_attr "amdfam10_decode" "double,double")
1921 (set_attr "prefix_rep" "1")
1922 (set_attr "mode" "DI")])
1924 (define_insn "sse2_cvttsd2si"
1925 [(set (match_operand:SI 0 "register_operand" "=r,r")
1928 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1929 (parallel [(const_int 0)]))))]
1931 "cvttsd2si\t{%1, %0|%0, %1}"
1932 [(set_attr "type" "sseicvt")
1933 (set_attr "prefix_rep" "1")
1934 (set_attr "mode" "SI")
1935 (set_attr "athlon_decode" "double,vector")
1936 (set_attr "amdfam10_decode" "double,double")])
1938 (define_insn "sse2_cvttsd2siq"
1939 [(set (match_operand:DI 0 "register_operand" "=r,r")
1942 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1943 (parallel [(const_int 0)]))))]
1944 "TARGET_SSE2 && TARGET_64BIT"
1945 "cvttsd2siq\t{%1, %0|%0, %1}"
1946 [(set_attr "type" "sseicvt")
1947 (set_attr "prefix_rep" "1")
1948 (set_attr "mode" "DI")
1949 (set_attr "athlon_decode" "double,vector")
1950 (set_attr "amdfam10_decode" "double,double")])
1952 (define_insn "sse2_cvtdq2pd"
1953 [(set (match_operand:V2DF 0 "register_operand" "=x")
1956 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1957 (parallel [(const_int 0) (const_int 1)]))))]
1959 "cvtdq2pd\t{%1, %0|%0, %1}"
1960 [(set_attr "type" "ssecvt")
1961 (set_attr "mode" "V2DF")])
1963 (define_expand "sse2_cvtpd2dq"
1964 [(set (match_operand:V4SI 0 "register_operand" "")
1966 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1970 "operands[2] = CONST0_RTX (V2SImode);")
1972 (define_insn "*sse2_cvtpd2dq"
1973 [(set (match_operand:V4SI 0 "register_operand" "=x")
1975 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1977 (match_operand:V2SI 2 "const0_operand" "")))]
1979 "cvtpd2dq\t{%1, %0|%0, %1}"
1980 [(set_attr "type" "ssecvt")
1981 (set_attr "prefix_rep" "1")
1982 (set_attr "mode" "TI")
1983 (set_attr "amdfam10_decode" "double")])
1985 (define_expand "sse2_cvttpd2dq"
1986 [(set (match_operand:V4SI 0 "register_operand" "")
1988 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1991 "operands[2] = CONST0_RTX (V2SImode);")
1993 (define_insn "*sse2_cvttpd2dq"
1994 [(set (match_operand:V4SI 0 "register_operand" "=x")
1996 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1997 (match_operand:V2SI 2 "const0_operand" "")))]
1999 "cvttpd2dq\t{%1, %0|%0, %1}"
2000 [(set_attr "type" "ssecvt")
2001 (set_attr "prefix_rep" "1")
2002 (set_attr "mode" "TI")
2003 (set_attr "amdfam10_decode" "double")])
2005 (define_insn "sse2_cvtsd2ss"
2006 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2009 (float_truncate:V2SF
2010 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2011 (match_operand:V4SF 1 "register_operand" "0,0")
2014 "cvtsd2ss\t{%2, %0|%0, %2}"
2015 [(set_attr "type" "ssecvt")
2016 (set_attr "athlon_decode" "vector,double")
2017 (set_attr "amdfam10_decode" "vector,double")
2018 (set_attr "mode" "SF")])
2020 (define_insn "sse2_cvtss2sd"
2021 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2025 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2026 (parallel [(const_int 0) (const_int 1)])))
2027 (match_operand:V2DF 1 "register_operand" "0,0")
2030 "cvtss2sd\t{%2, %0|%0, %2}"
2031 [(set_attr "type" "ssecvt")
2032 (set_attr "amdfam10_decode" "vector,double")
2033 (set_attr "mode" "DF")])
2035 (define_expand "sse2_cvtpd2ps"
2036 [(set (match_operand:V4SF 0 "register_operand" "")
2038 (float_truncate:V2SF
2039 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2042 "operands[2] = CONST0_RTX (V2SFmode);")
2044 (define_insn "*sse2_cvtpd2ps"
2045 [(set (match_operand:V4SF 0 "register_operand" "=x")
2047 (float_truncate:V2SF
2048 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2049 (match_operand:V2SF 2 "const0_operand" "")))]
2051 "cvtpd2ps\t{%1, %0|%0, %1}"
2052 [(set_attr "type" "ssecvt")
2053 (set_attr "prefix_data16" "1")
2054 (set_attr "mode" "V4SF")
2055 (set_attr "amdfam10_decode" "double")])
2057 (define_insn "sse2_cvtps2pd"
2058 [(set (match_operand:V2DF 0 "register_operand" "=x")
2061 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2062 (parallel [(const_int 0) (const_int 1)]))))]
2064 "cvtps2pd\t{%1, %0|%0, %1}"
2065 [(set_attr "type" "ssecvt")
2066 (set_attr "mode" "V2DF")
2067 (set_attr "amdfam10_decode" "direct")])
2069 (define_expand "vec_unpacks_hi_v4sf"
2074 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2075 (parallel [(const_int 6)
2079 (set (match_operand:V2DF 0 "register_operand" "")
2083 (parallel [(const_int 0) (const_int 1)]))))]
2086 operands[2] = gen_reg_rtx (V4SFmode);
2089 (define_expand "vec_unpacks_lo_v4sf"
2090 [(set (match_operand:V2DF 0 "register_operand" "")
2093 (match_operand:V4SF 1 "nonimmediate_operand" "")
2094 (parallel [(const_int 0) (const_int 1)]))))]
2097 (define_expand "vec_unpacks_float_hi_v8hi"
2098 [(match_operand:V4SF 0 "register_operand" "")
2099 (match_operand:V8HI 1 "register_operand" "")]
2102 rtx tmp = gen_reg_rtx (V4SImode);
2104 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2105 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2109 (define_expand "vec_unpacks_float_lo_v8hi"
2110 [(match_operand:V4SF 0 "register_operand" "")
2111 (match_operand:V8HI 1 "register_operand" "")]
2114 rtx tmp = gen_reg_rtx (V4SImode);
2116 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2117 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2121 (define_expand "vec_unpacku_float_hi_v8hi"
2122 [(match_operand:V4SF 0 "register_operand" "")
2123 (match_operand:V8HI 1 "register_operand" "")]
2126 rtx tmp = gen_reg_rtx (V4SImode);
2128 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2129 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2133 (define_expand "vec_unpacku_float_lo_v8hi"
2134 [(match_operand:V4SF 0 "register_operand" "")
2135 (match_operand:V8HI 1 "register_operand" "")]
2138 rtx tmp = gen_reg_rtx (V4SImode);
2140 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2141 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2145 (define_expand "vec_unpacks_float_hi_v4si"
2148 (match_operand:V4SI 1 "nonimmediate_operand" "")
2149 (parallel [(const_int 2)
2153 (set (match_operand:V2DF 0 "register_operand" "")
2157 (parallel [(const_int 0) (const_int 1)]))))]
2160 operands[2] = gen_reg_rtx (V4SImode);
2163 (define_expand "vec_unpacks_float_lo_v4si"
2164 [(set (match_operand:V2DF 0 "register_operand" "")
2167 (match_operand:V4SI 1 "nonimmediate_operand" "")
2168 (parallel [(const_int 0) (const_int 1)]))))]
2171 (define_expand "vec_pack_trunc_v2df"
2172 [(match_operand:V4SF 0 "register_operand" "")
2173 (match_operand:V2DF 1 "nonimmediate_operand" "")
2174 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2179 r1 = gen_reg_rtx (V4SFmode);
2180 r2 = gen_reg_rtx (V4SFmode);
2182 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2183 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2184 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2188 (define_expand "vec_pack_sfix_trunc_v2df"
2189 [(match_operand:V4SI 0 "register_operand" "")
2190 (match_operand:V2DF 1 "nonimmediate_operand" "")
2191 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2196 r1 = gen_reg_rtx (V4SImode);
2197 r2 = gen_reg_rtx (V4SImode);
2199 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2200 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2201 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2202 gen_lowpart (V2DImode, r1),
2203 gen_lowpart (V2DImode, r2)));
2207 (define_expand "vec_pack_sfix_v2df"
2208 [(match_operand:V4SI 0 "register_operand" "")
2209 (match_operand:V2DF 1 "nonimmediate_operand" "")
2210 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2215 r1 = gen_reg_rtx (V4SImode);
2216 r2 = gen_reg_rtx (V4SImode);
2218 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2219 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2220 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2221 gen_lowpart (V2DImode, r1),
2222 gen_lowpart (V2DImode, r2)));
2226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2228 ;; Parallel single-precision floating point element swizzling
2230 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2232 (define_insn "sse_movhlps"
2233 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2236 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2237 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2238 (parallel [(const_int 6)
2242 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2244 movhlps\t{%2, %0|%0, %2}
2245 movlps\t{%H2, %0|%0, %H2}
2246 movhps\t{%2, %0|%0, %2}"
2247 [(set_attr "type" "ssemov")
2248 (set_attr "mode" "V4SF,V2SF,V2SF")])
2250 (define_insn "sse_movlhps"
2251 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2254 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2255 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2256 (parallel [(const_int 0)
2260 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2262 movlhps\t{%2, %0|%0, %2}
2263 movhps\t{%2, %0|%0, %2}
2264 movlps\t{%2, %H0|%H0, %2}"
2265 [(set_attr "type" "ssemov")
2266 (set_attr "mode" "V4SF,V2SF,V2SF")])
2268 (define_insn "sse_unpckhps"
2269 [(set (match_operand:V4SF 0 "register_operand" "=x")
2272 (match_operand:V4SF 1 "register_operand" "0")
2273 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2274 (parallel [(const_int 2) (const_int 6)
2275 (const_int 3) (const_int 7)])))]
2277 "unpckhps\t{%2, %0|%0, %2}"
2278 [(set_attr "type" "sselog")
2279 (set_attr "mode" "V4SF")])
2281 (define_insn "sse_unpcklps"
2282 [(set (match_operand:V4SF 0 "register_operand" "=x")
2285 (match_operand:V4SF 1 "register_operand" "0")
2286 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2287 (parallel [(const_int 0) (const_int 4)
2288 (const_int 1) (const_int 5)])))]
2290 "unpcklps\t{%2, %0|%0, %2}"
2291 [(set_attr "type" "sselog")
2292 (set_attr "mode" "V4SF")])
2294 ;; These are modeled with the same vec_concat as the others so that we
2295 ;; capture users of shufps that can use the new instructions
2296 (define_insn "sse3_movshdup"
2297 [(set (match_operand:V4SF 0 "register_operand" "=x")
2300 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2302 (parallel [(const_int 1)
2307 "movshdup\t{%1, %0|%0, %1}"
2308 [(set_attr "type" "sse")
2309 (set_attr "prefix_rep" "1")
2310 (set_attr "mode" "V4SF")])
2312 (define_insn "sse3_movsldup"
2313 [(set (match_operand:V4SF 0 "register_operand" "=x")
2316 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2318 (parallel [(const_int 0)
2323 "movsldup\t{%1, %0|%0, %1}"
2324 [(set_attr "type" "sse")
2325 (set_attr "prefix_rep" "1")
2326 (set_attr "mode" "V4SF")])
2328 (define_expand "sse_shufps"
2329 [(match_operand:V4SF 0 "register_operand" "")
2330 (match_operand:V4SF 1 "register_operand" "")
2331 (match_operand:V4SF 2 "nonimmediate_operand" "")
2332 (match_operand:SI 3 "const_int_operand" "")]
2335 int mask = INTVAL (operands[3]);
2336 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2337 GEN_INT ((mask >> 0) & 3),
2338 GEN_INT ((mask >> 2) & 3),
2339 GEN_INT (((mask >> 4) & 3) + 4),
2340 GEN_INT (((mask >> 6) & 3) + 4)));
2344 (define_insn "sse_shufps_1"
2345 [(set (match_operand:V4SF 0 "register_operand" "=x")
2348 (match_operand:V4SF 1 "register_operand" "0")
2349 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2350 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2351 (match_operand 4 "const_0_to_3_operand" "")
2352 (match_operand 5 "const_4_to_7_operand" "")
2353 (match_operand 6 "const_4_to_7_operand" "")])))]
2357 mask |= INTVAL (operands[3]) << 0;
2358 mask |= INTVAL (operands[4]) << 2;
2359 mask |= (INTVAL (operands[5]) - 4) << 4;
2360 mask |= (INTVAL (operands[6]) - 4) << 6;
2361 operands[3] = GEN_INT (mask);
2363 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2365 [(set_attr "type" "sselog")
2366 (set_attr "mode" "V4SF")])
2368 (define_insn "sse_storehps"
2369 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2371 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2372 (parallel [(const_int 2) (const_int 3)])))]
2375 movhps\t{%1, %0|%0, %1}
2376 movhlps\t{%1, %0|%0, %1}
2377 movlps\t{%H1, %0|%0, %H1}"
2378 [(set_attr "type" "ssemov")
2379 (set_attr "mode" "V2SF,V4SF,V2SF")])
2381 (define_insn "sse_loadhps"
2382 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2385 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2386 (parallel [(const_int 0) (const_int 1)]))
2387 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2390 movhps\t{%2, %0|%0, %2}
2391 movlhps\t{%2, %0|%0, %2}
2392 movlps\t{%2, %H0|%H0, %2}"
2393 [(set_attr "type" "ssemov")
2394 (set_attr "mode" "V2SF,V4SF,V2SF")])
2396 (define_insn "sse_storelps"
2397 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2399 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2400 (parallel [(const_int 0) (const_int 1)])))]
2403 movlps\t{%1, %0|%0, %1}
2404 movaps\t{%1, %0|%0, %1}
2405 movlps\t{%1, %0|%0, %1}"
2406 [(set_attr "type" "ssemov")
2407 (set_attr "mode" "V2SF,V4SF,V2SF")])
2409 (define_insn "sse_loadlps"
2410 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2412 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2414 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2415 (parallel [(const_int 2) (const_int 3)]))))]
2418 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2419 movlps\t{%2, %0|%0, %2}
2420 movlps\t{%2, %0|%0, %2}"
2421 [(set_attr "type" "sselog,ssemov,ssemov")
2422 (set_attr "mode" "V4SF,V2SF,V2SF")])
2424 (define_insn "sse_movss"
2425 [(set (match_operand:V4SF 0 "register_operand" "=x")
2427 (match_operand:V4SF 2 "register_operand" "x")
2428 (match_operand:V4SF 1 "register_operand" "0")
2431 "movss\t{%2, %0|%0, %2}"
2432 [(set_attr "type" "ssemov")
2433 (set_attr "mode" "SF")])
2435 (define_insn "*vec_dupv4sf"
2436 [(set (match_operand:V4SF 0 "register_operand" "=x")
2438 (match_operand:SF 1 "register_operand" "0")))]
2440 "shufps\t{$0, %0, %0|%0, %0, 0}"
2441 [(set_attr "type" "sselog1")
2442 (set_attr "mode" "V4SF")])
2444 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2445 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2446 ;; alternatives pretty much forces the MMX alternative to be chosen.
2447 (define_insn "*sse_concatv2sf"
2448 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2450 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2451 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2454 unpcklps\t{%2, %0|%0, %2}
2455 movss\t{%1, %0|%0, %1}
2456 punpckldq\t{%2, %0|%0, %2}
2457 movd\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2459 (set_attr "mode" "V4SF,SF,DI,DI")])
2461 (define_insn "*sse_concatv4sf"
2462 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2464 (match_operand:V2SF 1 "register_operand" " 0,0")
2465 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2468 movlhps\t{%2, %0|%0, %2}
2469 movhps\t{%2, %0|%0, %2}"
2470 [(set_attr "type" "ssemov")
2471 (set_attr "mode" "V4SF,V2SF")])
2473 (define_expand "vec_initv4sf"
2474 [(match_operand:V4SF 0 "register_operand" "")
2475 (match_operand 1 "" "")]
2478 ix86_expand_vector_init (false, operands[0], operands[1]);
2482 (define_insn "vec_setv4sf_0"
2483 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2486 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2487 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2491 movss\t{%2, %0|%0, %2}
2492 movss\t{%2, %0|%0, %2}
2493 movd\t{%2, %0|%0, %2}
2495 [(set_attr "type" "ssemov")
2496 (set_attr "mode" "SF")])
2498 ;; A subset is vec_setv4sf.
2499 (define_insn "*vec_setv4sf_sse4_1"
2500 [(set (match_operand:V4SF 0 "register_operand" "=x")
2503 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2504 (match_operand:V4SF 1 "register_operand" "0")
2505 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2508 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2509 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2511 [(set_attr "type" "sselog")
2512 (set_attr "prefix_extra" "1")
2513 (set_attr "mode" "V4SF")])
2515 (define_insn "sse4_1_insertps"
2516 [(set (match_operand:V4SF 0 "register_operand" "=x")
2517 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2518 (match_operand:V4SF 1 "register_operand" "0")
2519 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2522 "insertps\t{%3, %2, %0|%0, %2, %3}";
2523 [(set_attr "type" "sselog")
2524 (set_attr "prefix_extra" "1")
2525 (set_attr "mode" "V4SF")])
2528 [(set (match_operand:V4SF 0 "memory_operand" "")
2531 (match_operand:SF 1 "nonmemory_operand" ""))
2534 "TARGET_SSE && reload_completed"
2537 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2541 (define_expand "vec_setv4sf"
2542 [(match_operand:V4SF 0 "register_operand" "")
2543 (match_operand:SF 1 "register_operand" "")
2544 (match_operand 2 "const_int_operand" "")]
2547 ix86_expand_vector_set (false, operands[0], operands[1],
2548 INTVAL (operands[2]));
2552 (define_insn_and_split "*vec_extractv4sf_0"
2553 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2555 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2556 (parallel [(const_int 0)])))]
2557 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2559 "&& reload_completed"
2562 rtx op1 = operands[1];
2564 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2566 op1 = gen_lowpart (SFmode, op1);
2567 emit_move_insn (operands[0], op1);
2571 (define_insn "*sse4_1_extractps"
2572 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2574 (match_operand:V4SF 1 "register_operand" "x")
2575 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2577 "extractps\t{%2, %1, %0|%0, %1, %2}"
2578 [(set_attr "type" "sselog")
2579 (set_attr "prefix_extra" "1")
2580 (set_attr "mode" "V4SF")])
2582 (define_insn_and_split "*vec_extract_v4sf_mem"
2583 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2585 (match_operand:V4SF 1 "memory_operand" "o")
2586 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2592 int i = INTVAL (operands[2]);
2594 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2598 (define_expand "vec_extractv4sf"
2599 [(match_operand:SF 0 "register_operand" "")
2600 (match_operand:V4SF 1 "register_operand" "")
2601 (match_operand 2 "const_int_operand" "")]
2604 ix86_expand_vector_extract (false, operands[0], operands[1],
2605 INTVAL (operands[2]));
2609 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2611 ;; Parallel double-precision floating point element swizzling
2613 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2615 (define_insn "sse2_unpckhpd"
2616 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2619 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2620 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2621 (parallel [(const_int 1)
2623 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2625 unpckhpd\t{%2, %0|%0, %2}
2626 movlpd\t{%H1, %0|%0, %H1}
2627 movhpd\t{%1, %0|%0, %1}"
2628 [(set_attr "type" "sselog,ssemov,ssemov")
2629 (set_attr "mode" "V2DF,V1DF,V1DF")])
2631 (define_insn "*sse3_movddup"
2632 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2635 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2637 (parallel [(const_int 0)
2639 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2641 movddup\t{%1, %0|%0, %1}
2643 [(set_attr "type" "sselog1,ssemov")
2644 (set_attr "mode" "V2DF")])
2647 [(set (match_operand:V2DF 0 "memory_operand" "")
2650 (match_operand:V2DF 1 "register_operand" "")
2652 (parallel [(const_int 0)
2654 "TARGET_SSE3 && reload_completed"
2657 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2658 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2659 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2663 (define_insn "sse2_unpcklpd"
2664 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2667 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2668 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2669 (parallel [(const_int 0)
2671 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2673 unpcklpd\t{%2, %0|%0, %2}
2674 movhpd\t{%2, %0|%0, %2}
2675 movlpd\t{%2, %H0|%H0, %2}"
2676 [(set_attr "type" "sselog,ssemov,ssemov")
2677 (set_attr "mode" "V2DF,V1DF,V1DF")])
2679 (define_expand "sse2_shufpd"
2680 [(match_operand:V2DF 0 "register_operand" "")
2681 (match_operand:V2DF 1 "register_operand" "")
2682 (match_operand:V2DF 2 "nonimmediate_operand" "")
2683 (match_operand:SI 3 "const_int_operand" "")]
2686 int mask = INTVAL (operands[3]);
2687 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2689 GEN_INT (mask & 2 ? 3 : 2)));
2693 (define_insn "sse2_shufpd_1"
2694 [(set (match_operand:V2DF 0 "register_operand" "=x")
2697 (match_operand:V2DF 1 "register_operand" "0")
2698 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2699 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2700 (match_operand 4 "const_2_to_3_operand" "")])))]
2704 mask = INTVAL (operands[3]);
2705 mask |= (INTVAL (operands[4]) - 2) << 1;
2706 operands[3] = GEN_INT (mask);
2708 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2710 [(set_attr "type" "sselog")
2711 (set_attr "mode" "V2DF")])
2713 (define_insn "sse2_storehpd"
2714 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2716 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2717 (parallel [(const_int 1)])))]
2718 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2720 movhpd\t{%1, %0|%0, %1}
2723 [(set_attr "type" "ssemov,sselog1,ssemov")
2724 (set_attr "mode" "V1DF,V2DF,DF")])
2727 [(set (match_operand:DF 0 "register_operand" "")
2729 (match_operand:V2DF 1 "memory_operand" "")
2730 (parallel [(const_int 1)])))]
2731 "TARGET_SSE2 && reload_completed"
2732 [(set (match_dup 0) (match_dup 1))]
2734 operands[1] = adjust_address (operands[1], DFmode, 8);
2737 (define_insn "sse2_storelpd"
2738 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2740 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2741 (parallel [(const_int 0)])))]
2742 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2744 movlpd\t{%1, %0|%0, %1}
2747 [(set_attr "type" "ssemov")
2748 (set_attr "mode" "V1DF,DF,DF")])
2751 [(set (match_operand:DF 0 "register_operand" "")
2753 (match_operand:V2DF 1 "nonimmediate_operand" "")
2754 (parallel [(const_int 0)])))]
2755 "TARGET_SSE2 && reload_completed"
2758 rtx op1 = operands[1];
2760 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2762 op1 = gen_lowpart (DFmode, op1);
2763 emit_move_insn (operands[0], op1);
2767 (define_insn "sse2_loadhpd"
2768 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2771 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2772 (parallel [(const_int 0)]))
2773 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2774 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2776 movhpd\t{%2, %0|%0, %2}
2777 unpcklpd\t{%2, %0|%0, %2}
2778 shufpd\t{$1, %1, %0|%0, %1, 1}
2780 [(set_attr "type" "ssemov,sselog,sselog,other")
2781 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2784 [(set (match_operand:V2DF 0 "memory_operand" "")
2786 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2787 (match_operand:DF 1 "register_operand" "")))]
2788 "TARGET_SSE2 && reload_completed"
2789 [(set (match_dup 0) (match_dup 1))]
2791 operands[0] = adjust_address (operands[0], DFmode, 8);
2794 (define_insn "sse2_loadlpd"
2795 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2797 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2799 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2800 (parallel [(const_int 1)]))))]
2801 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2803 movsd\t{%2, %0|%0, %2}
2804 movlpd\t{%2, %0|%0, %2}
2805 movsd\t{%2, %0|%0, %2}
2806 shufpd\t{$2, %2, %0|%0, %2, 2}
2807 movhpd\t{%H1, %0|%0, %H1}
2809 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2810 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2813 [(set (match_operand:V2DF 0 "memory_operand" "")
2815 (match_operand:DF 1 "register_operand" "")
2816 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2817 "TARGET_SSE2 && reload_completed"
2818 [(set (match_dup 0) (match_dup 1))]
2820 operands[0] = adjust_address (operands[0], DFmode, 8);
2823 ;; Not sure these two are ever used, but it doesn't hurt to have
2825 (define_insn "*vec_extractv2df_1_sse"
2826 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2828 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2829 (parallel [(const_int 1)])))]
2830 "!TARGET_SSE2 && TARGET_SSE
2831 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2833 movhps\t{%1, %0|%0, %1}
2834 movhlps\t{%1, %0|%0, %1}
2835 movlps\t{%H1, %0|%0, %H1}"
2836 [(set_attr "type" "ssemov")
2837 (set_attr "mode" "V2SF,V4SF,V2SF")])
2839 (define_insn "*vec_extractv2df_0_sse"
2840 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2842 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2843 (parallel [(const_int 0)])))]
2844 "!TARGET_SSE2 && TARGET_SSE
2845 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2847 movlps\t{%1, %0|%0, %1}
2848 movaps\t{%1, %0|%0, %1}
2849 movlps\t{%1, %0|%0, %1}"
2850 [(set_attr "type" "ssemov")
2851 (set_attr "mode" "V2SF,V4SF,V2SF")])
2853 (define_insn "sse2_movsd"
2854 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2856 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2857 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2861 movsd\t{%2, %0|%0, %2}
2862 movlpd\t{%2, %0|%0, %2}
2863 movlpd\t{%2, %0|%0, %2}
2864 shufpd\t{$2, %2, %0|%0, %2, 2}
2865 movhps\t{%H1, %0|%0, %H1}
2866 movhps\t{%1, %H0|%H0, %1}"
2867 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2868 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2870 (define_insn "*vec_dupv2df_sse3"
2871 [(set (match_operand:V2DF 0 "register_operand" "=x")
2873 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2875 "movddup\t{%1, %0|%0, %1}"
2876 [(set_attr "type" "sselog1")
2877 (set_attr "mode" "DF")])
2879 (define_insn "vec_dupv2df"
2880 [(set (match_operand:V2DF 0 "register_operand" "=x")
2882 (match_operand:DF 1 "register_operand" "0")))]
2885 [(set_attr "type" "sselog1")
2886 (set_attr "mode" "V2DF")])
2888 (define_insn "*vec_concatv2df_sse3"
2889 [(set (match_operand:V2DF 0 "register_operand" "=x")
2891 (match_operand:DF 1 "nonimmediate_operand" "xm")
2894 "movddup\t{%1, %0|%0, %1}"
2895 [(set_attr "type" "sselog1")
2896 (set_attr "mode" "DF")])
2898 (define_insn "*vec_concatv2df"
2899 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2901 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2902 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2905 unpcklpd\t{%2, %0|%0, %2}
2906 movhpd\t{%2, %0|%0, %2}
2907 movsd\t{%1, %0|%0, %1}
2908 movlhps\t{%2, %0|%0, %2}
2909 movhps\t{%2, %0|%0, %2}"
2910 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2911 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2913 (define_expand "vec_setv2df"
2914 [(match_operand:V2DF 0 "register_operand" "")
2915 (match_operand:DF 1 "register_operand" "")
2916 (match_operand 2 "const_int_operand" "")]
2919 ix86_expand_vector_set (false, operands[0], operands[1],
2920 INTVAL (operands[2]));
2924 (define_expand "vec_extractv2df"
2925 [(match_operand:DF 0 "register_operand" "")
2926 (match_operand:V2DF 1 "register_operand" "")
2927 (match_operand 2 "const_int_operand" "")]
2930 ix86_expand_vector_extract (false, operands[0], operands[1],
2931 INTVAL (operands[2]));
2935 (define_expand "vec_initv2df"
2936 [(match_operand:V2DF 0 "register_operand" "")
2937 (match_operand 1 "" "")]
2940 ix86_expand_vector_init (false, operands[0], operands[1]);
2944 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2946 ;; Parallel integral arithmetic
2948 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2950 (define_expand "neg<mode>2"
2951 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2954 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2956 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2958 (define_expand "add<mode>3"
2959 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2960 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2961 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2963 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2965 (define_insn "*add<mode>3"
2966 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2968 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2969 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2970 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2971 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2972 [(set_attr "type" "sseiadd")
2973 (set_attr "prefix_data16" "1")
2974 (set_attr "mode" "TI")])
2976 (define_insn "sse2_ssadd<mode>3"
2977 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2979 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2980 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2981 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2982 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2983 [(set_attr "type" "sseiadd")
2984 (set_attr "prefix_data16" "1")
2985 (set_attr "mode" "TI")])
2987 (define_insn "sse2_usadd<mode>3"
2988 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2990 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2991 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2992 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2993 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2994 [(set_attr "type" "sseiadd")
2995 (set_attr "prefix_data16" "1")
2996 (set_attr "mode" "TI")])
2998 (define_expand "sub<mode>3"
2999 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3000 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
3001 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3003 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
3005 (define_insn "*sub<mode>3"
3006 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3008 (match_operand:SSEMODEI 1 "register_operand" "0")
3009 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3011 "psub<ssevecsize>\t{%2, %0|%0, %2}"
3012 [(set_attr "type" "sseiadd")
3013 (set_attr "prefix_data16" "1")
3014 (set_attr "mode" "TI")])
3016 (define_insn "sse2_sssub<mode>3"
3017 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3019 (match_operand:SSEMODE12 1 "register_operand" "0")
3020 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3022 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
3023 [(set_attr "type" "sseiadd")
3024 (set_attr "prefix_data16" "1")
3025 (set_attr "mode" "TI")])
3027 (define_insn "sse2_ussub<mode>3"
3028 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3030 (match_operand:SSEMODE12 1 "register_operand" "0")
3031 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3033 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
3034 [(set_attr "type" "sseiadd")
3035 (set_attr "prefix_data16" "1")
3036 (set_attr "mode" "TI")])
3038 (define_insn_and_split "mulv16qi3"
3039 [(set (match_operand:V16QI 0 "register_operand" "")
3040 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
3041 (match_operand:V16QI 2 "register_operand" "")))]
3043 && !(reload_completed || reload_in_progress)"
3048 rtx t[12], op0, op[3];
3053 /* On SSE5, we can take advantage of the pperm instruction to pack and
3054 unpack the bytes. Unpack data such that we've got a source byte in
3055 each low byte of each word. We don't care what goes into the high
3056 byte, so put 0 there. */
3057 for (i = 0; i < 6; ++i)
3058 t[i] = gen_reg_rtx (V8HImode);
3060 for (i = 0; i < 2; i++)
3063 op[1] = operands[i+1];
3064 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
3067 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
3070 /* Multiply words. */
3071 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
3072 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
3074 /* Pack the low byte of each word back into a single xmm */
3075 op[0] = operands[0];
3078 ix86_expand_sse5_pack (op);
3082 for (i = 0; i < 12; ++i)
3083 t[i] = gen_reg_rtx (V16QImode);
3085 /* Unpack data such that we've got a source byte in each low byte of
3086 each word. We don't care what goes into the high byte of each word.
3087 Rather than trying to get zero in there, most convenient is to let
3088 it be a copy of the low byte. */
3089 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
3090 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
3091 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
3092 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
3094 /* Multiply words. The end-of-line annotations here give a picture of what
3095 the output of that instruction looks like. Dot means don't care; the
3096 letters are the bytes of the result with A being the most significant. */
3097 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
3098 gen_lowpart (V8HImode, t[0]),
3099 gen_lowpart (V8HImode, t[1])));
3100 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
3101 gen_lowpart (V8HImode, t[2]),
3102 gen_lowpart (V8HImode, t[3])));
3104 /* Extract the relevant bytes and merge them back together. */
3105 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
3106 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
3107 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
3108 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
3109 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
3110 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
3113 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
3117 (define_expand "mulv8hi3"
3118 [(set (match_operand:V8HI 0 "register_operand" "")
3119 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3120 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3122 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3124 (define_insn "*mulv8hi3"
3125 [(set (match_operand:V8HI 0 "register_operand" "=x")
3126 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3127 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3128 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3129 "pmullw\t{%2, %0|%0, %2}"
3130 [(set_attr "type" "sseimul")
3131 (set_attr "prefix_data16" "1")
3132 (set_attr "mode" "TI")])
3134 (define_expand "smulv8hi3_highpart"
3135 [(set (match_operand:V8HI 0 "register_operand" "")
3140 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3142 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3145 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3147 (define_insn "*smulv8hi3_highpart"
3148 [(set (match_operand:V8HI 0 "register_operand" "=x")
3153 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3155 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3157 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3158 "pmulhw\t{%2, %0|%0, %2}"
3159 [(set_attr "type" "sseimul")
3160 (set_attr "prefix_data16" "1")
3161 (set_attr "mode" "TI")])
3163 (define_expand "umulv8hi3_highpart"
3164 [(set (match_operand:V8HI 0 "register_operand" "")
3169 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3171 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3174 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3176 (define_insn "*umulv8hi3_highpart"
3177 [(set (match_operand:V8HI 0 "register_operand" "=x")
3182 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3184 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3186 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3187 "pmulhuw\t{%2, %0|%0, %2}"
3188 [(set_attr "type" "sseimul")
3189 (set_attr "prefix_data16" "1")
3190 (set_attr "mode" "TI")])
3192 (define_insn "sse2_umulv2siv2di3"
3193 [(set (match_operand:V2DI 0 "register_operand" "=x")
3197 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3198 (parallel [(const_int 0) (const_int 2)])))
3201 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3202 (parallel [(const_int 0) (const_int 2)])))))]
3203 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3204 "pmuludq\t{%2, %0|%0, %2}"
3205 [(set_attr "type" "sseimul")
3206 (set_attr "prefix_data16" "1")
3207 (set_attr "mode" "TI")])
3209 (define_insn "sse4_1_mulv2siv2di3"
3210 [(set (match_operand:V2DI 0 "register_operand" "=x")
3214 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3215 (parallel [(const_int 0) (const_int 2)])))
3218 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3219 (parallel [(const_int 0) (const_int 2)])))))]
3220 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3221 "pmuldq\t{%2, %0|%0, %2}"
3222 [(set_attr "type" "sseimul")
3223 (set_attr "prefix_extra" "1")
3224 (set_attr "mode" "TI")])
3226 (define_insn "sse2_pmaddwd"
3227 [(set (match_operand:V4SI 0 "register_operand" "=x")
3232 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3233 (parallel [(const_int 0)
3239 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3240 (parallel [(const_int 0)
3246 (vec_select:V4HI (match_dup 1)
3247 (parallel [(const_int 1)
3252 (vec_select:V4HI (match_dup 2)
3253 (parallel [(const_int 1)
3256 (const_int 7)]))))))]
3257 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3258 "pmaddwd\t{%2, %0|%0, %2}"
3259 [(set_attr "type" "sseiadd")
3260 (set_attr "prefix_data16" "1")
3261 (set_attr "mode" "TI")])
3263 (define_expand "mulv4si3"
3264 [(set (match_operand:V4SI 0 "register_operand" "")
3265 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3266 (match_operand:V4SI 2 "register_operand" "")))]
3269 if (TARGET_SSE4_1 || TARGET_SSE5)
3270 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3273 (define_insn "*sse4_1_mulv4si3"
3274 [(set (match_operand:V4SI 0 "register_operand" "=x")
3275 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3276 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3277 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3278 "pmulld\t{%2, %0|%0, %2}"
3279 [(set_attr "type" "sseimul")
3280 (set_attr "prefix_extra" "1")
3281 (set_attr "mode" "TI")])
3283 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3284 ;; multiply/add. In general, we expect the define_split to occur before
3285 ;; register allocation, so we have to handle the corner case where the target
3286 ;; is used as the base or index register in operands 1/2.
3287 (define_insn_and_split "*sse5_mulv4si3"
3288 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3289 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3290 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3293 "&& (reload_completed
3294 || (!reg_mentioned_p (operands[0], operands[1])
3295 && !reg_mentioned_p (operands[0], operands[2])))"
3299 (plus:V4SI (mult:V4SI (match_dup 1)
3303 operands[3] = CONST0_RTX (V4SImode);
3305 [(set_attr "type" "ssemuladd")
3306 (set_attr "mode" "TI")])
3308 (define_insn_and_split "*sse2_mulv4si3"
3309 [(set (match_operand:V4SI 0 "register_operand" "")
3310 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3311 (match_operand:V4SI 2 "register_operand" "")))]
3312 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3313 && !(reload_completed || reload_in_progress)"
3318 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3324 t1 = gen_reg_rtx (V4SImode);
3325 t2 = gen_reg_rtx (V4SImode);
3326 t3 = gen_reg_rtx (V4SImode);
3327 t4 = gen_reg_rtx (V4SImode);
3328 t5 = gen_reg_rtx (V4SImode);
3329 t6 = gen_reg_rtx (V4SImode);
3330 thirtytwo = GEN_INT (32);
3332 /* Multiply elements 2 and 0. */
3333 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3336 /* Shift both input vectors down one element, so that elements 3
3337 and 1 are now in the slots for elements 2 and 0. For K8, at
3338 least, this is faster than using a shuffle. */
3339 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3340 gen_lowpart (TImode, op1),
3342 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3343 gen_lowpart (TImode, op2),
3345 /* Multiply elements 3 and 1. */
3346 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3349 /* Move the results in element 2 down to element 1; we don't care
3350 what goes in elements 2 and 3. */
3351 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3352 const0_rtx, const0_rtx));
3353 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3354 const0_rtx, const0_rtx));
3356 /* Merge the parts back together. */
3357 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3361 (define_insn_and_split "mulv2di3"
3362 [(set (match_operand:V2DI 0 "register_operand" "")
3363 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3364 (match_operand:V2DI 2 "register_operand" "")))]
3366 && !(reload_completed || reload_in_progress)"
3371 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3377 t1 = gen_reg_rtx (V2DImode);
3378 t2 = gen_reg_rtx (V2DImode);
3379 t3 = gen_reg_rtx (V2DImode);
3380 t4 = gen_reg_rtx (V2DImode);
3381 t5 = gen_reg_rtx (V2DImode);
3382 t6 = gen_reg_rtx (V2DImode);
3383 thirtytwo = GEN_INT (32);
3385 /* Multiply low parts. */
3386 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3387 gen_lowpart (V4SImode, op2)));
3389 /* Shift input vectors left 32 bits so we can multiply high parts. */
3390 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3391 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3393 /* Multiply high parts by low parts. */
3394 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3395 gen_lowpart (V4SImode, t3)));
3396 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3397 gen_lowpart (V4SImode, t2)));
3399 /* Shift them back. */
3400 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3401 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3403 /* Add the three parts together. */
3404 emit_insn (gen_addv2di3 (t6, t1, t4));
3405 emit_insn (gen_addv2di3 (op0, t6, t5));
3409 (define_expand "vec_widen_smult_hi_v8hi"
3410 [(match_operand:V4SI 0 "register_operand" "")
3411 (match_operand:V8HI 1 "register_operand" "")
3412 (match_operand:V8HI 2 "register_operand" "")]
3415 rtx op1, op2, t1, t2, dest;
3419 t1 = gen_reg_rtx (V8HImode);
3420 t2 = gen_reg_rtx (V8HImode);
3421 dest = gen_lowpart (V8HImode, operands[0]);
3423 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3424 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3425 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3429 (define_expand "vec_widen_smult_lo_v8hi"
3430 [(match_operand:V4SI 0 "register_operand" "")
3431 (match_operand:V8HI 1 "register_operand" "")
3432 (match_operand:V8HI 2 "register_operand" "")]
3435 rtx op1, op2, t1, t2, dest;
3439 t1 = gen_reg_rtx (V8HImode);
3440 t2 = gen_reg_rtx (V8HImode);
3441 dest = gen_lowpart (V8HImode, operands[0]);
3443 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3444 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3445 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3449 (define_expand "vec_widen_umult_hi_v8hi"
3450 [(match_operand:V4SI 0 "register_operand" "")
3451 (match_operand:V8HI 1 "register_operand" "")
3452 (match_operand:V8HI 2 "register_operand" "")]
3455 rtx op1, op2, t1, t2, dest;
3459 t1 = gen_reg_rtx (V8HImode);
3460 t2 = gen_reg_rtx (V8HImode);
3461 dest = gen_lowpart (V8HImode, operands[0]);
3463 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3464 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3465 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3469 (define_expand "vec_widen_umult_lo_v8hi"
3470 [(match_operand:V4SI 0 "register_operand" "")
3471 (match_operand:V8HI 1 "register_operand" "")
3472 (match_operand:V8HI 2 "register_operand" "")]
3475 rtx op1, op2, t1, t2, dest;
3479 t1 = gen_reg_rtx (V8HImode);
3480 t2 = gen_reg_rtx (V8HImode);
3481 dest = gen_lowpart (V8HImode, operands[0]);
3483 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3484 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3485 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3489 (define_expand "vec_widen_smult_hi_v4si"
3490 [(match_operand:V2DI 0 "register_operand" "")
3491 (match_operand:V4SI 1 "register_operand" "")
3492 (match_operand:V4SI 2 "register_operand" "")]
3495 rtx op1, op2, t1, t2;
3499 t1 = gen_reg_rtx (V4SImode);
3500 t2 = gen_reg_rtx (V4SImode);
3502 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3503 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3504 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3508 (define_expand "vec_widen_smult_lo_v4si"
3509 [(match_operand:V2DI 0 "register_operand" "")
3510 (match_operand:V4SI 1 "register_operand" "")
3511 (match_operand:V4SI 2 "register_operand" "")]
3514 rtx op1, op2, t1, t2;
3518 t1 = gen_reg_rtx (V4SImode);
3519 t2 = gen_reg_rtx (V4SImode);
3521 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3522 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3523 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3527 (define_expand "vec_widen_umult_hi_v4si"
3528 [(match_operand:V2DI 0 "register_operand" "")
3529 (match_operand:V4SI 1 "register_operand" "")
3530 (match_operand:V4SI 2 "register_operand" "")]
3533 rtx op1, op2, t1, t2;
3537 t1 = gen_reg_rtx (V4SImode);
3538 t2 = gen_reg_rtx (V4SImode);
3540 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3541 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3542 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3546 (define_expand "vec_widen_umult_lo_v4si"
3547 [(match_operand:V2DI 0 "register_operand" "")
3548 (match_operand:V4SI 1 "register_operand" "")
3549 (match_operand:V4SI 2 "register_operand" "")]
3552 rtx op1, op2, t1, t2;
3556 t1 = gen_reg_rtx (V4SImode);
3557 t2 = gen_reg_rtx (V4SImode);
3559 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3560 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3561 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3565 (define_expand "sdot_prodv8hi"
3566 [(match_operand:V4SI 0 "register_operand" "")
3567 (match_operand:V8HI 1 "register_operand" "")
3568 (match_operand:V8HI 2 "register_operand" "")
3569 (match_operand:V4SI 3 "register_operand" "")]
3572 rtx t = gen_reg_rtx (V4SImode);
3573 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3574 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3578 (define_expand "udot_prodv4si"
3579 [(match_operand:V2DI 0 "register_operand" "")
3580 (match_operand:V4SI 1 "register_operand" "")
3581 (match_operand:V4SI 2 "register_operand" "")
3582 (match_operand:V2DI 3 "register_operand" "")]
3587 t1 = gen_reg_rtx (V2DImode);
3588 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3589 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3591 t2 = gen_reg_rtx (V4SImode);
3592 t3 = gen_reg_rtx (V4SImode);
3593 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3594 gen_lowpart (TImode, operands[1]),
3596 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3597 gen_lowpart (TImode, operands[2]),
3600 t4 = gen_reg_rtx (V2DImode);
3601 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3603 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3607 (define_insn "ashr<mode>3"
3608 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3610 (match_operand:SSEMODE24 1 "register_operand" "0")
3611 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3613 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3614 [(set_attr "type" "sseishft")
3615 (set_attr "prefix_data16" "1")
3616 (set_attr "mode" "TI")])
3618 (define_insn "lshr<mode>3"
3619 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3620 (lshiftrt:SSEMODE248
3621 (match_operand:SSEMODE248 1 "register_operand" "0")
3622 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3624 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3625 [(set_attr "type" "sseishft")
3626 (set_attr "prefix_data16" "1")
3627 (set_attr "mode" "TI")])
3629 (define_insn "ashl<mode>3"
3630 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3632 (match_operand:SSEMODE248 1 "register_operand" "0")
3633 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3635 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3636 [(set_attr "type" "sseishft")
3637 (set_attr "prefix_data16" "1")
3638 (set_attr "mode" "TI")])
3640 (define_expand "vec_shl_<mode>"
3641 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3642 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3643 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3646 operands[0] = gen_lowpart (TImode, operands[0]);
3647 operands[1] = gen_lowpart (TImode, operands[1]);
3650 (define_expand "vec_shr_<mode>"
3651 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3652 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3653 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3656 operands[0] = gen_lowpart (TImode, operands[0]);
3657 operands[1] = gen_lowpart (TImode, operands[1]);
3660 (define_expand "umaxv16qi3"
3661 [(set (match_operand:V16QI 0 "register_operand" "")
3662 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3663 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3665 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3667 (define_insn "*umaxv16qi3"
3668 [(set (match_operand:V16QI 0 "register_operand" "=x")
3669 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3670 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3671 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3672 "pmaxub\t{%2, %0|%0, %2}"
3673 [(set_attr "type" "sseiadd")
3674 (set_attr "prefix_data16" "1")
3675 (set_attr "mode" "TI")])
3677 (define_expand "smaxv8hi3"
3678 [(set (match_operand:V8HI 0 "register_operand" "")
3679 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3680 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3682 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3684 (define_insn "*smaxv8hi3"
3685 [(set (match_operand:V8HI 0 "register_operand" "=x")
3686 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3687 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3688 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3689 "pmaxsw\t{%2, %0|%0, %2}"
3690 [(set_attr "type" "sseiadd")
3691 (set_attr "prefix_data16" "1")
3692 (set_attr "mode" "TI")])
3694 (define_expand "umaxv8hi3"
3695 [(set (match_operand:V8HI 0 "register_operand" "")
3696 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3697 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3701 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3704 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3705 if (rtx_equal_p (op3, op2))
3706 op3 = gen_reg_rtx (V8HImode);
3707 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3708 emit_insn (gen_addv8hi3 (op0, op3, op2));
3713 (define_expand "smax<mode>3"
3714 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3715 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3716 (match_operand:SSEMODE14 2 "register_operand" "")))]
3720 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3726 xops[0] = operands[0];
3727 xops[1] = operands[1];
3728 xops[2] = operands[2];
3729 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3730 xops[4] = operands[1];
3731 xops[5] = operands[2];
3732 ok = ix86_expand_int_vcond (xops);
3738 (define_insn "*sse4_1_smax<mode>3"
3739 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3741 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3742 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3743 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3744 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3745 [(set_attr "type" "sseiadd")
3746 (set_attr "prefix_extra" "1")
3747 (set_attr "mode" "TI")])
3749 (define_expand "umaxv4si3"
3750 [(set (match_operand:V4SI 0 "register_operand" "")
3751 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3752 (match_operand:V4SI 2 "register_operand" "")))]
3756 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3762 xops[0] = operands[0];
3763 xops[1] = operands[1];
3764 xops[2] = operands[2];
3765 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3766 xops[4] = operands[1];
3767 xops[5] = operands[2];
3768 ok = ix86_expand_int_vcond (xops);
3774 (define_insn "*sse4_1_umax<mode>3"
3775 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3777 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3778 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3779 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3780 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3781 [(set_attr "type" "sseiadd")
3782 (set_attr "prefix_extra" "1")
3783 (set_attr "mode" "TI")])
3785 (define_expand "uminv16qi3"
3786 [(set (match_operand:V16QI 0 "register_operand" "")
3787 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3788 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3790 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3792 (define_insn "*uminv16qi3"
3793 [(set (match_operand:V16QI 0 "register_operand" "=x")
3794 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3795 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3796 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3797 "pminub\t{%2, %0|%0, %2}"
3798 [(set_attr "type" "sseiadd")
3799 (set_attr "prefix_data16" "1")
3800 (set_attr "mode" "TI")])
3802 (define_expand "sminv8hi3"
3803 [(set (match_operand:V8HI 0 "register_operand" "")
3804 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3805 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3807 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3809 (define_insn "*sminv8hi3"
3810 [(set (match_operand:V8HI 0 "register_operand" "=x")
3811 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3812 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3813 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3814 "pminsw\t{%2, %0|%0, %2}"
3815 [(set_attr "type" "sseiadd")
3816 (set_attr "prefix_data16" "1")
3817 (set_attr "mode" "TI")])
3819 (define_expand "smin<mode>3"
3820 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3821 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3822 (match_operand:SSEMODE14 2 "register_operand" "")))]
3826 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3832 xops[0] = operands[0];
3833 xops[1] = operands[2];
3834 xops[2] = operands[1];
3835 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3836 xops[4] = operands[1];
3837 xops[5] = operands[2];
3838 ok = ix86_expand_int_vcond (xops);
3844 (define_insn "*sse4_1_smin<mode>3"
3845 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3847 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3848 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3849 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3850 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3851 [(set_attr "type" "sseiadd")
3852 (set_attr "prefix_extra" "1")
3853 (set_attr "mode" "TI")])
3855 (define_expand "umin<mode>3"
3856 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3857 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3858 (match_operand:SSEMODE24 2 "register_operand" "")))]
3862 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3868 xops[0] = operands[0];
3869 xops[1] = operands[2];
3870 xops[2] = operands[1];
3871 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3872 xops[4] = operands[1];
3873 xops[5] = operands[2];
3874 ok = ix86_expand_int_vcond (xops);
3880 (define_insn "*sse4_1_umin<mode>3"
3881 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3883 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3884 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3885 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3886 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3887 [(set_attr "type" "sseiadd")
3888 (set_attr "prefix_extra" "1")
3889 (set_attr "mode" "TI")])
3891 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3893 ;; Parallel integral comparisons
3895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3897 (define_insn "sse2_eq<mode>3"
3898 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3900 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3901 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3902 "TARGET_SSE2 && !TARGET_SSE5
3903 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3904 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3905 [(set_attr "type" "ssecmp")
3906 (set_attr "prefix_data16" "1")
3907 (set_attr "mode" "TI")])
3909 (define_insn "sse4_1_eqv2di3"
3910 [(set (match_operand:V2DI 0 "register_operand" "=x")
3912 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3913 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3914 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3915 "pcmpeqq\t{%2, %0|%0, %2}"
3916 [(set_attr "type" "ssecmp")
3917 (set_attr "prefix_extra" "1")
3918 (set_attr "mode" "TI")])
3920 (define_insn "sse2_gt<mode>3"
3921 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3923 (match_operand:SSEMODE124 1 "register_operand" "0")
3924 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3925 "TARGET_SSE2 && !TARGET_SSE5"
3926 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3927 [(set_attr "type" "ssecmp")
3928 (set_attr "prefix_data16" "1")
3929 (set_attr "mode" "TI")])
3931 (define_insn "sse4_2_gtv2di3"
3932 [(set (match_operand:V2DI 0 "register_operand" "=x")
3934 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3935 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3937 "pcmpgtq\t{%2, %0|%0, %2}"
3938 [(set_attr "type" "ssecmp")
3939 (set_attr "mode" "TI")])
3941 (define_expand "vcond<mode>"
3942 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3943 (if_then_else:SSEMODEI
3944 (match_operator 3 ""
3945 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3946 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3947 (match_operand:SSEMODEI 1 "general_operand" "")
3948 (match_operand:SSEMODEI 2 "general_operand" "")))]
3951 if (ix86_expand_int_vcond (operands))
3957 (define_expand "vcondu<mode>"
3958 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3959 (if_then_else:SSEMODEI
3960 (match_operator 3 ""
3961 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3962 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3963 (match_operand:SSEMODEI 1 "general_operand" "")
3964 (match_operand:SSEMODEI 2 "general_operand" "")))]
3967 if (ix86_expand_int_vcond (operands))
3973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3975 ;; Parallel bitwise logical operations
3977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3979 (define_expand "one_cmpl<mode>2"
3980 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3981 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3985 int i, n = GET_MODE_NUNITS (<MODE>mode);
3986 rtvec v = rtvec_alloc (n);
3988 for (i = 0; i < n; ++i)
3989 RTVEC_ELT (v, i) = constm1_rtx;
3991 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3994 (define_expand "and<mode>3"
3995 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3996 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3997 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3999 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
4001 (define_insn "*sse_and<mode>3"
4002 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4004 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4005 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4006 "(TARGET_SSE && !TARGET_SSE2)
4007 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4008 "andps\t{%2, %0|%0, %2}"
4009 [(set_attr "type" "sselog")
4010 (set_attr "mode" "V4SF")])
4012 (define_insn "*sse2_and<mode>3"
4013 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4015 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4016 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4017 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4018 "pand\t{%2, %0|%0, %2}"
4019 [(set_attr "type" "sselog")
4020 (set_attr "prefix_data16" "1")
4021 (set_attr "mode" "TI")])
4023 (define_insn "*sse_nand<mode>3"
4024 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4026 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4027 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4028 "(TARGET_SSE && !TARGET_SSE2)"
4029 "andnps\t{%2, %0|%0, %2}"
4030 [(set_attr "type" "sselog")
4031 (set_attr "mode" "V4SF")])
4033 (define_insn "sse2_nand<mode>3"
4034 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4036 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4037 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4039 "pandn\t{%2, %0|%0, %2}"
4040 [(set_attr "type" "sselog")
4041 (set_attr "prefix_data16" "1")
4042 (set_attr "mode" "TI")])
4044 (define_expand "andtf3"
4045 [(set (match_operand:TF 0 "register_operand" "")
4046 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
4047 (match_operand:TF 2 "nonimmediate_operand" "")))]
4049 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
4051 (define_insn "*andtf3"
4052 [(set (match_operand:TF 0 "register_operand" "=x")
4054 (match_operand:TF 1 "nonimmediate_operand" "%0")
4055 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4056 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
4057 "pand\t{%2, %0|%0, %2}"
4058 [(set_attr "type" "sselog")
4059 (set_attr "prefix_data16" "1")
4060 (set_attr "mode" "TI")])
4062 (define_insn "*nandtf3"
4063 [(set (match_operand:TF 0 "register_operand" "=x")
4065 (not:TF (match_operand:TF 1 "register_operand" "0"))
4066 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4068 "pandn\t{%2, %0|%0, %2}"
4069 [(set_attr "type" "sselog")
4070 (set_attr "prefix_data16" "1")
4071 (set_attr "mode" "TI")])
4073 (define_expand "ior<mode>3"
4074 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4075 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4076 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4078 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
4080 (define_insn "*sse_ior<mode>3"
4081 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4083 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4084 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4085 "(TARGET_SSE && !TARGET_SSE2)
4086 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4087 "orps\t{%2, %0|%0, %2}"
4088 [(set_attr "type" "sselog")
4089 (set_attr "mode" "V4SF")])
4091 (define_insn "*sse2_ior<mode>3"
4092 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4094 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4095 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4096 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4097 "por\t{%2, %0|%0, %2}"
4098 [(set_attr "type" "sselog")
4099 (set_attr "prefix_data16" "1")
4100 (set_attr "mode" "TI")])
4102 (define_expand "iortf3"
4103 [(set (match_operand:TF 0 "register_operand" "")
4104 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
4105 (match_operand:TF 2 "nonimmediate_operand" "")))]
4107 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
4109 (define_insn "*iortf3"
4110 [(set (match_operand:TF 0 "register_operand" "=x")
4112 (match_operand:TF 1 "nonimmediate_operand" "%0")
4113 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4114 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
4115 "por\t{%2, %0|%0, %2}"
4116 [(set_attr "type" "sselog")
4117 (set_attr "prefix_data16" "1")
4118 (set_attr "mode" "TI")])
4120 (define_expand "xor<mode>3"
4121 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4122 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4123 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4125 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
4127 (define_insn "*sse_xor<mode>3"
4128 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4130 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4131 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4132 "(TARGET_SSE && !TARGET_SSE2)
4133 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4134 "xorps\t{%2, %0|%0, %2}"
4135 [(set_attr "type" "sselog")
4136 (set_attr "mode" "V4SF")])
4138 (define_insn "*sse2_xor<mode>3"
4139 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4141 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4142 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4143 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4144 "pxor\t{%2, %0|%0, %2}"
4145 [(set_attr "type" "sselog")
4146 (set_attr "prefix_data16" "1")
4147 (set_attr "mode" "TI")])
4149 (define_expand "xortf3"
4150 [(set (match_operand:TF 0 "register_operand" "")
4151 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
4152 (match_operand:TF 2 "nonimmediate_operand" "")))]
4154 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
4156 (define_insn "*xortf3"
4157 [(set (match_operand:TF 0 "register_operand" "=x")
4159 (match_operand:TF 1 "nonimmediate_operand" "%0")
4160 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4161 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
4162 "pxor\t{%2, %0|%0, %2}"
4163 [(set_attr "type" "sselog")
4164 (set_attr "prefix_data16" "1")
4165 (set_attr "mode" "TI")])
4167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4169 ;; Parallel integral element swizzling
4171 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4174 ;; op1 = abcdefghijklmnop
4175 ;; op2 = qrstuvwxyz012345
4176 ;; h1 = aqbrcsdteufvgwhx
4177 ;; l1 = iyjzk0l1m2n3o4p5
4178 ;; h2 = aiqybjrzcks0dlt1
4179 ;; l2 = emu2fnv3gow4hpx5
4180 ;; h3 = aeimquy2bfjnrvz3
4181 ;; l3 = cgkosw04dhlptx15
4182 ;; result = bdfhjlnprtvxz135
4183 (define_expand "vec_pack_trunc_v8hi"
4184 [(match_operand:V16QI 0 "register_operand" "")
4185 (match_operand:V8HI 1 "register_operand" "")
4186 (match_operand:V8HI 2 "register_operand" "")]
4189 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4191 op1 = gen_lowpart (V16QImode, operands[1]);
4192 op2 = gen_lowpart (V16QImode, operands[2]);
4193 h1 = gen_reg_rtx (V16QImode);
4194 l1 = gen_reg_rtx (V16QImode);
4195 h2 = gen_reg_rtx (V16QImode);
4196 l2 = gen_reg_rtx (V16QImode);
4197 h3 = gen_reg_rtx (V16QImode);
4198 l3 = gen_reg_rtx (V16QImode);
4200 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4201 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4202 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4203 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4204 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4205 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4206 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4217 ;; result = bdfhjlnp
4218 (define_expand "vec_pack_trunc_v4si"
4219 [(match_operand:V8HI 0 "register_operand" "")
4220 (match_operand:V4SI 1 "register_operand" "")
4221 (match_operand:V4SI 2 "register_operand" "")]
4224 rtx op1, op2, h1, l1, h2, l2;
4226 op1 = gen_lowpart (V8HImode, operands[1]);
4227 op2 = gen_lowpart (V8HImode, operands[2]);
4228 h1 = gen_reg_rtx (V8HImode);
4229 l1 = gen_reg_rtx (V8HImode);
4230 h2 = gen_reg_rtx (V8HImode);
4231 l2 = gen_reg_rtx (V8HImode);
4233 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4234 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4235 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4236 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4237 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4247 (define_expand "vec_pack_trunc_v2di"
4248 [(match_operand:V4SI 0 "register_operand" "")
4249 (match_operand:V2DI 1 "register_operand" "")
4250 (match_operand:V2DI 2 "register_operand" "")]
4253 rtx op1, op2, h1, l1;
4255 op1 = gen_lowpart (V4SImode, operands[1]);
4256 op2 = gen_lowpart (V4SImode, operands[2]);
4257 h1 = gen_reg_rtx (V4SImode);
4258 l1 = gen_reg_rtx (V4SImode);
4260 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4261 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4262 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4266 (define_expand "vec_interleave_highv16qi"
4267 [(set (match_operand:V16QI 0 "register_operand" "")
4270 (match_operand:V16QI 1 "register_operand" "")
4271 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4272 (parallel [(const_int 8) (const_int 24)
4273 (const_int 9) (const_int 25)
4274 (const_int 10) (const_int 26)
4275 (const_int 11) (const_int 27)
4276 (const_int 12) (const_int 28)
4277 (const_int 13) (const_int 29)
4278 (const_int 14) (const_int 30)
4279 (const_int 15) (const_int 31)])))]
4282 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4286 (define_expand "vec_interleave_lowv16qi"
4287 [(set (match_operand:V16QI 0 "register_operand" "")
4290 (match_operand:V16QI 1 "register_operand" "")
4291 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4292 (parallel [(const_int 0) (const_int 16)
4293 (const_int 1) (const_int 17)
4294 (const_int 2) (const_int 18)
4295 (const_int 3) (const_int 19)
4296 (const_int 4) (const_int 20)
4297 (const_int 5) (const_int 21)
4298 (const_int 6) (const_int 22)
4299 (const_int 7) (const_int 23)])))]
4302 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4306 (define_expand "vec_interleave_highv8hi"
4307 [(set (match_operand:V8HI 0 "register_operand" "=")
4310 (match_operand:V8HI 1 "register_operand" "")
4311 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4312 (parallel [(const_int 4) (const_int 12)
4313 (const_int 5) (const_int 13)
4314 (const_int 6) (const_int 14)
4315 (const_int 7) (const_int 15)])))]
4318 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4322 (define_expand "vec_interleave_lowv8hi"
4323 [(set (match_operand:V8HI 0 "register_operand" "")
4326 (match_operand:V8HI 1 "register_operand" "")
4327 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4328 (parallel [(const_int 0) (const_int 8)
4329 (const_int 1) (const_int 9)
4330 (const_int 2) (const_int 10)
4331 (const_int 3) (const_int 11)])))]
4334 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4338 (define_expand "vec_interleave_highv4si"
4339 [(set (match_operand:V4SI 0 "register_operand" "")
4342 (match_operand:V4SI 1 "register_operand" "")
4343 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4344 (parallel [(const_int 2) (const_int 6)
4345 (const_int 3) (const_int 7)])))]
4348 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4352 (define_expand "vec_interleave_lowv4si"
4353 [(set (match_operand:V4SI 0 "register_operand" "")
4356 (match_operand:V4SI 1 "register_operand" "")
4357 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4358 (parallel [(const_int 0) (const_int 4)
4359 (const_int 1) (const_int 5)])))]
4362 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4366 (define_expand "vec_interleave_highv2di"
4367 [(set (match_operand:V2DI 0 "register_operand" "")
4370 (match_operand:V2DI 1 "register_operand" "")
4371 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4372 (parallel [(const_int 1)
4376 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4380 (define_expand "vec_interleave_lowv2di"
4381 [(set (match_operand:V2DI 0 "register_operand" "")
4384 (match_operand:V2DI 1 "register_operand" "")
4385 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4386 (parallel [(const_int 0)
4390 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4394 (define_insn "sse2_packsswb"
4395 [(set (match_operand:V16QI 0 "register_operand" "=x")
4398 (match_operand:V8HI 1 "register_operand" "0"))
4400 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4402 "packsswb\t{%2, %0|%0, %2}"
4403 [(set_attr "type" "sselog")
4404 (set_attr "prefix_data16" "1")
4405 (set_attr "mode" "TI")])
4407 (define_insn "sse2_packssdw"
4408 [(set (match_operand:V8HI 0 "register_operand" "=x")
4411 (match_operand:V4SI 1 "register_operand" "0"))
4413 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4415 "packssdw\t{%2, %0|%0, %2}"
4416 [(set_attr "type" "sselog")
4417 (set_attr "prefix_data16" "1")
4418 (set_attr "mode" "TI")])
4420 (define_insn "sse2_packuswb"
4421 [(set (match_operand:V16QI 0 "register_operand" "=x")
4424 (match_operand:V8HI 1 "register_operand" "0"))
4426 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4428 "packuswb\t{%2, %0|%0, %2}"
4429 [(set_attr "type" "sselog")
4430 (set_attr "prefix_data16" "1")
4431 (set_attr "mode" "TI")])
4433 (define_insn "sse2_punpckhbw"
4434 [(set (match_operand:V16QI 0 "register_operand" "=x")
4437 (match_operand:V16QI 1 "register_operand" "0")
4438 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4439 (parallel [(const_int 8) (const_int 24)
4440 (const_int 9) (const_int 25)
4441 (const_int 10) (const_int 26)
4442 (const_int 11) (const_int 27)
4443 (const_int 12) (const_int 28)
4444 (const_int 13) (const_int 29)
4445 (const_int 14) (const_int 30)
4446 (const_int 15) (const_int 31)])))]
4448 "punpckhbw\t{%2, %0|%0, %2}"
4449 [(set_attr "type" "sselog")
4450 (set_attr "prefix_data16" "1")
4451 (set_attr "mode" "TI")])
4453 (define_insn "sse2_punpcklbw"
4454 [(set (match_operand:V16QI 0 "register_operand" "=x")
4457 (match_operand:V16QI 1 "register_operand" "0")
4458 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4459 (parallel [(const_int 0) (const_int 16)
4460 (const_int 1) (const_int 17)
4461 (const_int 2) (const_int 18)
4462 (const_int 3) (const_int 19)
4463 (const_int 4) (const_int 20)
4464 (const_int 5) (const_int 21)
4465 (const_int 6) (const_int 22)
4466 (const_int 7) (const_int 23)])))]
4468 "punpcklbw\t{%2, %0|%0, %2}"
4469 [(set_attr "type" "sselog")
4470 (set_attr "prefix_data16" "1")
4471 (set_attr "mode" "TI")])
4473 (define_insn "sse2_punpckhwd"
4474 [(set (match_operand:V8HI 0 "register_operand" "=x")
4477 (match_operand:V8HI 1 "register_operand" "0")
4478 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4479 (parallel [(const_int 4) (const_int 12)
4480 (const_int 5) (const_int 13)
4481 (const_int 6) (const_int 14)
4482 (const_int 7) (const_int 15)])))]
4484 "punpckhwd\t{%2, %0|%0, %2}"
4485 [(set_attr "type" "sselog")
4486 (set_attr "prefix_data16" "1")
4487 (set_attr "mode" "TI")])
4489 (define_insn "sse2_punpcklwd"
4490 [(set (match_operand:V8HI 0 "register_operand" "=x")
4493 (match_operand:V8HI 1 "register_operand" "0")
4494 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4495 (parallel [(const_int 0) (const_int 8)
4496 (const_int 1) (const_int 9)
4497 (const_int 2) (const_int 10)
4498 (const_int 3) (const_int 11)])))]
4500 "punpcklwd\t{%2, %0|%0, %2}"
4501 [(set_attr "type" "sselog")
4502 (set_attr "prefix_data16" "1")
4503 (set_attr "mode" "TI")])
4505 (define_insn "sse2_punpckhdq"
4506 [(set (match_operand:V4SI 0 "register_operand" "=x")
4509 (match_operand:V4SI 1 "register_operand" "0")
4510 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4511 (parallel [(const_int 2) (const_int 6)
4512 (const_int 3) (const_int 7)])))]
4514 "punpckhdq\t{%2, %0|%0, %2}"
4515 [(set_attr "type" "sselog")
4516 (set_attr "prefix_data16" "1")
4517 (set_attr "mode" "TI")])
4519 (define_insn "sse2_punpckldq"
4520 [(set (match_operand:V4SI 0 "register_operand" "=x")
4523 (match_operand:V4SI 1 "register_operand" "0")
4524 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4525 (parallel [(const_int 0) (const_int 4)
4526 (const_int 1) (const_int 5)])))]
4528 "punpckldq\t{%2, %0|%0, %2}"
4529 [(set_attr "type" "sselog")
4530 (set_attr "prefix_data16" "1")
4531 (set_attr "mode" "TI")])
4533 (define_insn "sse2_punpckhqdq"
4534 [(set (match_operand:V2DI 0 "register_operand" "=x")
4537 (match_operand:V2DI 1 "register_operand" "0")
4538 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4539 (parallel [(const_int 1)
4542 "punpckhqdq\t{%2, %0|%0, %2}"
4543 [(set_attr "type" "sselog")
4544 (set_attr "prefix_data16" "1")
4545 (set_attr "mode" "TI")])
4547 (define_insn "sse2_punpcklqdq"
4548 [(set (match_operand:V2DI 0 "register_operand" "=x")
4551 (match_operand:V2DI 1 "register_operand" "0")
4552 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4553 (parallel [(const_int 0)
4556 "punpcklqdq\t{%2, %0|%0, %2}"
4557 [(set_attr "type" "sselog")
4558 (set_attr "prefix_data16" "1")
4559 (set_attr "mode" "TI")])
4561 (define_insn "*sse4_1_pinsrb"
4562 [(set (match_operand:V16QI 0 "register_operand" "=x")
4564 (vec_duplicate:V16QI
4565 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4566 (match_operand:V16QI 1 "register_operand" "0")
4567 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4570 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4571 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4573 [(set_attr "type" "sselog")
4574 (set_attr "prefix_extra" "1")
4575 (set_attr "mode" "TI")])
4577 (define_insn "*sse2_pinsrw"
4578 [(set (match_operand:V8HI 0 "register_operand" "=x")
4581 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4582 (match_operand:V8HI 1 "register_operand" "0")
4583 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4586 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4587 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4589 [(set_attr "type" "sselog")
4590 (set_attr "prefix_data16" "1")
4591 (set_attr "mode" "TI")])
4593 ;; It must come before sse2_loadld since it is preferred.
4594 (define_insn "*sse4_1_pinsrd"
4595 [(set (match_operand:V4SI 0 "register_operand" "=x")
4598 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4599 (match_operand:V4SI 1 "register_operand" "0")
4600 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4603 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4604 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4606 [(set_attr "type" "sselog")
4607 (set_attr "prefix_extra" "1")
4608 (set_attr "mode" "TI")])
4610 (define_insn "*sse4_1_pinsrq"
4611 [(set (match_operand:V2DI 0 "register_operand" "=x")
4614 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4615 (match_operand:V2DI 1 "register_operand" "0")
4616 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4619 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4620 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4622 [(set_attr "type" "sselog")
4623 (set_attr "prefix_extra" "1")
4624 (set_attr "mode" "TI")])
4626 (define_insn "*sse4_1_pextrb"
4627 [(set (match_operand:SI 0 "register_operand" "=r")
4630 (match_operand:V16QI 1 "register_operand" "x")
4631 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4633 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4634 [(set_attr "type" "sselog")
4635 (set_attr "prefix_extra" "1")
4636 (set_attr "mode" "TI")])
4638 (define_insn "*sse4_1_pextrb_memory"
4639 [(set (match_operand:QI 0 "memory_operand" "=m")
4641 (match_operand:V16QI 1 "register_operand" "x")
4642 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4644 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4645 [(set_attr "type" "sselog")
4646 (set_attr "prefix_extra" "1")
4647 (set_attr "mode" "TI")])
4649 (define_insn "*sse2_pextrw"
4650 [(set (match_operand:SI 0 "register_operand" "=r")
4653 (match_operand:V8HI 1 "register_operand" "x")
4654 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4656 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4657 [(set_attr "type" "sselog")
4658 (set_attr "prefix_data16" "1")
4659 (set_attr "mode" "TI")])
4661 (define_insn "*sse4_1_pextrw_memory"
4662 [(set (match_operand:HI 0 "memory_operand" "=m")
4664 (match_operand:V8HI 1 "register_operand" "x")
4665 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4667 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4668 [(set_attr "type" "sselog")
4669 (set_attr "prefix_extra" "1")
4670 (set_attr "mode" "TI")])
4672 (define_insn "*sse4_1_pextrd"
4673 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4675 (match_operand:V4SI 1 "register_operand" "x")
4676 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4678 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4679 [(set_attr "type" "sselog")
4680 (set_attr "prefix_extra" "1")
4681 (set_attr "mode" "TI")])
4683 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4684 (define_insn "*sse4_1_pextrq"
4685 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4687 (match_operand:V2DI 1 "register_operand" "x")
4688 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4689 "TARGET_SSE4_1 && TARGET_64BIT"
4690 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4691 [(set_attr "type" "sselog")
4692 (set_attr "prefix_extra" "1")
4693 (set_attr "mode" "TI")])
4695 (define_expand "sse2_pshufd"
4696 [(match_operand:V4SI 0 "register_operand" "")
4697 (match_operand:V4SI 1 "nonimmediate_operand" "")
4698 (match_operand:SI 2 "const_int_operand" "")]
4701 int mask = INTVAL (operands[2]);
4702 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4703 GEN_INT ((mask >> 0) & 3),
4704 GEN_INT ((mask >> 2) & 3),
4705 GEN_INT ((mask >> 4) & 3),
4706 GEN_INT ((mask >> 6) & 3)));
4710 (define_insn "sse2_pshufd_1"
4711 [(set (match_operand:V4SI 0 "register_operand" "=x")
4713 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4714 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4715 (match_operand 3 "const_0_to_3_operand" "")
4716 (match_operand 4 "const_0_to_3_operand" "")
4717 (match_operand 5 "const_0_to_3_operand" "")])))]
4721 mask |= INTVAL (operands[2]) << 0;
4722 mask |= INTVAL (operands[3]) << 2;
4723 mask |= INTVAL (operands[4]) << 4;
4724 mask |= INTVAL (operands[5]) << 6;
4725 operands[2] = GEN_INT (mask);
4727 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4729 [(set_attr "type" "sselog1")
4730 (set_attr "prefix_data16" "1")
4731 (set_attr "mode" "TI")])
4733 (define_expand "sse2_pshuflw"
4734 [(match_operand:V8HI 0 "register_operand" "")
4735 (match_operand:V8HI 1 "nonimmediate_operand" "")
4736 (match_operand:SI 2 "const_int_operand" "")]
4739 int mask = INTVAL (operands[2]);
4740 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4741 GEN_INT ((mask >> 0) & 3),
4742 GEN_INT ((mask >> 2) & 3),
4743 GEN_INT ((mask >> 4) & 3),
4744 GEN_INT ((mask >> 6) & 3)));
4748 (define_insn "sse2_pshuflw_1"
4749 [(set (match_operand:V8HI 0 "register_operand" "=x")
4751 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4752 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4753 (match_operand 3 "const_0_to_3_operand" "")
4754 (match_operand 4 "const_0_to_3_operand" "")
4755 (match_operand 5 "const_0_to_3_operand" "")
4763 mask |= INTVAL (operands[2]) << 0;
4764 mask |= INTVAL (operands[3]) << 2;
4765 mask |= INTVAL (operands[4]) << 4;
4766 mask |= INTVAL (operands[5]) << 6;
4767 operands[2] = GEN_INT (mask);
4769 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4771 [(set_attr "type" "sselog")
4772 (set_attr "prefix_rep" "1")
4773 (set_attr "mode" "TI")])
4775 (define_expand "sse2_pshufhw"
4776 [(match_operand:V8HI 0 "register_operand" "")
4777 (match_operand:V8HI 1 "nonimmediate_operand" "")
4778 (match_operand:SI 2 "const_int_operand" "")]
4781 int mask = INTVAL (operands[2]);
4782 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4783 GEN_INT (((mask >> 0) & 3) + 4),
4784 GEN_INT (((mask >> 2) & 3) + 4),
4785 GEN_INT (((mask >> 4) & 3) + 4),
4786 GEN_INT (((mask >> 6) & 3) + 4)));
4790 (define_insn "sse2_pshufhw_1"
4791 [(set (match_operand:V8HI 0 "register_operand" "=x")
4793 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4794 (parallel [(const_int 0)
4798 (match_operand 2 "const_4_to_7_operand" "")
4799 (match_operand 3 "const_4_to_7_operand" "")
4800 (match_operand 4 "const_4_to_7_operand" "")
4801 (match_operand 5 "const_4_to_7_operand" "")])))]
4805 mask |= (INTVAL (operands[2]) - 4) << 0;
4806 mask |= (INTVAL (operands[3]) - 4) << 2;
4807 mask |= (INTVAL (operands[4]) - 4) << 4;
4808 mask |= (INTVAL (operands[5]) - 4) << 6;
4809 operands[2] = GEN_INT (mask);
4811 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4813 [(set_attr "type" "sselog")
4814 (set_attr "prefix_rep" "1")
4815 (set_attr "mode" "TI")])
4817 (define_expand "sse2_loadd"
4818 [(set (match_operand:V4SI 0 "register_operand" "")
4821 (match_operand:SI 1 "nonimmediate_operand" ""))
4825 "operands[2] = CONST0_RTX (V4SImode);")
4827 (define_insn "sse2_loadld"
4828 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4831 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4832 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4836 movd\t{%2, %0|%0, %2}
4837 movd\t{%2, %0|%0, %2}
4838 movss\t{%2, %0|%0, %2}
4839 movss\t{%2, %0|%0, %2}"
4840 [(set_attr "type" "ssemov")
4841 (set_attr "mode" "TI,TI,V4SF,SF")])
4843 (define_insn_and_split "sse2_stored"
4844 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4846 (match_operand:V4SI 1 "register_operand" "x,Yi")
4847 (parallel [(const_int 0)])))]
4850 "&& reload_completed
4851 && (TARGET_INTER_UNIT_MOVES
4852 || MEM_P (operands [0])
4853 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4854 [(set (match_dup 0) (match_dup 1))]
4856 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4859 (define_insn_and_split "*vec_ext_v4si_mem"
4860 [(set (match_operand:SI 0 "register_operand" "=r")
4862 (match_operand:V4SI 1 "memory_operand" "o")
4863 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4869 int i = INTVAL (operands[2]);
4871 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4875 (define_expand "sse_storeq"
4876 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4878 (match_operand:V2DI 1 "register_operand" "")
4879 (parallel [(const_int 0)])))]
4883 (define_insn "*sse2_storeq_rex64"
4884 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4886 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4887 (parallel [(const_int 0)])))]
4888 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4892 mov{q}\t{%1, %0|%0, %1}"
4893 [(set_attr "type" "*,*,imov")
4894 (set_attr "mode" "*,*,DI")])
4896 (define_insn "*sse2_storeq"
4897 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4899 (match_operand:V2DI 1 "register_operand" "x")
4900 (parallel [(const_int 0)])))]
4905 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4907 (match_operand:V2DI 1 "register_operand" "")
4908 (parallel [(const_int 0)])))]
4911 && (TARGET_INTER_UNIT_MOVES
4912 || MEM_P (operands [0])
4913 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4914 [(set (match_dup 0) (match_dup 1))]
4916 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4919 (define_insn "*vec_extractv2di_1_rex64"
4920 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4922 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4923 (parallel [(const_int 1)])))]
4924 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4926 movhps\t{%1, %0|%0, %1}
4927 psrldq\t{$8, %0|%0, 8}
4928 movq\t{%H1, %0|%0, %H1}
4929 mov{q}\t{%H1, %0|%0, %H1}"
4930 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4931 (set_attr "memory" "*,none,*,*")
4932 (set_attr "mode" "V2SF,TI,TI,DI")])
4934 (define_insn "*vec_extractv2di_1_sse2"
4935 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4937 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4938 (parallel [(const_int 1)])))]
4940 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4942 movhps\t{%1, %0|%0, %1}
4943 psrldq\t{$8, %0|%0, 8}
4944 movq\t{%H1, %0|%0, %H1}"
4945 [(set_attr "type" "ssemov,sseishft,ssemov")
4946 (set_attr "memory" "*,none,*")
4947 (set_attr "mode" "V2SF,TI,TI")])
4949 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4950 (define_insn "*vec_extractv2di_1_sse"
4951 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4953 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4954 (parallel [(const_int 1)])))]
4955 "!TARGET_SSE2 && TARGET_SSE
4956 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4958 movhps\t{%1, %0|%0, %1}
4959 movhlps\t{%1, %0|%0, %1}
4960 movlps\t{%H1, %0|%0, %H1}"
4961 [(set_attr "type" "ssemov")
4962 (set_attr "mode" "V2SF,V4SF,V2SF")])
4964 (define_insn "*vec_dupv4si"
4965 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4967 (match_operand:SI 1 "register_operand" " Y2,0")))]
4970 pshufd\t{$0, %1, %0|%0, %1, 0}
4971 shufps\t{$0, %0, %0|%0, %0, 0}"
4972 [(set_attr "type" "sselog1")
4973 (set_attr "mode" "TI,V4SF")])
4975 (define_insn "*vec_dupv2di"
4976 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4978 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4983 [(set_attr "type" "sselog1,ssemov")
4984 (set_attr "mode" "TI,V4SF")])
4986 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4987 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4988 ;; alternatives pretty much forces the MMX alternative to be chosen.
4989 (define_insn "*sse2_concatv2si"
4990 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4992 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4993 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4996 punpckldq\t{%2, %0|%0, %2}
4997 movd\t{%1, %0|%0, %1}
4998 punpckldq\t{%2, %0|%0, %2}
4999 movd\t{%1, %0|%0, %1}"
5000 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5001 (set_attr "mode" "TI,TI,DI,DI")])
5003 (define_insn "*sse1_concatv2si"
5004 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
5006 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
5007 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
5010 unpcklps\t{%2, %0|%0, %2}
5011 movss\t{%1, %0|%0, %1}
5012 punpckldq\t{%2, %0|%0, %2}
5013 movd\t{%1, %0|%0, %1}"
5014 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5015 (set_attr "mode" "V4SF,V4SF,DI,DI")])
5017 (define_insn "*vec_concatv4si_1"
5018 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
5020 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
5021 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
5024 punpcklqdq\t{%2, %0|%0, %2}
5025 movlhps\t{%2, %0|%0, %2}
5026 movhps\t{%2, %0|%0, %2}"
5027 [(set_attr "type" "sselog,ssemov,ssemov")
5028 (set_attr "mode" "TI,V4SF,V2SF")])
5030 (define_insn "vec_concatv2di"
5031 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
5033 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
5034 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
5035 "!TARGET_64BIT && TARGET_SSE"
5037 movq\t{%1, %0|%0, %1}
5038 movq2dq\t{%1, %0|%0, %1}
5039 punpcklqdq\t{%2, %0|%0, %2}
5040 movlhps\t{%2, %0|%0, %2}
5041 movhps\t{%2, %0|%0, %2}
5042 movlps\t{%1, %0|%0, %1}"
5043 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5044 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
5046 (define_insn "*vec_concatv2di_rex"
5047 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
5049 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
5050 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
5053 movq\t{%1, %0|%0, %1}
5054 movq\t{%1, %0|%0, %1}
5055 movq2dq\t{%1, %0|%0, %1}
5056 punpcklqdq\t{%2, %0|%0, %2}
5057 movlhps\t{%2, %0|%0, %2}
5058 movhps\t{%2, %0|%0, %2}
5059 movlps\t{%1, %0|%0, %1}"
5060 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5061 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
5063 (define_expand "vec_setv2di"
5064 [(match_operand:V2DI 0 "register_operand" "")
5065 (match_operand:DI 1 "register_operand" "")
5066 (match_operand 2 "const_int_operand" "")]
5069 ix86_expand_vector_set (false, operands[0], operands[1],
5070 INTVAL (operands[2]));
5074 (define_expand "vec_extractv2di"
5075 [(match_operand:DI 0 "register_operand" "")
5076 (match_operand:V2DI 1 "register_operand" "")
5077 (match_operand 2 "const_int_operand" "")]
5080 ix86_expand_vector_extract (false, operands[0], operands[1],
5081 INTVAL (operands[2]));
5085 (define_expand "vec_initv2di"
5086 [(match_operand:V2DI 0 "register_operand" "")
5087 (match_operand 1 "" "")]
5090 ix86_expand_vector_init (false, operands[0], operands[1]);
5094 (define_expand "vec_setv4si"
5095 [(match_operand:V4SI 0 "register_operand" "")
5096 (match_operand:SI 1 "register_operand" "")
5097 (match_operand 2 "const_int_operand" "")]
5100 ix86_expand_vector_set (false, operands[0], operands[1],
5101 INTVAL (operands[2]));
5105 (define_expand "vec_extractv4si"
5106 [(match_operand:SI 0 "register_operand" "")
5107 (match_operand:V4SI 1 "register_operand" "")
5108 (match_operand 2 "const_int_operand" "")]
5111 ix86_expand_vector_extract (false, operands[0], operands[1],
5112 INTVAL (operands[2]));
5116 (define_expand "vec_initv4si"
5117 [(match_operand:V4SI 0 "register_operand" "")
5118 (match_operand 1 "" "")]
5121 ix86_expand_vector_init (false, operands[0], operands[1]);
5125 (define_expand "vec_setv8hi"
5126 [(match_operand:V8HI 0 "register_operand" "")
5127 (match_operand:HI 1 "register_operand" "")
5128 (match_operand 2 "const_int_operand" "")]
5131 ix86_expand_vector_set (false, operands[0], operands[1],
5132 INTVAL (operands[2]));
5136 (define_expand "vec_extractv8hi"
5137 [(match_operand:HI 0 "register_operand" "")
5138 (match_operand:V8HI 1 "register_operand" "")
5139 (match_operand 2 "const_int_operand" "")]
5142 ix86_expand_vector_extract (false, operands[0], operands[1],
5143 INTVAL (operands[2]));
5147 (define_expand "vec_initv8hi"
5148 [(match_operand:V8HI 0 "register_operand" "")
5149 (match_operand 1 "" "")]
5152 ix86_expand_vector_init (false, operands[0], operands[1]);
5156 (define_expand "vec_setv16qi"
5157 [(match_operand:V16QI 0 "register_operand" "")
5158 (match_operand:QI 1 "register_operand" "")
5159 (match_operand 2 "const_int_operand" "")]
5162 ix86_expand_vector_set (false, operands[0], operands[1],
5163 INTVAL (operands[2]));
5167 (define_expand "vec_extractv16qi"
5168 [(match_operand:QI 0 "register_operand" "")
5169 (match_operand:V16QI 1 "register_operand" "")
5170 (match_operand 2 "const_int_operand" "")]
5173 ix86_expand_vector_extract (false, operands[0], operands[1],
5174 INTVAL (operands[2]));
5178 (define_expand "vec_initv16qi"
5179 [(match_operand:V16QI 0 "register_operand" "")
5180 (match_operand 1 "" "")]
5183 ix86_expand_vector_init (false, operands[0], operands[1]);
5187 (define_expand "vec_unpacku_hi_v16qi"
5188 [(match_operand:V8HI 0 "register_operand" "")
5189 (match_operand:V16QI 1 "register_operand" "")]
5193 ix86_expand_sse4_unpack (operands, true, true);
5194 else if (TARGET_SSE5)
5195 ix86_expand_sse5_unpack (operands, true, true);
5197 ix86_expand_sse_unpack (operands, true, true);
5201 (define_expand "vec_unpacks_hi_v16qi"
5202 [(match_operand:V8HI 0 "register_operand" "")
5203 (match_operand:V16QI 1 "register_operand" "")]
5207 ix86_expand_sse4_unpack (operands, false, true);
5208 else if (TARGET_SSE5)
5209 ix86_expand_sse5_unpack (operands, false, true);
5211 ix86_expand_sse_unpack (operands, false, true);
5215 (define_expand "vec_unpacku_lo_v16qi"
5216 [(match_operand:V8HI 0 "register_operand" "")
5217 (match_operand:V16QI 1 "register_operand" "")]
5221 ix86_expand_sse4_unpack (operands, true, false);
5222 else if (TARGET_SSE5)
5223 ix86_expand_sse5_unpack (operands, true, false);
5225 ix86_expand_sse_unpack (operands, true, false);
5229 (define_expand "vec_unpacks_lo_v16qi"
5230 [(match_operand:V8HI 0 "register_operand" "")
5231 (match_operand:V16QI 1 "register_operand" "")]
5235 ix86_expand_sse4_unpack (operands, false, false);
5236 else if (TARGET_SSE5)
5237 ix86_expand_sse5_unpack (operands, false, false);
5239 ix86_expand_sse_unpack (operands, false, false);
5243 (define_expand "vec_unpacku_hi_v8hi"
5244 [(match_operand:V4SI 0 "register_operand" "")
5245 (match_operand:V8HI 1 "register_operand" "")]
5249 ix86_expand_sse4_unpack (operands, true, true);
5250 else if (TARGET_SSE5)
5251 ix86_expand_sse5_unpack (operands, true, true);
5253 ix86_expand_sse_unpack (operands, true, true);
5257 (define_expand "vec_unpacks_hi_v8hi"
5258 [(match_operand:V4SI 0 "register_operand" "")
5259 (match_operand:V8HI 1 "register_operand" "")]
5263 ix86_expand_sse4_unpack (operands, false, true);
5264 else if (TARGET_SSE5)
5265 ix86_expand_sse5_unpack (operands, false, true);
5267 ix86_expand_sse_unpack (operands, false, true);
5271 (define_expand "vec_unpacku_lo_v8hi"
5272 [(match_operand:V4SI 0 "register_operand" "")
5273 (match_operand:V8HI 1 "register_operand" "")]
5277 ix86_expand_sse4_unpack (operands, true, false);
5278 else if (TARGET_SSE5)
5279 ix86_expand_sse5_unpack (operands, true, false);
5281 ix86_expand_sse_unpack (operands, true, false);
5285 (define_expand "vec_unpacks_lo_v8hi"
5286 [(match_operand:V4SI 0 "register_operand" "")
5287 (match_operand:V8HI 1 "register_operand" "")]
5291 ix86_expand_sse4_unpack (operands, false, false);
5292 else if (TARGET_SSE5)
5293 ix86_expand_sse5_unpack (operands, false, false);
5295 ix86_expand_sse_unpack (operands, false, false);
5299 (define_expand "vec_unpacku_hi_v4si"
5300 [(match_operand:V2DI 0 "register_operand" "")
5301 (match_operand:V4SI 1 "register_operand" "")]
5305 ix86_expand_sse4_unpack (operands, true, true);
5306 else if (TARGET_SSE5)
5307 ix86_expand_sse5_unpack (operands, true, true);
5309 ix86_expand_sse_unpack (operands, true, true);
5313 (define_expand "vec_unpacks_hi_v4si"
5314 [(match_operand:V2DI 0 "register_operand" "")
5315 (match_operand:V4SI 1 "register_operand" "")]
5319 ix86_expand_sse4_unpack (operands, false, true);
5320 else if (TARGET_SSE5)
5321 ix86_expand_sse5_unpack (operands, false, true);
5323 ix86_expand_sse_unpack (operands, false, true);
5327 (define_expand "vec_unpacku_lo_v4si"
5328 [(match_operand:V2DI 0 "register_operand" "")
5329 (match_operand:V4SI 1 "register_operand" "")]
5333 ix86_expand_sse4_unpack (operands, true, false);
5334 else if (TARGET_SSE5)
5335 ix86_expand_sse5_unpack (operands, true, false);
5337 ix86_expand_sse_unpack (operands, true, false);
5341 (define_expand "vec_unpacks_lo_v4si"
5342 [(match_operand:V2DI 0 "register_operand" "")
5343 (match_operand:V4SI 1 "register_operand" "")]
5347 ix86_expand_sse4_unpack (operands, false, false);
5348 else if (TARGET_SSE5)
5349 ix86_expand_sse5_unpack (operands, false, false);
5351 ix86_expand_sse_unpack (operands, false, false);
5355 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5361 (define_insn "sse2_uavgv16qi3"
5362 [(set (match_operand:V16QI 0 "register_operand" "=x")
5368 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5370 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5371 (const_vector:V16QI [(const_int 1) (const_int 1)
5372 (const_int 1) (const_int 1)
5373 (const_int 1) (const_int 1)
5374 (const_int 1) (const_int 1)
5375 (const_int 1) (const_int 1)
5376 (const_int 1) (const_int 1)
5377 (const_int 1) (const_int 1)
5378 (const_int 1) (const_int 1)]))
5380 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5381 "pavgb\t{%2, %0|%0, %2}"
5382 [(set_attr "type" "sseiadd")
5383 (set_attr "prefix_data16" "1")
5384 (set_attr "mode" "TI")])
5386 (define_insn "sse2_uavgv8hi3"
5387 [(set (match_operand:V8HI 0 "register_operand" "=x")
5393 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5395 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5396 (const_vector:V8HI [(const_int 1) (const_int 1)
5397 (const_int 1) (const_int 1)
5398 (const_int 1) (const_int 1)
5399 (const_int 1) (const_int 1)]))
5401 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5402 "pavgw\t{%2, %0|%0, %2}"
5403 [(set_attr "type" "sseiadd")
5404 (set_attr "prefix_data16" "1")
5405 (set_attr "mode" "TI")])
5407 ;; The correct representation for this is absolutely enormous, and
5408 ;; surely not generally useful.
5409 (define_insn "sse2_psadbw"
5410 [(set (match_operand:V2DI 0 "register_operand" "=x")
5411 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5412 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5415 "psadbw\t{%2, %0|%0, %2}"
5416 [(set_attr "type" "sseiadd")
5417 (set_attr "prefix_data16" "1")
5418 (set_attr "mode" "TI")])
5420 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5421 [(set (match_operand:SI 0 "register_operand" "=r")
5423 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5425 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5426 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5427 [(set_attr "type" "ssecvt")
5428 (set_attr "mode" "<MODE>")])
5430 (define_insn "sse2_pmovmskb"
5431 [(set (match_operand:SI 0 "register_operand" "=r")
5432 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5435 "pmovmskb\t{%1, %0|%0, %1}"
5436 [(set_attr "type" "ssecvt")
5437 (set_attr "prefix_data16" "1")
5438 (set_attr "mode" "SI")])
5440 (define_expand "sse2_maskmovdqu"
5441 [(set (match_operand:V16QI 0 "memory_operand" "")
5442 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5443 (match_operand:V16QI 2 "register_operand" "")
5449 (define_insn "*sse2_maskmovdqu"
5450 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5451 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5452 (match_operand:V16QI 2 "register_operand" "x")
5453 (mem:V16QI (match_dup 0))]
5455 "TARGET_SSE2 && !TARGET_64BIT"
5456 ;; @@@ check ordering of operands in intel/nonintel syntax
5457 "maskmovdqu\t{%2, %1|%1, %2}"
5458 [(set_attr "type" "ssecvt")
5459 (set_attr "prefix_data16" "1")
5460 (set_attr "mode" "TI")])
5462 (define_insn "*sse2_maskmovdqu_rex64"
5463 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5464 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5465 (match_operand:V16QI 2 "register_operand" "x")
5466 (mem:V16QI (match_dup 0))]
5468 "TARGET_SSE2 && TARGET_64BIT"
5469 ;; @@@ check ordering of operands in intel/nonintel syntax
5470 "maskmovdqu\t{%2, %1|%1, %2}"
5471 [(set_attr "type" "ssecvt")
5472 (set_attr "prefix_data16" "1")
5473 (set_attr "mode" "TI")])
5475 (define_insn "sse_ldmxcsr"
5476 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5480 [(set_attr "type" "sse")
5481 (set_attr "memory" "load")])
5483 (define_insn "sse_stmxcsr"
5484 [(set (match_operand:SI 0 "memory_operand" "=m")
5485 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5488 [(set_attr "type" "sse")
5489 (set_attr "memory" "store")])
5491 (define_expand "sse_sfence"
5493 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5494 "TARGET_SSE || TARGET_3DNOW_A"
5496 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5497 MEM_VOLATILE_P (operands[0]) = 1;
5500 (define_insn "*sse_sfence"
5501 [(set (match_operand:BLK 0 "" "")
5502 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5503 "TARGET_SSE || TARGET_3DNOW_A"
5505 [(set_attr "type" "sse")
5506 (set_attr "memory" "unknown")])
5508 (define_insn "sse2_clflush"
5509 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5513 [(set_attr "type" "sse")
5514 (set_attr "memory" "unknown")])
5516 (define_expand "sse2_mfence"
5518 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5521 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5522 MEM_VOLATILE_P (operands[0]) = 1;
5525 (define_insn "*sse2_mfence"
5526 [(set (match_operand:BLK 0 "" "")
5527 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5530 [(set_attr "type" "sse")
5531 (set_attr "memory" "unknown")])
5533 (define_expand "sse2_lfence"
5535 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5538 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5539 MEM_VOLATILE_P (operands[0]) = 1;
5542 (define_insn "*sse2_lfence"
5543 [(set (match_operand:BLK 0 "" "")
5544 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5547 [(set_attr "type" "sse")
5548 (set_attr "memory" "unknown")])
5550 (define_insn "sse3_mwait"
5551 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5552 (match_operand:SI 1 "register_operand" "c")]
5555 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5556 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5557 ;; we only need to set up 32bit registers.
5559 [(set_attr "length" "3")])
5561 (define_insn "sse3_monitor"
5562 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5563 (match_operand:SI 1 "register_operand" "c")
5564 (match_operand:SI 2 "register_operand" "d")]
5566 "TARGET_SSE3 && !TARGET_64BIT"
5567 "monitor\t%0, %1, %2"
5568 [(set_attr "length" "3")])
5570 (define_insn "sse3_monitor64"
5571 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5572 (match_operand:SI 1 "register_operand" "c")
5573 (match_operand:SI 2 "register_operand" "d")]
5575 "TARGET_SSE3 && TARGET_64BIT"
5576 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5577 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5578 ;; zero extended to 64bit, we only need to set up 32bit registers.
5580 [(set_attr "length" "3")])
5582 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5584 ;; SSSE3 instructions
5586 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5588 (define_insn "ssse3_phaddwv8hi3"
5589 [(set (match_operand:V8HI 0 "register_operand" "=x")
5595 (match_operand:V8HI 1 "register_operand" "0")
5596 (parallel [(const_int 0)]))
5597 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5599 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5600 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5603 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5604 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5606 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5607 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5612 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5613 (parallel [(const_int 0)]))
5614 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5616 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5617 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5620 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5621 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5623 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5624 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5626 "phaddw\t{%2, %0|%0, %2}"
5627 [(set_attr "type" "sseiadd")
5628 (set_attr "prefix_data16" "1")
5629 (set_attr "prefix_extra" "1")
5630 (set_attr "mode" "TI")])
5632 (define_insn "ssse3_phaddwv4hi3"
5633 [(set (match_operand:V4HI 0 "register_operand" "=y")
5638 (match_operand:V4HI 1 "register_operand" "0")
5639 (parallel [(const_int 0)]))
5640 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5642 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5643 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5647 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5648 (parallel [(const_int 0)]))
5649 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5651 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5652 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5654 "phaddw\t{%2, %0|%0, %2}"
5655 [(set_attr "type" "sseiadd")
5656 (set_attr "prefix_extra" "1")
5657 (set_attr "mode" "DI")])
5659 (define_insn "ssse3_phadddv4si3"
5660 [(set (match_operand:V4SI 0 "register_operand" "=x")
5665 (match_operand:V4SI 1 "register_operand" "0")
5666 (parallel [(const_int 0)]))
5667 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5669 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5670 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5674 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5675 (parallel [(const_int 0)]))
5676 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5678 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5679 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5681 "phaddd\t{%2, %0|%0, %2}"
5682 [(set_attr "type" "sseiadd")
5683 (set_attr "prefix_data16" "1")
5684 (set_attr "prefix_extra" "1")
5685 (set_attr "mode" "TI")])
5687 (define_insn "ssse3_phadddv2si3"
5688 [(set (match_operand:V2SI 0 "register_operand" "=y")
5692 (match_operand:V2SI 1 "register_operand" "0")
5693 (parallel [(const_int 0)]))
5694 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5697 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5698 (parallel [(const_int 0)]))
5699 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5701 "phaddd\t{%2, %0|%0, %2}"
5702 [(set_attr "type" "sseiadd")
5703 (set_attr "prefix_extra" "1")
5704 (set_attr "mode" "DI")])
5706 (define_insn "ssse3_phaddswv8hi3"
5707 [(set (match_operand:V8HI 0 "register_operand" "=x")
5713 (match_operand:V8HI 1 "register_operand" "0")
5714 (parallel [(const_int 0)]))
5715 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5717 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5718 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5721 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5722 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5724 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5725 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5730 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5731 (parallel [(const_int 0)]))
5732 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5734 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5735 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5738 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5739 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5741 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5742 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5744 "phaddsw\t{%2, %0|%0, %2}"
5745 [(set_attr "type" "sseiadd")
5746 (set_attr "prefix_data16" "1")
5747 (set_attr "prefix_extra" "1")
5748 (set_attr "mode" "TI")])
5750 (define_insn "ssse3_phaddswv4hi3"
5751 [(set (match_operand:V4HI 0 "register_operand" "=y")
5756 (match_operand:V4HI 1 "register_operand" "0")
5757 (parallel [(const_int 0)]))
5758 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5760 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5761 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5765 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5766 (parallel [(const_int 0)]))
5767 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5769 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5770 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5772 "phaddsw\t{%2, %0|%0, %2}"
5773 [(set_attr "type" "sseiadd")
5774 (set_attr "prefix_extra" "1")
5775 (set_attr "mode" "DI")])
5777 (define_insn "ssse3_phsubwv8hi3"
5778 [(set (match_operand:V8HI 0 "register_operand" "=x")
5784 (match_operand:V8HI 1 "register_operand" "0")
5785 (parallel [(const_int 0)]))
5786 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5788 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5789 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5792 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5793 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5795 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5796 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5801 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5802 (parallel [(const_int 0)]))
5803 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5805 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5806 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5809 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5810 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5812 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5813 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5815 "phsubw\t{%2, %0|%0, %2}"
5816 [(set_attr "type" "sseiadd")
5817 (set_attr "prefix_data16" "1")
5818 (set_attr "prefix_extra" "1")
5819 (set_attr "mode" "TI")])
5821 (define_insn "ssse3_phsubwv4hi3"
5822 [(set (match_operand:V4HI 0 "register_operand" "=y")
5827 (match_operand:V4HI 1 "register_operand" "0")
5828 (parallel [(const_int 0)]))
5829 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5831 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5832 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5836 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5837 (parallel [(const_int 0)]))
5838 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5840 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5841 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5843 "phsubw\t{%2, %0|%0, %2}"
5844 [(set_attr "type" "sseiadd")
5845 (set_attr "prefix_extra" "1")
5846 (set_attr "mode" "DI")])
5848 (define_insn "ssse3_phsubdv4si3"
5849 [(set (match_operand:V4SI 0 "register_operand" "=x")
5854 (match_operand:V4SI 1 "register_operand" "0")
5855 (parallel [(const_int 0)]))
5856 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5858 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5859 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5863 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5864 (parallel [(const_int 0)]))
5865 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5867 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5868 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5870 "phsubd\t{%2, %0|%0, %2}"
5871 [(set_attr "type" "sseiadd")
5872 (set_attr "prefix_data16" "1")
5873 (set_attr "prefix_extra" "1")
5874 (set_attr "mode" "TI")])
5876 (define_insn "ssse3_phsubdv2si3"
5877 [(set (match_operand:V2SI 0 "register_operand" "=y")
5881 (match_operand:V2SI 1 "register_operand" "0")
5882 (parallel [(const_int 0)]))
5883 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5886 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5887 (parallel [(const_int 0)]))
5888 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5890 "phsubd\t{%2, %0|%0, %2}"
5891 [(set_attr "type" "sseiadd")
5892 (set_attr "prefix_extra" "1")
5893 (set_attr "mode" "DI")])
5895 (define_insn "ssse3_phsubswv8hi3"
5896 [(set (match_operand:V8HI 0 "register_operand" "=x")
5902 (match_operand:V8HI 1 "register_operand" "0")
5903 (parallel [(const_int 0)]))
5904 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5906 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5907 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5910 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5911 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5913 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5914 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5919 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5920 (parallel [(const_int 0)]))
5921 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5923 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5924 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5927 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5928 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5930 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5931 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5933 "phsubsw\t{%2, %0|%0, %2}"
5934 [(set_attr "type" "sseiadd")
5935 (set_attr "prefix_data16" "1")
5936 (set_attr "prefix_extra" "1")
5937 (set_attr "mode" "TI")])
5939 (define_insn "ssse3_phsubswv4hi3"
5940 [(set (match_operand:V4HI 0 "register_operand" "=y")
5945 (match_operand:V4HI 1 "register_operand" "0")
5946 (parallel [(const_int 0)]))
5947 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5949 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5950 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5954 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5955 (parallel [(const_int 0)]))
5956 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5958 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5959 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5961 "phsubsw\t{%2, %0|%0, %2}"
5962 [(set_attr "type" "sseiadd")
5963 (set_attr "prefix_extra" "1")
5964 (set_attr "mode" "DI")])
5966 (define_insn "ssse3_pmaddubswv8hi3"
5967 [(set (match_operand:V8HI 0 "register_operand" "=x")
5972 (match_operand:V16QI 1 "nonimmediate_operand" "0")
5973 (parallel [(const_int 0)
5983 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5984 (parallel [(const_int 0)
5994 (vec_select:V16QI (match_dup 1)
5995 (parallel [(const_int 1)
6004 (vec_select:V16QI (match_dup 2)
6005 (parallel [(const_int 1)
6012 (const_int 15)]))))))]
6014 "pmaddubsw\t{%2, %0|%0, %2}"
6015 [(set_attr "type" "sseiadd")
6016 (set_attr "prefix_data16" "1")
6017 (set_attr "prefix_extra" "1")
6018 (set_attr "mode" "TI")])
6020 (define_insn "ssse3_pmaddubswv4hi3"
6021 [(set (match_operand:V4HI 0 "register_operand" "=y")
6026 (match_operand:V8QI 1 "nonimmediate_operand" "0")
6027 (parallel [(const_int 0)
6033 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
6034 (parallel [(const_int 0)
6040 (vec_select:V8QI (match_dup 1)
6041 (parallel [(const_int 1)
6046 (vec_select:V8QI (match_dup 2)
6047 (parallel [(const_int 1)
6050 (const_int 7)]))))))]
6052 "pmaddubsw\t{%2, %0|%0, %2}"
6053 [(set_attr "type" "sseiadd")
6054 (set_attr "prefix_extra" "1")
6055 (set_attr "mode" "DI")])
6057 (define_insn "ssse3_pmulhrswv8hi3"
6058 [(set (match_operand:V8HI 0 "register_operand" "=x")
6065 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
6067 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
6069 (const_vector:V8HI [(const_int 1) (const_int 1)
6070 (const_int 1) (const_int 1)
6071 (const_int 1) (const_int 1)
6072 (const_int 1) (const_int 1)]))
6074 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
6075 "pmulhrsw\t{%2, %0|%0, %2}"
6076 [(set_attr "type" "sseimul")
6077 (set_attr "prefix_data16" "1")
6078 (set_attr "prefix_extra" "1")
6079 (set_attr "mode" "TI")])
6081 (define_insn "ssse3_pmulhrswv4hi3"
6082 [(set (match_operand:V4HI 0 "register_operand" "=y")
6089 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
6091 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
6093 (const_vector:V4HI [(const_int 1) (const_int 1)
6094 (const_int 1) (const_int 1)]))
6096 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
6097 "pmulhrsw\t{%2, %0|%0, %2}"
6098 [(set_attr "type" "sseimul")
6099 (set_attr "prefix_extra" "1")
6100 (set_attr "mode" "DI")])
6102 (define_insn "ssse3_pshufbv16qi3"
6103 [(set (match_operand:V16QI 0 "register_operand" "=x")
6104 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6105 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
6108 "pshufb\t{%2, %0|%0, %2}";
6109 [(set_attr "type" "sselog1")
6110 (set_attr "prefix_data16" "1")
6111 (set_attr "prefix_extra" "1")
6112 (set_attr "mode" "TI")])
6114 (define_insn "ssse3_pshufbv8qi3"
6115 [(set (match_operand:V8QI 0 "register_operand" "=y")
6116 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
6117 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
6120 "pshufb\t{%2, %0|%0, %2}";
6121 [(set_attr "type" "sselog1")
6122 (set_attr "prefix_extra" "1")
6123 (set_attr "mode" "DI")])
6125 (define_insn "ssse3_psign<mode>3"
6126 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6128 [(match_operand:SSEMODE124 1 "register_operand" "0")
6129 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
6132 "psign<ssevecsize>\t{%2, %0|%0, %2}";
6133 [(set_attr "type" "sselog1")
6134 (set_attr "prefix_data16" "1")
6135 (set_attr "prefix_extra" "1")
6136 (set_attr "mode" "TI")])
6138 (define_insn "ssse3_psign<mode>3"
6139 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6141 [(match_operand:MMXMODEI 1 "register_operand" "0")
6142 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
6145 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
6146 [(set_attr "type" "sselog1")
6147 (set_attr "prefix_extra" "1")
6148 (set_attr "mode" "DI")])
6150 (define_insn "ssse3_palignrti"
6151 [(set (match_operand:TI 0 "register_operand" "=x")
6152 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6153 (match_operand:TI 2 "nonimmediate_operand" "xm")
6154 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6158 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6159 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6161 [(set_attr "type" "sseishft")
6162 (set_attr "prefix_data16" "1")
6163 (set_attr "prefix_extra" "1")
6164 (set_attr "mode" "TI")])
6166 (define_insn "ssse3_palignrdi"
6167 [(set (match_operand:DI 0 "register_operand" "=y")
6168 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6169 (match_operand:DI 2 "nonimmediate_operand" "ym")
6170 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6174 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6175 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6177 [(set_attr "type" "sseishft")
6178 (set_attr "prefix_extra" "1")
6179 (set_attr "mode" "DI")])
6181 (define_insn "abs<mode>2"
6182 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6183 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6185 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6186 [(set_attr "type" "sselog1")
6187 (set_attr "prefix_data16" "1")
6188 (set_attr "prefix_extra" "1")
6189 (set_attr "mode" "TI")])
6191 (define_insn "abs<mode>2"
6192 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6193 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6195 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6196 [(set_attr "type" "sselog1")
6197 (set_attr "prefix_extra" "1")
6198 (set_attr "mode" "DI")])
6200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6202 ;; AMD SSE4A instructions
6204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6206 (define_insn "sse4a_movnt<mode>"
6207 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6209 [(match_operand:MODEF 1 "register_operand" "x")]
6212 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6213 [(set_attr "type" "ssemov")
6214 (set_attr "mode" "<MODE>")])
6216 (define_insn "sse4a_vmmovnt<mode>"
6217 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6218 (unspec:<ssescalarmode>
6219 [(vec_select:<ssescalarmode>
6220 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6221 (parallel [(const_int 0)]))]
6224 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6225 [(set_attr "type" "ssemov")
6226 (set_attr "mode" "<ssescalarmode>")])
6228 (define_insn "sse4a_extrqi"
6229 [(set (match_operand:V2DI 0 "register_operand" "=x")
6230 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6231 (match_operand 2 "const_int_operand" "")
6232 (match_operand 3 "const_int_operand" "")]
6235 "extrq\t{%3, %2, %0|%0, %2, %3}"
6236 [(set_attr "type" "sse")
6237 (set_attr "prefix_data16" "1")
6238 (set_attr "mode" "TI")])
6240 (define_insn "sse4a_extrq"
6241 [(set (match_operand:V2DI 0 "register_operand" "=x")
6242 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6243 (match_operand:V16QI 2 "register_operand" "x")]
6246 "extrq\t{%2, %0|%0, %2}"
6247 [(set_attr "type" "sse")
6248 (set_attr "prefix_data16" "1")
6249 (set_attr "mode" "TI")])
6251 (define_insn "sse4a_insertqi"
6252 [(set (match_operand:V2DI 0 "register_operand" "=x")
6253 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6254 (match_operand:V2DI 2 "register_operand" "x")
6255 (match_operand 3 "const_int_operand" "")
6256 (match_operand 4 "const_int_operand" "")]
6259 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6260 [(set_attr "type" "sseins")
6261 (set_attr "prefix_rep" "1")
6262 (set_attr "mode" "TI")])
6264 (define_insn "sse4a_insertq"
6265 [(set (match_operand:V2DI 0 "register_operand" "=x")
6266 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6267 (match_operand:V2DI 2 "register_operand" "x")]
6270 "insertq\t{%2, %0|%0, %2}"
6271 [(set_attr "type" "sseins")
6272 (set_attr "prefix_rep" "1")
6273 (set_attr "mode" "TI")])
6275 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6277 ;; Intel SSE4.1 instructions
6279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6281 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6282 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6283 (vec_merge:SSEMODEF2P
6284 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6285 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6286 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6288 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6289 [(set_attr "type" "ssemov")
6290 (set_attr "prefix_extra" "1")
6291 (set_attr "mode" "<MODE>")])
6293 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6294 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6296 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6297 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6298 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6301 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6302 [(set_attr "type" "ssemov")
6303 (set_attr "prefix_extra" "1")
6304 (set_attr "mode" "<MODE>")])
6306 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6307 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6309 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6310 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6311 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6314 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6315 [(set_attr "type" "ssemul")
6316 (set_attr "prefix_extra" "1")
6317 (set_attr "mode" "<MODE>")])
6319 (define_insn "sse4_1_movntdqa"
6320 [(set (match_operand:V2DI 0 "register_operand" "=x")
6321 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6324 "movntdqa\t{%1, %0|%0, %1}"
6325 [(set_attr "type" "ssecvt")
6326 (set_attr "prefix_extra" "1")
6327 (set_attr "mode" "TI")])
6329 (define_insn "sse4_1_mpsadbw"
6330 [(set (match_operand:V16QI 0 "register_operand" "=x")
6331 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6332 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6333 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6336 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6337 [(set_attr "type" "sselog1")
6338 (set_attr "prefix_extra" "1")
6339 (set_attr "mode" "TI")])
6341 (define_insn "sse4_1_packusdw"
6342 [(set (match_operand:V8HI 0 "register_operand" "=x")
6345 (match_operand:V4SI 1 "register_operand" "0"))
6347 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6349 "packusdw\t{%2, %0|%0, %2}"
6350 [(set_attr "type" "sselog")
6351 (set_attr "prefix_extra" "1")
6352 (set_attr "mode" "TI")])
6354 (define_insn "sse4_1_pblendvb"
6355 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6356 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6357 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6358 (match_operand:V16QI 3 "register_operand" "Yz")]
6361 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6362 [(set_attr "type" "ssemov")
6363 (set_attr "prefix_extra" "1")
6364 (set_attr "mode" "TI")])
6366 (define_insn "sse4_1_pblendw"
6367 [(set (match_operand:V8HI 0 "register_operand" "=x")
6369 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6370 (match_operand:V8HI 1 "register_operand" "0")
6371 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6373 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6374 [(set_attr "type" "ssemov")
6375 (set_attr "prefix_extra" "1")
6376 (set_attr "mode" "TI")])
6378 (define_insn "sse4_1_phminposuw"
6379 [(set (match_operand:V8HI 0 "register_operand" "=x")
6380 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6381 UNSPEC_PHMINPOSUW))]
6383 "phminposuw\t{%1, %0|%0, %1}"
6384 [(set_attr "type" "sselog1")
6385 (set_attr "prefix_extra" "1")
6386 (set_attr "mode" "TI")])
6388 (define_insn "sse4_1_extendv8qiv8hi2"
6389 [(set (match_operand:V8HI 0 "register_operand" "=x")
6392 (match_operand:V16QI 1 "register_operand" "x")
6393 (parallel [(const_int 0)
6402 "pmovsxbw\t{%1, %0|%0, %1}"
6403 [(set_attr "type" "ssemov")
6404 (set_attr "prefix_extra" "1")
6405 (set_attr "mode" "TI")])
6407 (define_insn "*sse4_1_extendv8qiv8hi2"
6408 [(set (match_operand:V8HI 0 "register_operand" "=x")
6411 (vec_duplicate:V16QI
6412 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6413 (parallel [(const_int 0)
6422 "pmovsxbw\t{%1, %0|%0, %1}"
6423 [(set_attr "type" "ssemov")
6424 (set_attr "prefix_extra" "1")
6425 (set_attr "mode" "TI")])
6427 (define_insn "sse4_1_extendv4qiv4si2"
6428 [(set (match_operand:V4SI 0 "register_operand" "=x")
6431 (match_operand:V16QI 1 "register_operand" "x")
6432 (parallel [(const_int 0)
6437 "pmovsxbd\t{%1, %0|%0, %1}"
6438 [(set_attr "type" "ssemov")
6439 (set_attr "prefix_extra" "1")
6440 (set_attr "mode" "TI")])
6442 (define_insn "*sse4_1_extendv4qiv4si2"
6443 [(set (match_operand:V4SI 0 "register_operand" "=x")
6446 (vec_duplicate:V16QI
6447 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6448 (parallel [(const_int 0)
6453 "pmovsxbd\t{%1, %0|%0, %1}"
6454 [(set_attr "type" "ssemov")
6455 (set_attr "prefix_extra" "1")
6456 (set_attr "mode" "TI")])
6458 (define_insn "sse4_1_extendv2qiv2di2"
6459 [(set (match_operand:V2DI 0 "register_operand" "=x")
6462 (match_operand:V16QI 1 "register_operand" "x")
6463 (parallel [(const_int 0)
6466 "pmovsxbq\t{%1, %0|%0, %1}"
6467 [(set_attr "type" "ssemov")
6468 (set_attr "prefix_extra" "1")
6469 (set_attr "mode" "TI")])
6471 (define_insn "*sse4_1_extendv2qiv2di2"
6472 [(set (match_operand:V2DI 0 "register_operand" "=x")
6475 (vec_duplicate:V16QI
6476 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6477 (parallel [(const_int 0)
6480 "pmovsxbq\t{%1, %0|%0, %1}"
6481 [(set_attr "type" "ssemov")
6482 (set_attr "prefix_extra" "1")
6483 (set_attr "mode" "TI")])
6485 (define_insn "sse4_1_extendv4hiv4si2"
6486 [(set (match_operand:V4SI 0 "register_operand" "=x")
6489 (match_operand:V8HI 1 "register_operand" "x")
6490 (parallel [(const_int 0)
6495 "pmovsxwd\t{%1, %0|%0, %1}"
6496 [(set_attr "type" "ssemov")
6497 (set_attr "prefix_extra" "1")
6498 (set_attr "mode" "TI")])
6500 (define_insn "*sse4_1_extendv4hiv4si2"
6501 [(set (match_operand:V4SI 0 "register_operand" "=x")
6505 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6506 (parallel [(const_int 0)
6511 "pmovsxwd\t{%1, %0|%0, %1}"
6512 [(set_attr "type" "ssemov")
6513 (set_attr "prefix_extra" "1")
6514 (set_attr "mode" "TI")])
6516 (define_insn "sse4_1_extendv2hiv2di2"
6517 [(set (match_operand:V2DI 0 "register_operand" "=x")
6520 (match_operand:V8HI 1 "register_operand" "x")
6521 (parallel [(const_int 0)
6524 "pmovsxwq\t{%1, %0|%0, %1}"
6525 [(set_attr "type" "ssemov")
6526 (set_attr "prefix_extra" "1")
6527 (set_attr "mode" "TI")])
6529 (define_insn "*sse4_1_extendv2hiv2di2"
6530 [(set (match_operand:V2DI 0 "register_operand" "=x")
6534 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6535 (parallel [(const_int 0)
6538 "pmovsxwq\t{%1, %0|%0, %1}"
6539 [(set_attr "type" "ssemov")
6540 (set_attr "prefix_extra" "1")
6541 (set_attr "mode" "TI")])
6543 (define_insn "sse4_1_extendv2siv2di2"
6544 [(set (match_operand:V2DI 0 "register_operand" "=x")
6547 (match_operand:V4SI 1 "register_operand" "x")
6548 (parallel [(const_int 0)
6551 "pmovsxdq\t{%1, %0|%0, %1}"
6552 [(set_attr "type" "ssemov")
6553 (set_attr "prefix_extra" "1")
6554 (set_attr "mode" "TI")])
6556 (define_insn "*sse4_1_extendv2siv2di2"
6557 [(set (match_operand:V2DI 0 "register_operand" "=x")
6561 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6562 (parallel [(const_int 0)
6565 "pmovsxdq\t{%1, %0|%0, %1}"
6566 [(set_attr "type" "ssemov")
6567 (set_attr "prefix_extra" "1")
6568 (set_attr "mode" "TI")])
6570 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6571 [(set (match_operand:V8HI 0 "register_operand" "=x")
6574 (match_operand:V16QI 1 "register_operand" "x")
6575 (parallel [(const_int 0)
6584 "pmovzxbw\t{%1, %0|%0, %1}"
6585 [(set_attr "type" "ssemov")
6586 (set_attr "prefix_extra" "1")
6587 (set_attr "mode" "TI")])
6589 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6590 [(set (match_operand:V8HI 0 "register_operand" "=x")
6593 (vec_duplicate:V16QI
6594 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6595 (parallel [(const_int 0)
6604 "pmovzxbw\t{%1, %0|%0, %1}"
6605 [(set_attr "type" "ssemov")
6606 (set_attr "prefix_extra" "1")
6607 (set_attr "mode" "TI")])
6609 (define_insn "sse4_1_zero_extendv4qiv4si2"
6610 [(set (match_operand:V4SI 0 "register_operand" "=x")
6613 (match_operand:V16QI 1 "register_operand" "x")
6614 (parallel [(const_int 0)
6619 "pmovzxbd\t{%1, %0|%0, %1}"
6620 [(set_attr "type" "ssemov")
6621 (set_attr "prefix_extra" "1")
6622 (set_attr "mode" "TI")])
6624 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6625 [(set (match_operand:V4SI 0 "register_operand" "=x")
6628 (vec_duplicate:V16QI
6629 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6630 (parallel [(const_int 0)
6635 "pmovzxbd\t{%1, %0|%0, %1}"
6636 [(set_attr "type" "ssemov")
6637 (set_attr "prefix_extra" "1")
6638 (set_attr "mode" "TI")])
6640 (define_insn "sse4_1_zero_extendv2qiv2di2"
6641 [(set (match_operand:V2DI 0 "register_operand" "=x")
6644 (match_operand:V16QI 1 "register_operand" "x")
6645 (parallel [(const_int 0)
6648 "pmovzxbq\t{%1, %0|%0, %1}"
6649 [(set_attr "type" "ssemov")
6650 (set_attr "prefix_extra" "1")
6651 (set_attr "mode" "TI")])
6653 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6654 [(set (match_operand:V2DI 0 "register_operand" "=x")
6657 (vec_duplicate:V16QI
6658 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6659 (parallel [(const_int 0)
6662 "pmovzxbq\t{%1, %0|%0, %1}"
6663 [(set_attr "type" "ssemov")
6664 (set_attr "prefix_extra" "1")
6665 (set_attr "mode" "TI")])
6667 (define_insn "sse4_1_zero_extendv4hiv4si2"
6668 [(set (match_operand:V4SI 0 "register_operand" "=x")
6671 (match_operand:V8HI 1 "register_operand" "x")
6672 (parallel [(const_int 0)
6677 "pmovzxwd\t{%1, %0|%0, %1}"
6678 [(set_attr "type" "ssemov")
6679 (set_attr "prefix_extra" "1")
6680 (set_attr "mode" "TI")])
6682 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6683 [(set (match_operand:V4SI 0 "register_operand" "=x")
6687 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6688 (parallel [(const_int 0)
6693 "pmovzxwd\t{%1, %0|%0, %1}"
6694 [(set_attr "type" "ssemov")
6695 (set_attr "prefix_extra" "1")
6696 (set_attr "mode" "TI")])
6698 (define_insn "sse4_1_zero_extendv2hiv2di2"
6699 [(set (match_operand:V2DI 0 "register_operand" "=x")
6702 (match_operand:V8HI 1 "register_operand" "x")
6703 (parallel [(const_int 0)
6706 "pmovzxwq\t{%1, %0|%0, %1}"
6707 [(set_attr "type" "ssemov")
6708 (set_attr "prefix_extra" "1")
6709 (set_attr "mode" "TI")])
6711 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6712 [(set (match_operand:V2DI 0 "register_operand" "=x")
6716 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6717 (parallel [(const_int 0)
6720 "pmovzxwq\t{%1, %0|%0, %1}"
6721 [(set_attr "type" "ssemov")
6722 (set_attr "prefix_extra" "1")
6723 (set_attr "mode" "TI")])
6725 (define_insn "sse4_1_zero_extendv2siv2di2"
6726 [(set (match_operand:V2DI 0 "register_operand" "=x")
6729 (match_operand:V4SI 1 "register_operand" "x")
6730 (parallel [(const_int 0)
6733 "pmovzxdq\t{%1, %0|%0, %1}"
6734 [(set_attr "type" "ssemov")
6735 (set_attr "prefix_extra" "1")
6736 (set_attr "mode" "TI")])
6738 (define_insn "*sse4_1_zero_extendv2siv2di2"
6739 [(set (match_operand:V2DI 0 "register_operand" "=x")
6743 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6744 (parallel [(const_int 0)
6747 "pmovzxdq\t{%1, %0|%0, %1}"
6748 [(set_attr "type" "ssemov")
6749 (set_attr "prefix_extra" "1")
6750 (set_attr "mode" "TI")])
6752 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6753 ;; But it is not a really compare instruction.
6754 (define_insn "sse4_1_ptest"
6755 [(set (reg:CC FLAGS_REG)
6756 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6757 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6760 "ptest\t{%1, %0|%0, %1}"
6761 [(set_attr "type" "ssecomi")
6762 (set_attr "prefix_extra" "1")
6763 (set_attr "mode" "TI")])
6765 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6766 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6768 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6769 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6772 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6773 [(set_attr "type" "ssecvt")
6774 (set_attr "prefix_extra" "1")
6775 (set_attr "mode" "<MODE>")])
6777 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6778 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6779 (vec_merge:SSEMODEF2P
6781 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6782 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6784 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6787 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6788 [(set_attr "type" "ssecvt")
6789 (set_attr "prefix_extra" "1")
6790 (set_attr "mode" "<MODE>")])
6792 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6794 ;; Intel SSE4.2 string/text processing instructions
6796 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6798 (define_insn_and_split "sse4_2_pcmpestr"
6799 [(set (match_operand:SI 0 "register_operand" "=c,c")
6801 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6802 (match_operand:SI 3 "register_operand" "a,a")
6803 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6804 (match_operand:SI 5 "register_operand" "d,d")
6805 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6807 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6815 (set (reg:CC FLAGS_REG)
6824 && !(reload_completed || reload_in_progress)"
6829 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6830 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6831 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6834 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6835 operands[3], operands[4],
6836 operands[5], operands[6]));
6838 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6839 operands[3], operands[4],
6840 operands[5], operands[6]));
6841 if (flags && !(ecx || xmm0))
6842 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6843 operands[2], operands[3],
6844 operands[4], operands[5],
6848 [(set_attr "type" "sselog")
6849 (set_attr "prefix_data16" "1")
6850 (set_attr "prefix_extra" "1")
6851 (set_attr "memory" "none,load")
6852 (set_attr "mode" "TI")])
6854 (define_insn "sse4_2_pcmpestri"
6855 [(set (match_operand:SI 0 "register_operand" "=c,c")
6857 [(match_operand:V16QI 1 "register_operand" "x,x")
6858 (match_operand:SI 2 "register_operand" "a,a")
6859 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6860 (match_operand:SI 4 "register_operand" "d,d")
6861 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6863 (set (reg:CC FLAGS_REG)
6872 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6873 [(set_attr "type" "sselog")
6874 (set_attr "prefix_data16" "1")
6875 (set_attr "prefix_extra" "1")
6876 (set_attr "memory" "none,load")
6877 (set_attr "mode" "TI")])
6879 (define_insn "sse4_2_pcmpestrm"
6880 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6882 [(match_operand:V16QI 1 "register_operand" "x,x")
6883 (match_operand:SI 2 "register_operand" "a,a")
6884 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6885 (match_operand:SI 4 "register_operand" "d,d")
6886 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6888 (set (reg:CC FLAGS_REG)
6897 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6898 [(set_attr "type" "sselog")
6899 (set_attr "prefix_data16" "1")
6900 (set_attr "prefix_extra" "1")
6901 (set_attr "memory" "none,load")
6902 (set_attr "mode" "TI")])
6904 (define_insn "sse4_2_pcmpestr_cconly"
6905 [(set (reg:CC FLAGS_REG)
6907 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6908 (match_operand:SI 3 "register_operand" "a,a,a,a")
6909 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6910 (match_operand:SI 5 "register_operand" "d,d,d,d")
6911 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6913 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6914 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6917 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6918 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6919 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6920 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6921 [(set_attr "type" "sselog")
6922 (set_attr "prefix_data16" "1")
6923 (set_attr "prefix_extra" "1")
6924 (set_attr "memory" "none,load,none,load")
6925 (set_attr "mode" "TI")])
6927 (define_insn_and_split "sse4_2_pcmpistr"
6928 [(set (match_operand:SI 0 "register_operand" "=c,c")
6930 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6931 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6932 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6934 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6940 (set (reg:CC FLAGS_REG)
6947 && !(reload_completed || reload_in_progress)"
6952 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6953 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6954 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6957 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6958 operands[3], operands[4]));
6960 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6961 operands[3], operands[4]));
6962 if (flags && !(ecx || xmm0))
6963 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6964 operands[2], operands[3],
6968 [(set_attr "type" "sselog")
6969 (set_attr "prefix_data16" "1")
6970 (set_attr "prefix_extra" "1")
6971 (set_attr "memory" "none,load")
6972 (set_attr "mode" "TI")])
6974 (define_insn "sse4_2_pcmpistri"
6975 [(set (match_operand:SI 0 "register_operand" "=c,c")
6977 [(match_operand:V16QI 1 "register_operand" "x,x")
6978 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6979 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6981 (set (reg:CC FLAGS_REG)
6988 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6989 [(set_attr "type" "sselog")
6990 (set_attr "prefix_data16" "1")
6991 (set_attr "prefix_extra" "1")
6992 (set_attr "memory" "none,load")
6993 (set_attr "mode" "TI")])
6995 (define_insn "sse4_2_pcmpistrm"
6996 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6998 [(match_operand:V16QI 1 "register_operand" "x,x")
6999 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7000 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7002 (set (reg:CC FLAGS_REG)
7009 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
7010 [(set_attr "type" "sselog")
7011 (set_attr "prefix_data16" "1")
7012 (set_attr "prefix_extra" "1")
7013 (set_attr "memory" "none,load")
7014 (set_attr "mode" "TI")])
7016 (define_insn "sse4_2_pcmpistr_cconly"
7017 [(set (reg:CC FLAGS_REG)
7019 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
7020 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
7021 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
7023 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
7024 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
7027 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7028 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7029 pcmpistri\t{%4, %3, %2|%2, %3, %4}
7030 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
7031 [(set_attr "type" "sselog")
7032 (set_attr "prefix_data16" "1")
7033 (set_attr "prefix_extra" "1")
7034 (set_attr "memory" "none,load,none,load")
7035 (set_attr "mode" "TI")])
7037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7039 ;; SSE5 instructions
7041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7043 ;; SSE5 parallel integer multiply/add instructions.
7044 ;; Note the instruction does not allow the value being added to be a memory
7045 ;; operation. However by pretending via the nonimmediate_operand predicate
7046 ;; that it does and splitting it later allows the following to be recognized:
7047 ;; a[i] = b[i] * c[i] + d[i];
7048 (define_insn "sse5_pmacsww"
7049 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7052 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7053 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7054 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7055 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7057 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7058 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7059 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7060 [(set_attr "type" "ssemuladd")
7061 (set_attr "mode" "TI")])
7063 ;; Split pmacsww with two memory operands into a load and the pmacsww.
7065 [(set (match_operand:V8HI 0 "register_operand" "")
7067 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
7068 (match_operand:V8HI 2 "nonimmediate_operand" ""))
7069 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
7071 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7072 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7073 && !reg_mentioned_p (operands[0], operands[1])
7074 && !reg_mentioned_p (operands[0], operands[2])
7075 && !reg_mentioned_p (operands[0], operands[3])"
7078 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
7079 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
7084 (define_insn "sse5_pmacssww"
7085 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7087 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7088 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7089 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7090 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7092 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7093 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7094 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7095 [(set_attr "type" "ssemuladd")
7096 (set_attr "mode" "TI")])
7098 ;; Note the instruction does not allow the value being added to be a memory
7099 ;; operation. However by pretending via the nonimmediate_operand predicate
7100 ;; that it does and splitting it later allows the following to be recognized:
7101 ;; a[i] = b[i] * c[i] + d[i];
7102 (define_insn "sse5_pmacsdd"
7103 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7106 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7107 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7108 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7109 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7111 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7112 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7113 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7114 [(set_attr "type" "ssemuladd")
7115 (set_attr "mode" "TI")])
7117 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
7119 [(set (match_operand:V4SI 0 "register_operand" "")
7121 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
7122 (match_operand:V4SI 2 "nonimmediate_operand" ""))
7123 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
7125 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7126 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7127 && !reg_mentioned_p (operands[0], operands[1])
7128 && !reg_mentioned_p (operands[0], operands[2])
7129 && !reg_mentioned_p (operands[0], operands[3])"
7132 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
7133 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
7138 (define_insn "sse5_pmacssdd"
7139 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7141 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7142 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7143 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7144 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7146 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7147 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7148 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7149 [(set_attr "type" "ssemuladd")
7150 (set_attr "mode" "TI")])
7152 (define_insn "sse5_pmacssdql"
7153 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7158 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7159 (parallel [(const_int 1)
7162 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7163 (parallel [(const_int 1)
7165 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7166 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7168 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7169 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7170 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7171 [(set_attr "type" "ssemuladd")
7172 (set_attr "mode" "TI")])
7174 (define_insn "sse5_pmacssdqh"
7175 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7180 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7181 (parallel [(const_int 0)
7185 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7186 (parallel [(const_int 0)
7188 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7189 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7191 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7192 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7193 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7194 [(set_attr "type" "ssemuladd")
7195 (set_attr "mode" "TI")])
7197 (define_insn "sse5_pmacsdql"
7198 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7203 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7204 (parallel [(const_int 1)
7208 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7209 (parallel [(const_int 1)
7211 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7212 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7214 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7215 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7216 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7217 [(set_attr "type" "ssemuladd")
7218 (set_attr "mode" "TI")])
7220 (define_insn "sse5_pmacsdqh"
7221 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7226 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7227 (parallel [(const_int 0)
7231 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7232 (parallel [(const_int 0)
7234 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7235 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7237 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7238 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7239 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7240 [(set_attr "type" "ssemuladd")
7241 (set_attr "mode" "TI")])
7243 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7244 (define_insn "sse5_pmacsswd"
7245 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7250 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7251 (parallel [(const_int 1)
7257 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7258 (parallel [(const_int 1)
7262 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7263 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7265 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7266 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7267 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7268 [(set_attr "type" "ssemuladd")
7269 (set_attr "mode" "TI")])
7271 (define_insn "sse5_pmacswd"
7272 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7277 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7278 (parallel [(const_int 1)
7284 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7285 (parallel [(const_int 1)
7289 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7290 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7292 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7293 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7294 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7295 [(set_attr "type" "ssemuladd")
7296 (set_attr "mode" "TI")])
7298 (define_insn "sse5_pmadcsswd"
7299 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7305 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7306 (parallel [(const_int 0)
7312 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7313 (parallel [(const_int 0)
7321 (parallel [(const_int 1)
7328 (parallel [(const_int 1)
7332 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7333 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7335 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7336 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7337 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7338 [(set_attr "type" "ssemuladd")
7339 (set_attr "mode" "TI")])
7341 (define_insn "sse5_pmadcswd"
7342 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7348 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7349 (parallel [(const_int 0)
7355 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7356 (parallel [(const_int 0)
7364 (parallel [(const_int 1)
7371 (parallel [(const_int 1)
7375 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7376 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7378 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7379 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7380 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7381 [(set_attr "type" "ssemuladd")
7382 (set_attr "mode" "TI")])
7384 ;; SSE5 parallel XMM conditional moves
7385 (define_insn "sse5_pcmov_<mode>"
7386 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7387 (if_then_else:SSEMODE
7388 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
7389 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
7390 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
7391 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7393 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7394 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7395 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7396 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7397 andps\t{%2, %0|%0, %2}
7398 andnps\t{%1, %0|%0, %1}"
7399 [(set_attr "type" "sse4arg")])
7401 ;; SSE5 horizontal add/subtract instructions
7402 (define_insn "sse5_phaddbw"
7403 [(set (match_operand:V8HI 0 "register_operand" "=x")
7407 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7408 (parallel [(const_int 0)
7419 (parallel [(const_int 1)
7426 (const_int 15)])))))]
7428 "phaddbw\t{%1, %0|%0, %1}"
7429 [(set_attr "type" "sseiadd1")])
7431 (define_insn "sse5_phaddbd"
7432 [(set (match_operand:V4SI 0 "register_operand" "=x")
7437 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7438 (parallel [(const_int 0)
7445 (parallel [(const_int 1)
7453 (parallel [(const_int 2)
7460 (parallel [(const_int 3)
7463 (const_int 15)]))))))]
7465 "phaddbd\t{%1, %0|%0, %1}"
7466 [(set_attr "type" "sseiadd1")])
7468 (define_insn "sse5_phaddbq"
7469 [(set (match_operand:V2DI 0 "register_operand" "=x")
7475 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7476 (parallel [(const_int 0)
7481 (parallel [(const_int 1)
7487 (parallel [(const_int 2)
7492 (parallel [(const_int 3)
7499 (parallel [(const_int 8)
7504 (parallel [(const_int 9)
7510 (parallel [(const_int 10)
7515 (parallel [(const_int 11)
7516 (const_int 15)])))))))]
7518 "phaddbq\t{%1, %0|%0, %1}"
7519 [(set_attr "type" "sseiadd1")])
7521 (define_insn "sse5_phaddwd"
7522 [(set (match_operand:V4SI 0 "register_operand" "=x")
7526 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7527 (parallel [(const_int 0)
7534 (parallel [(const_int 1)
7537 (const_int 7)])))))]
7539 "phaddwd\t{%1, %0|%0, %1}"
7540 [(set_attr "type" "sseiadd1")])
7542 (define_insn "sse5_phaddwq"
7543 [(set (match_operand:V2DI 0 "register_operand" "=x")
7548 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7549 (parallel [(const_int 0)
7554 (parallel [(const_int 1)
7560 (parallel [(const_int 2)
7565 (parallel [(const_int 3)
7566 (const_int 7)]))))))]
7568 "phaddwq\t{%1, %0|%0, %1}"
7569 [(set_attr "type" "sseiadd1")])
7571 (define_insn "sse5_phadddq"
7572 [(set (match_operand:V2DI 0 "register_operand" "=x")
7576 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7577 (parallel [(const_int 0)
7582 (parallel [(const_int 1)
7583 (const_int 3)])))))]
7585 "phadddq\t{%1, %0|%0, %1}"
7586 [(set_attr "type" "sseiadd1")])
7588 (define_insn "sse5_phaddubw"
7589 [(set (match_operand:V8HI 0 "register_operand" "=x")
7593 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7594 (parallel [(const_int 0)
7605 (parallel [(const_int 1)
7612 (const_int 15)])))))]
7614 "phaddubw\t{%1, %0|%0, %1}"
7615 [(set_attr "type" "sseiadd1")])
7617 (define_insn "sse5_phaddubd"
7618 [(set (match_operand:V4SI 0 "register_operand" "=x")
7623 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7624 (parallel [(const_int 0)
7631 (parallel [(const_int 1)
7639 (parallel [(const_int 2)
7646 (parallel [(const_int 3)
7649 (const_int 15)]))))))]
7651 "phaddubd\t{%1, %0|%0, %1}"
7652 [(set_attr "type" "sseiadd1")])
7654 (define_insn "sse5_phaddubq"
7655 [(set (match_operand:V2DI 0 "register_operand" "=x")
7661 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7662 (parallel [(const_int 0)
7667 (parallel [(const_int 1)
7673 (parallel [(const_int 2)
7678 (parallel [(const_int 3)
7685 (parallel [(const_int 8)
7690 (parallel [(const_int 9)
7696 (parallel [(const_int 10)
7701 (parallel [(const_int 11)
7702 (const_int 15)])))))))]
7704 "phaddubq\t{%1, %0|%0, %1}"
7705 [(set_attr "type" "sseiadd1")])
7707 (define_insn "sse5_phadduwd"
7708 [(set (match_operand:V4SI 0 "register_operand" "=x")
7712 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7713 (parallel [(const_int 0)
7720 (parallel [(const_int 1)
7723 (const_int 7)])))))]
7725 "phadduwd\t{%1, %0|%0, %1}"
7726 [(set_attr "type" "sseiadd1")])
7728 (define_insn "sse5_phadduwq"
7729 [(set (match_operand:V2DI 0 "register_operand" "=x")
7734 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7735 (parallel [(const_int 0)
7740 (parallel [(const_int 1)
7746 (parallel [(const_int 2)
7751 (parallel [(const_int 3)
7752 (const_int 7)]))))))]
7754 "phadduwq\t{%1, %0|%0, %1}"
7755 [(set_attr "type" "sseiadd1")])
7757 (define_insn "sse5_phaddudq"
7758 [(set (match_operand:V2DI 0 "register_operand" "=x")
7762 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7763 (parallel [(const_int 0)
7768 (parallel [(const_int 1)
7769 (const_int 3)])))))]
7771 "phaddudq\t{%1, %0|%0, %1}"
7772 [(set_attr "type" "sseiadd1")])
7774 (define_insn "sse5_phsubbw"
7775 [(set (match_operand:V8HI 0 "register_operand" "=x")
7779 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7780 (parallel [(const_int 0)
7791 (parallel [(const_int 1)
7798 (const_int 15)])))))]
7800 "phsubbw\t{%1, %0|%0, %1}"
7801 [(set_attr "type" "sseiadd1")])
7803 (define_insn "sse5_phsubwd"
7804 [(set (match_operand:V4SI 0 "register_operand" "=x")
7808 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7809 (parallel [(const_int 0)
7816 (parallel [(const_int 1)
7819 (const_int 7)])))))]
7821 "phsubwd\t{%1, %0|%0, %1}"
7822 [(set_attr "type" "sseiadd1")])
7824 (define_insn "sse5_phsubdq"
7825 [(set (match_operand:V2DI 0 "register_operand" "=x")
7829 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7830 (parallel [(const_int 0)
7835 (parallel [(const_int 1)
7836 (const_int 3)])))))]
7838 "phsubdq\t{%1, %0|%0, %1}"
7839 [(set_attr "type" "sseiadd1")])
7841 ;; SSE5 permute instructions
7842 (define_insn "sse5_pperm"
7843 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7845 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7846 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7847 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7848 UNSPEC_SSE5_PERMUTE))]
7849 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7850 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7851 [(set_attr "type" "sse4arg")
7852 (set_attr "mode" "TI")])
7854 ;; The following are for the various unpack insns which doesn't need the first
7855 ;; source operand, so we can just use the output operand for the first operand.
7856 ;; This allows either of the other two operands to be a memory operand. We
7857 ;; can't just use the first operand as an argument to the normal pperm because
7858 ;; then an output only argument, suddenly becomes an input operand.
7859 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7860 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7863 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7864 (match_operand 2 "" "")))) ;; parallel with const_int's
7865 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7867 && (register_operand (operands[1], V16QImode)
7868 || register_operand (operands[2], V16QImode))"
7869 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7870 [(set_attr "type" "sseadd")
7871 (set_attr "mode" "TI")])
7873 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7874 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7877 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7878 (match_operand 2 "" "")))) ;; parallel with const_int's
7879 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7881 && (register_operand (operands[1], V16QImode)
7882 || register_operand (operands[2], V16QImode))"
7883 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7884 [(set_attr "type" "sseadd")
7885 (set_attr "mode" "TI")])
7887 (define_insn "sse5_pperm_zero_v8hi_v4si"
7888 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7891 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7892 (match_operand 2 "" "")))) ;; parallel with const_int's
7893 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7895 && (register_operand (operands[1], V8HImode)
7896 || register_operand (operands[2], V16QImode))"
7897 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7898 [(set_attr "type" "sseadd")
7899 (set_attr "mode" "TI")])
7901 (define_insn "sse5_pperm_sign_v8hi_v4si"
7902 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7905 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7906 (match_operand 2 "" "")))) ;; parallel with const_int's
7907 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7909 && (register_operand (operands[1], V8HImode)
7910 || register_operand (operands[2], V16QImode))"
7911 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7912 [(set_attr "type" "sseadd")
7913 (set_attr "mode" "TI")])
7915 (define_insn "sse5_pperm_zero_v4si_v2di"
7916 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7919 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7920 (match_operand 2 "" "")))) ;; parallel with const_int's
7921 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7923 && (register_operand (operands[1], V4SImode)
7924 || register_operand (operands[2], V16QImode))"
7925 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7926 [(set_attr "type" "sseadd")
7927 (set_attr "mode" "TI")])
7929 (define_insn "sse5_pperm_sign_v4si_v2di"
7930 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7933 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7934 (match_operand 2 "" "")))) ;; parallel with const_int's
7935 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7937 && (register_operand (operands[1], V4SImode)
7938 || register_operand (operands[2], V16QImode))"
7939 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7940 [(set_attr "type" "sseadd")
7941 (set_attr "mode" "TI")])
7943 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7944 (define_insn "sse5_pperm_pack_v2di_v4si"
7945 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7948 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7950 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7951 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7952 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7953 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7954 [(set_attr "type" "sse4arg")
7955 (set_attr "mode" "TI")])
7957 (define_insn "sse5_pperm_pack_v4si_v8hi"
7958 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7961 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7963 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7964 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7965 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7966 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7967 [(set_attr "type" "sse4arg")
7968 (set_attr "mode" "TI")])
7970 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7971 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7974 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7976 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7977 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7978 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7979 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7980 [(set_attr "type" "sse4arg")
7981 (set_attr "mode" "TI")])
7983 ;; Floating point permutation (permps, permpd)
7984 (define_insn "sse5_perm<mode>"
7985 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7987 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7988 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7989 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7990 UNSPEC_SSE5_PERMUTE))]
7991 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7992 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7993 [(set_attr "type" "sse4arg")
7994 (set_attr "mode" "<MODE>")])
7996 ;; SSE5 packed rotate instructions
7997 (define_insn "rotl<mode>3"
7998 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8000 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8001 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8003 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8004 [(set_attr "type" "sseishft")
8005 (set_attr "mode" "TI")])
8007 (define_insn "sse5_rotl<mode>3"
8008 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8010 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8011 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
8012 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8013 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8014 [(set_attr "type" "sseishft")
8015 (set_attr "mode" "TI")])
8017 ;; SSE5 packed shift instructions. Note negative values for the shift amount
8018 ;; convert this into a right shift instead of left shift. For now, model this
8019 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
8020 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
8021 (define_insn "sse5_ashl<mode>3"
8022 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8024 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8025 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8026 UNSPEC_SSE5_ASHIFT))]
8027 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8028 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8029 [(set_attr "type" "sseishft")
8030 (set_attr "mode" "TI")])
8032 (define_insn "sse5_lshl<mode>3"
8033 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8035 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8036 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8037 UNSPEC_SSE5_LSHIFT))]
8038 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8039 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8040 [(set_attr "type" "sseishft")
8041 (set_attr "mode" "TI")])
8043 ;; SSE5 FRCZ support
8045 (define_insn "sse5_frcz<mode>2"
8046 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8048 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8051 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
8052 [(set_attr "type" "ssecvt1")
8053 (set_attr "prefix_extra" "1")
8054 (set_attr "mode" "<MODE>")])
8057 (define_insn "sse5_vmfrcz<mode>2"
8058 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8059 (vec_merge:SSEMODEF2P
8061 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8063 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8066 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
8067 [(set_attr "type" "ssecvt1")
8068 (set_attr "prefix_extra" "1")
8069 (set_attr "mode" "<MODE>")])
8071 (define_insn "sse5_cvtph2ps"
8072 [(set (match_operand:V4SF 0 "register_operand" "=x")
8073 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8076 "cvtph2ps\t{%1, %0|%0, %1}"
8077 [(set_attr "type" "ssecvt")
8078 (set_attr "mode" "V4SF")])
8080 (define_insn "sse5_cvtps2ph"
8081 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8082 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8085 "cvtps2ph\t{%1, %0|%0, %1}"
8086 [(set_attr "type" "ssecvt")
8087 (set_attr "mode" "V4SF")])
8089 ;; Scalar versions of the com instructions that use vector types that are
8090 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8091 ;; com instructions fill in 0's in the upper bits instead of leaving them
8092 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8093 (define_expand "sse5_vmmaskcmp<mode>3"
8094 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8095 (vec_merge:SSEMODEF2P
8096 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8097 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8098 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8103 operands[4] = CONST0_RTX (<MODE>mode);
8106 (define_insn "*sse5_vmmaskcmp<mode>3"
8107 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8108 (vec_merge:SSEMODEF2P
8109 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8110 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8111 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8112 (match_operand:SSEMODEF2P 4 "")
8115 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8116 [(set_attr "type" "sse4arg")
8117 (set_attr "mode" "<ssescalarmode>")])
8119 ;; We don't have a comparison operator that always returns true/false, so
8120 ;; handle comfalse and comtrue specially.
8121 (define_insn "sse5_com_tf<mode>3"
8122 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8124 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8125 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8126 (match_operand:SI 3 "const_int_operand" "n")]
8127 UNSPEC_SSE5_TRUEFALSE))]
8130 const char *ret = NULL;
8132 switch (INTVAL (operands[3]))
8135 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8139 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8143 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8147 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8156 [(set_attr "type" "ssecmp")
8157 (set_attr "mode" "<MODE>")])
8159 (define_insn "sse5_maskcmp<mode>3"
8160 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8161 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8162 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8163 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8165 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8166 [(set_attr "type" "ssecmp")
8167 (set_attr "mode" "<MODE>")])
8169 (define_insn "sse5_maskcmp<mode>3"
8170 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8171 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8172 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8173 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8175 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8176 [(set_attr "type" "sse4arg")
8177 (set_attr "mode" "TI")])
8179 (define_insn "sse5_maskcmp_uns<mode>3"
8180 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8181 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8182 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8183 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8185 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8186 [(set_attr "type" "ssecmp")
8187 (set_attr "mode" "TI")])
8189 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8190 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8191 ;; the exact instruction generated for the intrinsic.
8192 (define_insn "sse5_maskcmp_uns2<mode>3"
8193 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8195 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8196 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8197 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8198 UNSPEC_SSE5_UNSIGNED_CMP))]
8200 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8201 [(set_attr "type" "ssecmp")
8202 (set_attr "mode" "TI")])
8204 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8205 ;; being added here to be complete.
8206 (define_insn "sse5_pcom_tf<mode>3"
8207 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8209 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8210 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8211 (match_operand:SI 3 "const_int_operand" "n")]
8212 UNSPEC_SSE5_TRUEFALSE))]
8215 return ((INTVAL (operands[3]) != 0)
8216 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8217 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8219 [(set_attr "type" "ssecmp")
8220 (set_attr "mode" "TI")])