1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")])
47 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd")])
48 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
50 ;; Mapping of the max integer size for sse5 rotate immediate constraint
51 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
53 ;; Mapping of vector modes back to the scalar modes
54 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
56 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
58 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
62 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
64 ;; All of these patterns are enabled for SSE1 as well as SSE2.
65 ;; This is essential for maintaining stable calling conventions.
67 (define_expand "mov<mode>"
68 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
69 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
72 ix86_expand_vector_move (<MODE>mode, operands);
76 (define_insn "*mov<mode>_internal"
77 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
78 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
80 && (register_operand (operands[0], <MODE>mode)
81 || register_operand (operands[1], <MODE>mode))"
83 switch (which_alternative)
86 return standard_sse_constant_opcode (insn, operands[1]);
89 if (get_attr_mode (insn) == MODE_V4SF)
90 return "movaps\t{%1, %0|%0, %1}";
92 return "movdqa\t{%1, %0|%0, %1}";
97 [(set_attr "type" "sselog1,ssemov,ssemov")
100 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
101 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
102 (and (eq_attr "alternative" "2")
103 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
105 (const_string "V4SF")
106 (const_string "TI")))])
108 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
109 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
110 ;; from memory, we'd prefer to load the memory directly into the %xmm
111 ;; register. To facilitate this happy circumstance, this pattern won't
112 ;; split until after register allocation. If the 64-bit value didn't
113 ;; come from memory, this is the best we can do. This is much better
114 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
117 (define_insn_and_split "movdi_to_sse"
119 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
120 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
121 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
122 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
124 "&& reload_completed"
127 if (register_operand (operands[1], DImode))
129 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
130 Assemble the 64-bit DImode value in an xmm register. */
131 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
132 gen_rtx_SUBREG (SImode, operands[1], 0)));
133 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
134 gen_rtx_SUBREG (SImode, operands[1], 4)));
135 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
137 else if (memory_operand (operands[1], DImode))
138 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
143 (define_expand "mov<mode>"
144 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
145 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))]
148 ix86_expand_vector_move (<MODE>mode, operands);
152 (define_insn "*movv4sf_internal"
153 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
154 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
156 && (register_operand (operands[0], V4SFmode)
157 || register_operand (operands[1], V4SFmode))"
159 switch (which_alternative)
162 return standard_sse_constant_opcode (insn, operands[1]);
165 return "movaps\t{%1, %0|%0, %1}";
170 [(set_attr "type" "sselog1,ssemov,ssemov")
171 (set_attr "mode" "V4SF")])
174 [(set (match_operand:V4SF 0 "register_operand" "")
175 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
176 "TARGET_SSE && reload_completed"
179 (vec_duplicate:V4SF (match_dup 1))
183 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
184 operands[2] = CONST0_RTX (V4SFmode);
187 (define_insn "*movv2df_internal"
188 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
189 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
191 && (register_operand (operands[0], V2DFmode)
192 || register_operand (operands[1], V2DFmode))"
194 switch (which_alternative)
197 return standard_sse_constant_opcode (insn, operands[1]);
200 if (get_attr_mode (insn) == MODE_V4SF)
201 return "movaps\t{%1, %0|%0, %1}";
203 return "movapd\t{%1, %0|%0, %1}";
208 [(set_attr "type" "sselog1,ssemov,ssemov")
211 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
212 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
213 (and (eq_attr "alternative" "2")
214 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
216 (const_string "V4SF")
217 (const_string "V2DF")))])
220 [(set (match_operand:V2DF 0 "register_operand" "")
221 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
222 "TARGET_SSE2 && reload_completed"
223 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
225 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
226 operands[2] = CONST0_RTX (DFmode);
229 (define_expand "push<mode>1"
230 [(match_operand:SSEMODE 0 "register_operand" "")]
233 ix86_expand_push (<MODE>mode, operands[0]);
237 (define_expand "movmisalign<mode>"
238 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
239 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
242 ix86_expand_vector_move_misalign (<MODE>mode, operands);
246 (define_insn "sse_movups"
247 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
248 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
250 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
251 "movups\t{%1, %0|%0, %1}"
252 [(set_attr "type" "ssemov")
253 (set_attr "mode" "V2DF")])
255 (define_insn "sse2_movupd"
256 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
257 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
259 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
260 "movupd\t{%1, %0|%0, %1}"
261 [(set_attr "type" "ssemov")
262 (set_attr "mode" "V2DF")])
264 (define_insn "sse2_movdqu"
265 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
266 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
268 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
269 "movdqu\t{%1, %0|%0, %1}"
270 [(set_attr "type" "ssemov")
271 (set_attr "prefix_data16" "1")
272 (set_attr "mode" "TI")])
274 (define_insn "<sse>_movnt<mode>"
275 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
277 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
279 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
280 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
281 [(set_attr "type" "ssemov")
282 (set_attr "mode" "<MODE>")])
284 (define_insn "sse2_movntv2di"
285 [(set (match_operand:V2DI 0 "memory_operand" "=m")
286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
289 "movntdq\t{%1, %0|%0, %1}"
290 [(set_attr "type" "ssecvt")
291 (set_attr "prefix_data16" "1")
292 (set_attr "mode" "TI")])
294 (define_insn "sse2_movntsi"
295 [(set (match_operand:SI 0 "memory_operand" "=m")
296 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
299 "movnti\t{%1, %0|%0, %1}"
300 [(set_attr "type" "ssecvt")
301 (set_attr "mode" "V2DF")])
303 (define_insn "sse3_lddqu"
304 [(set (match_operand:V16QI 0 "register_operand" "=x")
305 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
308 "lddqu\t{%1, %0|%0, %1}"
309 [(set_attr "type" "ssecvt")
310 (set_attr "prefix_rep" "1")
311 (set_attr "mode" "TI")])
313 ; Expand patterns for non-temporal stores. At the moment, only those
314 ; that directly map to insns are defined; it would be possible to
315 ; define patterns for other modes that would expand to several insns.
317 (define_expand "storent<mode>"
318 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
320 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
322 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
325 (define_expand "storent<mode>"
326 [(set (match_operand:MODEF 0 "memory_operand" "")
328 [(match_operand:MODEF 1 "register_operand" "")]
333 (define_expand "storentv2di"
334 [(set (match_operand:V2DI 0 "memory_operand" "")
335 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
340 (define_expand "storentsi"
341 [(set (match_operand:SI 0 "memory_operand" "")
342 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
347 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
349 ;; Parallel floating point arithmetic
351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
353 (define_expand "neg<mode>2"
354 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
355 (neg:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
356 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
357 "ix86_expand_fp_absneg_operator (NEG, <MODE>mode, operands); DONE;")
359 (define_expand "abs<mode>2"
360 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
361 (abs:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
362 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
363 "ix86_expand_fp_absneg_operator (ABS, <MODE>mode, operands); DONE;")
365 (define_expand "add<mode>3"
366 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
368 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
369 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
370 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
371 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
373 (define_insn "*add<mode>3"
374 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
376 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
377 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
378 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
379 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
380 "addp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
381 [(set_attr "type" "sseadd")
382 (set_attr "mode" "<MODE>")])
384 (define_insn "<sse>_vmadd<mode>3"
385 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
386 (vec_merge:SSEMODEF2P
388 (match_operand:SSEMODEF2P 1 "register_operand" "0")
389 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
392 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
393 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
394 "adds<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
395 [(set_attr "type" "sseadd")
396 (set_attr "mode" "<ssescalarmode>")])
398 (define_expand "sub<mode>3"
399 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
401 (match_operand:SSEMODEF2P 1 "register_operand" "")
402 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
403 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
404 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
406 (define_insn "*sub<mode>3"
407 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
409 (match_operand:SSEMODEF2P 1 "register_operand" "0")
410 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
411 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
412 "subp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
413 [(set_attr "type" "sseadd")
414 (set_attr "mode" "<MODE>")])
416 (define_insn "<sse>_vmsub<mode>3"
417 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
418 (vec_merge:SSEMODEF2P
420 (match_operand:SSEMODEF2P 1 "register_operand" "0")
421 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
424 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
425 "subs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
426 [(set_attr "type" "sseadd")
427 (set_attr "mode" "<ssescalarmode>")])
429 (define_expand "mul<mode>3"
430 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
432 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
433 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
434 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
435 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
437 (define_insn "*mul<mode>3"
438 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
440 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
441 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
442 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
443 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
444 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
445 [(set_attr "type" "ssemul")
446 (set_attr "mode" "<MODE>")])
448 (define_insn "<sse>_vmmul<mode>3"
449 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
450 (vec_merge:SSEMODEF2P
452 (match_operand:SSEMODEF2P 1 "register_operand" "0")
453 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
456 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
457 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
458 "muls<ssemodesuffix2c>\t{%2, %0|%0, %2}"
459 [(set_attr "type" "ssemul")
460 (set_attr "mode" "<ssescalarmode>")])
462 (define_expand "divv4sf3"
463 [(set (match_operand:V4SF 0 "register_operand" "")
464 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
465 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
468 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
470 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
471 && flag_finite_math_only && !flag_trapping_math
472 && flag_unsafe_math_optimizations)
474 ix86_emit_swdivsf (operands[0], operands[1],
475 operands[2], V4SFmode);
480 (define_expand "divv2df3"
481 [(set (match_operand:V2DF 0 "register_operand" "")
482 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
483 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
485 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
487 (define_insn "<sse>_div<mode>3"
488 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
490 (match_operand:SSEMODEF2P 1 "register_operand" "0")
491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
492 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
493 "divp<ssemodesuffix2c>\t{%2, %0|%0, %2}"
494 [(set_attr "type" "ssediv")
495 (set_attr "mode" "<MODE>")])
497 (define_insn "<sse>_vmdiv<mode>3"
498 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
499 (vec_merge:SSEMODEF2P
501 (match_operand:SSEMODEF2P 1 "register_operand" "0")
502 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
505 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 "divs<ssemodesuffix2c>\t{%2, %0|%0, %2}"
507 [(set_attr "type" "ssediv")
508 (set_attr "mode" "<ssescalarmode>")])
510 (define_insn "sse_rcpv4sf2"
511 [(set (match_operand:V4SF 0 "register_operand" "=x")
513 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
515 "rcpps\t{%1, %0|%0, %1}"
516 [(set_attr "type" "sse")
517 (set_attr "mode" "V4SF")])
519 (define_insn "sse_vmrcpv4sf2"
520 [(set (match_operand:V4SF 0 "register_operand" "=x")
522 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
524 (match_operand:V4SF 2 "register_operand" "0")
527 "rcpss\t{%1, %0|%0, %1}"
528 [(set_attr "type" "sse")
529 (set_attr "mode" "SF")])
531 (define_expand "sqrtv4sf2"
532 [(set (match_operand:V4SF 0 "register_operand" "")
533 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
536 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
537 && flag_finite_math_only && !flag_trapping_math
538 && flag_unsafe_math_optimizations)
540 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
545 (define_insn "sse_sqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
547 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
549 "sqrtps\t{%1, %0|%0, %1}"
550 [(set_attr "type" "sse")
551 (set_attr "mode" "V4SF")])
553 (define_insn "sqrtv2df2"
554 [(set (match_operand:V2DF 0 "register_operand" "=x")
555 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
557 "sqrtpd\t{%1, %0|%0, %1}"
558 [(set_attr "type" "sse")
559 (set_attr "mode" "V2DF")])
561 (define_insn "<sse>_vmsqrt<mode>2"
562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
563 (vec_merge:SSEMODEF2P
565 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
566 (match_operand:SSEMODEF2P 2 "register_operand" "0")
568 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
569 "sqrts<ssemodesuffix2c>\t{%1, %0|%0, %1}"
570 [(set_attr "type" "sse")
571 (set_attr "mode" "<ssescalarmode>")])
573 (define_expand "rsqrtv4sf2"
574 [(set (match_operand:V4SF 0 "register_operand" "")
576 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
579 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
583 (define_insn "sse_rsqrtv4sf2"
584 [(set (match_operand:V4SF 0 "register_operand" "=x")
586 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
588 "rsqrtps\t{%1, %0|%0, %1}"
589 [(set_attr "type" "sse")
590 (set_attr "mode" "V4SF")])
592 (define_insn "sse_vmrsqrtv4sf2"
593 [(set (match_operand:V4SF 0 "register_operand" "=x")
595 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
597 (match_operand:V4SF 2 "register_operand" "0")
600 "rsqrtss\t{%1, %0|%0, %1}"
601 [(set_attr "type" "sse")
602 (set_attr "mode" "SF")])
604 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
605 ;; isn't really correct, as those rtl operators aren't defined when
606 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
608 (define_expand "smin<mode>3"
609 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
611 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
612 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
613 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
615 if (!flag_finite_math_only)
616 operands[1] = force_reg (<MODE>mode, operands[1]);
617 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
620 (define_insn "*smin<mode>3_finite"
621 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
623 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
624 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
625 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
626 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
627 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
628 [(set_attr "type" "sseadd")
629 (set_attr "mode" "<MODE>")])
631 (define_insn "*smin<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
634 (match_operand:SSEMODEF2P 1 "register_operand" "0")
635 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
636 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
638 [(set_attr "type" "sseadd")
639 (set_attr "mode" "<MODE>")])
641 (define_insn "<sse>_vmsmin<mode>3"
642 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
643 (vec_merge:SSEMODEF2P
645 (match_operand:SSEMODEF2P 1 "register_operand" "0")
646 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
649 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
650 "mins<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
651 [(set_attr "type" "sse")
652 (set_attr "mode" "<ssescalarmode>")])
654 (define_expand "smax<mode>3"
655 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
657 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
658 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
659 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
661 if (!flag_finite_math_only)
662 operands[1] = force_reg (<MODE>mode, operands[1]);
663 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
666 (define_insn "*smax<mode>3_finite"
667 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
669 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
670 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
671 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
672 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
673 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
674 [(set_attr "type" "sseadd")
675 (set_attr "mode" "<MODE>")])
677 (define_insn "*smax<mode>3"
678 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
680 (match_operand:SSEMODEF2P 1 "register_operand" "0")
681 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
682 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
683 "maxp<ssemodesuffix2c>\t{%2, %0|%0, %2}"
684 [(set_attr "type" "sseadd")
685 (set_attr "mode" "<MODE>")])
687 (define_insn "<sse>_vmsmax<mode>3"
688 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
689 (vec_merge:SSEMODEF2P
691 (match_operand:SSEMODEF2P 1 "register_operand" "0")
692 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
695 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
696 "maxs<ssemodesuffix2c>\t{%2, %0|%0, %2}"
697 [(set_attr "type" "sseadd")
698 (set_attr "mode" "<ssescalarmode>")])
700 ;; These versions of the min/max patterns implement exactly the operations
701 ;; min = (op1 < op2 ? op1 : op2)
702 ;; max = (!(op1 < op2) ? op1 : op2)
703 ;; Their operands are not commutative, and thus they may be used in the
704 ;; presence of -0.0 and NaN.
706 (define_insn "*ieee_smin<mode>3"
707 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
709 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
710 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
712 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
713 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
714 [(set_attr "type" "sseadd")
715 (set_attr "mode" "<MODE>")])
717 (define_insn "*ieee_smax<mode>3"
718 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
720 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
721 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
723 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
724 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
725 [(set_attr "type" "sseadd")
726 (set_attr "mode" "<MODE>")])
728 (define_insn "sse3_addsubv4sf3"
729 [(set (match_operand:V4SF 0 "register_operand" "=x")
732 (match_operand:V4SF 1 "register_operand" "0")
733 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
734 (minus:V4SF (match_dup 1) (match_dup 2))
737 "addsubps\t{%2, %0|%0, %2}"
738 [(set_attr "type" "sseadd")
739 (set_attr "prefix_rep" "1")
740 (set_attr "mode" "V4SF")])
742 (define_insn "sse3_addsubv2df3"
743 [(set (match_operand:V2DF 0 "register_operand" "=x")
746 (match_operand:V2DF 1 "register_operand" "0")
747 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
748 (minus:V2DF (match_dup 1) (match_dup 2))
751 "addsubpd\t{%2, %0|%0, %2}"
752 [(set_attr "type" "sseadd")
753 (set_attr "mode" "V2DF")])
755 (define_insn "sse3_haddv4sf3"
756 [(set (match_operand:V4SF 0 "register_operand" "=x")
761 (match_operand:V4SF 1 "register_operand" "0")
762 (parallel [(const_int 0)]))
763 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
765 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
766 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
770 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
771 (parallel [(const_int 0)]))
772 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
774 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
775 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
777 "haddps\t{%2, %0|%0, %2}"
778 [(set_attr "type" "sseadd")
779 (set_attr "prefix_rep" "1")
780 (set_attr "mode" "V4SF")])
782 (define_insn "sse3_haddv2df3"
783 [(set (match_operand:V2DF 0 "register_operand" "=x")
787 (match_operand:V2DF 1 "register_operand" "0")
788 (parallel [(const_int 0)]))
789 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
792 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
793 (parallel [(const_int 0)]))
794 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
796 "haddpd\t{%2, %0|%0, %2}"
797 [(set_attr "type" "sseadd")
798 (set_attr "mode" "V2DF")])
800 (define_insn "sse3_hsubv4sf3"
801 [(set (match_operand:V4SF 0 "register_operand" "=x")
806 (match_operand:V4SF 1 "register_operand" "0")
807 (parallel [(const_int 0)]))
808 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
810 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
811 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
815 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
816 (parallel [(const_int 0)]))
817 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
819 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
820 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
822 "hsubps\t{%2, %0|%0, %2}"
823 [(set_attr "type" "sseadd")
824 (set_attr "prefix_rep" "1")
825 (set_attr "mode" "V4SF")])
827 (define_insn "sse3_hsubv2df3"
828 [(set (match_operand:V2DF 0 "register_operand" "=x")
832 (match_operand:V2DF 1 "register_operand" "0")
833 (parallel [(const_int 0)]))
834 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
837 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
838 (parallel [(const_int 0)]))
839 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
841 "hsubpd\t{%2, %0|%0, %2}"
842 [(set_attr "type" "sseadd")
843 (set_attr "mode" "V2DF")])
845 (define_expand "reduc_splus_v4sf"
846 [(match_operand:V4SF 0 "register_operand" "")
847 (match_operand:V4SF 1 "register_operand" "")]
852 rtx tmp = gen_reg_rtx (V4SFmode);
853 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
854 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
857 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
861 (define_expand "reduc_splus_v2df"
862 [(match_operand:V2DF 0 "register_operand" "")
863 (match_operand:V2DF 1 "register_operand" "")]
866 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
870 (define_expand "reduc_smax_v4sf"
871 [(match_operand:V4SF 0 "register_operand" "")
872 (match_operand:V4SF 1 "register_operand" "")]
875 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
879 (define_expand "reduc_smin_v4sf"
880 [(match_operand:V4SF 0 "register_operand" "")
881 (match_operand:V4SF 1 "register_operand" "")]
884 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
888 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
890 ;; Parallel floating point comparisons
892 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
894 (define_insn "<sse>_maskcmp<mode>3"
895 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
896 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
897 [(match_operand:SSEMODEF4 1 "register_operand" "0")
898 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
899 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
901 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
902 [(set_attr "type" "ssecmp")
903 (set_attr "mode" "<MODE>")])
905 (define_insn "<sse>_vmmaskcmp<mode>3"
906 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
907 (vec_merge:SSEMODEF2P
908 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
909 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
910 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
913 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
914 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
915 [(set_attr "type" "ssecmp")
916 (set_attr "mode" "<ssescalarmode>")])
918 (define_insn "<sse>_comi"
919 [(set (reg:CCFP FLAGS_REG)
922 (match_operand:<ssevecmode> 0 "register_operand" "x")
923 (parallel [(const_int 0)]))
925 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
926 (parallel [(const_int 0)]))))]
927 "SSE_FLOAT_MODE_P (<MODE>mode)"
928 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
929 [(set_attr "type" "ssecomi")
930 (set_attr "mode" "<MODE>")])
932 (define_insn "<sse>_ucomi"
933 [(set (reg:CCFPU FLAGS_REG)
936 (match_operand:<ssevecmode> 0 "register_operand" "x")
937 (parallel [(const_int 0)]))
939 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
940 (parallel [(const_int 0)]))))]
941 "SSE_FLOAT_MODE_P (<MODE>mode)"
942 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
943 [(set_attr "type" "ssecomi")
944 (set_attr "mode" "<MODE>")])
946 (define_expand "vcond<mode>"
947 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
948 (if_then_else:SSEMODEF2P
950 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
951 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
952 (match_operand:SSEMODEF2P 1 "general_operand" "")
953 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
954 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
956 if (ix86_expand_fp_vcond (operands))
962 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
964 ;; Parallel floating point logical operations
966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
968 (define_expand "and<mode>3"
969 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
971 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
972 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
973 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
974 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
976 (define_insn "*and<mode>3"
977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
979 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
980 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
981 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
982 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
983 "andp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
984 [(set_attr "type" "sselog")
985 (set_attr "mode" "<MODE>")])
987 (define_insn "<sse>_nand<mode>3"
988 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
991 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
992 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
993 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
994 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
995 [(set_attr "type" "sselog")
996 (set_attr "mode" "<MODE>")])
998 (define_expand "ior<mode>3"
999 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1001 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1002 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1003 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1004 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
1006 (define_insn "*ior<mode>3"
1007 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1009 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1010 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1011 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1012 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
1013 "orp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1014 [(set_attr "type" "sselog")
1015 (set_attr "mode" "<MODE>")])
1017 (define_expand "xor<mode>3"
1018 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1020 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1021 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1022 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1023 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
1025 (define_insn "*xor<mode>3"
1026 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1028 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1029 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1030 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1031 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
1032 "xorp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1033 [(set_attr "type" "sselog")
1034 (set_attr "mode" "<MODE>")])
1036 ;; Also define scalar versions. These are used for abs, neg, and
1037 ;; conditional move. Using subregs into vector modes causes register
1038 ;; allocation lossage. These patterns do not allow memory operands
1039 ;; because the native instructions read the full 128-bits.
1041 (define_insn "*and<mode>3"
1042 [(set (match_operand:MODEF 0 "register_operand" "=x")
1044 (match_operand:MODEF 1 "register_operand" "0")
1045 (match_operand:MODEF 2 "register_operand" "x")))]
1046 "SSE_FLOAT_MODE_P (<MODE>mode)"
1047 "andp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1048 [(set_attr "type" "sselog")
1049 (set_attr "mode" "<ssevecmode>")])
1051 (define_insn "*nand<mode>3"
1052 [(set (match_operand:MODEF 0 "register_operand" "=x")
1055 (match_operand:MODEF 1 "register_operand" "0"))
1056 (match_operand:MODEF 2 "register_operand" "x")))]
1057 "SSE_FLOAT_MODE_P (<MODE>mode)"
1058 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1059 [(set_attr "type" "sselog")
1060 (set_attr "mode" "<ssevecmode>")])
1062 (define_insn "*ior<mode>3"
1063 [(set (match_operand:MODEF 0 "register_operand" "=x")
1065 (match_operand:MODEF 1 "register_operand" "0")
1066 (match_operand:MODEF 2 "register_operand" "x")))]
1067 "SSE_FLOAT_MODE_P (<MODE>mode)"
1068 "orp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1069 [(set_attr "type" "sselog")
1070 (set_attr "mode" "<ssevecmode>")])
1072 (define_insn "*xor<mode>3"
1073 [(set (match_operand:MODEF 0 "register_operand" "=x")
1075 (match_operand:MODEF 1 "register_operand" "0")
1076 (match_operand:MODEF 2 "register_operand" "x")))]
1077 "SSE_FLOAT_MODE_P (<MODE>mode)"
1078 "xorp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1079 [(set_attr "type" "sselog")
1080 (set_attr "mode" "<ssevecmode>")])
1082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1084 ;; SSE5 floating point multiply/accumulate instructions This includes the
1085 ;; scalar version of the instructions as well as the vector
1087 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1089 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1090 ;; combine to generate a multiply/add with two memory references. We then
1091 ;; split this insn, into loading up the destination register with one of the
1092 ;; memory operations. If we don't manage to split the insn, reload will
1093 ;; generate the appropriate moves. The reason this is needed, is that combine
1094 ;; has already folded one of the memory references into both the multiply and
1095 ;; add insns, and it can't generate a new pseudo. I.e.:
1096 ;; (set (reg1) (mem (addr1)))
1097 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1098 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1100 (define_insn "sse5_fmadd<mode>4"
1101 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1104 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1105 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1106 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1107 "TARGET_SSE5 && TARGET_FUSED_MADD
1108 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1109 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1110 [(set_attr "type" "ssemuladd")
1111 (set_attr "mode" "<MODE>")])
1113 ;; Split fmadd with two memory operands into a load and the fmadd.
1115 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1118 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1119 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1120 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1122 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1123 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1124 && !reg_mentioned_p (operands[0], operands[1])
1125 && !reg_mentioned_p (operands[0], operands[2])
1126 && !reg_mentioned_p (operands[0], operands[3])"
1129 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1130 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1131 operands[2], operands[3]));
1135 ;; For the scalar operations, use operand1 for the upper words that aren't
1136 ;; modified, so restrict the forms that are generated.
1137 ;; Scalar version of fmadd
1138 (define_insn "sse5_vmfmadd<mode>4"
1139 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1140 (vec_merge:SSEMODEF2P
1143 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1144 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1145 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1148 "TARGET_SSE5 && TARGET_FUSED_MADD
1149 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1150 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1151 [(set_attr "type" "ssemuladd")
1152 (set_attr "mode" "<MODE>")])
1154 ;; Floating multiply and subtract
1155 ;; Allow two memory operands the same as fmadd
1156 (define_insn "sse5_fmsub<mode>4"
1157 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1160 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1161 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1162 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1163 "TARGET_SSE5 && TARGET_FUSED_MADD
1164 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1165 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1166 [(set_attr "type" "ssemuladd")
1167 (set_attr "mode" "<MODE>")])
1169 ;; Split fmsub with two memory operands into a load and the fmsub.
1171 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1174 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1175 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1176 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1178 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1179 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1180 && !reg_mentioned_p (operands[0], operands[1])
1181 && !reg_mentioned_p (operands[0], operands[2])
1182 && !reg_mentioned_p (operands[0], operands[3])"
1185 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1186 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1187 operands[2], operands[3]));
1191 ;; For the scalar operations, use operand1 for the upper words that aren't
1192 ;; modified, so restrict the forms that are generated.
1193 ;; Scalar version of fmsub
1194 (define_insn "sse5_vmfmsub<mode>4"
1195 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1196 (vec_merge:SSEMODEF2P
1199 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1200 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1201 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1204 "TARGET_SSE5 && TARGET_FUSED_MADD
1205 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1206 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1207 [(set_attr "type" "ssemuladd")
1208 (set_attr "mode" "<MODE>")])
1210 ;; Floating point negative multiply and add
1211 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1212 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1213 ;; Allow two memory operands to help in optimizing.
1214 (define_insn "sse5_fnmadd<mode>4"
1215 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1217 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1219 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1220 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1221 "TARGET_SSE5 && TARGET_FUSED_MADD
1222 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1223 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1224 [(set_attr "type" "ssemuladd")
1225 (set_attr "mode" "<MODE>")])
1227 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1229 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1231 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1233 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1234 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1236 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1237 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1238 && !reg_mentioned_p (operands[0], operands[1])
1239 && !reg_mentioned_p (operands[0], operands[2])
1240 && !reg_mentioned_p (operands[0], operands[3])"
1243 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1244 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1245 operands[2], operands[3]));
1249 ;; For the scalar operations, use operand1 for the upper words that aren't
1250 ;; modified, so restrict the forms that are generated.
1251 ;; Scalar version of fnmadd
1252 (define_insn "sse5_vmfnmadd<mode>4"
1253 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1254 (vec_merge:SSEMODEF2P
1256 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1258 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1259 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1262 "TARGET_SSE5 && TARGET_FUSED_MADD
1263 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1264 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1265 [(set_attr "type" "ssemuladd")
1266 (set_attr "mode" "<MODE>")])
1268 ;; Floating point negative multiply and subtract
1269 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1270 ;; Allow 2 memory operands to help with optimization
1271 (define_insn "sse5_fnmsub<mode>4"
1272 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1276 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1277 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1278 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1279 "TARGET_SSE5 && TARGET_FUSED_MADD
1280 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1281 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1282 [(set_attr "type" "ssemuladd")
1283 (set_attr "mode" "<MODE>")])
1285 ;; Split fnmsub with two memory operands into a load and the fmsub.
1287 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1291 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1292 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1293 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1295 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1296 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1297 && !reg_mentioned_p (operands[0], operands[1])
1298 && !reg_mentioned_p (operands[0], operands[2])
1299 && !reg_mentioned_p (operands[0], operands[3])"
1302 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1303 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1304 operands[2], operands[3]));
1308 ;; For the scalar operations, use operand1 for the upper words that aren't
1309 ;; modified, so restrict the forms that are generated.
1310 ;; Scalar version of fnmsub
1311 (define_insn "sse5_vmfnmsub<mode>4"
1312 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1313 (vec_merge:SSEMODEF2P
1317 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1318 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1319 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1322 "TARGET_SSE5 && TARGET_FUSED_MADD
1323 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1324 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1325 [(set_attr "type" "ssemuladd")
1326 (set_attr "mode" "<MODE>")])
1328 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1329 ;; even if the user used -mno-fused-madd
1330 ;; Parallel instructions. During instruction generation, just default
1331 ;; to registers, and let combine later build the appropriate instruction.
1332 (define_expand "sse5i_fmadd<mode>4"
1333 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1337 (match_operand:SSEMODEF2P 1 "register_operand" "")
1338 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1339 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1340 UNSPEC_SSE5_INTRINSIC))]
1343 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1344 if (TARGET_FUSED_MADD)
1346 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1347 operands[2], operands[3]));
1352 (define_insn "*sse5i_fmadd<mode>4"
1353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1357 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1358 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1359 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1360 UNSPEC_SSE5_INTRINSIC))]
1361 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1362 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1363 [(set_attr "type" "ssemuladd")
1364 (set_attr "mode" "<MODE>")])
1366 (define_expand "sse5i_fmsub<mode>4"
1367 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1371 (match_operand:SSEMODEF2P 1 "register_operand" "")
1372 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1373 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1374 UNSPEC_SSE5_INTRINSIC))]
1377 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1378 if (TARGET_FUSED_MADD)
1380 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1381 operands[2], operands[3]));
1386 (define_insn "*sse5i_fmsub<mode>4"
1387 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1391 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1392 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1393 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1394 UNSPEC_SSE5_INTRINSIC))]
1395 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1396 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1397 [(set_attr "type" "ssemuladd")
1398 (set_attr "mode" "<MODE>")])
1400 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1401 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1402 (define_expand "sse5i_fnmadd<mode>4"
1403 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1406 (match_operand:SSEMODEF2P 3 "register_operand" "")
1408 (match_operand:SSEMODEF2P 1 "register_operand" "")
1409 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1410 UNSPEC_SSE5_INTRINSIC))]
1413 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1414 if (TARGET_FUSED_MADD)
1416 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1417 operands[2], operands[3]));
1422 (define_insn "*sse5i_fnmadd<mode>4"
1423 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1426 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1428 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1429 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1430 UNSPEC_SSE5_INTRINSIC))]
1431 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1432 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1433 [(set_attr "type" "ssemuladd")
1434 (set_attr "mode" "<MODE>")])
1436 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1437 (define_expand "sse5i_fnmsub<mode>4"
1438 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1443 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1444 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1445 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1446 UNSPEC_SSE5_INTRINSIC))]
1449 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1450 if (TARGET_FUSED_MADD)
1452 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1453 operands[2], operands[3]));
1458 (define_insn "*sse5i_fnmsub<mode>4"
1459 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1464 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1465 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1466 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1467 UNSPEC_SSE5_INTRINSIC))]
1468 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1469 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1470 [(set_attr "type" "ssemuladd")
1471 (set_attr "mode" "<MODE>")])
1473 ;; Scalar instructions
1474 (define_expand "sse5i_vmfmadd<mode>4"
1475 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1477 [(vec_merge:SSEMODEF2P
1480 (match_operand:SSEMODEF2P 1 "register_operand" "")
1481 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1482 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1485 UNSPEC_SSE5_INTRINSIC))]
1488 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1489 if (TARGET_FUSED_MADD)
1491 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1492 operands[2], operands[3]));
1497 ;; For the scalar operations, use operand1 for the upper words that aren't
1498 ;; modified, so restrict the forms that are accepted.
1499 (define_insn "*sse5i_vmfmadd<mode>4"
1500 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1502 [(vec_merge:SSEMODEF2P
1505 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1506 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1507 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1510 UNSPEC_SSE5_INTRINSIC))]
1511 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1512 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1513 [(set_attr "type" "ssemuladd")
1514 (set_attr "mode" "<ssescalarmode>")])
1516 (define_expand "sse5i_vmfmsub<mode>4"
1517 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1519 [(vec_merge:SSEMODEF2P
1522 (match_operand:SSEMODEF2P 1 "register_operand" "")
1523 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1524 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1527 UNSPEC_SSE5_INTRINSIC))]
1530 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1531 if (TARGET_FUSED_MADD)
1533 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1534 operands[2], operands[3]));
1539 (define_insn "*sse5i_vmfmsub<mode>4"
1540 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1542 [(vec_merge:SSEMODEF2P
1545 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1546 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1547 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1550 UNSPEC_SSE5_INTRINSIC))]
1551 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1552 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1553 [(set_attr "type" "ssemuladd")
1554 (set_attr "mode" "<ssescalarmode>")])
1556 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1557 (define_expand "sse5i_vmfnmadd<mode>4"
1558 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1560 [(vec_merge:SSEMODEF2P
1562 (match_operand:SSEMODEF2P 3 "register_operand" "")
1564 (match_operand:SSEMODEF2P 1 "register_operand" "")
1565 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1568 UNSPEC_SSE5_INTRINSIC))]
1571 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1572 if (TARGET_FUSED_MADD)
1574 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1575 operands[2], operands[3]));
1580 (define_insn "*sse5i_vmfnmadd<mode>4"
1581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1583 [(vec_merge:SSEMODEF2P
1585 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1587 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1588 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1591 UNSPEC_SSE5_INTRINSIC))]
1592 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1593 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1594 [(set_attr "type" "ssemuladd")
1595 (set_attr "mode" "<ssescalarmode>")])
1597 (define_expand "sse5i_vmfnmsub<mode>4"
1598 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1600 [(vec_merge:SSEMODEF2P
1604 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1605 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1606 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1609 UNSPEC_SSE5_INTRINSIC))]
1612 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1613 if (TARGET_FUSED_MADD)
1615 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1616 operands[2], operands[3]));
1621 (define_insn "*sse5i_vmfnmsub<mode>4"
1622 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1624 [(vec_merge:SSEMODEF2P
1628 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1629 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1630 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1633 UNSPEC_SSE5_INTRINSIC))]
1634 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1635 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1636 [(set_attr "type" "ssemuladd")
1637 (set_attr "mode" "<ssescalarmode>")])
1639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1641 ;; Parallel single-precision floating point conversion operations
1643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1645 (define_insn "sse_cvtpi2ps"
1646 [(set (match_operand:V4SF 0 "register_operand" "=x")
1649 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1650 (match_operand:V4SF 1 "register_operand" "0")
1653 "cvtpi2ps\t{%2, %0|%0, %2}"
1654 [(set_attr "type" "ssecvt")
1655 (set_attr "mode" "V4SF")])
1657 (define_insn "sse_cvtps2pi"
1658 [(set (match_operand:V2SI 0 "register_operand" "=y")
1660 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1662 (parallel [(const_int 0) (const_int 1)])))]
1664 "cvtps2pi\t{%1, %0|%0, %1}"
1665 [(set_attr "type" "ssecvt")
1666 (set_attr "unit" "mmx")
1667 (set_attr "mode" "DI")])
1669 (define_insn "sse_cvttps2pi"
1670 [(set (match_operand:V2SI 0 "register_operand" "=y")
1672 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1673 (parallel [(const_int 0) (const_int 1)])))]
1675 "cvttps2pi\t{%1, %0|%0, %1}"
1676 [(set_attr "type" "ssecvt")
1677 (set_attr "unit" "mmx")
1678 (set_attr "mode" "SF")])
1680 (define_insn "sse_cvtsi2ss"
1681 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1684 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1685 (match_operand:V4SF 1 "register_operand" "0,0")
1688 "cvtsi2ss\t{%2, %0|%0, %2}"
1689 [(set_attr "type" "sseicvt")
1690 (set_attr "athlon_decode" "vector,double")
1691 (set_attr "amdfam10_decode" "vector,double")
1692 (set_attr "mode" "SF")])
1694 (define_insn "sse_cvtsi2ssq"
1695 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1698 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1699 (match_operand:V4SF 1 "register_operand" "0,0")
1701 "TARGET_SSE && TARGET_64BIT"
1702 "cvtsi2ssq\t{%2, %0|%0, %2}"
1703 [(set_attr "type" "sseicvt")
1704 (set_attr "athlon_decode" "vector,double")
1705 (set_attr "amdfam10_decode" "vector,double")
1706 (set_attr "mode" "SF")])
1708 (define_insn "sse_cvtss2si"
1709 [(set (match_operand:SI 0 "register_operand" "=r,r")
1712 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1713 (parallel [(const_int 0)]))]
1714 UNSPEC_FIX_NOTRUNC))]
1716 "cvtss2si\t{%1, %0|%0, %1}"
1717 [(set_attr "type" "sseicvt")
1718 (set_attr "athlon_decode" "double,vector")
1719 (set_attr "prefix_rep" "1")
1720 (set_attr "mode" "SI")])
1722 (define_insn "sse_cvtss2si_2"
1723 [(set (match_operand:SI 0 "register_operand" "=r,r")
1724 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1725 UNSPEC_FIX_NOTRUNC))]
1727 "cvtss2si\t{%1, %0|%0, %1}"
1728 [(set_attr "type" "sseicvt")
1729 (set_attr "athlon_decode" "double,vector")
1730 (set_attr "amdfam10_decode" "double,double")
1731 (set_attr "prefix_rep" "1")
1732 (set_attr "mode" "SI")])
1734 (define_insn "sse_cvtss2siq"
1735 [(set (match_operand:DI 0 "register_operand" "=r,r")
1738 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1739 (parallel [(const_int 0)]))]
1740 UNSPEC_FIX_NOTRUNC))]
1741 "TARGET_SSE && TARGET_64BIT"
1742 "cvtss2siq\t{%1, %0|%0, %1}"
1743 [(set_attr "type" "sseicvt")
1744 (set_attr "athlon_decode" "double,vector")
1745 (set_attr "prefix_rep" "1")
1746 (set_attr "mode" "DI")])
1748 (define_insn "sse_cvtss2siq_2"
1749 [(set (match_operand:DI 0 "register_operand" "=r,r")
1750 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1751 UNSPEC_FIX_NOTRUNC))]
1752 "TARGET_SSE && TARGET_64BIT"
1753 "cvtss2siq\t{%1, %0|%0, %1}"
1754 [(set_attr "type" "sseicvt")
1755 (set_attr "athlon_decode" "double,vector")
1756 (set_attr "amdfam10_decode" "double,double")
1757 (set_attr "prefix_rep" "1")
1758 (set_attr "mode" "DI")])
1760 (define_insn "sse_cvttss2si"
1761 [(set (match_operand:SI 0 "register_operand" "=r,r")
1764 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1765 (parallel [(const_int 0)]))))]
1767 "cvttss2si\t{%1, %0|%0, %1}"
1768 [(set_attr "type" "sseicvt")
1769 (set_attr "athlon_decode" "double,vector")
1770 (set_attr "amdfam10_decode" "double,double")
1771 (set_attr "prefix_rep" "1")
1772 (set_attr "mode" "SI")])
1774 (define_insn "sse_cvttss2siq"
1775 [(set (match_operand:DI 0 "register_operand" "=r,r")
1778 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1779 (parallel [(const_int 0)]))))]
1780 "TARGET_SSE && TARGET_64BIT"
1781 "cvttss2siq\t{%1, %0|%0, %1}"
1782 [(set_attr "type" "sseicvt")
1783 (set_attr "athlon_decode" "double,vector")
1784 (set_attr "amdfam10_decode" "double,double")
1785 (set_attr "prefix_rep" "1")
1786 (set_attr "mode" "DI")])
1788 (define_insn "sse2_cvtdq2ps"
1789 [(set (match_operand:V4SF 0 "register_operand" "=x")
1790 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1792 "cvtdq2ps\t{%1, %0|%0, %1}"
1793 [(set_attr "type" "ssecvt")
1794 (set_attr "mode" "V4SF")])
1796 (define_insn "sse2_cvtps2dq"
1797 [(set (match_operand:V4SI 0 "register_operand" "=x")
1798 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1799 UNSPEC_FIX_NOTRUNC))]
1801 "cvtps2dq\t{%1, %0|%0, %1}"
1802 [(set_attr "type" "ssecvt")
1803 (set_attr "prefix_data16" "1")
1804 (set_attr "mode" "TI")])
1806 (define_insn "sse2_cvttps2dq"
1807 [(set (match_operand:V4SI 0 "register_operand" "=x")
1808 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1810 "cvttps2dq\t{%1, %0|%0, %1}"
1811 [(set_attr "type" "ssecvt")
1812 (set_attr "prefix_rep" "1")
1813 (set_attr "mode" "TI")])
1815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1817 ;; Parallel double-precision floating point conversion operations
1819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1821 (define_insn "sse2_cvtpi2pd"
1822 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1823 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1825 "cvtpi2pd\t{%1, %0|%0, %1}"
1826 [(set_attr "type" "ssecvt")
1827 (set_attr "unit" "mmx,*")
1828 (set_attr "mode" "V2DF")])
1830 (define_insn "sse2_cvtpd2pi"
1831 [(set (match_operand:V2SI 0 "register_operand" "=y")
1832 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1833 UNSPEC_FIX_NOTRUNC))]
1835 "cvtpd2pi\t{%1, %0|%0, %1}"
1836 [(set_attr "type" "ssecvt")
1837 (set_attr "unit" "mmx")
1838 (set_attr "prefix_data16" "1")
1839 (set_attr "mode" "DI")])
1841 (define_insn "sse2_cvttpd2pi"
1842 [(set (match_operand:V2SI 0 "register_operand" "=y")
1843 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1845 "cvttpd2pi\t{%1, %0|%0, %1}"
1846 [(set_attr "type" "ssecvt")
1847 (set_attr "unit" "mmx")
1848 (set_attr "prefix_data16" "1")
1849 (set_attr "mode" "TI")])
1851 (define_insn "sse2_cvtsi2sd"
1852 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1855 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1856 (match_operand:V2DF 1 "register_operand" "0,0")
1859 "cvtsi2sd\t{%2, %0|%0, %2}"
1860 [(set_attr "type" "sseicvt")
1861 (set_attr "mode" "DF")
1862 (set_attr "athlon_decode" "double,direct")
1863 (set_attr "amdfam10_decode" "vector,double")])
1865 (define_insn "sse2_cvtsi2sdq"
1866 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1869 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1870 (match_operand:V2DF 1 "register_operand" "0,0")
1872 "TARGET_SSE2 && TARGET_64BIT"
1873 "cvtsi2sdq\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "sseicvt")
1875 (set_attr "mode" "DF")
1876 (set_attr "athlon_decode" "double,direct")
1877 (set_attr "amdfam10_decode" "vector,double")])
1879 (define_insn "sse2_cvtsd2si"
1880 [(set (match_operand:SI 0 "register_operand" "=r,r")
1883 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1884 (parallel [(const_int 0)]))]
1885 UNSPEC_FIX_NOTRUNC))]
1887 "cvtsd2si\t{%1, %0|%0, %1}"
1888 [(set_attr "type" "sseicvt")
1889 (set_attr "athlon_decode" "double,vector")
1890 (set_attr "prefix_rep" "1")
1891 (set_attr "mode" "SI")])
1893 (define_insn "sse2_cvtsd2si_2"
1894 [(set (match_operand:SI 0 "register_operand" "=r,r")
1895 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1896 UNSPEC_FIX_NOTRUNC))]
1898 "cvtsd2si\t{%1, %0|%0, %1}"
1899 [(set_attr "type" "sseicvt")
1900 (set_attr "athlon_decode" "double,vector")
1901 (set_attr "amdfam10_decode" "double,double")
1902 (set_attr "prefix_rep" "1")
1903 (set_attr "mode" "SI")])
1905 (define_insn "sse2_cvtsd2siq"
1906 [(set (match_operand:DI 0 "register_operand" "=r,r")
1909 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1910 (parallel [(const_int 0)]))]
1911 UNSPEC_FIX_NOTRUNC))]
1912 "TARGET_SSE2 && TARGET_64BIT"
1913 "cvtsd2siq\t{%1, %0|%0, %1}"
1914 [(set_attr "type" "sseicvt")
1915 (set_attr "athlon_decode" "double,vector")
1916 (set_attr "prefix_rep" "1")
1917 (set_attr "mode" "DI")])
1919 (define_insn "sse2_cvtsd2siq_2"
1920 [(set (match_operand:DI 0 "register_operand" "=r,r")
1921 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1922 UNSPEC_FIX_NOTRUNC))]
1923 "TARGET_SSE2 && TARGET_64BIT"
1924 "cvtsd2siq\t{%1, %0|%0, %1}"
1925 [(set_attr "type" "sseicvt")
1926 (set_attr "athlon_decode" "double,vector")
1927 (set_attr "amdfam10_decode" "double,double")
1928 (set_attr "prefix_rep" "1")
1929 (set_attr "mode" "DI")])
1931 (define_insn "sse2_cvttsd2si"
1932 [(set (match_operand:SI 0 "register_operand" "=r,r")
1935 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1936 (parallel [(const_int 0)]))))]
1938 "cvttsd2si\t{%1, %0|%0, %1}"
1939 [(set_attr "type" "sseicvt")
1940 (set_attr "prefix_rep" "1")
1941 (set_attr "mode" "SI")
1942 (set_attr "athlon_decode" "double,vector")
1943 (set_attr "amdfam10_decode" "double,double")])
1945 (define_insn "sse2_cvttsd2siq"
1946 [(set (match_operand:DI 0 "register_operand" "=r,r")
1949 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1950 (parallel [(const_int 0)]))))]
1951 "TARGET_SSE2 && TARGET_64BIT"
1952 "cvttsd2siq\t{%1, %0|%0, %1}"
1953 [(set_attr "type" "sseicvt")
1954 (set_attr "prefix_rep" "1")
1955 (set_attr "mode" "DI")
1956 (set_attr "athlon_decode" "double,vector")
1957 (set_attr "amdfam10_decode" "double,double")])
1959 (define_insn "sse2_cvtdq2pd"
1960 [(set (match_operand:V2DF 0 "register_operand" "=x")
1963 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1964 (parallel [(const_int 0) (const_int 1)]))))]
1966 "cvtdq2pd\t{%1, %0|%0, %1}"
1967 [(set_attr "type" "ssecvt")
1968 (set_attr "mode" "V2DF")])
1970 (define_expand "sse2_cvtpd2dq"
1971 [(set (match_operand:V4SI 0 "register_operand" "")
1973 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1977 "operands[2] = CONST0_RTX (V2SImode);")
1979 (define_insn "*sse2_cvtpd2dq"
1980 [(set (match_operand:V4SI 0 "register_operand" "=x")
1982 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1984 (match_operand:V2SI 2 "const0_operand" "")))]
1986 "cvtpd2dq\t{%1, %0|%0, %1}"
1987 [(set_attr "type" "ssecvt")
1988 (set_attr "prefix_rep" "1")
1989 (set_attr "mode" "TI")
1990 (set_attr "amdfam10_decode" "double")])
1992 (define_expand "sse2_cvttpd2dq"
1993 [(set (match_operand:V4SI 0 "register_operand" "")
1995 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1998 "operands[2] = CONST0_RTX (V2SImode);")
2000 (define_insn "*sse2_cvttpd2dq"
2001 [(set (match_operand:V4SI 0 "register_operand" "=x")
2003 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2004 (match_operand:V2SI 2 "const0_operand" "")))]
2006 "cvttpd2dq\t{%1, %0|%0, %1}"
2007 [(set_attr "type" "ssecvt")
2008 (set_attr "prefix_rep" "1")
2009 (set_attr "mode" "TI")
2010 (set_attr "amdfam10_decode" "double")])
2012 (define_insn "sse2_cvtsd2ss"
2013 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2016 (float_truncate:V2SF
2017 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2018 (match_operand:V4SF 1 "register_operand" "0,0")
2021 "cvtsd2ss\t{%2, %0|%0, %2}"
2022 [(set_attr "type" "ssecvt")
2023 (set_attr "athlon_decode" "vector,double")
2024 (set_attr "amdfam10_decode" "vector,double")
2025 (set_attr "mode" "SF")])
2027 (define_insn "sse2_cvtss2sd"
2028 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2032 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2033 (parallel [(const_int 0) (const_int 1)])))
2034 (match_operand:V2DF 1 "register_operand" "0,0")
2037 "cvtss2sd\t{%2, %0|%0, %2}"
2038 [(set_attr "type" "ssecvt")
2039 (set_attr "amdfam10_decode" "vector,double")
2040 (set_attr "mode" "DF")])
2042 (define_expand "sse2_cvtpd2ps"
2043 [(set (match_operand:V4SF 0 "register_operand" "")
2045 (float_truncate:V2SF
2046 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2049 "operands[2] = CONST0_RTX (V2SFmode);")
2051 (define_insn "*sse2_cvtpd2ps"
2052 [(set (match_operand:V4SF 0 "register_operand" "=x")
2054 (float_truncate:V2SF
2055 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2056 (match_operand:V2SF 2 "const0_operand" "")))]
2058 "cvtpd2ps\t{%1, %0|%0, %1}"
2059 [(set_attr "type" "ssecvt")
2060 (set_attr "prefix_data16" "1")
2061 (set_attr "mode" "V4SF")
2062 (set_attr "amdfam10_decode" "double")])
2064 (define_insn "sse2_cvtps2pd"
2065 [(set (match_operand:V2DF 0 "register_operand" "=x")
2068 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2069 (parallel [(const_int 0) (const_int 1)]))))]
2071 "cvtps2pd\t{%1, %0|%0, %1}"
2072 [(set_attr "type" "ssecvt")
2073 (set_attr "mode" "V2DF")
2074 (set_attr "amdfam10_decode" "direct")])
2076 (define_expand "vec_unpacks_hi_v4sf"
2081 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2082 (parallel [(const_int 6)
2086 (set (match_operand:V2DF 0 "register_operand" "")
2090 (parallel [(const_int 0) (const_int 1)]))))]
2093 operands[2] = gen_reg_rtx (V4SFmode);
2096 (define_expand "vec_unpacks_lo_v4sf"
2097 [(set (match_operand:V2DF 0 "register_operand" "")
2100 (match_operand:V4SF 1 "nonimmediate_operand" "")
2101 (parallel [(const_int 0) (const_int 1)]))))]
2104 (define_expand "vec_unpacks_float_hi_v8hi"
2105 [(match_operand:V4SF 0 "register_operand" "")
2106 (match_operand:V8HI 1 "register_operand" "")]
2109 rtx tmp = gen_reg_rtx (V4SImode);
2111 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2112 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2116 (define_expand "vec_unpacks_float_lo_v8hi"
2117 [(match_operand:V4SF 0 "register_operand" "")
2118 (match_operand:V8HI 1 "register_operand" "")]
2121 rtx tmp = gen_reg_rtx (V4SImode);
2123 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2124 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2128 (define_expand "vec_unpacku_float_hi_v8hi"
2129 [(match_operand:V4SF 0 "register_operand" "")
2130 (match_operand:V8HI 1 "register_operand" "")]
2133 rtx tmp = gen_reg_rtx (V4SImode);
2135 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2136 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2140 (define_expand "vec_unpacku_float_lo_v8hi"
2141 [(match_operand:V4SF 0 "register_operand" "")
2142 (match_operand:V8HI 1 "register_operand" "")]
2145 rtx tmp = gen_reg_rtx (V4SImode);
2147 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2148 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2152 (define_expand "vec_unpacks_float_hi_v4si"
2155 (match_operand:V4SI 1 "nonimmediate_operand" "")
2156 (parallel [(const_int 2)
2160 (set (match_operand:V2DF 0 "register_operand" "")
2164 (parallel [(const_int 0) (const_int 1)]))))]
2167 operands[2] = gen_reg_rtx (V4SImode);
2170 (define_expand "vec_unpacks_float_lo_v4si"
2171 [(set (match_operand:V2DF 0 "register_operand" "")
2174 (match_operand:V4SI 1 "nonimmediate_operand" "")
2175 (parallel [(const_int 0) (const_int 1)]))))]
2178 (define_expand "vec_pack_trunc_v2df"
2179 [(match_operand:V4SF 0 "register_operand" "")
2180 (match_operand:V2DF 1 "nonimmediate_operand" "")
2181 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2186 r1 = gen_reg_rtx (V4SFmode);
2187 r2 = gen_reg_rtx (V4SFmode);
2189 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2190 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2191 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2195 (define_expand "vec_pack_sfix_trunc_v2df"
2196 [(match_operand:V4SI 0 "register_operand" "")
2197 (match_operand:V2DF 1 "nonimmediate_operand" "")
2198 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2203 r1 = gen_reg_rtx (V4SImode);
2204 r2 = gen_reg_rtx (V4SImode);
2206 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2207 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2208 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2209 gen_lowpart (V2DImode, r1),
2210 gen_lowpart (V2DImode, r2)));
2214 (define_expand "vec_pack_sfix_v2df"
2215 [(match_operand:V4SI 0 "register_operand" "")
2216 (match_operand:V2DF 1 "nonimmediate_operand" "")
2217 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2222 r1 = gen_reg_rtx (V4SImode);
2223 r2 = gen_reg_rtx (V4SImode);
2225 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2226 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2227 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2228 gen_lowpart (V2DImode, r1),
2229 gen_lowpart (V2DImode, r2)));
2233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2235 ;; Parallel single-precision floating point element swizzling
2237 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2239 (define_insn "sse_movhlps"
2240 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2243 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2244 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2245 (parallel [(const_int 6)
2249 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2251 movhlps\t{%2, %0|%0, %2}
2252 movlps\t{%H2, %0|%0, %H2}
2253 movhps\t{%2, %0|%0, %2}"
2254 [(set_attr "type" "ssemov")
2255 (set_attr "mode" "V4SF,V2SF,V2SF")])
2257 (define_insn "sse_movlhps"
2258 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2261 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2262 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2263 (parallel [(const_int 0)
2267 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2269 movlhps\t{%2, %0|%0, %2}
2270 movhps\t{%2, %0|%0, %2}
2271 movlps\t{%2, %H0|%H0, %2}"
2272 [(set_attr "type" "ssemov")
2273 (set_attr "mode" "V4SF,V2SF,V2SF")])
2275 (define_insn "sse_unpckhps"
2276 [(set (match_operand:V4SF 0 "register_operand" "=x")
2279 (match_operand:V4SF 1 "register_operand" "0")
2280 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2281 (parallel [(const_int 2) (const_int 6)
2282 (const_int 3) (const_int 7)])))]
2284 "unpckhps\t{%2, %0|%0, %2}"
2285 [(set_attr "type" "sselog")
2286 (set_attr "mode" "V4SF")])
2288 (define_insn "sse_unpcklps"
2289 [(set (match_operand:V4SF 0 "register_operand" "=x")
2292 (match_operand:V4SF 1 "register_operand" "0")
2293 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2294 (parallel [(const_int 0) (const_int 4)
2295 (const_int 1) (const_int 5)])))]
2297 "unpcklps\t{%2, %0|%0, %2}"
2298 [(set_attr "type" "sselog")
2299 (set_attr "mode" "V4SF")])
2301 ;; These are modeled with the same vec_concat as the others so that we
2302 ;; capture users of shufps that can use the new instructions
2303 (define_insn "sse3_movshdup"
2304 [(set (match_operand:V4SF 0 "register_operand" "=x")
2307 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2309 (parallel [(const_int 1)
2314 "movshdup\t{%1, %0|%0, %1}"
2315 [(set_attr "type" "sse")
2316 (set_attr "prefix_rep" "1")
2317 (set_attr "mode" "V4SF")])
2319 (define_insn "sse3_movsldup"
2320 [(set (match_operand:V4SF 0 "register_operand" "=x")
2323 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2325 (parallel [(const_int 0)
2330 "movsldup\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "sse")
2332 (set_attr "prefix_rep" "1")
2333 (set_attr "mode" "V4SF")])
2335 (define_expand "sse_shufps"
2336 [(match_operand:V4SF 0 "register_operand" "")
2337 (match_operand:V4SF 1 "register_operand" "")
2338 (match_operand:V4SF 2 "nonimmediate_operand" "")
2339 (match_operand:SI 3 "const_int_operand" "")]
2342 int mask = INTVAL (operands[3]);
2343 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2344 GEN_INT ((mask >> 0) & 3),
2345 GEN_INT ((mask >> 2) & 3),
2346 GEN_INT (((mask >> 4) & 3) + 4),
2347 GEN_INT (((mask >> 6) & 3) + 4)));
2351 (define_insn "sse_shufps_1"
2352 [(set (match_operand:V4SF 0 "register_operand" "=x")
2355 (match_operand:V4SF 1 "register_operand" "0")
2356 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2357 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2358 (match_operand 4 "const_0_to_3_operand" "")
2359 (match_operand 5 "const_4_to_7_operand" "")
2360 (match_operand 6 "const_4_to_7_operand" "")])))]
2364 mask |= INTVAL (operands[3]) << 0;
2365 mask |= INTVAL (operands[4]) << 2;
2366 mask |= (INTVAL (operands[5]) - 4) << 4;
2367 mask |= (INTVAL (operands[6]) - 4) << 6;
2368 operands[3] = GEN_INT (mask);
2370 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2372 [(set_attr "type" "sselog")
2373 (set_attr "mode" "V4SF")])
2375 (define_insn "sse_storehps"
2376 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2378 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2379 (parallel [(const_int 2) (const_int 3)])))]
2382 movhps\t{%1, %0|%0, %1}
2383 movhlps\t{%1, %0|%0, %1}
2384 movlps\t{%H1, %0|%0, %H1}"
2385 [(set_attr "type" "ssemov")
2386 (set_attr "mode" "V2SF,V4SF,V2SF")])
2388 (define_insn "sse_loadhps"
2389 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2392 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2393 (parallel [(const_int 0) (const_int 1)]))
2394 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2397 movhps\t{%2, %0|%0, %2}
2398 movlhps\t{%2, %0|%0, %2}
2399 movlps\t{%2, %H0|%H0, %2}"
2400 [(set_attr "type" "ssemov")
2401 (set_attr "mode" "V2SF,V4SF,V2SF")])
2403 (define_insn "sse_storelps"
2404 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2406 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2407 (parallel [(const_int 0) (const_int 1)])))]
2410 movlps\t{%1, %0|%0, %1}
2411 movaps\t{%1, %0|%0, %1}
2412 movlps\t{%1, %0|%0, %1}"
2413 [(set_attr "type" "ssemov")
2414 (set_attr "mode" "V2SF,V4SF,V2SF")])
2416 (define_insn "sse_loadlps"
2417 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2419 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2421 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2422 (parallel [(const_int 2) (const_int 3)]))))]
2425 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2426 movlps\t{%2, %0|%0, %2}
2427 movlps\t{%2, %0|%0, %2}"
2428 [(set_attr "type" "sselog,ssemov,ssemov")
2429 (set_attr "mode" "V4SF,V2SF,V2SF")])
2431 (define_insn "sse_movss"
2432 [(set (match_operand:V4SF 0 "register_operand" "=x")
2434 (match_operand:V4SF 2 "register_operand" "x")
2435 (match_operand:V4SF 1 "register_operand" "0")
2438 "movss\t{%2, %0|%0, %2}"
2439 [(set_attr "type" "ssemov")
2440 (set_attr "mode" "SF")])
2442 (define_insn "*vec_dupv4sf"
2443 [(set (match_operand:V4SF 0 "register_operand" "=x")
2445 (match_operand:SF 1 "register_operand" "0")))]
2447 "shufps\t{$0, %0, %0|%0, %0, 0}"
2448 [(set_attr "type" "sselog1")
2449 (set_attr "mode" "V4SF")])
2451 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2452 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2453 ;; alternatives pretty much forces the MMX alternative to be chosen.
2454 (define_insn "*sse_concatv2sf"
2455 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2457 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2458 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2461 unpcklps\t{%2, %0|%0, %2}
2462 movss\t{%1, %0|%0, %1}
2463 punpckldq\t{%2, %0|%0, %2}
2464 movd\t{%1, %0|%0, %1}"
2465 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2466 (set_attr "mode" "V4SF,SF,DI,DI")])
2468 (define_insn "*sse_concatv4sf"
2469 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2471 (match_operand:V2SF 1 "register_operand" " 0,0")
2472 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2475 movlhps\t{%2, %0|%0, %2}
2476 movhps\t{%2, %0|%0, %2}"
2477 [(set_attr "type" "ssemov")
2478 (set_attr "mode" "V4SF,V2SF")])
2480 (define_expand "vec_initv4sf"
2481 [(match_operand:V4SF 0 "register_operand" "")
2482 (match_operand 1 "" "")]
2485 ix86_expand_vector_init (false, operands[0], operands[1]);
2489 (define_insn "vec_setv4sf_0"
2490 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2493 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2494 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2498 movss\t{%2, %0|%0, %2}
2499 movss\t{%2, %0|%0, %2}
2500 movd\t{%2, %0|%0, %2}
2502 [(set_attr "type" "ssemov")
2503 (set_attr "mode" "SF")])
2505 ;; A subset is vec_setv4sf.
2506 (define_insn "*vec_setv4sf_sse4_1"
2507 [(set (match_operand:V4SF 0 "register_operand" "=x")
2510 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2511 (match_operand:V4SF 1 "register_operand" "0")
2512 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2515 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2516 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2518 [(set_attr "type" "sselog")
2519 (set_attr "prefix_extra" "1")
2520 (set_attr "mode" "V4SF")])
2522 (define_insn "sse4_1_insertps"
2523 [(set (match_operand:V4SF 0 "register_operand" "=x")
2524 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2525 (match_operand:V4SF 1 "register_operand" "0")
2526 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2529 "insertps\t{%3, %2, %0|%0, %2, %3}";
2530 [(set_attr "type" "sselog")
2531 (set_attr "prefix_extra" "1")
2532 (set_attr "mode" "V4SF")])
2535 [(set (match_operand:V4SF 0 "memory_operand" "")
2538 (match_operand:SF 1 "nonmemory_operand" ""))
2541 "TARGET_SSE && reload_completed"
2544 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2548 (define_expand "vec_setv4sf"
2549 [(match_operand:V4SF 0 "register_operand" "")
2550 (match_operand:SF 1 "register_operand" "")
2551 (match_operand 2 "const_int_operand" "")]
2554 ix86_expand_vector_set (false, operands[0], operands[1],
2555 INTVAL (operands[2]));
2559 (define_insn_and_split "*vec_extractv4sf_0"
2560 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2562 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2563 (parallel [(const_int 0)])))]
2564 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2566 "&& reload_completed"
2569 rtx op1 = operands[1];
2571 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2573 op1 = gen_lowpart (SFmode, op1);
2574 emit_move_insn (operands[0], op1);
2578 (define_insn "*sse4_1_extractps"
2579 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2581 (match_operand:V4SF 1 "register_operand" "x")
2582 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2584 "extractps\t{%2, %1, %0|%0, %1, %2}"
2585 [(set_attr "type" "sselog")
2586 (set_attr "prefix_extra" "1")
2587 (set_attr "mode" "V4SF")])
2589 (define_insn_and_split "*vec_extract_v4sf_mem"
2590 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2592 (match_operand:V4SF 1 "memory_operand" "o")
2593 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2599 int i = INTVAL (operands[2]);
2601 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2605 (define_expand "vec_extractv4sf"
2606 [(match_operand:SF 0 "register_operand" "")
2607 (match_operand:V4SF 1 "register_operand" "")
2608 (match_operand 2 "const_int_operand" "")]
2611 ix86_expand_vector_extract (false, operands[0], operands[1],
2612 INTVAL (operands[2]));
2616 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2618 ;; Parallel double-precision floating point element swizzling
2620 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2622 (define_insn "sse2_unpckhpd"
2623 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2626 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2627 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2628 (parallel [(const_int 1)
2630 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2632 unpckhpd\t{%2, %0|%0, %2}
2633 movlpd\t{%H1, %0|%0, %H1}
2634 movhpd\t{%1, %0|%0, %1}"
2635 [(set_attr "type" "sselog,ssemov,ssemov")
2636 (set_attr "mode" "V2DF,V1DF,V1DF")])
2638 (define_insn "*sse3_movddup"
2639 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2642 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2644 (parallel [(const_int 0)
2646 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2648 movddup\t{%1, %0|%0, %1}
2650 [(set_attr "type" "sselog1,ssemov")
2651 (set_attr "mode" "V2DF")])
2654 [(set (match_operand:V2DF 0 "memory_operand" "")
2657 (match_operand:V2DF 1 "register_operand" "")
2659 (parallel [(const_int 0)
2661 "TARGET_SSE3 && reload_completed"
2664 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2665 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2666 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2670 (define_insn "sse2_unpcklpd"
2671 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2674 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2675 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2676 (parallel [(const_int 0)
2678 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2680 unpcklpd\t{%2, %0|%0, %2}
2681 movhpd\t{%2, %0|%0, %2}
2682 movlpd\t{%2, %H0|%H0, %2}"
2683 [(set_attr "type" "sselog,ssemov,ssemov")
2684 (set_attr "mode" "V2DF,V1DF,V1DF")])
2686 (define_expand "sse2_shufpd"
2687 [(match_operand:V2DF 0 "register_operand" "")
2688 (match_operand:V2DF 1 "register_operand" "")
2689 (match_operand:V2DF 2 "nonimmediate_operand" "")
2690 (match_operand:SI 3 "const_int_operand" "")]
2693 int mask = INTVAL (operands[3]);
2694 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2696 GEN_INT (mask & 2 ? 3 : 2)));
2700 (define_insn "sse2_shufpd_1"
2701 [(set (match_operand:V2DF 0 "register_operand" "=x")
2704 (match_operand:V2DF 1 "register_operand" "0")
2705 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2706 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2707 (match_operand 4 "const_2_to_3_operand" "")])))]
2711 mask = INTVAL (operands[3]);
2712 mask |= (INTVAL (operands[4]) - 2) << 1;
2713 operands[3] = GEN_INT (mask);
2715 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2717 [(set_attr "type" "sselog")
2718 (set_attr "mode" "V2DF")])
2720 (define_insn "sse2_storehpd"
2721 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2723 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2724 (parallel [(const_int 1)])))]
2725 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2727 movhpd\t{%1, %0|%0, %1}
2730 [(set_attr "type" "ssemov,sselog1,ssemov")
2731 (set_attr "mode" "V1DF,V2DF,DF")])
2734 [(set (match_operand:DF 0 "register_operand" "")
2736 (match_operand:V2DF 1 "memory_operand" "")
2737 (parallel [(const_int 1)])))]
2738 "TARGET_SSE2 && reload_completed"
2739 [(set (match_dup 0) (match_dup 1))]
2741 operands[1] = adjust_address (operands[1], DFmode, 8);
2744 (define_insn "sse2_storelpd"
2745 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2747 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2748 (parallel [(const_int 0)])))]
2749 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2751 movlpd\t{%1, %0|%0, %1}
2754 [(set_attr "type" "ssemov")
2755 (set_attr "mode" "V1DF,DF,DF")])
2758 [(set (match_operand:DF 0 "register_operand" "")
2760 (match_operand:V2DF 1 "nonimmediate_operand" "")
2761 (parallel [(const_int 0)])))]
2762 "TARGET_SSE2 && reload_completed"
2765 rtx op1 = operands[1];
2767 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2769 op1 = gen_lowpart (DFmode, op1);
2770 emit_move_insn (operands[0], op1);
2774 (define_insn "sse2_loadhpd"
2775 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2778 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2779 (parallel [(const_int 0)]))
2780 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2781 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2783 movhpd\t{%2, %0|%0, %2}
2784 unpcklpd\t{%2, %0|%0, %2}
2785 shufpd\t{$1, %1, %0|%0, %1, 1}
2787 [(set_attr "type" "ssemov,sselog,sselog,other")
2788 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2791 [(set (match_operand:V2DF 0 "memory_operand" "")
2793 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2794 (match_operand:DF 1 "register_operand" "")))]
2795 "TARGET_SSE2 && reload_completed"
2796 [(set (match_dup 0) (match_dup 1))]
2798 operands[0] = adjust_address (operands[0], DFmode, 8);
2801 (define_insn "sse2_loadlpd"
2802 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2804 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2806 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2807 (parallel [(const_int 1)]))))]
2808 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2810 movsd\t{%2, %0|%0, %2}
2811 movlpd\t{%2, %0|%0, %2}
2812 movsd\t{%2, %0|%0, %2}
2813 shufpd\t{$2, %2, %0|%0, %2, 2}
2814 movhpd\t{%H1, %0|%0, %H1}
2816 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2817 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2820 [(set (match_operand:V2DF 0 "memory_operand" "")
2822 (match_operand:DF 1 "register_operand" "")
2823 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2824 "TARGET_SSE2 && reload_completed"
2825 [(set (match_dup 0) (match_dup 1))]
2827 operands[0] = adjust_address (operands[0], DFmode, 8);
2830 ;; Not sure these two are ever used, but it doesn't hurt to have
2832 (define_insn "*vec_extractv2df_1_sse"
2833 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2835 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2836 (parallel [(const_int 1)])))]
2837 "!TARGET_SSE2 && TARGET_SSE
2838 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2840 movhps\t{%1, %0|%0, %1}
2841 movhlps\t{%1, %0|%0, %1}
2842 movlps\t{%H1, %0|%0, %H1}"
2843 [(set_attr "type" "ssemov")
2844 (set_attr "mode" "V2SF,V4SF,V2SF")])
2846 (define_insn "*vec_extractv2df_0_sse"
2847 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2849 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2850 (parallel [(const_int 0)])))]
2851 "!TARGET_SSE2 && TARGET_SSE
2852 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2854 movlps\t{%1, %0|%0, %1}
2855 movaps\t{%1, %0|%0, %1}
2856 movlps\t{%1, %0|%0, %1}"
2857 [(set_attr "type" "ssemov")
2858 (set_attr "mode" "V2SF,V4SF,V2SF")])
2860 (define_insn "sse2_movsd"
2861 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2863 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2864 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2868 movsd\t{%2, %0|%0, %2}
2869 movlpd\t{%2, %0|%0, %2}
2870 movlpd\t{%2, %0|%0, %2}
2871 shufpd\t{$2, %2, %0|%0, %2, 2}
2872 movhps\t{%H1, %0|%0, %H1}
2873 movhps\t{%1, %H0|%H0, %1}"
2874 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2875 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2877 (define_insn "*vec_dupv2df_sse3"
2878 [(set (match_operand:V2DF 0 "register_operand" "=x")
2880 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2882 "movddup\t{%1, %0|%0, %1}"
2883 [(set_attr "type" "sselog1")
2884 (set_attr "mode" "DF")])
2886 (define_insn "vec_dupv2df"
2887 [(set (match_operand:V2DF 0 "register_operand" "=x")
2889 (match_operand:DF 1 "register_operand" "0")))]
2892 [(set_attr "type" "sselog1")
2893 (set_attr "mode" "V2DF")])
2895 (define_insn "*vec_concatv2df_sse3"
2896 [(set (match_operand:V2DF 0 "register_operand" "=x")
2898 (match_operand:DF 1 "nonimmediate_operand" "xm")
2901 "movddup\t{%1, %0|%0, %1}"
2902 [(set_attr "type" "sselog1")
2903 (set_attr "mode" "DF")])
2905 (define_insn "*vec_concatv2df"
2906 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2908 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2909 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2912 unpcklpd\t{%2, %0|%0, %2}
2913 movhpd\t{%2, %0|%0, %2}
2914 movsd\t{%1, %0|%0, %1}
2915 movlhps\t{%2, %0|%0, %2}
2916 movhps\t{%2, %0|%0, %2}"
2917 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2918 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2920 (define_expand "vec_setv2df"
2921 [(match_operand:V2DF 0 "register_operand" "")
2922 (match_operand:DF 1 "register_operand" "")
2923 (match_operand 2 "const_int_operand" "")]
2926 ix86_expand_vector_set (false, operands[0], operands[1],
2927 INTVAL (operands[2]));
2931 (define_expand "vec_extractv2df"
2932 [(match_operand:DF 0 "register_operand" "")
2933 (match_operand:V2DF 1 "register_operand" "")
2934 (match_operand 2 "const_int_operand" "")]
2937 ix86_expand_vector_extract (false, operands[0], operands[1],
2938 INTVAL (operands[2]));
2942 (define_expand "vec_initv2df"
2943 [(match_operand:V2DF 0 "register_operand" "")
2944 (match_operand 1 "" "")]
2947 ix86_expand_vector_init (false, operands[0], operands[1]);
2951 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2953 ;; Parallel integral arithmetic
2955 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2957 (define_expand "neg<mode>2"
2958 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2961 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2963 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2965 (define_expand "add<mode>3"
2966 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2967 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2968 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2970 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2972 (define_insn "*add<mode>3"
2973 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2975 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2976 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2977 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2978 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2979 [(set_attr "type" "sseiadd")
2980 (set_attr "prefix_data16" "1")
2981 (set_attr "mode" "TI")])
2983 (define_insn "sse2_ssadd<mode>3"
2984 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2986 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2987 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2988 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2989 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2990 [(set_attr "type" "sseiadd")
2991 (set_attr "prefix_data16" "1")
2992 (set_attr "mode" "TI")])
2994 (define_insn "sse2_usadd<mode>3"
2995 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2997 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2998 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2999 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
3000 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
3001 [(set_attr "type" "sseiadd")
3002 (set_attr "prefix_data16" "1")
3003 (set_attr "mode" "TI")])
3005 (define_expand "sub<mode>3"
3006 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3007 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
3008 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3010 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
3012 (define_insn "*sub<mode>3"
3013 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3015 (match_operand:SSEMODEI 1 "register_operand" "0")
3016 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3018 "psub<ssevecsize>\t{%2, %0|%0, %2}"
3019 [(set_attr "type" "sseiadd")
3020 (set_attr "prefix_data16" "1")
3021 (set_attr "mode" "TI")])
3023 (define_insn "sse2_sssub<mode>3"
3024 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3026 (match_operand:SSEMODE12 1 "register_operand" "0")
3027 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3029 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
3030 [(set_attr "type" "sseiadd")
3031 (set_attr "prefix_data16" "1")
3032 (set_attr "mode" "TI")])
3034 (define_insn "sse2_ussub<mode>3"
3035 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3037 (match_operand:SSEMODE12 1 "register_operand" "0")
3038 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3040 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
3041 [(set_attr "type" "sseiadd")
3042 (set_attr "prefix_data16" "1")
3043 (set_attr "mode" "TI")])
3045 (define_insn_and_split "mulv16qi3"
3046 [(set (match_operand:V16QI 0 "register_operand" "")
3047 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
3048 (match_operand:V16QI 2 "register_operand" "")))]
3050 && !(reload_completed || reload_in_progress)"
3055 rtx t[12], op0, op[3];
3060 /* On SSE5, we can take advantage of the pperm instruction to pack and
3061 unpack the bytes. Unpack data such that we've got a source byte in
3062 each low byte of each word. We don't care what goes into the high
3063 byte, so put 0 there. */
3064 for (i = 0; i < 6; ++i)
3065 t[i] = gen_reg_rtx (V8HImode);
3067 for (i = 0; i < 2; i++)
3070 op[1] = operands[i+1];
3071 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
3074 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
3077 /* Multiply words. */
3078 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
3079 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
3081 /* Pack the low byte of each word back into a single xmm */
3082 op[0] = operands[0];
3085 ix86_expand_sse5_pack (op);
3089 for (i = 0; i < 12; ++i)
3090 t[i] = gen_reg_rtx (V16QImode);
3092 /* Unpack data such that we've got a source byte in each low byte of
3093 each word. We don't care what goes into the high byte of each word.
3094 Rather than trying to get zero in there, most convenient is to let
3095 it be a copy of the low byte. */
3096 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
3097 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
3098 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
3099 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
3101 /* Multiply words. The end-of-line annotations here give a picture of what
3102 the output of that instruction looks like. Dot means don't care; the
3103 letters are the bytes of the result with A being the most significant. */
3104 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
3105 gen_lowpart (V8HImode, t[0]),
3106 gen_lowpart (V8HImode, t[1])));
3107 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
3108 gen_lowpart (V8HImode, t[2]),
3109 gen_lowpart (V8HImode, t[3])));
3111 /* Extract the relevant bytes and merge them back together. */
3112 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
3113 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
3114 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
3115 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
3116 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
3117 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
3120 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
3124 (define_expand "mulv8hi3"
3125 [(set (match_operand:V8HI 0 "register_operand" "")
3126 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3127 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3129 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3131 (define_insn "*mulv8hi3"
3132 [(set (match_operand:V8HI 0 "register_operand" "=x")
3133 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3134 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3135 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3136 "pmullw\t{%2, %0|%0, %2}"
3137 [(set_attr "type" "sseimul")
3138 (set_attr "prefix_data16" "1")
3139 (set_attr "mode" "TI")])
3141 (define_expand "smulv8hi3_highpart"
3142 [(set (match_operand:V8HI 0 "register_operand" "")
3147 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3149 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3152 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3154 (define_insn "*smulv8hi3_highpart"
3155 [(set (match_operand:V8HI 0 "register_operand" "=x")
3160 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3162 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3164 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3165 "pmulhw\t{%2, %0|%0, %2}"
3166 [(set_attr "type" "sseimul")
3167 (set_attr "prefix_data16" "1")
3168 (set_attr "mode" "TI")])
3170 (define_expand "umulv8hi3_highpart"
3171 [(set (match_operand:V8HI 0 "register_operand" "")
3176 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3178 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3181 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3183 (define_insn "*umulv8hi3_highpart"
3184 [(set (match_operand:V8HI 0 "register_operand" "=x")
3189 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3191 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3193 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3194 "pmulhuw\t{%2, %0|%0, %2}"
3195 [(set_attr "type" "sseimul")
3196 (set_attr "prefix_data16" "1")
3197 (set_attr "mode" "TI")])
3199 (define_insn "sse2_umulv2siv2di3"
3200 [(set (match_operand:V2DI 0 "register_operand" "=x")
3204 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3205 (parallel [(const_int 0) (const_int 2)])))
3208 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3209 (parallel [(const_int 0) (const_int 2)])))))]
3210 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3211 "pmuludq\t{%2, %0|%0, %2}"
3212 [(set_attr "type" "sseimul")
3213 (set_attr "prefix_data16" "1")
3214 (set_attr "mode" "TI")])
3216 (define_insn "sse4_1_mulv2siv2di3"
3217 [(set (match_operand:V2DI 0 "register_operand" "=x")
3221 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3222 (parallel [(const_int 0) (const_int 2)])))
3225 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3226 (parallel [(const_int 0) (const_int 2)])))))]
3227 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3228 "pmuldq\t{%2, %0|%0, %2}"
3229 [(set_attr "type" "sseimul")
3230 (set_attr "prefix_extra" "1")
3231 (set_attr "mode" "TI")])
3233 (define_insn "sse2_pmaddwd"
3234 [(set (match_operand:V4SI 0 "register_operand" "=x")
3239 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3240 (parallel [(const_int 0)
3246 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3247 (parallel [(const_int 0)
3253 (vec_select:V4HI (match_dup 1)
3254 (parallel [(const_int 1)
3259 (vec_select:V4HI (match_dup 2)
3260 (parallel [(const_int 1)
3263 (const_int 7)]))))))]
3264 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3265 "pmaddwd\t{%2, %0|%0, %2}"
3266 [(set_attr "type" "sseiadd")
3267 (set_attr "prefix_data16" "1")
3268 (set_attr "mode" "TI")])
3270 (define_expand "mulv4si3"
3271 [(set (match_operand:V4SI 0 "register_operand" "")
3272 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3273 (match_operand:V4SI 2 "register_operand" "")))]
3276 if (TARGET_SSE4_1 || TARGET_SSE5)
3277 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3280 (define_insn "*sse4_1_mulv4si3"
3281 [(set (match_operand:V4SI 0 "register_operand" "=x")
3282 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3283 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3284 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3285 "pmulld\t{%2, %0|%0, %2}"
3286 [(set_attr "type" "sseimul")
3287 (set_attr "prefix_extra" "1")
3288 (set_attr "mode" "TI")])
3290 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3291 ;; multiply/add. In general, we expect the define_split to occur before
3292 ;; register allocation, so we have to handle the corner case where the target
3293 ;; is used as the base or index register in operands 1/2.
3294 (define_insn_and_split "*sse5_mulv4si3"
3295 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3296 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3297 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3300 "&& (reload_completed
3301 || (!reg_mentioned_p (operands[0], operands[1])
3302 && !reg_mentioned_p (operands[0], operands[2])))"
3306 (plus:V4SI (mult:V4SI (match_dup 1)
3310 operands[3] = CONST0_RTX (V4SImode);
3312 [(set_attr "type" "ssemuladd")
3313 (set_attr "mode" "TI")])
3315 (define_insn_and_split "*sse2_mulv4si3"
3316 [(set (match_operand:V4SI 0 "register_operand" "")
3317 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3318 (match_operand:V4SI 2 "register_operand" "")))]
3319 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3320 && !(reload_completed || reload_in_progress)"
3325 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3331 t1 = gen_reg_rtx (V4SImode);
3332 t2 = gen_reg_rtx (V4SImode);
3333 t3 = gen_reg_rtx (V4SImode);
3334 t4 = gen_reg_rtx (V4SImode);
3335 t5 = gen_reg_rtx (V4SImode);
3336 t6 = gen_reg_rtx (V4SImode);
3337 thirtytwo = GEN_INT (32);
3339 /* Multiply elements 2 and 0. */
3340 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3343 /* Shift both input vectors down one element, so that elements 3
3344 and 1 are now in the slots for elements 2 and 0. For K8, at
3345 least, this is faster than using a shuffle. */
3346 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3347 gen_lowpart (TImode, op1),
3349 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3350 gen_lowpart (TImode, op2),
3352 /* Multiply elements 3 and 1. */
3353 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3356 /* Move the results in element 2 down to element 1; we don't care
3357 what goes in elements 2 and 3. */
3358 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3359 const0_rtx, const0_rtx));
3360 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3361 const0_rtx, const0_rtx));
3363 /* Merge the parts back together. */
3364 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3368 (define_insn_and_split "mulv2di3"
3369 [(set (match_operand:V2DI 0 "register_operand" "")
3370 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3371 (match_operand:V2DI 2 "register_operand" "")))]
3373 && !(reload_completed || reload_in_progress)"
3378 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3384 t1 = gen_reg_rtx (V2DImode);
3385 t2 = gen_reg_rtx (V2DImode);
3386 t3 = gen_reg_rtx (V2DImode);
3387 t4 = gen_reg_rtx (V2DImode);
3388 t5 = gen_reg_rtx (V2DImode);
3389 t6 = gen_reg_rtx (V2DImode);
3390 thirtytwo = GEN_INT (32);
3392 /* Multiply low parts. */
3393 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3394 gen_lowpart (V4SImode, op2)));
3396 /* Shift input vectors left 32 bits so we can multiply high parts. */
3397 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3398 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3400 /* Multiply high parts by low parts. */
3401 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3402 gen_lowpart (V4SImode, t3)));
3403 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3404 gen_lowpart (V4SImode, t2)));
3406 /* Shift them back. */
3407 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3408 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3410 /* Add the three parts together. */
3411 emit_insn (gen_addv2di3 (t6, t1, t4));
3412 emit_insn (gen_addv2di3 (op0, t6, t5));
3416 (define_expand "vec_widen_smult_hi_v8hi"
3417 [(match_operand:V4SI 0 "register_operand" "")
3418 (match_operand:V8HI 1 "register_operand" "")
3419 (match_operand:V8HI 2 "register_operand" "")]
3422 rtx op1, op2, t1, t2, dest;
3426 t1 = gen_reg_rtx (V8HImode);
3427 t2 = gen_reg_rtx (V8HImode);
3428 dest = gen_lowpart (V8HImode, operands[0]);
3430 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3431 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3432 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3436 (define_expand "vec_widen_smult_lo_v8hi"
3437 [(match_operand:V4SI 0 "register_operand" "")
3438 (match_operand:V8HI 1 "register_operand" "")
3439 (match_operand:V8HI 2 "register_operand" "")]
3442 rtx op1, op2, t1, t2, dest;
3446 t1 = gen_reg_rtx (V8HImode);
3447 t2 = gen_reg_rtx (V8HImode);
3448 dest = gen_lowpart (V8HImode, operands[0]);
3450 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3451 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3452 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3456 (define_expand "vec_widen_umult_hi_v8hi"
3457 [(match_operand:V4SI 0 "register_operand" "")
3458 (match_operand:V8HI 1 "register_operand" "")
3459 (match_operand:V8HI 2 "register_operand" "")]
3462 rtx op1, op2, t1, t2, dest;
3466 t1 = gen_reg_rtx (V8HImode);
3467 t2 = gen_reg_rtx (V8HImode);
3468 dest = gen_lowpart (V8HImode, operands[0]);
3470 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3471 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3472 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3476 (define_expand "vec_widen_umult_lo_v8hi"
3477 [(match_operand:V4SI 0 "register_operand" "")
3478 (match_operand:V8HI 1 "register_operand" "")
3479 (match_operand:V8HI 2 "register_operand" "")]
3482 rtx op1, op2, t1, t2, dest;
3486 t1 = gen_reg_rtx (V8HImode);
3487 t2 = gen_reg_rtx (V8HImode);
3488 dest = gen_lowpart (V8HImode, operands[0]);
3490 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3491 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3492 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3496 (define_expand "vec_widen_smult_hi_v4si"
3497 [(match_operand:V2DI 0 "register_operand" "")
3498 (match_operand:V4SI 1 "register_operand" "")
3499 (match_operand:V4SI 2 "register_operand" "")]
3502 rtx op1, op2, t1, t2;
3506 t1 = gen_reg_rtx (V4SImode);
3507 t2 = gen_reg_rtx (V4SImode);
3509 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3510 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3511 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3515 (define_expand "vec_widen_smult_lo_v4si"
3516 [(match_operand:V2DI 0 "register_operand" "")
3517 (match_operand:V4SI 1 "register_operand" "")
3518 (match_operand:V4SI 2 "register_operand" "")]
3521 rtx op1, op2, t1, t2;
3525 t1 = gen_reg_rtx (V4SImode);
3526 t2 = gen_reg_rtx (V4SImode);
3528 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3529 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3530 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3534 (define_expand "vec_widen_umult_hi_v4si"
3535 [(match_operand:V2DI 0 "register_operand" "")
3536 (match_operand:V4SI 1 "register_operand" "")
3537 (match_operand:V4SI 2 "register_operand" "")]
3540 rtx op1, op2, t1, t2;
3544 t1 = gen_reg_rtx (V4SImode);
3545 t2 = gen_reg_rtx (V4SImode);
3547 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3548 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3549 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3553 (define_expand "vec_widen_umult_lo_v4si"
3554 [(match_operand:V2DI 0 "register_operand" "")
3555 (match_operand:V4SI 1 "register_operand" "")
3556 (match_operand:V4SI 2 "register_operand" "")]
3559 rtx op1, op2, t1, t2;
3563 t1 = gen_reg_rtx (V4SImode);
3564 t2 = gen_reg_rtx (V4SImode);
3566 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3567 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3568 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3572 (define_expand "sdot_prodv8hi"
3573 [(match_operand:V4SI 0 "register_operand" "")
3574 (match_operand:V8HI 1 "register_operand" "")
3575 (match_operand:V8HI 2 "register_operand" "")
3576 (match_operand:V4SI 3 "register_operand" "")]
3579 rtx t = gen_reg_rtx (V4SImode);
3580 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3581 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3585 (define_expand "udot_prodv4si"
3586 [(match_operand:V2DI 0 "register_operand" "")
3587 (match_operand:V4SI 1 "register_operand" "")
3588 (match_operand:V4SI 2 "register_operand" "")
3589 (match_operand:V2DI 3 "register_operand" "")]
3594 t1 = gen_reg_rtx (V2DImode);
3595 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3596 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3598 t2 = gen_reg_rtx (V4SImode);
3599 t3 = gen_reg_rtx (V4SImode);
3600 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3601 gen_lowpart (TImode, operands[1]),
3603 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3604 gen_lowpart (TImode, operands[2]),
3607 t4 = gen_reg_rtx (V2DImode);
3608 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3610 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3614 (define_insn "ashr<mode>3"
3615 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3617 (match_operand:SSEMODE24 1 "register_operand" "0")
3618 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3620 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3621 [(set_attr "type" "sseishft")
3622 (set_attr "prefix_data16" "1")
3623 (set_attr "mode" "TI")])
3625 (define_insn "lshr<mode>3"
3626 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3627 (lshiftrt:SSEMODE248
3628 (match_operand:SSEMODE248 1 "register_operand" "0")
3629 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3631 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3632 [(set_attr "type" "sseishft")
3633 (set_attr "prefix_data16" "1")
3634 (set_attr "mode" "TI")])
3636 (define_insn "ashl<mode>3"
3637 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3639 (match_operand:SSEMODE248 1 "register_operand" "0")
3640 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3642 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3643 [(set_attr "type" "sseishft")
3644 (set_attr "prefix_data16" "1")
3645 (set_attr "mode" "TI")])
3647 (define_expand "vec_shl_<mode>"
3648 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3649 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3650 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3653 operands[0] = gen_lowpart (TImode, operands[0]);
3654 operands[1] = gen_lowpart (TImode, operands[1]);
3657 (define_expand "vec_shr_<mode>"
3658 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3659 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3660 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3663 operands[0] = gen_lowpart (TImode, operands[0]);
3664 operands[1] = gen_lowpart (TImode, operands[1]);
3667 (define_expand "umaxv16qi3"
3668 [(set (match_operand:V16QI 0 "register_operand" "")
3669 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3670 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3672 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3674 (define_insn "*umaxv16qi3"
3675 [(set (match_operand:V16QI 0 "register_operand" "=x")
3676 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3677 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3678 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3679 "pmaxub\t{%2, %0|%0, %2}"
3680 [(set_attr "type" "sseiadd")
3681 (set_attr "prefix_data16" "1")
3682 (set_attr "mode" "TI")])
3684 (define_expand "smaxv8hi3"
3685 [(set (match_operand:V8HI 0 "register_operand" "")
3686 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3687 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3689 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3691 (define_insn "*smaxv8hi3"
3692 [(set (match_operand:V8HI 0 "register_operand" "=x")
3693 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3694 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3695 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3696 "pmaxsw\t{%2, %0|%0, %2}"
3697 [(set_attr "type" "sseiadd")
3698 (set_attr "prefix_data16" "1")
3699 (set_attr "mode" "TI")])
3701 (define_expand "umaxv8hi3"
3702 [(set (match_operand:V8HI 0 "register_operand" "")
3703 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3704 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3708 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3711 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3712 if (rtx_equal_p (op3, op2))
3713 op3 = gen_reg_rtx (V8HImode);
3714 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3715 emit_insn (gen_addv8hi3 (op0, op3, op2));
3720 (define_expand "smax<mode>3"
3721 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3722 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3723 (match_operand:SSEMODE14 2 "register_operand" "")))]
3727 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3733 xops[0] = operands[0];
3734 xops[1] = operands[1];
3735 xops[2] = operands[2];
3736 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3737 xops[4] = operands[1];
3738 xops[5] = operands[2];
3739 ok = ix86_expand_int_vcond (xops);
3745 (define_insn "*sse4_1_smax<mode>3"
3746 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3748 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3749 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3750 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3751 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3752 [(set_attr "type" "sseiadd")
3753 (set_attr "prefix_extra" "1")
3754 (set_attr "mode" "TI")])
3756 (define_expand "umaxv4si3"
3757 [(set (match_operand:V4SI 0 "register_operand" "")
3758 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3759 (match_operand:V4SI 2 "register_operand" "")))]
3763 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3769 xops[0] = operands[0];
3770 xops[1] = operands[1];
3771 xops[2] = operands[2];
3772 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3773 xops[4] = operands[1];
3774 xops[5] = operands[2];
3775 ok = ix86_expand_int_vcond (xops);
3781 (define_insn "*sse4_1_umax<mode>3"
3782 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3784 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3785 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3786 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3787 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3788 [(set_attr "type" "sseiadd")
3789 (set_attr "prefix_extra" "1")
3790 (set_attr "mode" "TI")])
3792 (define_expand "uminv16qi3"
3793 [(set (match_operand:V16QI 0 "register_operand" "")
3794 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3795 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3797 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3799 (define_insn "*uminv16qi3"
3800 [(set (match_operand:V16QI 0 "register_operand" "=x")
3801 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3802 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3803 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3804 "pminub\t{%2, %0|%0, %2}"
3805 [(set_attr "type" "sseiadd")
3806 (set_attr "prefix_data16" "1")
3807 (set_attr "mode" "TI")])
3809 (define_expand "sminv8hi3"
3810 [(set (match_operand:V8HI 0 "register_operand" "")
3811 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3812 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3814 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3816 (define_insn "*sminv8hi3"
3817 [(set (match_operand:V8HI 0 "register_operand" "=x")
3818 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3819 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3820 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3821 "pminsw\t{%2, %0|%0, %2}"
3822 [(set_attr "type" "sseiadd")
3823 (set_attr "prefix_data16" "1")
3824 (set_attr "mode" "TI")])
3826 (define_expand "smin<mode>3"
3827 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3828 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3829 (match_operand:SSEMODE14 2 "register_operand" "")))]
3833 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3839 xops[0] = operands[0];
3840 xops[1] = operands[2];
3841 xops[2] = operands[1];
3842 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3843 xops[4] = operands[1];
3844 xops[5] = operands[2];
3845 ok = ix86_expand_int_vcond (xops);
3851 (define_insn "*sse4_1_smin<mode>3"
3852 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3854 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3855 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3856 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3857 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3858 [(set_attr "type" "sseiadd")
3859 (set_attr "prefix_extra" "1")
3860 (set_attr "mode" "TI")])
3862 (define_expand "umin<mode>3"
3863 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3864 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3865 (match_operand:SSEMODE24 2 "register_operand" "")))]
3869 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3875 xops[0] = operands[0];
3876 xops[1] = operands[2];
3877 xops[2] = operands[1];
3878 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3879 xops[4] = operands[1];
3880 xops[5] = operands[2];
3881 ok = ix86_expand_int_vcond (xops);
3887 (define_insn "*sse4_1_umin<mode>3"
3888 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3890 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3891 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3892 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3893 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3894 [(set_attr "type" "sseiadd")
3895 (set_attr "prefix_extra" "1")
3896 (set_attr "mode" "TI")])
3898 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3900 ;; Parallel integral comparisons
3902 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3904 (define_insn "sse2_eq<mode>3"
3905 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3907 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3908 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3909 "TARGET_SSE2 && !TARGET_SSE5
3910 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3911 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3912 [(set_attr "type" "ssecmp")
3913 (set_attr "prefix_data16" "1")
3914 (set_attr "mode" "TI")])
3916 (define_insn "sse4_1_eqv2di3"
3917 [(set (match_operand:V2DI 0 "register_operand" "=x")
3919 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3920 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3921 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3922 "pcmpeqq\t{%2, %0|%0, %2}"
3923 [(set_attr "type" "ssecmp")
3924 (set_attr "prefix_extra" "1")
3925 (set_attr "mode" "TI")])
3927 (define_insn "sse2_gt<mode>3"
3928 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3930 (match_operand:SSEMODE124 1 "register_operand" "0")
3931 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3932 "TARGET_SSE2 && !TARGET_SSE5"
3933 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3934 [(set_attr "type" "ssecmp")
3935 (set_attr "prefix_data16" "1")
3936 (set_attr "mode" "TI")])
3938 (define_insn "sse4_2_gtv2di3"
3939 [(set (match_operand:V2DI 0 "register_operand" "=x")
3941 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3942 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3944 "pcmpgtq\t{%2, %0|%0, %2}"
3945 [(set_attr "type" "ssecmp")
3946 (set_attr "mode" "TI")])
3948 (define_expand "vcond<mode>"
3949 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3950 (if_then_else:SSEMODEI
3951 (match_operator 3 ""
3952 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3953 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3954 (match_operand:SSEMODEI 1 "general_operand" "")
3955 (match_operand:SSEMODEI 2 "general_operand" "")))]
3958 if (ix86_expand_int_vcond (operands))
3964 (define_expand "vcondu<mode>"
3965 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3966 (if_then_else:SSEMODEI
3967 (match_operator 3 ""
3968 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3969 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3970 (match_operand:SSEMODEI 1 "general_operand" "")
3971 (match_operand:SSEMODEI 2 "general_operand" "")))]
3974 if (ix86_expand_int_vcond (operands))
3980 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3982 ;; Parallel bitwise logical operations
3984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3986 (define_expand "one_cmpl<mode>2"
3987 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3988 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3992 int i, n = GET_MODE_NUNITS (<MODE>mode);
3993 rtvec v = rtvec_alloc (n);
3995 for (i = 0; i < n; ++i)
3996 RTVEC_ELT (v, i) = constm1_rtx;
3998 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
4001 (define_expand "and<mode>3"
4002 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4003 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4004 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4006 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
4008 (define_insn "*sse_and<mode>3"
4009 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4011 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4012 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4013 "(TARGET_SSE && !TARGET_SSE2)
4014 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4015 "andps\t{%2, %0|%0, %2}"
4016 [(set_attr "type" "sselog")
4017 (set_attr "mode" "V4SF")])
4019 (define_insn "*sse2_and<mode>3"
4020 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4022 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4023 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4024 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4025 "pand\t{%2, %0|%0, %2}"
4026 [(set_attr "type" "sselog")
4027 (set_attr "prefix_data16" "1")
4028 (set_attr "mode" "TI")])
4030 (define_insn "*sse_nand<mode>3"
4031 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4033 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4034 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4035 "(TARGET_SSE && !TARGET_SSE2)"
4036 "andnps\t{%2, %0|%0, %2}"
4037 [(set_attr "type" "sselog")
4038 (set_attr "mode" "V4SF")])
4040 (define_insn "sse2_nand<mode>3"
4041 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4043 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4044 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4046 "pandn\t{%2, %0|%0, %2}"
4047 [(set_attr "type" "sselog")
4048 (set_attr "prefix_data16" "1")
4049 (set_attr "mode" "TI")])
4051 (define_expand "andtf3"
4052 [(set (match_operand:TF 0 "register_operand" "")
4053 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
4054 (match_operand:TF 2 "nonimmediate_operand" "")))]
4056 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
4058 (define_insn "*andtf3"
4059 [(set (match_operand:TF 0 "register_operand" "=x")
4061 (match_operand:TF 1 "nonimmediate_operand" "%0")
4062 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4063 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
4064 "pand\t{%2, %0|%0, %2}"
4065 [(set_attr "type" "sselog")
4066 (set_attr "prefix_data16" "1")
4067 (set_attr "mode" "TI")])
4069 (define_insn "*nandtf3"
4070 [(set (match_operand:TF 0 "register_operand" "=x")
4072 (not:TF (match_operand:TF 1 "register_operand" "0"))
4073 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4075 "pandn\t{%2, %0|%0, %2}"
4076 [(set_attr "type" "sselog")
4077 (set_attr "prefix_data16" "1")
4078 (set_attr "mode" "TI")])
4080 (define_expand "ior<mode>3"
4081 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4082 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4083 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4085 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
4087 (define_insn "*sse_ior<mode>3"
4088 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4090 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4091 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4092 "(TARGET_SSE && !TARGET_SSE2)
4093 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4094 "orps\t{%2, %0|%0, %2}"
4095 [(set_attr "type" "sselog")
4096 (set_attr "mode" "V4SF")])
4098 (define_insn "*sse2_ior<mode>3"
4099 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4101 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4102 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4103 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4104 "por\t{%2, %0|%0, %2}"
4105 [(set_attr "type" "sselog")
4106 (set_attr "prefix_data16" "1")
4107 (set_attr "mode" "TI")])
4109 (define_expand "iortf3"
4110 [(set (match_operand:TF 0 "register_operand" "")
4111 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
4112 (match_operand:TF 2 "nonimmediate_operand" "")))]
4114 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
4116 (define_insn "*iortf3"
4117 [(set (match_operand:TF 0 "register_operand" "=x")
4119 (match_operand:TF 1 "nonimmediate_operand" "%0")
4120 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4121 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
4122 "por\t{%2, %0|%0, %2}"
4123 [(set_attr "type" "sselog")
4124 (set_attr "prefix_data16" "1")
4125 (set_attr "mode" "TI")])
4127 (define_expand "xor<mode>3"
4128 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4129 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4130 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4132 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
4134 (define_insn "*sse_xor<mode>3"
4135 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4137 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4138 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4139 "(TARGET_SSE && !TARGET_SSE2)
4140 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4141 "xorps\t{%2, %0|%0, %2}"
4142 [(set_attr "type" "sselog")
4143 (set_attr "mode" "V4SF")])
4145 (define_insn "*sse2_xor<mode>3"
4146 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4148 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4149 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4150 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4151 "pxor\t{%2, %0|%0, %2}"
4152 [(set_attr "type" "sselog")
4153 (set_attr "prefix_data16" "1")
4154 (set_attr "mode" "TI")])
4156 (define_expand "xortf3"
4157 [(set (match_operand:TF 0 "register_operand" "")
4158 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
4159 (match_operand:TF 2 "nonimmediate_operand" "")))]
4161 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
4163 (define_insn "*xortf3"
4164 [(set (match_operand:TF 0 "register_operand" "=x")
4166 (match_operand:TF 1 "nonimmediate_operand" "%0")
4167 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4168 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
4169 "pxor\t{%2, %0|%0, %2}"
4170 [(set_attr "type" "sselog")
4171 (set_attr "prefix_data16" "1")
4172 (set_attr "mode" "TI")])
4174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4176 ;; Parallel integral element swizzling
4178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4181 ;; op1 = abcdefghijklmnop
4182 ;; op2 = qrstuvwxyz012345
4183 ;; h1 = aqbrcsdteufvgwhx
4184 ;; l1 = iyjzk0l1m2n3o4p5
4185 ;; h2 = aiqybjrzcks0dlt1
4186 ;; l2 = emu2fnv3gow4hpx5
4187 ;; h3 = aeimquy2bfjnrvz3
4188 ;; l3 = cgkosw04dhlptx15
4189 ;; result = bdfhjlnprtvxz135
4190 (define_expand "vec_pack_trunc_v8hi"
4191 [(match_operand:V16QI 0 "register_operand" "")
4192 (match_operand:V8HI 1 "register_operand" "")
4193 (match_operand:V8HI 2 "register_operand" "")]
4196 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4198 op1 = gen_lowpart (V16QImode, operands[1]);
4199 op2 = gen_lowpart (V16QImode, operands[2]);
4200 h1 = gen_reg_rtx (V16QImode);
4201 l1 = gen_reg_rtx (V16QImode);
4202 h2 = gen_reg_rtx (V16QImode);
4203 l2 = gen_reg_rtx (V16QImode);
4204 h3 = gen_reg_rtx (V16QImode);
4205 l3 = gen_reg_rtx (V16QImode);
4207 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4208 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4209 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4210 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4211 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4212 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4213 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4224 ;; result = bdfhjlnp
4225 (define_expand "vec_pack_trunc_v4si"
4226 [(match_operand:V8HI 0 "register_operand" "")
4227 (match_operand:V4SI 1 "register_operand" "")
4228 (match_operand:V4SI 2 "register_operand" "")]
4231 rtx op1, op2, h1, l1, h2, l2;
4233 op1 = gen_lowpart (V8HImode, operands[1]);
4234 op2 = gen_lowpart (V8HImode, operands[2]);
4235 h1 = gen_reg_rtx (V8HImode);
4236 l1 = gen_reg_rtx (V8HImode);
4237 h2 = gen_reg_rtx (V8HImode);
4238 l2 = gen_reg_rtx (V8HImode);
4240 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4241 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4242 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4243 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4244 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4254 (define_expand "vec_pack_trunc_v2di"
4255 [(match_operand:V4SI 0 "register_operand" "")
4256 (match_operand:V2DI 1 "register_operand" "")
4257 (match_operand:V2DI 2 "register_operand" "")]
4260 rtx op1, op2, h1, l1;
4262 op1 = gen_lowpart (V4SImode, operands[1]);
4263 op2 = gen_lowpart (V4SImode, operands[2]);
4264 h1 = gen_reg_rtx (V4SImode);
4265 l1 = gen_reg_rtx (V4SImode);
4267 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4268 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4269 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4273 (define_expand "vec_interleave_highv16qi"
4274 [(set (match_operand:V16QI 0 "register_operand" "")
4277 (match_operand:V16QI 1 "register_operand" "")
4278 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4279 (parallel [(const_int 8) (const_int 24)
4280 (const_int 9) (const_int 25)
4281 (const_int 10) (const_int 26)
4282 (const_int 11) (const_int 27)
4283 (const_int 12) (const_int 28)
4284 (const_int 13) (const_int 29)
4285 (const_int 14) (const_int 30)
4286 (const_int 15) (const_int 31)])))]
4289 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4293 (define_expand "vec_interleave_lowv16qi"
4294 [(set (match_operand:V16QI 0 "register_operand" "")
4297 (match_operand:V16QI 1 "register_operand" "")
4298 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4299 (parallel [(const_int 0) (const_int 16)
4300 (const_int 1) (const_int 17)
4301 (const_int 2) (const_int 18)
4302 (const_int 3) (const_int 19)
4303 (const_int 4) (const_int 20)
4304 (const_int 5) (const_int 21)
4305 (const_int 6) (const_int 22)
4306 (const_int 7) (const_int 23)])))]
4309 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4313 (define_expand "vec_interleave_highv8hi"
4314 [(set (match_operand:V8HI 0 "register_operand" "=")
4317 (match_operand:V8HI 1 "register_operand" "")
4318 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4319 (parallel [(const_int 4) (const_int 12)
4320 (const_int 5) (const_int 13)
4321 (const_int 6) (const_int 14)
4322 (const_int 7) (const_int 15)])))]
4325 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4329 (define_expand "vec_interleave_lowv8hi"
4330 [(set (match_operand:V8HI 0 "register_operand" "")
4333 (match_operand:V8HI 1 "register_operand" "")
4334 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4335 (parallel [(const_int 0) (const_int 8)
4336 (const_int 1) (const_int 9)
4337 (const_int 2) (const_int 10)
4338 (const_int 3) (const_int 11)])))]
4341 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4345 (define_expand "vec_interleave_highv4si"
4346 [(set (match_operand:V4SI 0 "register_operand" "")
4349 (match_operand:V4SI 1 "register_operand" "")
4350 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4351 (parallel [(const_int 2) (const_int 6)
4352 (const_int 3) (const_int 7)])))]
4355 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4359 (define_expand "vec_interleave_lowv4si"
4360 [(set (match_operand:V4SI 0 "register_operand" "")
4363 (match_operand:V4SI 1 "register_operand" "")
4364 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4365 (parallel [(const_int 0) (const_int 4)
4366 (const_int 1) (const_int 5)])))]
4369 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4373 (define_expand "vec_interleave_highv2di"
4374 [(set (match_operand:V2DI 0 "register_operand" "")
4377 (match_operand:V2DI 1 "register_operand" "")
4378 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4379 (parallel [(const_int 1)
4383 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4387 (define_expand "vec_interleave_lowv2di"
4388 [(set (match_operand:V2DI 0 "register_operand" "")
4391 (match_operand:V2DI 1 "register_operand" "")
4392 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4393 (parallel [(const_int 0)
4397 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4401 (define_insn "sse2_packsswb"
4402 [(set (match_operand:V16QI 0 "register_operand" "=x")
4405 (match_operand:V8HI 1 "register_operand" "0"))
4407 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4409 "packsswb\t{%2, %0|%0, %2}"
4410 [(set_attr "type" "sselog")
4411 (set_attr "prefix_data16" "1")
4412 (set_attr "mode" "TI")])
4414 (define_insn "sse2_packssdw"
4415 [(set (match_operand:V8HI 0 "register_operand" "=x")
4418 (match_operand:V4SI 1 "register_operand" "0"))
4420 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4422 "packssdw\t{%2, %0|%0, %2}"
4423 [(set_attr "type" "sselog")
4424 (set_attr "prefix_data16" "1")
4425 (set_attr "mode" "TI")])
4427 (define_insn "sse2_packuswb"
4428 [(set (match_operand:V16QI 0 "register_operand" "=x")
4431 (match_operand:V8HI 1 "register_operand" "0"))
4433 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4435 "packuswb\t{%2, %0|%0, %2}"
4436 [(set_attr "type" "sselog")
4437 (set_attr "prefix_data16" "1")
4438 (set_attr "mode" "TI")])
4440 (define_insn "sse2_punpckhbw"
4441 [(set (match_operand:V16QI 0 "register_operand" "=x")
4444 (match_operand:V16QI 1 "register_operand" "0")
4445 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4446 (parallel [(const_int 8) (const_int 24)
4447 (const_int 9) (const_int 25)
4448 (const_int 10) (const_int 26)
4449 (const_int 11) (const_int 27)
4450 (const_int 12) (const_int 28)
4451 (const_int 13) (const_int 29)
4452 (const_int 14) (const_int 30)
4453 (const_int 15) (const_int 31)])))]
4455 "punpckhbw\t{%2, %0|%0, %2}"
4456 [(set_attr "type" "sselog")
4457 (set_attr "prefix_data16" "1")
4458 (set_attr "mode" "TI")])
4460 (define_insn "sse2_punpcklbw"
4461 [(set (match_operand:V16QI 0 "register_operand" "=x")
4464 (match_operand:V16QI 1 "register_operand" "0")
4465 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4466 (parallel [(const_int 0) (const_int 16)
4467 (const_int 1) (const_int 17)
4468 (const_int 2) (const_int 18)
4469 (const_int 3) (const_int 19)
4470 (const_int 4) (const_int 20)
4471 (const_int 5) (const_int 21)
4472 (const_int 6) (const_int 22)
4473 (const_int 7) (const_int 23)])))]
4475 "punpcklbw\t{%2, %0|%0, %2}"
4476 [(set_attr "type" "sselog")
4477 (set_attr "prefix_data16" "1")
4478 (set_attr "mode" "TI")])
4480 (define_insn "sse2_punpckhwd"
4481 [(set (match_operand:V8HI 0 "register_operand" "=x")
4484 (match_operand:V8HI 1 "register_operand" "0")
4485 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4486 (parallel [(const_int 4) (const_int 12)
4487 (const_int 5) (const_int 13)
4488 (const_int 6) (const_int 14)
4489 (const_int 7) (const_int 15)])))]
4491 "punpckhwd\t{%2, %0|%0, %2}"
4492 [(set_attr "type" "sselog")
4493 (set_attr "prefix_data16" "1")
4494 (set_attr "mode" "TI")])
4496 (define_insn "sse2_punpcklwd"
4497 [(set (match_operand:V8HI 0 "register_operand" "=x")
4500 (match_operand:V8HI 1 "register_operand" "0")
4501 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4502 (parallel [(const_int 0) (const_int 8)
4503 (const_int 1) (const_int 9)
4504 (const_int 2) (const_int 10)
4505 (const_int 3) (const_int 11)])))]
4507 "punpcklwd\t{%2, %0|%0, %2}"
4508 [(set_attr "type" "sselog")
4509 (set_attr "prefix_data16" "1")
4510 (set_attr "mode" "TI")])
4512 (define_insn "sse2_punpckhdq"
4513 [(set (match_operand:V4SI 0 "register_operand" "=x")
4516 (match_operand:V4SI 1 "register_operand" "0")
4517 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4518 (parallel [(const_int 2) (const_int 6)
4519 (const_int 3) (const_int 7)])))]
4521 "punpckhdq\t{%2, %0|%0, %2}"
4522 [(set_attr "type" "sselog")
4523 (set_attr "prefix_data16" "1")
4524 (set_attr "mode" "TI")])
4526 (define_insn "sse2_punpckldq"
4527 [(set (match_operand:V4SI 0 "register_operand" "=x")
4530 (match_operand:V4SI 1 "register_operand" "0")
4531 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4532 (parallel [(const_int 0) (const_int 4)
4533 (const_int 1) (const_int 5)])))]
4535 "punpckldq\t{%2, %0|%0, %2}"
4536 [(set_attr "type" "sselog")
4537 (set_attr "prefix_data16" "1")
4538 (set_attr "mode" "TI")])
4540 (define_insn "sse2_punpckhqdq"
4541 [(set (match_operand:V2DI 0 "register_operand" "=x")
4544 (match_operand:V2DI 1 "register_operand" "0")
4545 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4546 (parallel [(const_int 1)
4549 "punpckhqdq\t{%2, %0|%0, %2}"
4550 [(set_attr "type" "sselog")
4551 (set_attr "prefix_data16" "1")
4552 (set_attr "mode" "TI")])
4554 (define_insn "sse2_punpcklqdq"
4555 [(set (match_operand:V2DI 0 "register_operand" "=x")
4558 (match_operand:V2DI 1 "register_operand" "0")
4559 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4560 (parallel [(const_int 0)
4563 "punpcklqdq\t{%2, %0|%0, %2}"
4564 [(set_attr "type" "sselog")
4565 (set_attr "prefix_data16" "1")
4566 (set_attr "mode" "TI")])
4568 (define_insn "*sse4_1_pinsrb"
4569 [(set (match_operand:V16QI 0 "register_operand" "=x")
4571 (vec_duplicate:V16QI
4572 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4573 (match_operand:V16QI 1 "register_operand" "0")
4574 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4577 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4578 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4580 [(set_attr "type" "sselog")
4581 (set_attr "prefix_extra" "1")
4582 (set_attr "mode" "TI")])
4584 (define_insn "*sse2_pinsrw"
4585 [(set (match_operand:V8HI 0 "register_operand" "=x")
4588 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4589 (match_operand:V8HI 1 "register_operand" "0")
4590 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4593 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4594 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4596 [(set_attr "type" "sselog")
4597 (set_attr "prefix_data16" "1")
4598 (set_attr "mode" "TI")])
4600 ;; It must come before sse2_loadld since it is preferred.
4601 (define_insn "*sse4_1_pinsrd"
4602 [(set (match_operand:V4SI 0 "register_operand" "=x")
4605 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4606 (match_operand:V4SI 1 "register_operand" "0")
4607 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4610 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4611 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4613 [(set_attr "type" "sselog")
4614 (set_attr "prefix_extra" "1")
4615 (set_attr "mode" "TI")])
4617 (define_insn "*sse4_1_pinsrq"
4618 [(set (match_operand:V2DI 0 "register_operand" "=x")
4621 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4622 (match_operand:V2DI 1 "register_operand" "0")
4623 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4626 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4627 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4629 [(set_attr "type" "sselog")
4630 (set_attr "prefix_extra" "1")
4631 (set_attr "mode" "TI")])
4633 (define_insn "*sse4_1_pextrb"
4634 [(set (match_operand:SI 0 "register_operand" "=r")
4637 (match_operand:V16QI 1 "register_operand" "x")
4638 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4640 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4641 [(set_attr "type" "sselog")
4642 (set_attr "prefix_extra" "1")
4643 (set_attr "mode" "TI")])
4645 (define_insn "*sse4_1_pextrb_memory"
4646 [(set (match_operand:QI 0 "memory_operand" "=m")
4648 (match_operand:V16QI 1 "register_operand" "x")
4649 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4651 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4652 [(set_attr "type" "sselog")
4653 (set_attr "prefix_extra" "1")
4654 (set_attr "mode" "TI")])
4656 (define_insn "*sse2_pextrw"
4657 [(set (match_operand:SI 0 "register_operand" "=r")
4660 (match_operand:V8HI 1 "register_operand" "x")
4661 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4663 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4664 [(set_attr "type" "sselog")
4665 (set_attr "prefix_data16" "1")
4666 (set_attr "mode" "TI")])
4668 (define_insn "*sse4_1_pextrw_memory"
4669 [(set (match_operand:HI 0 "memory_operand" "=m")
4671 (match_operand:V8HI 1 "register_operand" "x")
4672 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4674 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4675 [(set_attr "type" "sselog")
4676 (set_attr "prefix_extra" "1")
4677 (set_attr "mode" "TI")])
4679 (define_insn "*sse4_1_pextrd"
4680 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4682 (match_operand:V4SI 1 "register_operand" "x")
4683 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4685 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4686 [(set_attr "type" "sselog")
4687 (set_attr "prefix_extra" "1")
4688 (set_attr "mode" "TI")])
4690 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4691 (define_insn "*sse4_1_pextrq"
4692 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4694 (match_operand:V2DI 1 "register_operand" "x")
4695 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4696 "TARGET_SSE4_1 && TARGET_64BIT"
4697 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4698 [(set_attr "type" "sselog")
4699 (set_attr "prefix_extra" "1")
4700 (set_attr "mode" "TI")])
4702 (define_expand "sse2_pshufd"
4703 [(match_operand:V4SI 0 "register_operand" "")
4704 (match_operand:V4SI 1 "nonimmediate_operand" "")
4705 (match_operand:SI 2 "const_int_operand" "")]
4708 int mask = INTVAL (operands[2]);
4709 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4710 GEN_INT ((mask >> 0) & 3),
4711 GEN_INT ((mask >> 2) & 3),
4712 GEN_INT ((mask >> 4) & 3),
4713 GEN_INT ((mask >> 6) & 3)));
4717 (define_insn "sse2_pshufd_1"
4718 [(set (match_operand:V4SI 0 "register_operand" "=x")
4720 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4721 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4722 (match_operand 3 "const_0_to_3_operand" "")
4723 (match_operand 4 "const_0_to_3_operand" "")
4724 (match_operand 5 "const_0_to_3_operand" "")])))]
4728 mask |= INTVAL (operands[2]) << 0;
4729 mask |= INTVAL (operands[3]) << 2;
4730 mask |= INTVAL (operands[4]) << 4;
4731 mask |= INTVAL (operands[5]) << 6;
4732 operands[2] = GEN_INT (mask);
4734 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4736 [(set_attr "type" "sselog1")
4737 (set_attr "prefix_data16" "1")
4738 (set_attr "mode" "TI")])
4740 (define_expand "sse2_pshuflw"
4741 [(match_operand:V8HI 0 "register_operand" "")
4742 (match_operand:V8HI 1 "nonimmediate_operand" "")
4743 (match_operand:SI 2 "const_int_operand" "")]
4746 int mask = INTVAL (operands[2]);
4747 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4748 GEN_INT ((mask >> 0) & 3),
4749 GEN_INT ((mask >> 2) & 3),
4750 GEN_INT ((mask >> 4) & 3),
4751 GEN_INT ((mask >> 6) & 3)));
4755 (define_insn "sse2_pshuflw_1"
4756 [(set (match_operand:V8HI 0 "register_operand" "=x")
4758 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4759 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4760 (match_operand 3 "const_0_to_3_operand" "")
4761 (match_operand 4 "const_0_to_3_operand" "")
4762 (match_operand 5 "const_0_to_3_operand" "")
4770 mask |= INTVAL (operands[2]) << 0;
4771 mask |= INTVAL (operands[3]) << 2;
4772 mask |= INTVAL (operands[4]) << 4;
4773 mask |= INTVAL (operands[5]) << 6;
4774 operands[2] = GEN_INT (mask);
4776 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4778 [(set_attr "type" "sselog")
4779 (set_attr "prefix_rep" "1")
4780 (set_attr "mode" "TI")])
4782 (define_expand "sse2_pshufhw"
4783 [(match_operand:V8HI 0 "register_operand" "")
4784 (match_operand:V8HI 1 "nonimmediate_operand" "")
4785 (match_operand:SI 2 "const_int_operand" "")]
4788 int mask = INTVAL (operands[2]);
4789 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4790 GEN_INT (((mask >> 0) & 3) + 4),
4791 GEN_INT (((mask >> 2) & 3) + 4),
4792 GEN_INT (((mask >> 4) & 3) + 4),
4793 GEN_INT (((mask >> 6) & 3) + 4)));
4797 (define_insn "sse2_pshufhw_1"
4798 [(set (match_operand:V8HI 0 "register_operand" "=x")
4800 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4801 (parallel [(const_int 0)
4805 (match_operand 2 "const_4_to_7_operand" "")
4806 (match_operand 3 "const_4_to_7_operand" "")
4807 (match_operand 4 "const_4_to_7_operand" "")
4808 (match_operand 5 "const_4_to_7_operand" "")])))]
4812 mask |= (INTVAL (operands[2]) - 4) << 0;
4813 mask |= (INTVAL (operands[3]) - 4) << 2;
4814 mask |= (INTVAL (operands[4]) - 4) << 4;
4815 mask |= (INTVAL (operands[5]) - 4) << 6;
4816 operands[2] = GEN_INT (mask);
4818 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4820 [(set_attr "type" "sselog")
4821 (set_attr "prefix_rep" "1")
4822 (set_attr "mode" "TI")])
4824 (define_expand "sse2_loadd"
4825 [(set (match_operand:V4SI 0 "register_operand" "")
4828 (match_operand:SI 1 "nonimmediate_operand" ""))
4832 "operands[2] = CONST0_RTX (V4SImode);")
4834 (define_insn "sse2_loadld"
4835 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4838 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4839 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4843 movd\t{%2, %0|%0, %2}
4844 movd\t{%2, %0|%0, %2}
4845 movss\t{%2, %0|%0, %2}
4846 movss\t{%2, %0|%0, %2}"
4847 [(set_attr "type" "ssemov")
4848 (set_attr "mode" "TI,TI,V4SF,SF")])
4850 (define_insn_and_split "sse2_stored"
4851 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4853 (match_operand:V4SI 1 "register_operand" "x,Yi")
4854 (parallel [(const_int 0)])))]
4857 "&& reload_completed
4858 && (TARGET_INTER_UNIT_MOVES
4859 || MEM_P (operands [0])
4860 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4861 [(set (match_dup 0) (match_dup 1))]
4863 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4866 (define_insn_and_split "*vec_ext_v4si_mem"
4867 [(set (match_operand:SI 0 "register_operand" "=r")
4869 (match_operand:V4SI 1 "memory_operand" "o")
4870 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4876 int i = INTVAL (operands[2]);
4878 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4882 (define_expand "sse_storeq"
4883 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4885 (match_operand:V2DI 1 "register_operand" "")
4886 (parallel [(const_int 0)])))]
4890 (define_insn "*sse2_storeq_rex64"
4891 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4893 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4894 (parallel [(const_int 0)])))]
4895 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4899 mov{q}\t{%1, %0|%0, %1}"
4900 [(set_attr "type" "*,*,imov")
4901 (set_attr "mode" "*,*,DI")])
4903 (define_insn "*sse2_storeq"
4904 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4906 (match_operand:V2DI 1 "register_operand" "x")
4907 (parallel [(const_int 0)])))]
4912 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4914 (match_operand:V2DI 1 "register_operand" "")
4915 (parallel [(const_int 0)])))]
4918 && (TARGET_INTER_UNIT_MOVES
4919 || MEM_P (operands [0])
4920 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4921 [(set (match_dup 0) (match_dup 1))]
4923 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4926 (define_insn "*vec_extractv2di_1_rex64"
4927 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4929 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4930 (parallel [(const_int 1)])))]
4931 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4933 movhps\t{%1, %0|%0, %1}
4934 psrldq\t{$8, %0|%0, 8}
4935 movq\t{%H1, %0|%0, %H1}
4936 mov{q}\t{%H1, %0|%0, %H1}"
4937 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4938 (set_attr "memory" "*,none,*,*")
4939 (set_attr "mode" "V2SF,TI,TI,DI")])
4941 (define_insn "*vec_extractv2di_1_sse2"
4942 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4944 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4945 (parallel [(const_int 1)])))]
4947 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4949 movhps\t{%1, %0|%0, %1}
4950 psrldq\t{$8, %0|%0, 8}
4951 movq\t{%H1, %0|%0, %H1}"
4952 [(set_attr "type" "ssemov,sseishft,ssemov")
4953 (set_attr "memory" "*,none,*")
4954 (set_attr "mode" "V2SF,TI,TI")])
4956 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4957 (define_insn "*vec_extractv2di_1_sse"
4958 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4960 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4961 (parallel [(const_int 1)])))]
4962 "!TARGET_SSE2 && TARGET_SSE
4963 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4965 movhps\t{%1, %0|%0, %1}
4966 movhlps\t{%1, %0|%0, %1}
4967 movlps\t{%H1, %0|%0, %H1}"
4968 [(set_attr "type" "ssemov")
4969 (set_attr "mode" "V2SF,V4SF,V2SF")])
4971 (define_insn "*vec_dupv4si"
4972 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4974 (match_operand:SI 1 "register_operand" " Y2,0")))]
4977 pshufd\t{$0, %1, %0|%0, %1, 0}
4978 shufps\t{$0, %0, %0|%0, %0, 0}"
4979 [(set_attr "type" "sselog1")
4980 (set_attr "mode" "TI,V4SF")])
4982 (define_insn "*vec_dupv2di"
4983 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4985 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4990 [(set_attr "type" "sselog1,ssemov")
4991 (set_attr "mode" "TI,V4SF")])
4993 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4994 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4995 ;; alternatives pretty much forces the MMX alternative to be chosen.
4996 (define_insn "*sse2_concatv2si"
4997 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4999 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
5000 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
5003 punpckldq\t{%2, %0|%0, %2}
5004 movd\t{%1, %0|%0, %1}
5005 punpckldq\t{%2, %0|%0, %2}
5006 movd\t{%1, %0|%0, %1}"
5007 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5008 (set_attr "mode" "TI,TI,DI,DI")])
5010 (define_insn "*sse1_concatv2si"
5011 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
5013 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
5014 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
5017 unpcklps\t{%2, %0|%0, %2}
5018 movss\t{%1, %0|%0, %1}
5019 punpckldq\t{%2, %0|%0, %2}
5020 movd\t{%1, %0|%0, %1}"
5021 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5022 (set_attr "mode" "V4SF,V4SF,DI,DI")])
5024 (define_insn "*vec_concatv4si_1"
5025 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
5027 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
5028 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
5031 punpcklqdq\t{%2, %0|%0, %2}
5032 movlhps\t{%2, %0|%0, %2}
5033 movhps\t{%2, %0|%0, %2}"
5034 [(set_attr "type" "sselog,ssemov,ssemov")
5035 (set_attr "mode" "TI,V4SF,V2SF")])
5037 (define_insn "vec_concatv2di"
5038 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
5040 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
5041 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
5042 "!TARGET_64BIT && TARGET_SSE"
5044 movq\t{%1, %0|%0, %1}
5045 movq2dq\t{%1, %0|%0, %1}
5046 punpcklqdq\t{%2, %0|%0, %2}
5047 movlhps\t{%2, %0|%0, %2}
5048 movhps\t{%2, %0|%0, %2}
5049 movlps\t{%1, %0|%0, %1}"
5050 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5051 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
5053 (define_insn "*vec_concatv2di_rex"
5054 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
5056 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
5057 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
5060 movq\t{%1, %0|%0, %1}
5061 movq\t{%1, %0|%0, %1}
5062 movq2dq\t{%1, %0|%0, %1}
5063 punpcklqdq\t{%2, %0|%0, %2}
5064 movlhps\t{%2, %0|%0, %2}
5065 movhps\t{%2, %0|%0, %2}
5066 movlps\t{%1, %0|%0, %1}"
5067 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5068 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
5070 (define_expand "vec_setv2di"
5071 [(match_operand:V2DI 0 "register_operand" "")
5072 (match_operand:DI 1 "register_operand" "")
5073 (match_operand 2 "const_int_operand" "")]
5076 ix86_expand_vector_set (false, operands[0], operands[1],
5077 INTVAL (operands[2]));
5081 (define_expand "vec_extractv2di"
5082 [(match_operand:DI 0 "register_operand" "")
5083 (match_operand:V2DI 1 "register_operand" "")
5084 (match_operand 2 "const_int_operand" "")]
5087 ix86_expand_vector_extract (false, operands[0], operands[1],
5088 INTVAL (operands[2]));
5092 (define_expand "vec_initv2di"
5093 [(match_operand:V2DI 0 "register_operand" "")
5094 (match_operand 1 "" "")]
5097 ix86_expand_vector_init (false, operands[0], operands[1]);
5101 (define_expand "vec_setv4si"
5102 [(match_operand:V4SI 0 "register_operand" "")
5103 (match_operand:SI 1 "register_operand" "")
5104 (match_operand 2 "const_int_operand" "")]
5107 ix86_expand_vector_set (false, operands[0], operands[1],
5108 INTVAL (operands[2]));
5112 (define_expand "vec_extractv4si"
5113 [(match_operand:SI 0 "register_operand" "")
5114 (match_operand:V4SI 1 "register_operand" "")
5115 (match_operand 2 "const_int_operand" "")]
5118 ix86_expand_vector_extract (false, operands[0], operands[1],
5119 INTVAL (operands[2]));
5123 (define_expand "vec_initv4si"
5124 [(match_operand:V4SI 0 "register_operand" "")
5125 (match_operand 1 "" "")]
5128 ix86_expand_vector_init (false, operands[0], operands[1]);
5132 (define_expand "vec_setv8hi"
5133 [(match_operand:V8HI 0 "register_operand" "")
5134 (match_operand:HI 1 "register_operand" "")
5135 (match_operand 2 "const_int_operand" "")]
5138 ix86_expand_vector_set (false, operands[0], operands[1],
5139 INTVAL (operands[2]));
5143 (define_expand "vec_extractv8hi"
5144 [(match_operand:HI 0 "register_operand" "")
5145 (match_operand:V8HI 1 "register_operand" "")
5146 (match_operand 2 "const_int_operand" "")]
5149 ix86_expand_vector_extract (false, operands[0], operands[1],
5150 INTVAL (operands[2]));
5154 (define_expand "vec_initv8hi"
5155 [(match_operand:V8HI 0 "register_operand" "")
5156 (match_operand 1 "" "")]
5159 ix86_expand_vector_init (false, operands[0], operands[1]);
5163 (define_expand "vec_setv16qi"
5164 [(match_operand:V16QI 0 "register_operand" "")
5165 (match_operand:QI 1 "register_operand" "")
5166 (match_operand 2 "const_int_operand" "")]
5169 ix86_expand_vector_set (false, operands[0], operands[1],
5170 INTVAL (operands[2]));
5174 (define_expand "vec_extractv16qi"
5175 [(match_operand:QI 0 "register_operand" "")
5176 (match_operand:V16QI 1 "register_operand" "")
5177 (match_operand 2 "const_int_operand" "")]
5180 ix86_expand_vector_extract (false, operands[0], operands[1],
5181 INTVAL (operands[2]));
5185 (define_expand "vec_initv16qi"
5186 [(match_operand:V16QI 0 "register_operand" "")
5187 (match_operand 1 "" "")]
5190 ix86_expand_vector_init (false, operands[0], operands[1]);
5194 (define_expand "vec_unpacku_hi_v16qi"
5195 [(match_operand:V8HI 0 "register_operand" "")
5196 (match_operand:V16QI 1 "register_operand" "")]
5200 ix86_expand_sse4_unpack (operands, true, true);
5201 else if (TARGET_SSE5)
5202 ix86_expand_sse5_unpack (operands, true, true);
5204 ix86_expand_sse_unpack (operands, true, true);
5208 (define_expand "vec_unpacks_hi_v16qi"
5209 [(match_operand:V8HI 0 "register_operand" "")
5210 (match_operand:V16QI 1 "register_operand" "")]
5214 ix86_expand_sse4_unpack (operands, false, true);
5215 else if (TARGET_SSE5)
5216 ix86_expand_sse5_unpack (operands, false, true);
5218 ix86_expand_sse_unpack (operands, false, true);
5222 (define_expand "vec_unpacku_lo_v16qi"
5223 [(match_operand:V8HI 0 "register_operand" "")
5224 (match_operand:V16QI 1 "register_operand" "")]
5228 ix86_expand_sse4_unpack (operands, true, false);
5229 else if (TARGET_SSE5)
5230 ix86_expand_sse5_unpack (operands, true, false);
5232 ix86_expand_sse_unpack (operands, true, false);
5236 (define_expand "vec_unpacks_lo_v16qi"
5237 [(match_operand:V8HI 0 "register_operand" "")
5238 (match_operand:V16QI 1 "register_operand" "")]
5242 ix86_expand_sse4_unpack (operands, false, false);
5243 else if (TARGET_SSE5)
5244 ix86_expand_sse5_unpack (operands, false, false);
5246 ix86_expand_sse_unpack (operands, false, false);
5250 (define_expand "vec_unpacku_hi_v8hi"
5251 [(match_operand:V4SI 0 "register_operand" "")
5252 (match_operand:V8HI 1 "register_operand" "")]
5256 ix86_expand_sse4_unpack (operands, true, true);
5257 else if (TARGET_SSE5)
5258 ix86_expand_sse5_unpack (operands, true, true);
5260 ix86_expand_sse_unpack (operands, true, true);
5264 (define_expand "vec_unpacks_hi_v8hi"
5265 [(match_operand:V4SI 0 "register_operand" "")
5266 (match_operand:V8HI 1 "register_operand" "")]
5270 ix86_expand_sse4_unpack (operands, false, true);
5271 else if (TARGET_SSE5)
5272 ix86_expand_sse5_unpack (operands, false, true);
5274 ix86_expand_sse_unpack (operands, false, true);
5278 (define_expand "vec_unpacku_lo_v8hi"
5279 [(match_operand:V4SI 0 "register_operand" "")
5280 (match_operand:V8HI 1 "register_operand" "")]
5284 ix86_expand_sse4_unpack (operands, true, false);
5285 else if (TARGET_SSE5)
5286 ix86_expand_sse5_unpack (operands, true, false);
5288 ix86_expand_sse_unpack (operands, true, false);
5292 (define_expand "vec_unpacks_lo_v8hi"
5293 [(match_operand:V4SI 0 "register_operand" "")
5294 (match_operand:V8HI 1 "register_operand" "")]
5298 ix86_expand_sse4_unpack (operands, false, false);
5299 else if (TARGET_SSE5)
5300 ix86_expand_sse5_unpack (operands, false, false);
5302 ix86_expand_sse_unpack (operands, false, false);
5306 (define_expand "vec_unpacku_hi_v4si"
5307 [(match_operand:V2DI 0 "register_operand" "")
5308 (match_operand:V4SI 1 "register_operand" "")]
5312 ix86_expand_sse4_unpack (operands, true, true);
5313 else if (TARGET_SSE5)
5314 ix86_expand_sse5_unpack (operands, true, true);
5316 ix86_expand_sse_unpack (operands, true, true);
5320 (define_expand "vec_unpacks_hi_v4si"
5321 [(match_operand:V2DI 0 "register_operand" "")
5322 (match_operand:V4SI 1 "register_operand" "")]
5326 ix86_expand_sse4_unpack (operands, false, true);
5327 else if (TARGET_SSE5)
5328 ix86_expand_sse5_unpack (operands, false, true);
5330 ix86_expand_sse_unpack (operands, false, true);
5334 (define_expand "vec_unpacku_lo_v4si"
5335 [(match_operand:V2DI 0 "register_operand" "")
5336 (match_operand:V4SI 1 "register_operand" "")]
5340 ix86_expand_sse4_unpack (operands, true, false);
5341 else if (TARGET_SSE5)
5342 ix86_expand_sse5_unpack (operands, true, false);
5344 ix86_expand_sse_unpack (operands, true, false);
5348 (define_expand "vec_unpacks_lo_v4si"
5349 [(match_operand:V2DI 0 "register_operand" "")
5350 (match_operand:V4SI 1 "register_operand" "")]
5354 ix86_expand_sse4_unpack (operands, false, false);
5355 else if (TARGET_SSE5)
5356 ix86_expand_sse5_unpack (operands, false, false);
5358 ix86_expand_sse_unpack (operands, false, false);
5362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5368 (define_insn "sse2_uavgv16qi3"
5369 [(set (match_operand:V16QI 0 "register_operand" "=x")
5375 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5377 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5378 (const_vector:V16QI [(const_int 1) (const_int 1)
5379 (const_int 1) (const_int 1)
5380 (const_int 1) (const_int 1)
5381 (const_int 1) (const_int 1)
5382 (const_int 1) (const_int 1)
5383 (const_int 1) (const_int 1)
5384 (const_int 1) (const_int 1)
5385 (const_int 1) (const_int 1)]))
5387 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5388 "pavgb\t{%2, %0|%0, %2}"
5389 [(set_attr "type" "sseiadd")
5390 (set_attr "prefix_data16" "1")
5391 (set_attr "mode" "TI")])
5393 (define_insn "sse2_uavgv8hi3"
5394 [(set (match_operand:V8HI 0 "register_operand" "=x")
5400 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5402 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5403 (const_vector:V8HI [(const_int 1) (const_int 1)
5404 (const_int 1) (const_int 1)
5405 (const_int 1) (const_int 1)
5406 (const_int 1) (const_int 1)]))
5408 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5409 "pavgw\t{%2, %0|%0, %2}"
5410 [(set_attr "type" "sseiadd")
5411 (set_attr "prefix_data16" "1")
5412 (set_attr "mode" "TI")])
5414 ;; The correct representation for this is absolutely enormous, and
5415 ;; surely not generally useful.
5416 (define_insn "sse2_psadbw"
5417 [(set (match_operand:V2DI 0 "register_operand" "=x")
5418 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5419 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5422 "psadbw\t{%2, %0|%0, %2}"
5423 [(set_attr "type" "sseiadd")
5424 (set_attr "prefix_data16" "1")
5425 (set_attr "mode" "TI")])
5427 (define_insn "sse_movmskps"
5428 [(set (match_operand:SI 0 "register_operand" "=r")
5429 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5432 "movmskps\t{%1, %0|%0, %1}"
5433 [(set_attr "type" "ssecvt")
5434 (set_attr "mode" "V4SF")])
5436 (define_insn "sse2_movmskpd"
5437 [(set (match_operand:SI 0 "register_operand" "=r")
5438 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5441 "movmskpd\t{%1, %0|%0, %1}"
5442 [(set_attr "type" "ssecvt")
5443 (set_attr "mode" "V2DF")])
5445 (define_insn "sse2_pmovmskb"
5446 [(set (match_operand:SI 0 "register_operand" "=r")
5447 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5450 "pmovmskb\t{%1, %0|%0, %1}"
5451 [(set_attr "type" "ssecvt")
5452 (set_attr "prefix_data16" "1")
5453 (set_attr "mode" "SI")])
5455 (define_expand "sse2_maskmovdqu"
5456 [(set (match_operand:V16QI 0 "memory_operand" "")
5457 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5458 (match_operand:V16QI 2 "register_operand" "")
5464 (define_insn "*sse2_maskmovdqu"
5465 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5466 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5467 (match_operand:V16QI 2 "register_operand" "x")
5468 (mem:V16QI (match_dup 0))]
5470 "TARGET_SSE2 && !TARGET_64BIT"
5471 ;; @@@ check ordering of operands in intel/nonintel syntax
5472 "maskmovdqu\t{%2, %1|%1, %2}"
5473 [(set_attr "type" "ssecvt")
5474 (set_attr "prefix_data16" "1")
5475 (set_attr "mode" "TI")])
5477 (define_insn "*sse2_maskmovdqu_rex64"
5478 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5479 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5480 (match_operand:V16QI 2 "register_operand" "x")
5481 (mem:V16QI (match_dup 0))]
5483 "TARGET_SSE2 && TARGET_64BIT"
5484 ;; @@@ check ordering of operands in intel/nonintel syntax
5485 "maskmovdqu\t{%2, %1|%1, %2}"
5486 [(set_attr "type" "ssecvt")
5487 (set_attr "prefix_data16" "1")
5488 (set_attr "mode" "TI")])
5490 (define_insn "sse_ldmxcsr"
5491 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5495 [(set_attr "type" "sse")
5496 (set_attr "memory" "load")])
5498 (define_insn "sse_stmxcsr"
5499 [(set (match_operand:SI 0 "memory_operand" "=m")
5500 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5503 [(set_attr "type" "sse")
5504 (set_attr "memory" "store")])
5506 (define_expand "sse_sfence"
5508 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5509 "TARGET_SSE || TARGET_3DNOW_A"
5511 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5512 MEM_VOLATILE_P (operands[0]) = 1;
5515 (define_insn "*sse_sfence"
5516 [(set (match_operand:BLK 0 "" "")
5517 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5518 "TARGET_SSE || TARGET_3DNOW_A"
5520 [(set_attr "type" "sse")
5521 (set_attr "memory" "unknown")])
5523 (define_insn "sse2_clflush"
5524 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5528 [(set_attr "type" "sse")
5529 (set_attr "memory" "unknown")])
5531 (define_expand "sse2_mfence"
5533 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5536 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5537 MEM_VOLATILE_P (operands[0]) = 1;
5540 (define_insn "*sse2_mfence"
5541 [(set (match_operand:BLK 0 "" "")
5542 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5545 [(set_attr "type" "sse")
5546 (set_attr "memory" "unknown")])
5548 (define_expand "sse2_lfence"
5550 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5553 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5554 MEM_VOLATILE_P (operands[0]) = 1;
5557 (define_insn "*sse2_lfence"
5558 [(set (match_operand:BLK 0 "" "")
5559 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5562 [(set_attr "type" "sse")
5563 (set_attr "memory" "unknown")])
5565 (define_insn "sse3_mwait"
5566 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5567 (match_operand:SI 1 "register_operand" "c")]
5570 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5571 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5572 ;; we only need to set up 32bit registers.
5574 [(set_attr "length" "3")])
5576 (define_insn "sse3_monitor"
5577 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5578 (match_operand:SI 1 "register_operand" "c")
5579 (match_operand:SI 2 "register_operand" "d")]
5581 "TARGET_SSE3 && !TARGET_64BIT"
5582 "monitor\t%0, %1, %2"
5583 [(set_attr "length" "3")])
5585 (define_insn "sse3_monitor64"
5586 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5587 (match_operand:SI 1 "register_operand" "c")
5588 (match_operand:SI 2 "register_operand" "d")]
5590 "TARGET_SSE3 && TARGET_64BIT"
5591 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5592 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5593 ;; zero extended to 64bit, we only need to set up 32bit registers.
5595 [(set_attr "length" "3")])
5598 (define_insn "ssse3_phaddwv8hi3"
5599 [(set (match_operand:V8HI 0 "register_operand" "=x")
5605 (match_operand:V8HI 1 "register_operand" "0")
5606 (parallel [(const_int 0)]))
5607 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5609 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5610 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5613 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5614 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5616 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5617 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5622 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5623 (parallel [(const_int 0)]))
5624 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5626 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5627 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5630 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5631 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5633 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5634 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5636 "phaddw\t{%2, %0|%0, %2}"
5637 [(set_attr "type" "sseiadd")
5638 (set_attr "prefix_data16" "1")
5639 (set_attr "prefix_extra" "1")
5640 (set_attr "mode" "TI")])
5642 (define_insn "ssse3_phaddwv4hi3"
5643 [(set (match_operand:V4HI 0 "register_operand" "=y")
5648 (match_operand:V4HI 1 "register_operand" "0")
5649 (parallel [(const_int 0)]))
5650 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5652 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5653 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5657 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5658 (parallel [(const_int 0)]))
5659 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5661 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5662 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5664 "phaddw\t{%2, %0|%0, %2}"
5665 [(set_attr "type" "sseiadd")
5666 (set_attr "prefix_extra" "1")
5667 (set_attr "mode" "DI")])
5669 (define_insn "ssse3_phadddv4si3"
5670 [(set (match_operand:V4SI 0 "register_operand" "=x")
5675 (match_operand:V4SI 1 "register_operand" "0")
5676 (parallel [(const_int 0)]))
5677 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5679 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5680 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5684 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5685 (parallel [(const_int 0)]))
5686 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5688 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5689 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5691 "phaddd\t{%2, %0|%0, %2}"
5692 [(set_attr "type" "sseiadd")
5693 (set_attr "prefix_data16" "1")
5694 (set_attr "prefix_extra" "1")
5695 (set_attr "mode" "TI")])
5697 (define_insn "ssse3_phadddv2si3"
5698 [(set (match_operand:V2SI 0 "register_operand" "=y")
5702 (match_operand:V2SI 1 "register_operand" "0")
5703 (parallel [(const_int 0)]))
5704 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5707 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5708 (parallel [(const_int 0)]))
5709 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5711 "phaddd\t{%2, %0|%0, %2}"
5712 [(set_attr "type" "sseiadd")
5713 (set_attr "prefix_extra" "1")
5714 (set_attr "mode" "DI")])
5716 (define_insn "ssse3_phaddswv8hi3"
5717 [(set (match_operand:V8HI 0 "register_operand" "=x")
5723 (match_operand:V8HI 1 "register_operand" "0")
5724 (parallel [(const_int 0)]))
5725 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5727 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5728 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5731 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5732 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5734 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5735 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5740 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5741 (parallel [(const_int 0)]))
5742 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5744 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5745 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5748 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5749 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5751 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5752 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5754 "phaddsw\t{%2, %0|%0, %2}"
5755 [(set_attr "type" "sseiadd")
5756 (set_attr "prefix_data16" "1")
5757 (set_attr "prefix_extra" "1")
5758 (set_attr "mode" "TI")])
5760 (define_insn "ssse3_phaddswv4hi3"
5761 [(set (match_operand:V4HI 0 "register_operand" "=y")
5766 (match_operand:V4HI 1 "register_operand" "0")
5767 (parallel [(const_int 0)]))
5768 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5770 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5771 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5775 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5776 (parallel [(const_int 0)]))
5777 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5779 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5780 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5782 "phaddsw\t{%2, %0|%0, %2}"
5783 [(set_attr "type" "sseiadd")
5784 (set_attr "prefix_extra" "1")
5785 (set_attr "mode" "DI")])
5787 (define_insn "ssse3_phsubwv8hi3"
5788 [(set (match_operand:V8HI 0 "register_operand" "=x")
5794 (match_operand:V8HI 1 "register_operand" "0")
5795 (parallel [(const_int 0)]))
5796 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5798 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5799 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5802 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5803 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5805 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5806 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5811 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5812 (parallel [(const_int 0)]))
5813 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5815 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5816 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5819 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5820 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5822 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5823 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5825 "phsubw\t{%2, %0|%0, %2}"
5826 [(set_attr "type" "sseiadd")
5827 (set_attr "prefix_data16" "1")
5828 (set_attr "prefix_extra" "1")
5829 (set_attr "mode" "TI")])
5831 (define_insn "ssse3_phsubwv4hi3"
5832 [(set (match_operand:V4HI 0 "register_operand" "=y")
5837 (match_operand:V4HI 1 "register_operand" "0")
5838 (parallel [(const_int 0)]))
5839 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5841 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5842 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5846 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5847 (parallel [(const_int 0)]))
5848 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5850 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5851 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5853 "phsubw\t{%2, %0|%0, %2}"
5854 [(set_attr "type" "sseiadd")
5855 (set_attr "prefix_extra" "1")
5856 (set_attr "mode" "DI")])
5858 (define_insn "ssse3_phsubdv4si3"
5859 [(set (match_operand:V4SI 0 "register_operand" "=x")
5864 (match_operand:V4SI 1 "register_operand" "0")
5865 (parallel [(const_int 0)]))
5866 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5868 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5869 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5873 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5874 (parallel [(const_int 0)]))
5875 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5877 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5878 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5880 "phsubd\t{%2, %0|%0, %2}"
5881 [(set_attr "type" "sseiadd")
5882 (set_attr "prefix_data16" "1")
5883 (set_attr "prefix_extra" "1")
5884 (set_attr "mode" "TI")])
5886 (define_insn "ssse3_phsubdv2si3"
5887 [(set (match_operand:V2SI 0 "register_operand" "=y")
5891 (match_operand:V2SI 1 "register_operand" "0")
5892 (parallel [(const_int 0)]))
5893 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5896 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5897 (parallel [(const_int 0)]))
5898 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5900 "phsubd\t{%2, %0|%0, %2}"
5901 [(set_attr "type" "sseiadd")
5902 (set_attr "prefix_extra" "1")
5903 (set_attr "mode" "DI")])
5905 (define_insn "ssse3_phsubswv8hi3"
5906 [(set (match_operand:V8HI 0 "register_operand" "=x")
5912 (match_operand:V8HI 1 "register_operand" "0")
5913 (parallel [(const_int 0)]))
5914 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5916 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5917 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5920 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5921 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5923 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5924 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5929 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5930 (parallel [(const_int 0)]))
5931 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5933 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5934 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5937 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5938 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5940 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5941 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5943 "phsubsw\t{%2, %0|%0, %2}"
5944 [(set_attr "type" "sseiadd")
5945 (set_attr "prefix_data16" "1")
5946 (set_attr "prefix_extra" "1")
5947 (set_attr "mode" "TI")])
5949 (define_insn "ssse3_phsubswv4hi3"
5950 [(set (match_operand:V4HI 0 "register_operand" "=y")
5955 (match_operand:V4HI 1 "register_operand" "0")
5956 (parallel [(const_int 0)]))
5957 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5959 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5960 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5964 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5965 (parallel [(const_int 0)]))
5966 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5968 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5969 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5971 "phsubsw\t{%2, %0|%0, %2}"
5972 [(set_attr "type" "sseiadd")
5973 (set_attr "prefix_extra" "1")
5974 (set_attr "mode" "DI")])
5976 (define_insn "ssse3_pmaddubswv8hi3"
5977 [(set (match_operand:V8HI 0 "register_operand" "=x")
5982 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5983 (parallel [(const_int 0)
5993 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5994 (parallel [(const_int 0)
6004 (vec_select:V16QI (match_dup 1)
6005 (parallel [(const_int 1)
6014 (vec_select:V16QI (match_dup 2)
6015 (parallel [(const_int 1)
6022 (const_int 15)]))))))]
6024 "pmaddubsw\t{%2, %0|%0, %2}"
6025 [(set_attr "type" "sseiadd")
6026 (set_attr "prefix_data16" "1")
6027 (set_attr "prefix_extra" "1")
6028 (set_attr "mode" "TI")])
6030 (define_insn "ssse3_pmaddubswv4hi3"
6031 [(set (match_operand:V4HI 0 "register_operand" "=y")
6036 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
6037 (parallel [(const_int 0)
6043 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
6044 (parallel [(const_int 0)
6050 (vec_select:V8QI (match_dup 1)
6051 (parallel [(const_int 1)
6056 (vec_select:V8QI (match_dup 2)
6057 (parallel [(const_int 1)
6060 (const_int 7)]))))))]
6062 "pmaddubsw\t{%2, %0|%0, %2}"
6063 [(set_attr "type" "sseiadd")
6064 (set_attr "prefix_extra" "1")
6065 (set_attr "mode" "DI")])
6067 (define_insn "ssse3_pmulhrswv8hi3"
6068 [(set (match_operand:V8HI 0 "register_operand" "=x")
6075 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
6077 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
6079 (const_vector:V8HI [(const_int 1) (const_int 1)
6080 (const_int 1) (const_int 1)
6081 (const_int 1) (const_int 1)
6082 (const_int 1) (const_int 1)]))
6084 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
6085 "pmulhrsw\t{%2, %0|%0, %2}"
6086 [(set_attr "type" "sseimul")
6087 (set_attr "prefix_data16" "1")
6088 (set_attr "prefix_extra" "1")
6089 (set_attr "mode" "TI")])
6091 (define_insn "ssse3_pmulhrswv4hi3"
6092 [(set (match_operand:V4HI 0 "register_operand" "=y")
6099 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
6101 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
6103 (const_vector:V4HI [(const_int 1) (const_int 1)
6104 (const_int 1) (const_int 1)]))
6106 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
6107 "pmulhrsw\t{%2, %0|%0, %2}"
6108 [(set_attr "type" "sseimul")
6109 (set_attr "prefix_extra" "1")
6110 (set_attr "mode" "DI")])
6112 (define_insn "ssse3_pshufbv16qi3"
6113 [(set (match_operand:V16QI 0 "register_operand" "=x")
6114 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6115 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
6118 "pshufb\t{%2, %0|%0, %2}";
6119 [(set_attr "type" "sselog1")
6120 (set_attr "prefix_data16" "1")
6121 (set_attr "prefix_extra" "1")
6122 (set_attr "mode" "TI")])
6124 (define_insn "ssse3_pshufbv8qi3"
6125 [(set (match_operand:V8QI 0 "register_operand" "=y")
6126 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
6127 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
6130 "pshufb\t{%2, %0|%0, %2}";
6131 [(set_attr "type" "sselog1")
6132 (set_attr "prefix_extra" "1")
6133 (set_attr "mode" "DI")])
6135 (define_insn "ssse3_psign<mode>3"
6136 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6138 [(match_operand:SSEMODE124 1 "register_operand" "0")
6139 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
6142 "psign<ssevecsize>\t{%2, %0|%0, %2}";
6143 [(set_attr "type" "sselog1")
6144 (set_attr "prefix_data16" "1")
6145 (set_attr "prefix_extra" "1")
6146 (set_attr "mode" "TI")])
6148 (define_insn "ssse3_psign<mode>3"
6149 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6151 [(match_operand:MMXMODEI 1 "register_operand" "0")
6152 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
6155 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
6156 [(set_attr "type" "sselog1")
6157 (set_attr "prefix_extra" "1")
6158 (set_attr "mode" "DI")])
6160 (define_insn "ssse3_palignrti"
6161 [(set (match_operand:TI 0 "register_operand" "=x")
6162 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6163 (match_operand:TI 2 "nonimmediate_operand" "xm")
6164 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6168 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6169 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6171 [(set_attr "type" "sseishft")
6172 (set_attr "prefix_data16" "1")
6173 (set_attr "prefix_extra" "1")
6174 (set_attr "mode" "TI")])
6176 (define_insn "ssse3_palignrdi"
6177 [(set (match_operand:DI 0 "register_operand" "=y")
6178 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6179 (match_operand:DI 2 "nonimmediate_operand" "ym")
6180 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6184 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6185 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6187 [(set_attr "type" "sseishft")
6188 (set_attr "prefix_extra" "1")
6189 (set_attr "mode" "DI")])
6191 (define_insn "abs<mode>2"
6192 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6193 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6195 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6196 [(set_attr "type" "sselog1")
6197 (set_attr "prefix_data16" "1")
6198 (set_attr "prefix_extra" "1")
6199 (set_attr "mode" "TI")])
6201 (define_insn "abs<mode>2"
6202 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6203 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6205 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6206 [(set_attr "type" "sselog1")
6207 (set_attr "prefix_extra" "1")
6208 (set_attr "mode" "DI")])
6210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6212 ;; AMD SSE4A instructions
6214 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6216 (define_insn "sse4a_vmmovntv2df"
6217 [(set (match_operand:DF 0 "memory_operand" "=m")
6218 (unspec:DF [(vec_select:DF
6219 (match_operand:V2DF 1 "register_operand" "x")
6220 (parallel [(const_int 0)]))]
6223 "movntsd\t{%1, %0|%0, %1}"
6224 [(set_attr "type" "ssemov")
6225 (set_attr "mode" "DF")])
6227 (define_insn "sse4a_movntdf"
6228 [(set (match_operand:DF 0 "memory_operand" "=m")
6229 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
6232 "movntsd\t{%1, %0|%0, %1}"
6233 [(set_attr "type" "ssemov")
6234 (set_attr "mode" "DF")])
6236 (define_insn "sse4a_vmmovntv4sf"
6237 [(set (match_operand:SF 0 "memory_operand" "=m")
6238 (unspec:SF [(vec_select:SF
6239 (match_operand:V4SF 1 "register_operand" "x")
6240 (parallel [(const_int 0)]))]
6243 "movntss\t{%1, %0|%0, %1}"
6244 [(set_attr "type" "ssemov")
6245 (set_attr "mode" "SF")])
6247 (define_insn "sse4a_movntsf"
6248 [(set (match_operand:SF 0 "memory_operand" "=m")
6249 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
6252 "movntss\t{%1, %0|%0, %1}"
6253 [(set_attr "type" "ssemov")
6254 (set_attr "mode" "SF")])
6256 (define_insn "sse4a_extrqi"
6257 [(set (match_operand:V2DI 0 "register_operand" "=x")
6258 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6259 (match_operand 2 "const_int_operand" "")
6260 (match_operand 3 "const_int_operand" "")]
6263 "extrq\t{%3, %2, %0|%0, %2, %3}"
6264 [(set_attr "type" "sse")
6265 (set_attr "prefix_data16" "1")
6266 (set_attr "mode" "TI")])
6268 (define_insn "sse4a_extrq"
6269 [(set (match_operand:V2DI 0 "register_operand" "=x")
6270 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6271 (match_operand:V16QI 2 "register_operand" "x")]
6274 "extrq\t{%2, %0|%0, %2}"
6275 [(set_attr "type" "sse")
6276 (set_attr "prefix_data16" "1")
6277 (set_attr "mode" "TI")])
6279 (define_insn "sse4a_insertqi"
6280 [(set (match_operand:V2DI 0 "register_operand" "=x")
6281 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6282 (match_operand:V2DI 2 "register_operand" "x")
6283 (match_operand 3 "const_int_operand" "")
6284 (match_operand 4 "const_int_operand" "")]
6287 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6288 [(set_attr "type" "sseins")
6289 (set_attr "prefix_rep" "1")
6290 (set_attr "mode" "TI")])
6292 (define_insn "sse4a_insertq"
6293 [(set (match_operand:V2DI 0 "register_operand" "=x")
6294 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6295 (match_operand:V2DI 2 "register_operand" "x")]
6298 "insertq\t{%2, %0|%0, %2}"
6299 [(set_attr "type" "sseins")
6300 (set_attr "prefix_rep" "1")
6301 (set_attr "mode" "TI")])
6303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6305 ;; Intel SSE4.1 instructions
6307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6309 (define_insn "sse4_1_blendpd"
6310 [(set (match_operand:V2DF 0 "register_operand" "=x")
6312 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6313 (match_operand:V2DF 1 "register_operand" "0")
6314 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6316 "blendpd\t{%3, %2, %0|%0, %2, %3}"
6317 [(set_attr "type" "ssemov")
6318 (set_attr "prefix_extra" "1")
6319 (set_attr "mode" "V2DF")])
6321 (define_insn "sse4_1_blendps"
6322 [(set (match_operand:V4SF 0 "register_operand" "=x")
6324 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6325 (match_operand:V4SF 1 "register_operand" "0")
6326 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
6328 "blendps\t{%3, %2, %0|%0, %2, %3}"
6329 [(set_attr "type" "ssemov")
6330 (set_attr "prefix_extra" "1")
6331 (set_attr "mode" "V4SF")])
6333 (define_insn "sse4_1_blendvpd"
6334 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
6335 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
6336 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
6337 (match_operand:V2DF 3 "register_operand" "Yz")]
6340 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
6341 [(set_attr "type" "ssemov")
6342 (set_attr "prefix_extra" "1")
6343 (set_attr "mode" "V2DF")])
6345 (define_insn "sse4_1_blendvps"
6346 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
6347 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
6348 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
6349 (match_operand:V4SF 3 "register_operand" "Yz")]
6352 "blendvps\t{%3, %2, %0|%0, %2, %3}"
6353 [(set_attr "type" "ssemov")
6354 (set_attr "prefix_extra" "1")
6355 (set_attr "mode" "V4SF")])
6357 (define_insn "sse4_1_dppd"
6358 [(set (match_operand:V2DF 0 "register_operand" "=x")
6359 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
6360 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6361 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6364 "dppd\t{%3, %2, %0|%0, %2, %3}"
6365 [(set_attr "type" "ssemul")
6366 (set_attr "prefix_extra" "1")
6367 (set_attr "mode" "V2DF")])
6369 (define_insn "sse4_1_dpps"
6370 [(set (match_operand:V4SF 0 "register_operand" "=x")
6371 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6372 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6373 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6376 "dpps\t{%3, %2, %0|%0, %2, %3}"
6377 [(set_attr "type" "ssemul")
6378 (set_attr "prefix_extra" "1")
6379 (set_attr "mode" "V4SF")])
6381 (define_insn "sse4_1_movntdqa"
6382 [(set (match_operand:V2DI 0 "register_operand" "=x")
6383 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6386 "movntdqa\t{%1, %0|%0, %1}"
6387 [(set_attr "type" "ssecvt")
6388 (set_attr "prefix_extra" "1")
6389 (set_attr "mode" "TI")])
6391 (define_insn "sse4_1_mpsadbw"
6392 [(set (match_operand:V16QI 0 "register_operand" "=x")
6393 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6394 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6395 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6398 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6399 [(set_attr "type" "sselog1")
6400 (set_attr "prefix_extra" "1")
6401 (set_attr "mode" "TI")])
6403 (define_insn "sse4_1_packusdw"
6404 [(set (match_operand:V8HI 0 "register_operand" "=x")
6407 (match_operand:V4SI 1 "register_operand" "0"))
6409 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6411 "packusdw\t{%2, %0|%0, %2}"
6412 [(set_attr "type" "sselog")
6413 (set_attr "prefix_extra" "1")
6414 (set_attr "mode" "TI")])
6416 (define_insn "sse4_1_pblendvb"
6417 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6418 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6419 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6420 (match_operand:V16QI 3 "register_operand" "Yz")]
6423 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6424 [(set_attr "type" "ssemov")
6425 (set_attr "prefix_extra" "1")
6426 (set_attr "mode" "TI")])
6428 (define_insn "sse4_1_pblendw"
6429 [(set (match_operand:V8HI 0 "register_operand" "=x")
6431 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6432 (match_operand:V8HI 1 "register_operand" "0")
6433 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6435 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6436 [(set_attr "type" "ssemov")
6437 (set_attr "prefix_extra" "1")
6438 (set_attr "mode" "TI")])
6440 (define_insn "sse4_1_phminposuw"
6441 [(set (match_operand:V8HI 0 "register_operand" "=x")
6442 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6443 UNSPEC_PHMINPOSUW))]
6445 "phminposuw\t{%1, %0|%0, %1}"
6446 [(set_attr "type" "sselog1")
6447 (set_attr "prefix_extra" "1")
6448 (set_attr "mode" "TI")])
6450 (define_insn "sse4_1_extendv8qiv8hi2"
6451 [(set (match_operand:V8HI 0 "register_operand" "=x")
6454 (match_operand:V16QI 1 "register_operand" "x")
6455 (parallel [(const_int 0)
6464 "pmovsxbw\t{%1, %0|%0, %1}"
6465 [(set_attr "type" "ssemov")
6466 (set_attr "prefix_extra" "1")
6467 (set_attr "mode" "TI")])
6469 (define_insn "*sse4_1_extendv8qiv8hi2"
6470 [(set (match_operand:V8HI 0 "register_operand" "=x")
6473 (vec_duplicate:V16QI
6474 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6475 (parallel [(const_int 0)
6484 "pmovsxbw\t{%1, %0|%0, %1}"
6485 [(set_attr "type" "ssemov")
6486 (set_attr "prefix_extra" "1")
6487 (set_attr "mode" "TI")])
6489 (define_insn "sse4_1_extendv4qiv4si2"
6490 [(set (match_operand:V4SI 0 "register_operand" "=x")
6493 (match_operand:V16QI 1 "register_operand" "x")
6494 (parallel [(const_int 0)
6499 "pmovsxbd\t{%1, %0|%0, %1}"
6500 [(set_attr "type" "ssemov")
6501 (set_attr "prefix_extra" "1")
6502 (set_attr "mode" "TI")])
6504 (define_insn "*sse4_1_extendv4qiv4si2"
6505 [(set (match_operand:V4SI 0 "register_operand" "=x")
6508 (vec_duplicate:V16QI
6509 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6510 (parallel [(const_int 0)
6515 "pmovsxbd\t{%1, %0|%0, %1}"
6516 [(set_attr "type" "ssemov")
6517 (set_attr "prefix_extra" "1")
6518 (set_attr "mode" "TI")])
6520 (define_insn "sse4_1_extendv2qiv2di2"
6521 [(set (match_operand:V2DI 0 "register_operand" "=x")
6524 (match_operand:V16QI 1 "register_operand" "x")
6525 (parallel [(const_int 0)
6528 "pmovsxbq\t{%1, %0|%0, %1}"
6529 [(set_attr "type" "ssemov")
6530 (set_attr "prefix_extra" "1")
6531 (set_attr "mode" "TI")])
6533 (define_insn "*sse4_1_extendv2qiv2di2"
6534 [(set (match_operand:V2DI 0 "register_operand" "=x")
6537 (vec_duplicate:V16QI
6538 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6539 (parallel [(const_int 0)
6542 "pmovsxbq\t{%1, %0|%0, %1}"
6543 [(set_attr "type" "ssemov")
6544 (set_attr "prefix_extra" "1")
6545 (set_attr "mode" "TI")])
6547 (define_insn "sse4_1_extendv4hiv4si2"
6548 [(set (match_operand:V4SI 0 "register_operand" "=x")
6551 (match_operand:V8HI 1 "register_operand" "x")
6552 (parallel [(const_int 0)
6557 "pmovsxwd\t{%1, %0|%0, %1}"
6558 [(set_attr "type" "ssemov")
6559 (set_attr "prefix_extra" "1")
6560 (set_attr "mode" "TI")])
6562 (define_insn "*sse4_1_extendv4hiv4si2"
6563 [(set (match_operand:V4SI 0 "register_operand" "=x")
6567 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6568 (parallel [(const_int 0)
6573 "pmovsxwd\t{%1, %0|%0, %1}"
6574 [(set_attr "type" "ssemov")
6575 (set_attr "prefix_extra" "1")
6576 (set_attr "mode" "TI")])
6578 (define_insn "sse4_1_extendv2hiv2di2"
6579 [(set (match_operand:V2DI 0 "register_operand" "=x")
6582 (match_operand:V8HI 1 "register_operand" "x")
6583 (parallel [(const_int 0)
6586 "pmovsxwq\t{%1, %0|%0, %1}"
6587 [(set_attr "type" "ssemov")
6588 (set_attr "prefix_extra" "1")
6589 (set_attr "mode" "TI")])
6591 (define_insn "*sse4_1_extendv2hiv2di2"
6592 [(set (match_operand:V2DI 0 "register_operand" "=x")
6596 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6597 (parallel [(const_int 0)
6600 "pmovsxwq\t{%1, %0|%0, %1}"
6601 [(set_attr "type" "ssemov")
6602 (set_attr "prefix_extra" "1")
6603 (set_attr "mode" "TI")])
6605 (define_insn "sse4_1_extendv2siv2di2"
6606 [(set (match_operand:V2DI 0 "register_operand" "=x")
6609 (match_operand:V4SI 1 "register_operand" "x")
6610 (parallel [(const_int 0)
6613 "pmovsxdq\t{%1, %0|%0, %1}"
6614 [(set_attr "type" "ssemov")
6615 (set_attr "prefix_extra" "1")
6616 (set_attr "mode" "TI")])
6618 (define_insn "*sse4_1_extendv2siv2di2"
6619 [(set (match_operand:V2DI 0 "register_operand" "=x")
6623 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6624 (parallel [(const_int 0)
6627 "pmovsxdq\t{%1, %0|%0, %1}"
6628 [(set_attr "type" "ssemov")
6629 (set_attr "prefix_extra" "1")
6630 (set_attr "mode" "TI")])
6632 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6633 [(set (match_operand:V8HI 0 "register_operand" "=x")
6636 (match_operand:V16QI 1 "register_operand" "x")
6637 (parallel [(const_int 0)
6646 "pmovzxbw\t{%1, %0|%0, %1}"
6647 [(set_attr "type" "ssemov")
6648 (set_attr "prefix_extra" "1")
6649 (set_attr "mode" "TI")])
6651 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6652 [(set (match_operand:V8HI 0 "register_operand" "=x")
6655 (vec_duplicate:V16QI
6656 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6657 (parallel [(const_int 0)
6666 "pmovzxbw\t{%1, %0|%0, %1}"
6667 [(set_attr "type" "ssemov")
6668 (set_attr "prefix_extra" "1")
6669 (set_attr "mode" "TI")])
6671 (define_insn "sse4_1_zero_extendv4qiv4si2"
6672 [(set (match_operand:V4SI 0 "register_operand" "=x")
6675 (match_operand:V16QI 1 "register_operand" "x")
6676 (parallel [(const_int 0)
6681 "pmovzxbd\t{%1, %0|%0, %1}"
6682 [(set_attr "type" "ssemov")
6683 (set_attr "prefix_extra" "1")
6684 (set_attr "mode" "TI")])
6686 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6687 [(set (match_operand:V4SI 0 "register_operand" "=x")
6690 (vec_duplicate:V16QI
6691 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6692 (parallel [(const_int 0)
6697 "pmovzxbd\t{%1, %0|%0, %1}"
6698 [(set_attr "type" "ssemov")
6699 (set_attr "prefix_extra" "1")
6700 (set_attr "mode" "TI")])
6702 (define_insn "sse4_1_zero_extendv2qiv2di2"
6703 [(set (match_operand:V2DI 0 "register_operand" "=x")
6706 (match_operand:V16QI 1 "register_operand" "x")
6707 (parallel [(const_int 0)
6710 "pmovzxbq\t{%1, %0|%0, %1}"
6711 [(set_attr "type" "ssemov")
6712 (set_attr "prefix_extra" "1")
6713 (set_attr "mode" "TI")])
6715 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6716 [(set (match_operand:V2DI 0 "register_operand" "=x")
6719 (vec_duplicate:V16QI
6720 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6721 (parallel [(const_int 0)
6724 "pmovzxbq\t{%1, %0|%0, %1}"
6725 [(set_attr "type" "ssemov")
6726 (set_attr "prefix_extra" "1")
6727 (set_attr "mode" "TI")])
6729 (define_insn "sse4_1_zero_extendv4hiv4si2"
6730 [(set (match_operand:V4SI 0 "register_operand" "=x")
6733 (match_operand:V8HI 1 "register_operand" "x")
6734 (parallel [(const_int 0)
6739 "pmovzxwd\t{%1, %0|%0, %1}"
6740 [(set_attr "type" "ssemov")
6741 (set_attr "prefix_extra" "1")
6742 (set_attr "mode" "TI")])
6744 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6745 [(set (match_operand:V4SI 0 "register_operand" "=x")
6749 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6750 (parallel [(const_int 0)
6755 "pmovzxwd\t{%1, %0|%0, %1}"
6756 [(set_attr "type" "ssemov")
6757 (set_attr "prefix_extra" "1")
6758 (set_attr "mode" "TI")])
6760 (define_insn "sse4_1_zero_extendv2hiv2di2"
6761 [(set (match_operand:V2DI 0 "register_operand" "=x")
6764 (match_operand:V8HI 1 "register_operand" "x")
6765 (parallel [(const_int 0)
6768 "pmovzxwq\t{%1, %0|%0, %1}"
6769 [(set_attr "type" "ssemov")
6770 (set_attr "prefix_extra" "1")
6771 (set_attr "mode" "TI")])
6773 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6774 [(set (match_operand:V2DI 0 "register_operand" "=x")
6778 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6779 (parallel [(const_int 0)
6782 "pmovzxwq\t{%1, %0|%0, %1}"
6783 [(set_attr "type" "ssemov")
6784 (set_attr "prefix_extra" "1")
6785 (set_attr "mode" "TI")])
6787 (define_insn "sse4_1_zero_extendv2siv2di2"
6788 [(set (match_operand:V2DI 0 "register_operand" "=x")
6791 (match_operand:V4SI 1 "register_operand" "x")
6792 (parallel [(const_int 0)
6795 "pmovzxdq\t{%1, %0|%0, %1}"
6796 [(set_attr "type" "ssemov")
6797 (set_attr "prefix_extra" "1")
6798 (set_attr "mode" "TI")])
6800 (define_insn "*sse4_1_zero_extendv2siv2di2"
6801 [(set (match_operand:V2DI 0 "register_operand" "=x")
6805 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6806 (parallel [(const_int 0)
6809 "pmovzxdq\t{%1, %0|%0, %1}"
6810 [(set_attr "type" "ssemov")
6811 (set_attr "prefix_extra" "1")
6812 (set_attr "mode" "TI")])
6814 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6815 ;; But it is not a really compare instruction.
6816 (define_insn "sse4_1_ptest"
6817 [(set (reg:CC FLAGS_REG)
6818 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6819 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6822 "ptest\t{%1, %0|%0, %1}"
6823 [(set_attr "type" "ssecomi")
6824 (set_attr "prefix_extra" "1")
6825 (set_attr "mode" "TI")])
6827 (define_insn "sse4_1_roundpd"
6828 [(set (match_operand:V2DF 0 "register_operand" "=x")
6829 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6830 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6833 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6834 [(set_attr "type" "ssecvt")
6835 (set_attr "prefix_extra" "1")
6836 (set_attr "mode" "V2DF")])
6838 (define_insn "sse4_1_roundps"
6839 [(set (match_operand:V4SF 0 "register_operand" "=x")
6840 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6841 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6844 "roundps\t{%2, %1, %0|%0, %1, %2}"
6845 [(set_attr "type" "ssecvt")
6846 (set_attr "prefix_extra" "1")
6847 (set_attr "mode" "V4SF")])
6849 (define_insn "sse4_1_roundsd"
6850 [(set (match_operand:V2DF 0 "register_operand" "=x")
6852 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6853 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6855 (match_operand:V2DF 1 "register_operand" "0")
6858 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6859 [(set_attr "type" "ssecvt")
6860 (set_attr "prefix_extra" "1")
6861 (set_attr "mode" "V2DF")])
6863 (define_insn "sse4_1_roundss"
6864 [(set (match_operand:V4SF 0 "register_operand" "=x")
6866 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6867 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6869 (match_operand:V4SF 1 "register_operand" "0")
6872 "roundss\t{%3, %2, %0|%0, %2, %3}"
6873 [(set_attr "type" "ssecvt")
6874 (set_attr "prefix_extra" "1")
6875 (set_attr "mode" "V4SF")])
6877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6879 ;; Intel SSE4.2 string/text processing instructions
6881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6883 (define_insn_and_split "sse4_2_pcmpestr"
6884 [(set (match_operand:SI 0 "register_operand" "=c,c")
6886 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6887 (match_operand:SI 3 "register_operand" "a,a")
6888 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6889 (match_operand:SI 5 "register_operand" "d,d")
6890 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6892 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6900 (set (reg:CC FLAGS_REG)
6909 && !(reload_completed || reload_in_progress)"
6914 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6915 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6916 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6919 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6920 operands[3], operands[4],
6921 operands[5], operands[6]));
6923 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6924 operands[3], operands[4],
6925 operands[5], operands[6]));
6926 if (flags && !(ecx || xmm0))
6927 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6928 operands[2], operands[3],
6929 operands[4], operands[5],
6933 [(set_attr "type" "sselog")
6934 (set_attr "prefix_data16" "1")
6935 (set_attr "prefix_extra" "1")
6936 (set_attr "memory" "none,load")
6937 (set_attr "mode" "TI")])
6939 (define_insn "sse4_2_pcmpestri"
6940 [(set (match_operand:SI 0 "register_operand" "=c,c")
6942 [(match_operand:V16QI 1 "register_operand" "x,x")
6943 (match_operand:SI 2 "register_operand" "a,a")
6944 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6945 (match_operand:SI 4 "register_operand" "d,d")
6946 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6948 (set (reg:CC FLAGS_REG)
6957 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6958 [(set_attr "type" "sselog")
6959 (set_attr "prefix_data16" "1")
6960 (set_attr "prefix_extra" "1")
6961 (set_attr "memory" "none,load")
6962 (set_attr "mode" "TI")])
6964 (define_insn "sse4_2_pcmpestrm"
6965 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6967 [(match_operand:V16QI 1 "register_operand" "x,x")
6968 (match_operand:SI 2 "register_operand" "a,a")
6969 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6970 (match_operand:SI 4 "register_operand" "d,d")
6971 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6973 (set (reg:CC FLAGS_REG)
6982 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6983 [(set_attr "type" "sselog")
6984 (set_attr "prefix_data16" "1")
6985 (set_attr "prefix_extra" "1")
6986 (set_attr "memory" "none,load")
6987 (set_attr "mode" "TI")])
6989 (define_insn "sse4_2_pcmpestr_cconly"
6990 [(set (reg:CC FLAGS_REG)
6992 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6993 (match_operand:SI 3 "register_operand" "a,a,a,a")
6994 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6995 (match_operand:SI 5 "register_operand" "d,d,d,d")
6996 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6998 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6999 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
7002 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
7003 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
7004 pcmpestri\t{%6, %4, %2|%2, %4, %6}
7005 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
7006 [(set_attr "type" "sselog")
7007 (set_attr "prefix_data16" "1")
7008 (set_attr "prefix_extra" "1")
7009 (set_attr "memory" "none,load,none,load")
7010 (set_attr "mode" "TI")])
7012 (define_insn_and_split "sse4_2_pcmpistr"
7013 [(set (match_operand:SI 0 "register_operand" "=c,c")
7015 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
7016 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
7017 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
7019 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
7025 (set (reg:CC FLAGS_REG)
7032 && !(reload_completed || reload_in_progress)"
7037 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
7038 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
7039 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
7042 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
7043 operands[3], operands[4]));
7045 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
7046 operands[3], operands[4]));
7047 if (flags && !(ecx || xmm0))
7048 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
7049 operands[2], operands[3],
7053 [(set_attr "type" "sselog")
7054 (set_attr "prefix_data16" "1")
7055 (set_attr "prefix_extra" "1")
7056 (set_attr "memory" "none,load")
7057 (set_attr "mode" "TI")])
7059 (define_insn "sse4_2_pcmpistri"
7060 [(set (match_operand:SI 0 "register_operand" "=c,c")
7062 [(match_operand:V16QI 1 "register_operand" "x,x")
7063 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7064 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7066 (set (reg:CC FLAGS_REG)
7073 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
7074 [(set_attr "type" "sselog")
7075 (set_attr "prefix_data16" "1")
7076 (set_attr "prefix_extra" "1")
7077 (set_attr "memory" "none,load")
7078 (set_attr "mode" "TI")])
7080 (define_insn "sse4_2_pcmpistrm"
7081 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
7083 [(match_operand:V16QI 1 "register_operand" "x,x")
7084 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7085 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7087 (set (reg:CC FLAGS_REG)
7094 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
7095 [(set_attr "type" "sselog")
7096 (set_attr "prefix_data16" "1")
7097 (set_attr "prefix_extra" "1")
7098 (set_attr "memory" "none,load")
7099 (set_attr "mode" "TI")])
7101 (define_insn "sse4_2_pcmpistr_cconly"
7102 [(set (reg:CC FLAGS_REG)
7104 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
7105 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
7106 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
7108 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
7109 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
7112 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7113 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7114 pcmpistri\t{%4, %3, %2|%2, %3, %4}
7115 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
7116 [(set_attr "type" "sselog")
7117 (set_attr "prefix_data16" "1")
7118 (set_attr "prefix_extra" "1")
7119 (set_attr "memory" "none,load,none,load")
7120 (set_attr "mode" "TI")])
7122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7124 ;; SSE5 instructions
7126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7128 ;; SSE5 parallel integer multiply/add instructions.
7129 ;; Note the instruction does not allow the value being added to be a memory
7130 ;; operation. However by pretending via the nonimmediate_operand predicate
7131 ;; that it does and splitting it later allows the following to be recognized:
7132 ;; a[i] = b[i] * c[i] + d[i];
7133 (define_insn "sse5_pmacsww"
7134 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7137 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7138 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7139 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7140 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7142 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7143 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7144 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7145 [(set_attr "type" "ssemuladd")
7146 (set_attr "mode" "TI")])
7148 ;; Split pmacsww with two memory operands into a load and the pmacsww.
7150 [(set (match_operand:V8HI 0 "register_operand" "")
7152 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
7153 (match_operand:V8HI 2 "nonimmediate_operand" ""))
7154 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
7156 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7157 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7158 && !reg_mentioned_p (operands[0], operands[1])
7159 && !reg_mentioned_p (operands[0], operands[2])
7160 && !reg_mentioned_p (operands[0], operands[3])"
7163 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
7164 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
7169 (define_insn "sse5_pmacssww"
7170 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7172 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7173 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7174 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7175 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7177 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7178 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7179 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7180 [(set_attr "type" "ssemuladd")
7181 (set_attr "mode" "TI")])
7183 ;; Note the instruction does not allow the value being added to be a memory
7184 ;; operation. However by pretending via the nonimmediate_operand predicate
7185 ;; that it does and splitting it later allows the following to be recognized:
7186 ;; a[i] = b[i] * c[i] + d[i];
7187 (define_insn "sse5_pmacsdd"
7188 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7191 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7192 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7193 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7194 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7196 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7197 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7198 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7199 [(set_attr "type" "ssemuladd")
7200 (set_attr "mode" "TI")])
7202 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
7204 [(set (match_operand:V4SI 0 "register_operand" "")
7206 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
7207 (match_operand:V4SI 2 "nonimmediate_operand" ""))
7208 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
7210 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7211 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7212 && !reg_mentioned_p (operands[0], operands[1])
7213 && !reg_mentioned_p (operands[0], operands[2])
7214 && !reg_mentioned_p (operands[0], operands[3])"
7217 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
7218 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
7223 (define_insn "sse5_pmacssdd"
7224 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7226 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7227 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7228 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7229 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7231 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7232 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7233 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7234 [(set_attr "type" "ssemuladd")
7235 (set_attr "mode" "TI")])
7237 (define_insn "sse5_pmacssdql"
7238 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7243 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7244 (parallel [(const_int 1)
7247 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7248 (parallel [(const_int 1)
7250 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7251 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7253 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7254 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7255 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7256 [(set_attr "type" "ssemuladd")
7257 (set_attr "mode" "TI")])
7259 (define_insn "sse5_pmacssdqh"
7260 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7265 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7266 (parallel [(const_int 0)
7270 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7271 (parallel [(const_int 0)
7273 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7274 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7276 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7277 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7278 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7279 [(set_attr "type" "ssemuladd")
7280 (set_attr "mode" "TI")])
7282 (define_insn "sse5_pmacsdql"
7283 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7288 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7289 (parallel [(const_int 1)
7293 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7294 (parallel [(const_int 1)
7296 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7297 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7299 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7300 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7301 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7302 [(set_attr "type" "ssemuladd")
7303 (set_attr "mode" "TI")])
7305 (define_insn "sse5_pmacsdqh"
7306 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7311 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7312 (parallel [(const_int 0)
7316 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7317 (parallel [(const_int 0)
7319 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7320 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7322 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7323 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7324 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7325 [(set_attr "type" "ssemuladd")
7326 (set_attr "mode" "TI")])
7328 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7329 (define_insn "sse5_pmacsswd"
7330 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7335 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7336 (parallel [(const_int 1)
7342 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7343 (parallel [(const_int 1)
7347 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7348 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7350 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7351 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7352 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7353 [(set_attr "type" "ssemuladd")
7354 (set_attr "mode" "TI")])
7356 (define_insn "sse5_pmacswd"
7357 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7362 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7363 (parallel [(const_int 1)
7369 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7370 (parallel [(const_int 1)
7374 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7375 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7377 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7378 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7379 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7380 [(set_attr "type" "ssemuladd")
7381 (set_attr "mode" "TI")])
7383 (define_insn "sse5_pmadcsswd"
7384 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7390 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7391 (parallel [(const_int 0)
7397 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7398 (parallel [(const_int 0)
7406 (parallel [(const_int 1)
7413 (parallel [(const_int 1)
7417 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7418 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7420 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7421 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7422 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7423 [(set_attr "type" "ssemuladd")
7424 (set_attr "mode" "TI")])
7426 (define_insn "sse5_pmadcswd"
7427 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7433 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7434 (parallel [(const_int 0)
7440 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7441 (parallel [(const_int 0)
7449 (parallel [(const_int 1)
7456 (parallel [(const_int 1)
7460 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7461 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7463 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7464 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7465 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7466 [(set_attr "type" "ssemuladd")
7467 (set_attr "mode" "TI")])
7469 ;; SSE5 parallel XMM conditional moves
7470 (define_insn "sse5_pcmov_<mode>"
7471 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7472 (if_then_else:SSEMODE
7473 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
7474 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
7475 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
7476 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7478 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7479 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7480 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7481 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7482 andps\t{%2, %0|%0, %2}
7483 andnps\t{%1, %0|%0, %1}"
7484 [(set_attr "type" "sse4arg")])
7486 ;; SSE5 horizontal add/subtract instructions
7487 (define_insn "sse5_phaddbw"
7488 [(set (match_operand:V8HI 0 "register_operand" "=x")
7492 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7493 (parallel [(const_int 0)
7504 (parallel [(const_int 1)
7511 (const_int 15)])))))]
7513 "phaddbw\t{%1, %0|%0, %1}"
7514 [(set_attr "type" "sseiadd1")])
7516 (define_insn "sse5_phaddbd"
7517 [(set (match_operand:V4SI 0 "register_operand" "=x")
7522 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7523 (parallel [(const_int 0)
7530 (parallel [(const_int 1)
7538 (parallel [(const_int 2)
7545 (parallel [(const_int 3)
7548 (const_int 15)]))))))]
7550 "phaddbd\t{%1, %0|%0, %1}"
7551 [(set_attr "type" "sseiadd1")])
7553 (define_insn "sse5_phaddbq"
7554 [(set (match_operand:V2DI 0 "register_operand" "=x")
7560 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7561 (parallel [(const_int 0)
7566 (parallel [(const_int 1)
7572 (parallel [(const_int 2)
7577 (parallel [(const_int 3)
7584 (parallel [(const_int 8)
7589 (parallel [(const_int 9)
7595 (parallel [(const_int 10)
7600 (parallel [(const_int 11)
7601 (const_int 15)])))))))]
7603 "phaddbq\t{%1, %0|%0, %1}"
7604 [(set_attr "type" "sseiadd1")])
7606 (define_insn "sse5_phaddwd"
7607 [(set (match_operand:V4SI 0 "register_operand" "=x")
7611 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7612 (parallel [(const_int 0)
7619 (parallel [(const_int 1)
7622 (const_int 7)])))))]
7624 "phaddwd\t{%1, %0|%0, %1}"
7625 [(set_attr "type" "sseiadd1")])
7627 (define_insn "sse5_phaddwq"
7628 [(set (match_operand:V2DI 0 "register_operand" "=x")
7633 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7634 (parallel [(const_int 0)
7639 (parallel [(const_int 1)
7645 (parallel [(const_int 2)
7650 (parallel [(const_int 3)
7651 (const_int 7)]))))))]
7653 "phaddwq\t{%1, %0|%0, %1}"
7654 [(set_attr "type" "sseiadd1")])
7656 (define_insn "sse5_phadddq"
7657 [(set (match_operand:V2DI 0 "register_operand" "=x")
7661 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7662 (parallel [(const_int 0)
7667 (parallel [(const_int 1)
7668 (const_int 3)])))))]
7670 "phadddq\t{%1, %0|%0, %1}"
7671 [(set_attr "type" "sseiadd1")])
7673 (define_insn "sse5_phaddubw"
7674 [(set (match_operand:V8HI 0 "register_operand" "=x")
7678 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7679 (parallel [(const_int 0)
7690 (parallel [(const_int 1)
7697 (const_int 15)])))))]
7699 "phaddubw\t{%1, %0|%0, %1}"
7700 [(set_attr "type" "sseiadd1")])
7702 (define_insn "sse5_phaddubd"
7703 [(set (match_operand:V4SI 0 "register_operand" "=x")
7708 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7709 (parallel [(const_int 0)
7716 (parallel [(const_int 1)
7724 (parallel [(const_int 2)
7731 (parallel [(const_int 3)
7734 (const_int 15)]))))))]
7736 "phaddubd\t{%1, %0|%0, %1}"
7737 [(set_attr "type" "sseiadd1")])
7739 (define_insn "sse5_phaddubq"
7740 [(set (match_operand:V2DI 0 "register_operand" "=x")
7746 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7747 (parallel [(const_int 0)
7752 (parallel [(const_int 1)
7758 (parallel [(const_int 2)
7763 (parallel [(const_int 3)
7770 (parallel [(const_int 8)
7775 (parallel [(const_int 9)
7781 (parallel [(const_int 10)
7786 (parallel [(const_int 11)
7787 (const_int 15)])))))))]
7789 "phaddubq\t{%1, %0|%0, %1}"
7790 [(set_attr "type" "sseiadd1")])
7792 (define_insn "sse5_phadduwd"
7793 [(set (match_operand:V4SI 0 "register_operand" "=x")
7797 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7798 (parallel [(const_int 0)
7805 (parallel [(const_int 1)
7808 (const_int 7)])))))]
7810 "phadduwd\t{%1, %0|%0, %1}"
7811 [(set_attr "type" "sseiadd1")])
7813 (define_insn "sse5_phadduwq"
7814 [(set (match_operand:V2DI 0 "register_operand" "=x")
7819 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7820 (parallel [(const_int 0)
7825 (parallel [(const_int 1)
7831 (parallel [(const_int 2)
7836 (parallel [(const_int 3)
7837 (const_int 7)]))))))]
7839 "phadduwq\t{%1, %0|%0, %1}"
7840 [(set_attr "type" "sseiadd1")])
7842 (define_insn "sse5_phaddudq"
7843 [(set (match_operand:V2DI 0 "register_operand" "=x")
7847 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7848 (parallel [(const_int 0)
7853 (parallel [(const_int 1)
7854 (const_int 3)])))))]
7856 "phaddudq\t{%1, %0|%0, %1}"
7857 [(set_attr "type" "sseiadd1")])
7859 (define_insn "sse5_phsubbw"
7860 [(set (match_operand:V8HI 0 "register_operand" "=x")
7864 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7865 (parallel [(const_int 0)
7876 (parallel [(const_int 1)
7883 (const_int 15)])))))]
7885 "phsubbw\t{%1, %0|%0, %1}"
7886 [(set_attr "type" "sseiadd1")])
7888 (define_insn "sse5_phsubwd"
7889 [(set (match_operand:V4SI 0 "register_operand" "=x")
7893 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7894 (parallel [(const_int 0)
7901 (parallel [(const_int 1)
7904 (const_int 7)])))))]
7906 "phsubwd\t{%1, %0|%0, %1}"
7907 [(set_attr "type" "sseiadd1")])
7909 (define_insn "sse5_phsubdq"
7910 [(set (match_operand:V2DI 0 "register_operand" "=x")
7914 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7915 (parallel [(const_int 0)
7920 (parallel [(const_int 1)
7921 (const_int 3)])))))]
7923 "phsubdq\t{%1, %0|%0, %1}"
7924 [(set_attr "type" "sseiadd1")])
7926 ;; SSE5 permute instructions
7927 (define_insn "sse5_pperm"
7928 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7929 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7930 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7931 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7932 UNSPEC_SSE5_PERMUTE))]
7933 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7934 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7935 [(set_attr "type" "sse4arg")
7936 (set_attr "mode" "TI")])
7938 ;; The following are for the various unpack insns which doesn't need the first
7939 ;; source operand, so we can just use the output operand for the first operand.
7940 ;; This allows either of the other two operands to be a memory operand. We
7941 ;; can't just use the first operand as an argument to the normal pperm because
7942 ;; then an output only argument, suddenly becomes an input operand.
7943 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7944 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7947 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7948 (match_operand 2 "" "")))) ;; parallel with const_int's
7949 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7951 && (register_operand (operands[1], V16QImode)
7952 || register_operand (operands[2], V16QImode))"
7953 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7954 [(set_attr "type" "sseadd")
7955 (set_attr "mode" "TI")])
7957 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7958 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7961 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7962 (match_operand 2 "" "")))) ;; parallel with const_int's
7963 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7965 && (register_operand (operands[1], V16QImode)
7966 || register_operand (operands[2], V16QImode))"
7967 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7968 [(set_attr "type" "sseadd")
7969 (set_attr "mode" "TI")])
7971 (define_insn "sse5_pperm_zero_v8hi_v4si"
7972 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7975 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7976 (match_operand 2 "" "")))) ;; parallel with const_int's
7977 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7979 && (register_operand (operands[1], V8HImode)
7980 || register_operand (operands[2], V16QImode))"
7981 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7982 [(set_attr "type" "sseadd")
7983 (set_attr "mode" "TI")])
7985 (define_insn "sse5_pperm_sign_v8hi_v4si"
7986 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7989 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7990 (match_operand 2 "" "")))) ;; parallel with const_int's
7991 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7993 && (register_operand (operands[1], V8HImode)
7994 || register_operand (operands[2], V16QImode))"
7995 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7996 [(set_attr "type" "sseadd")
7997 (set_attr "mode" "TI")])
7999 (define_insn "sse5_pperm_zero_v4si_v2di"
8000 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8003 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8004 (match_operand 2 "" "")))) ;; parallel with const_int's
8005 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8007 && (register_operand (operands[1], V4SImode)
8008 || register_operand (operands[2], V16QImode))"
8009 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8010 [(set_attr "type" "sseadd")
8011 (set_attr "mode" "TI")])
8013 (define_insn "sse5_pperm_sign_v4si_v2di"
8014 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8017 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8018 (match_operand 2 "" "")))) ;; parallel with const_int's
8019 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8021 && (register_operand (operands[1], V4SImode)
8022 || register_operand (operands[2], V16QImode))"
8023 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8024 [(set_attr "type" "sseadd")
8025 (set_attr "mode" "TI")])
8027 ;; SSE5 pack instructions that combine two vectors into a smaller vector
8028 (define_insn "sse5_pperm_pack_v2di_v4si"
8029 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
8032 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
8034 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8035 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8036 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8037 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8038 [(set_attr "type" "sse4arg")
8039 (set_attr "mode" "TI")])
8041 (define_insn "sse5_pperm_pack_v4si_v8hi"
8042 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
8045 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
8047 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8048 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8049 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8050 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8051 [(set_attr "type" "sse4arg")
8052 (set_attr "mode" "TI")])
8054 (define_insn "sse5_pperm_pack_v8hi_v16qi"
8055 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
8058 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
8060 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8061 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8062 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8063 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8064 [(set_attr "type" "sse4arg")
8065 (set_attr "mode" "TI")])
8067 ;; Floating point permutation (permps, permpd)
8068 (define_insn "sse5_perm<mode>"
8069 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
8071 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
8072 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
8073 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
8074 UNSPEC_SSE5_PERMUTE))]
8075 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8076 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8077 [(set_attr "type" "sse4arg")
8078 (set_attr "mode" "<MODE>")])
8080 ;; SSE5 packed rotate instructions
8081 (define_insn "rotl<mode>3"
8082 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8084 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8085 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8087 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8088 [(set_attr "type" "sseishft")
8089 (set_attr "mode" "TI")])
8091 (define_insn "sse5_rotl<mode>3"
8092 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8094 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8095 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
8096 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8097 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8098 [(set_attr "type" "sseishft")
8099 (set_attr "mode" "TI")])
8101 ;; SSE5 packed shift instructions. Note negative values for the shift amount
8102 ;; convert this into a right shift instead of left shift. For now, model this
8103 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
8104 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
8105 (define_insn "sse5_ashl<mode>3"
8106 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8108 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8109 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8110 UNSPEC_SSE5_ASHIFT))]
8111 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8112 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8113 [(set_attr "type" "sseishft")
8114 (set_attr "mode" "TI")])
8116 (define_insn "sse5_lshl<mode>3"
8117 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8119 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8120 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8121 UNSPEC_SSE5_LSHIFT))]
8122 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8123 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8124 [(set_attr "type" "sseishft")
8125 (set_attr "mode" "TI")])
8127 ;; SSE5 FRCZ support
8129 (define_insn "sse5_frcz<mode>2"
8130 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8132 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8135 "frcz<ssesuffixf4>\t{%1, %0|%0, %1}"
8136 [(set_attr "type" "ssecvt1")
8137 (set_attr "prefix_extra" "1")
8138 (set_attr "mode" "<MODE>")])
8141 (define_insn "sse5_vmfrcz<mode>2"
8142 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8143 (vec_merge:SSEMODEF2P
8145 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8147 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8150 "frcz<ssesuffixf2s>\t{%2, %0|%0, %2}"
8151 [(set_attr "type" "ssecvt1")
8152 (set_attr "prefix_extra" "1")
8153 (set_attr "mode" "<MODE>")])
8155 (define_insn "sse5_cvtph2ps"
8156 [(set (match_operand:V4SF 0 "register_operand" "=x")
8157 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8160 "cvtph2ps\t{%1, %0|%0, %1}"
8161 [(set_attr "type" "ssecvt")
8162 (set_attr "mode" "V4SF")])
8164 (define_insn "sse5_cvtps2ph"
8165 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8166 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8169 "cvtps2ph\t{%1, %0|%0, %1}"
8170 [(set_attr "type" "ssecvt")
8171 (set_attr "mode" "V4SF")])
8173 ;; Scalar versions of the com instructions that use vector types that are
8174 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8175 ;; com instructions fill in 0's in the upper bits instead of leaving them
8176 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8177 (define_expand "sse5_vmmaskcmp<mode>3"
8178 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8179 (vec_merge:SSEMODEF2P
8180 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8181 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8182 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8187 operands[4] = CONST0_RTX (<MODE>mode);
8190 (define_insn "*sse5_vmmaskcmp<mode>3"
8191 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8192 (vec_merge:SSEMODEF2P
8193 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8194 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8195 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8196 (match_operand:SSEMODEF2P 4 "")
8199 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8200 [(set_attr "type" "sse4arg")
8201 (set_attr "mode" "<ssescalarmode>")])
8203 ;; We don't have a comparison operator that always returns true/false, so
8204 ;; handle comfalse and comtrue specially.
8205 (define_insn "sse5_com_tf<mode>3"
8206 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8208 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8209 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8210 (match_operand:SI 3 "const_int_operand" "n")]
8211 UNSPEC_SSE5_TRUEFALSE))]
8214 const char *ret = NULL;
8216 switch (INTVAL (operands[3]))
8219 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8223 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8227 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8231 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8240 [(set_attr "type" "ssecmp")
8241 (set_attr "mode" "<MODE>")])
8243 (define_insn "sse5_maskcmp<mode>3"
8244 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8245 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8246 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8247 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8249 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8250 [(set_attr "type" "ssecmp")
8251 (set_attr "mode" "<MODE>")])
8253 (define_insn "sse5_maskcmp<mode>3"
8254 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8255 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8256 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8257 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8259 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8260 [(set_attr "type" "sse4arg")
8261 (set_attr "mode" "TI")])
8263 (define_insn "sse5_maskcmp_uns<mode>3"
8264 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8265 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8266 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8267 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8269 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8270 [(set_attr "type" "ssecmp")
8271 (set_attr "mode" "TI")])
8273 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8274 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8275 ;; the exact instruction generated for the intrinsic.
8276 (define_insn "sse5_maskcmp_uns2<mode>3"
8277 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8279 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8280 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8281 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8282 UNSPEC_SSE5_UNSIGNED_CMP))]
8284 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8285 [(set_attr "type" "ssecmp")
8286 (set_attr "mode" "TI")])
8288 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8289 ;; being added here to be complete.
8290 (define_insn "sse5_pcom_tf<mode>3"
8291 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8292 (unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8293 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8294 (match_operand:SI 3 "const_int_operand" "n")]
8295 UNSPEC_SSE5_TRUEFALSE))]
8298 return ((INTVAL (operands[3]) != 0)
8299 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8300 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8302 [(set_attr "type" "ssecmp")
8303 (set_attr "mode" "TI")])