1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")])
47 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd")])
48 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
50 ;; Mapping of the max integer size for sse5 rotate immediate constraint
51 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
53 ;; Mapping of vector modes back to the scalar modes
54 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
56 ;; Mapping of immediate bits for blend instructions
57 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
59 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
61 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
65 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;; All of these patterns are enabled for SSE1 as well as SSE2.
68 ;; This is essential for maintaining stable calling conventions.
70 (define_expand "mov<mode>"
71 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
72 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
75 ix86_expand_vector_move (<MODE>mode, operands);
79 (define_insn "*mov<mode>_internal"
80 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
81 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
83 && (register_operand (operands[0], <MODE>mode)
84 || register_operand (operands[1], <MODE>mode))"
86 switch (which_alternative)
89 return standard_sse_constant_opcode (insn, operands[1]);
92 if (get_attr_mode (insn) == MODE_V4SF)
93 return "movaps\t{%1, %0|%0, %1}";
95 return "movdqa\t{%1, %0|%0, %1}";
100 [(set_attr "type" "sselog1,ssemov,ssemov")
103 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
104 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
105 (and (eq_attr "alternative" "2")
106 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
108 (const_string "V4SF")
109 (const_string "TI")))])
111 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
112 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
113 ;; from memory, we'd prefer to load the memory directly into the %xmm
114 ;; register. To facilitate this happy circumstance, this pattern won't
115 ;; split until after register allocation. If the 64-bit value didn't
116 ;; come from memory, this is the best we can do. This is much better
117 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
120 (define_insn_and_split "movdi_to_sse"
122 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
123 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
124 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
125 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
127 "&& reload_completed"
130 if (register_operand (operands[1], DImode))
132 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
133 Assemble the 64-bit DImode value in an xmm register. */
134 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
135 gen_rtx_SUBREG (SImode, operands[1], 0)));
136 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
137 gen_rtx_SUBREG (SImode, operands[1], 4)));
138 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
140 else if (memory_operand (operands[1], DImode))
141 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
146 (define_expand "mov<mode>"
147 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
148 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))]
151 ix86_expand_vector_move (<MODE>mode, operands);
155 (define_insn "*movv4sf_internal"
156 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
157 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
159 && (register_operand (operands[0], V4SFmode)
160 || register_operand (operands[1], V4SFmode))"
162 switch (which_alternative)
165 return standard_sse_constant_opcode (insn, operands[1]);
168 return "movaps\t{%1, %0|%0, %1}";
173 [(set_attr "type" "sselog1,ssemov,ssemov")
174 (set_attr "mode" "V4SF")])
177 [(set (match_operand:V4SF 0 "register_operand" "")
178 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
179 "TARGET_SSE && reload_completed"
182 (vec_duplicate:V4SF (match_dup 1))
186 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
187 operands[2] = CONST0_RTX (V4SFmode);
190 (define_insn "*movv2df_internal"
191 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
192 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
194 && (register_operand (operands[0], V2DFmode)
195 || register_operand (operands[1], V2DFmode))"
197 switch (which_alternative)
200 return standard_sse_constant_opcode (insn, operands[1]);
203 if (get_attr_mode (insn) == MODE_V4SF)
204 return "movaps\t{%1, %0|%0, %1}";
206 return "movapd\t{%1, %0|%0, %1}";
211 [(set_attr "type" "sselog1,ssemov,ssemov")
214 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
215 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
216 (and (eq_attr "alternative" "2")
217 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
219 (const_string "V4SF")
220 (const_string "V2DF")))])
223 [(set (match_operand:V2DF 0 "register_operand" "")
224 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
225 "TARGET_SSE2 && reload_completed"
226 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
228 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
229 operands[2] = CONST0_RTX (DFmode);
232 (define_expand "push<mode>1"
233 [(match_operand:SSEMODE 0 "register_operand" "")]
236 ix86_expand_push (<MODE>mode, operands[0]);
240 (define_expand "movmisalign<mode>"
241 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
242 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
245 ix86_expand_vector_move_misalign (<MODE>mode, operands);
249 (define_insn "<sse>_movup<ssemodesuffixf2c>"
250 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
252 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
254 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
255 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
256 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
257 [(set_attr "type" "ssemov")
258 (set_attr "mode" "<MODE>")])
260 (define_insn "sse2_movdqu"
261 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
262 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
264 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
265 "movdqu\t{%1, %0|%0, %1}"
266 [(set_attr "type" "ssemov")
267 (set_attr "prefix_data16" "1")
268 (set_attr "mode" "TI")])
270 (define_insn "<sse>_movnt<mode>"
271 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
273 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
275 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
276 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
277 [(set_attr "type" "ssemov")
278 (set_attr "mode" "<MODE>")])
280 (define_insn "sse2_movntv2di"
281 [(set (match_operand:V2DI 0 "memory_operand" "=m")
282 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
285 "movntdq\t{%1, %0|%0, %1}"
286 [(set_attr "type" "ssecvt")
287 (set_attr "prefix_data16" "1")
288 (set_attr "mode" "TI")])
290 (define_insn "sse2_movntsi"
291 [(set (match_operand:SI 0 "memory_operand" "=m")
292 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
295 "movnti\t{%1, %0|%0, %1}"
296 [(set_attr "type" "ssecvt")
297 (set_attr "mode" "V2DF")])
299 (define_insn "sse3_lddqu"
300 [(set (match_operand:V16QI 0 "register_operand" "=x")
301 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
304 "lddqu\t{%1, %0|%0, %1}"
305 [(set_attr "type" "ssecvt")
306 (set_attr "prefix_rep" "1")
307 (set_attr "mode" "TI")])
309 ; Expand patterns for non-temporal stores. At the moment, only those
310 ; that directly map to insns are defined; it would be possible to
311 ; define patterns for other modes that would expand to several insns.
313 (define_expand "storent<mode>"
314 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
316 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
318 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
321 (define_expand "storent<mode>"
322 [(set (match_operand:MODEF 0 "memory_operand" "")
324 [(match_operand:MODEF 1 "register_operand" "")]
329 (define_expand "storentv2di"
330 [(set (match_operand:V2DI 0 "memory_operand" "")
331 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
336 (define_expand "storentsi"
337 [(set (match_operand:SI 0 "memory_operand" "")
338 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
343 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
345 ;; Parallel floating point arithmetic
347 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
349 (define_expand "neg<mode>2"
350 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
351 (neg:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
352 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
353 "ix86_expand_fp_absneg_operator (NEG, <MODE>mode, operands); DONE;")
355 (define_expand "abs<mode>2"
356 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
357 (abs:SSEMODEF2P (match_operand:SSEMODEF2P 1 "register_operand" "")))]
358 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
359 "ix86_expand_fp_absneg_operator (ABS, <MODE>mode, operands); DONE;")
361 (define_expand "add<mode>3"
362 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
364 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
365 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
366 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
367 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
369 (define_insn "*add<mode>3"
370 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
372 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
373 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
374 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
375 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
376 "addp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
377 [(set_attr "type" "sseadd")
378 (set_attr "mode" "<MODE>")])
380 (define_insn "<sse>_vmadd<mode>3"
381 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
382 (vec_merge:SSEMODEF2P
384 (match_operand:SSEMODEF2P 1 "register_operand" "0")
385 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
389 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
390 "adds<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
391 [(set_attr "type" "sseadd")
392 (set_attr "mode" "<ssescalarmode>")])
394 (define_expand "sub<mode>3"
395 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
397 (match_operand:SSEMODEF2P 1 "register_operand" "")
398 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
399 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
400 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
402 (define_insn "*sub<mode>3"
403 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
405 (match_operand:SSEMODEF2P 1 "register_operand" "0")
406 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
407 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
408 "subp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
409 [(set_attr "type" "sseadd")
410 (set_attr "mode" "<MODE>")])
412 (define_insn "<sse>_vmsub<mode>3"
413 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
414 (vec_merge:SSEMODEF2P
416 (match_operand:SSEMODEF2P 1 "register_operand" "0")
417 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
420 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
421 "subs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
422 [(set_attr "type" "sseadd")
423 (set_attr "mode" "<ssescalarmode>")])
425 (define_expand "mul<mode>3"
426 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
428 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
429 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
430 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
431 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
433 (define_insn "*mul<mode>3"
434 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
436 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
437 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
438 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
439 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
440 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
441 [(set_attr "type" "ssemul")
442 (set_attr "mode" "<MODE>")])
444 (define_insn "<sse>_vmmul<mode>3"
445 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
446 (vec_merge:SSEMODEF2P
448 (match_operand:SSEMODEF2P 1 "register_operand" "0")
449 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
452 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
453 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
454 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
455 [(set_attr "type" "ssemul")
456 (set_attr "mode" "<ssescalarmode>")])
458 (define_expand "divv4sf3"
459 [(set (match_operand:V4SF 0 "register_operand" "")
460 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
461 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
464 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
466 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
467 && flag_finite_math_only && !flag_trapping_math
468 && flag_unsafe_math_optimizations)
470 ix86_emit_swdivsf (operands[0], operands[1],
471 operands[2], V4SFmode);
476 (define_expand "divv2df3"
477 [(set (match_operand:V2DF 0 "register_operand" "")
478 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
479 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
481 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
483 (define_insn "<sse>_div<mode>3"
484 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
486 (match_operand:SSEMODEF2P 1 "register_operand" "0")
487 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
488 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
489 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
490 [(set_attr "type" "ssediv")
491 (set_attr "mode" "<MODE>")])
493 (define_insn "<sse>_vmdiv<mode>3"
494 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
495 (vec_merge:SSEMODEF2P
497 (match_operand:SSEMODEF2P 1 "register_operand" "0")
498 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
501 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
502 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
503 [(set_attr "type" "ssediv")
504 (set_attr "mode" "<ssescalarmode>")])
506 (define_insn "sse_rcpv4sf2"
507 [(set (match_operand:V4SF 0 "register_operand" "=x")
509 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
511 "rcpps\t{%1, %0|%0, %1}"
512 [(set_attr "type" "sse")
513 (set_attr "mode" "V4SF")])
515 (define_insn "sse_vmrcpv4sf2"
516 [(set (match_operand:V4SF 0 "register_operand" "=x")
518 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
520 (match_operand:V4SF 2 "register_operand" "0")
523 "rcpss\t{%1, %0|%0, %1}"
524 [(set_attr "type" "sse")
525 (set_attr "mode" "SF")])
527 (define_expand "sqrtv4sf2"
528 [(set (match_operand:V4SF 0 "register_operand" "")
529 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
532 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
533 && flag_finite_math_only && !flag_trapping_math
534 && flag_unsafe_math_optimizations)
536 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
541 (define_insn "sse_sqrtv4sf2"
542 [(set (match_operand:V4SF 0 "register_operand" "=x")
543 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
545 "sqrtps\t{%1, %0|%0, %1}"
546 [(set_attr "type" "sse")
547 (set_attr "mode" "V4SF")])
549 (define_insn "sqrtv2df2"
550 [(set (match_operand:V2DF 0 "register_operand" "=x")
551 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
553 "sqrtpd\t{%1, %0|%0, %1}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "V2DF")])
557 (define_insn "<sse>_vmsqrt<mode>2"
558 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
559 (vec_merge:SSEMODEF2P
561 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
562 (match_operand:SSEMODEF2P 2 "register_operand" "0")
564 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
565 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
566 [(set_attr "type" "sse")
567 (set_attr "mode" "<ssescalarmode>")])
569 (define_expand "rsqrtv4sf2"
570 [(set (match_operand:V4SF 0 "register_operand" "")
572 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
575 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
579 (define_insn "sse_rsqrtv4sf2"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
584 "rsqrtps\t{%1, %0|%0, %1}"
585 [(set_attr "type" "sse")
586 (set_attr "mode" "V4SF")])
588 (define_insn "sse_vmrsqrtv4sf2"
589 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
593 (match_operand:V4SF 2 "register_operand" "0")
596 "rsqrtss\t{%1, %0|%0, %1}"
597 [(set_attr "type" "sse")
598 (set_attr "mode" "SF")])
600 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
601 ;; isn't really correct, as those rtl operators aren't defined when
602 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
604 (define_expand "smin<mode>3"
605 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
607 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
609 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
611 if (!flag_finite_math_only)
612 operands[1] = force_reg (<MODE>mode, operands[1]);
613 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
616 (define_insn "*smin<mode>3_finite"
617 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
619 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
620 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
621 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
622 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
623 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
624 [(set_attr "type" "sseadd")
625 (set_attr "mode" "<MODE>")])
627 (define_insn "*smin<mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
630 (match_operand:SSEMODEF2P 1 "register_operand" "0")
631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
633 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
634 [(set_attr "type" "sseadd")
635 (set_attr "mode" "<MODE>")])
637 (define_insn "<sse>_vmsmin<mode>3"
638 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
639 (vec_merge:SSEMODEF2P
641 (match_operand:SSEMODEF2P 1 "register_operand" "0")
642 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
645 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
646 "mins<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
647 [(set_attr "type" "sse")
648 (set_attr "mode" "<ssescalarmode>")])
650 (define_expand "smax<mode>3"
651 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
653 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
654 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
655 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
657 if (!flag_finite_math_only)
658 operands[1] = force_reg (<MODE>mode, operands[1]);
659 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
662 (define_insn "*smax<mode>3_finite"
663 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
665 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
666 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
667 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
668 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
669 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
670 [(set_attr "type" "sseadd")
671 (set_attr "mode" "<MODE>")])
673 (define_insn "*smax<mode>3"
674 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
676 (match_operand:SSEMODEF2P 1 "register_operand" "0")
677 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
678 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
679 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
680 [(set_attr "type" "sseadd")
681 (set_attr "mode" "<MODE>")])
683 (define_insn "<sse>_vmsmax<mode>3"
684 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
685 (vec_merge:SSEMODEF2P
687 (match_operand:SSEMODEF2P 1 "register_operand" "0")
688 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
691 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
692 "maxs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
693 [(set_attr "type" "sseadd")
694 (set_attr "mode" "<ssescalarmode>")])
696 ;; These versions of the min/max patterns implement exactly the operations
697 ;; min = (op1 < op2 ? op1 : op2)
698 ;; max = (!(op1 < op2) ? op1 : op2)
699 ;; Their operands are not commutative, and thus they may be used in the
700 ;; presence of -0.0 and NaN.
702 (define_insn "*ieee_smin<mode>3"
703 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
705 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
706 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
708 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
709 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
710 [(set_attr "type" "sseadd")
711 (set_attr "mode" "<MODE>")])
713 (define_insn "*ieee_smax<mode>3"
714 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
716 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
717 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
719 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
720 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "mode" "<MODE>")])
724 (define_insn "sse3_addsubv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operand:V4SF 1 "register_operand" "0")
729 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
730 (minus:V4SF (match_dup 1) (match_dup 2))
733 "addsubps\t{%2, %0|%0, %2}"
734 [(set_attr "type" "sseadd")
735 (set_attr "prefix_rep" "1")
736 (set_attr "mode" "V4SF")])
738 (define_insn "sse3_addsubv2df3"
739 [(set (match_operand:V2DF 0 "register_operand" "=x")
742 (match_operand:V2DF 1 "register_operand" "0")
743 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
744 (minus:V2DF (match_dup 1) (match_dup 2))
747 "addsubpd\t{%2, %0|%0, %2}"
748 [(set_attr "type" "sseadd")
749 (set_attr "mode" "V2DF")])
751 (define_insn "sse3_haddv4sf3"
752 [(set (match_operand:V4SF 0 "register_operand" "=x")
757 (match_operand:V4SF 1 "register_operand" "0")
758 (parallel [(const_int 0)]))
759 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
761 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
762 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
766 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
767 (parallel [(const_int 0)]))
768 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
770 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
771 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
773 "haddps\t{%2, %0|%0, %2}"
774 [(set_attr "type" "sseadd")
775 (set_attr "prefix_rep" "1")
776 (set_attr "mode" "V4SF")])
778 (define_insn "sse3_haddv2df3"
779 [(set (match_operand:V2DF 0 "register_operand" "=x")
783 (match_operand:V2DF 1 "register_operand" "0")
784 (parallel [(const_int 0)]))
785 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
788 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
789 (parallel [(const_int 0)]))
790 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
792 "haddpd\t{%2, %0|%0, %2}"
793 [(set_attr "type" "sseadd")
794 (set_attr "mode" "V2DF")])
796 (define_insn "sse3_hsubv4sf3"
797 [(set (match_operand:V4SF 0 "register_operand" "=x")
802 (match_operand:V4SF 1 "register_operand" "0")
803 (parallel [(const_int 0)]))
804 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
806 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
807 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
811 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
812 (parallel [(const_int 0)]))
813 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
815 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
816 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
818 "hsubps\t{%2, %0|%0, %2}"
819 [(set_attr "type" "sseadd")
820 (set_attr "prefix_rep" "1")
821 (set_attr "mode" "V4SF")])
823 (define_insn "sse3_hsubv2df3"
824 [(set (match_operand:V2DF 0 "register_operand" "=x")
828 (match_operand:V2DF 1 "register_operand" "0")
829 (parallel [(const_int 0)]))
830 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
833 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
834 (parallel [(const_int 0)]))
835 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
837 "hsubpd\t{%2, %0|%0, %2}"
838 [(set_attr "type" "sseadd")
839 (set_attr "mode" "V2DF")])
841 (define_expand "reduc_splus_v4sf"
842 [(match_operand:V4SF 0 "register_operand" "")
843 (match_operand:V4SF 1 "register_operand" "")]
848 rtx tmp = gen_reg_rtx (V4SFmode);
849 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
850 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
853 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
857 (define_expand "reduc_splus_v2df"
858 [(match_operand:V2DF 0 "register_operand" "")
859 (match_operand:V2DF 1 "register_operand" "")]
862 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
866 (define_expand "reduc_smax_v4sf"
867 [(match_operand:V4SF 0 "register_operand" "")
868 (match_operand:V4SF 1 "register_operand" "")]
871 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
875 (define_expand "reduc_smin_v4sf"
876 [(match_operand:V4SF 0 "register_operand" "")
877 (match_operand:V4SF 1 "register_operand" "")]
880 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
884 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
886 ;; Parallel floating point comparisons
888 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
890 (define_insn "<sse>_maskcmp<mode>3"
891 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
892 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
893 [(match_operand:SSEMODEF4 1 "register_operand" "0")
894 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
895 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
897 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
898 [(set_attr "type" "ssecmp")
899 (set_attr "mode" "<MODE>")])
901 (define_insn "<sse>_vmmaskcmp<mode>3"
902 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
903 (vec_merge:SSEMODEF2P
904 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
905 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
906 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
909 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
910 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
911 [(set_attr "type" "ssecmp")
912 (set_attr "mode" "<ssescalarmode>")])
914 (define_insn "<sse>_comi"
915 [(set (reg:CCFP FLAGS_REG)
918 (match_operand:<ssevecmode> 0 "register_operand" "x")
919 (parallel [(const_int 0)]))
921 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
922 (parallel [(const_int 0)]))))]
923 "SSE_FLOAT_MODE_P (<MODE>mode)"
924 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
925 [(set_attr "type" "ssecomi")
926 (set_attr "mode" "<MODE>")])
928 (define_insn "<sse>_ucomi"
929 [(set (reg:CCFPU FLAGS_REG)
932 (match_operand:<ssevecmode> 0 "register_operand" "x")
933 (parallel [(const_int 0)]))
935 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
936 (parallel [(const_int 0)]))))]
937 "SSE_FLOAT_MODE_P (<MODE>mode)"
938 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
939 [(set_attr "type" "ssecomi")
940 (set_attr "mode" "<MODE>")])
942 (define_expand "vcond<mode>"
943 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
944 (if_then_else:SSEMODEF2P
946 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
947 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
948 (match_operand:SSEMODEF2P 1 "general_operand" "")
949 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
950 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
952 if (ix86_expand_fp_vcond (operands))
958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
960 ;; Parallel floating point logical operations
962 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
964 (define_expand "and<mode>3"
965 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
967 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
968 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
969 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
970 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
972 (define_insn "*and<mode>3"
973 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
975 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
976 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
977 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
978 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
979 "andp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
980 [(set_attr "type" "sselog")
981 (set_attr "mode" "<MODE>")])
983 (define_insn "<sse>_nand<mode>3"
984 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
987 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
988 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
989 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
990 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
991 [(set_attr "type" "sselog")
992 (set_attr "mode" "<MODE>")])
994 (define_expand "ior<mode>3"
995 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
997 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
998 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
999 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1000 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
1002 (define_insn "*ior<mode>3"
1003 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1005 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1006 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1007 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1008 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
1009 "orp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1010 [(set_attr "type" "sselog")
1011 (set_attr "mode" "<MODE>")])
1013 (define_expand "xor<mode>3"
1014 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1016 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1017 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1018 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1019 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
1021 (define_insn "*xor<mode>3"
1022 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1024 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1025 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1026 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1027 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
1028 "xorp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1029 [(set_attr "type" "sselog")
1030 (set_attr "mode" "<MODE>")])
1032 ;; Also define scalar versions. These are used for abs, neg, and
1033 ;; conditional move. Using subregs into vector modes causes register
1034 ;; allocation lossage. These patterns do not allow memory operands
1035 ;; because the native instructions read the full 128-bits.
1037 (define_insn "*and<mode>3"
1038 [(set (match_operand:MODEF 0 "register_operand" "=x")
1040 (match_operand:MODEF 1 "register_operand" "0")
1041 (match_operand:MODEF 2 "register_operand" "x")))]
1042 "SSE_FLOAT_MODE_P (<MODE>mode)"
1043 "andp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1044 [(set_attr "type" "sselog")
1045 (set_attr "mode" "<ssevecmode>")])
1047 (define_insn "*nand<mode>3"
1048 [(set (match_operand:MODEF 0 "register_operand" "=x")
1051 (match_operand:MODEF 1 "register_operand" "0"))
1052 (match_operand:MODEF 2 "register_operand" "x")))]
1053 "SSE_FLOAT_MODE_P (<MODE>mode)"
1054 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1055 [(set_attr "type" "sselog")
1056 (set_attr "mode" "<ssevecmode>")])
1058 (define_insn "*ior<mode>3"
1059 [(set (match_operand:MODEF 0 "register_operand" "=x")
1061 (match_operand:MODEF 1 "register_operand" "0")
1062 (match_operand:MODEF 2 "register_operand" "x")))]
1063 "SSE_FLOAT_MODE_P (<MODE>mode)"
1064 "orp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1065 [(set_attr "type" "sselog")
1066 (set_attr "mode" "<ssevecmode>")])
1068 (define_insn "*xor<mode>3"
1069 [(set (match_operand:MODEF 0 "register_operand" "=x")
1071 (match_operand:MODEF 1 "register_operand" "0")
1072 (match_operand:MODEF 2 "register_operand" "x")))]
1073 "SSE_FLOAT_MODE_P (<MODE>mode)"
1074 "xorp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1075 [(set_attr "type" "sselog")
1076 (set_attr "mode" "<ssevecmode>")])
1078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1080 ;; SSE5 floating point multiply/accumulate instructions This includes the
1081 ;; scalar version of the instructions as well as the vector
1083 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1085 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1086 ;; combine to generate a multiply/add with two memory references. We then
1087 ;; split this insn, into loading up the destination register with one of the
1088 ;; memory operations. If we don't manage to split the insn, reload will
1089 ;; generate the appropriate moves. The reason this is needed, is that combine
1090 ;; has already folded one of the memory references into both the multiply and
1091 ;; add insns, and it can't generate a new pseudo. I.e.:
1092 ;; (set (reg1) (mem (addr1)))
1093 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1094 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1096 (define_insn "sse5_fmadd<mode>4"
1097 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1100 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1101 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1102 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1103 "TARGET_SSE5 && TARGET_FUSED_MADD
1104 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1105 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1106 [(set_attr "type" "ssemuladd")
1107 (set_attr "mode" "<MODE>")])
1109 ;; Split fmadd with two memory operands into a load and the fmadd.
1111 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1114 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1115 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1116 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1118 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1119 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1120 && !reg_mentioned_p (operands[0], operands[1])
1121 && !reg_mentioned_p (operands[0], operands[2])
1122 && !reg_mentioned_p (operands[0], operands[3])"
1125 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1126 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1127 operands[2], operands[3]));
1131 ;; For the scalar operations, use operand1 for the upper words that aren't
1132 ;; modified, so restrict the forms that are generated.
1133 ;; Scalar version of fmadd
1134 (define_insn "sse5_vmfmadd<mode>4"
1135 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1136 (vec_merge:SSEMODEF2P
1139 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1140 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1141 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1144 "TARGET_SSE5 && TARGET_FUSED_MADD
1145 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1146 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1147 [(set_attr "type" "ssemuladd")
1148 (set_attr "mode" "<MODE>")])
1150 ;; Floating multiply and subtract
1151 ;; Allow two memory operands the same as fmadd
1152 (define_insn "sse5_fmsub<mode>4"
1153 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1156 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1157 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1158 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1159 "TARGET_SSE5 && TARGET_FUSED_MADD
1160 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1161 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1162 [(set_attr "type" "ssemuladd")
1163 (set_attr "mode" "<MODE>")])
1165 ;; Split fmsub with two memory operands into a load and the fmsub.
1167 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1170 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1171 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1172 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1174 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1175 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1176 && !reg_mentioned_p (operands[0], operands[1])
1177 && !reg_mentioned_p (operands[0], operands[2])
1178 && !reg_mentioned_p (operands[0], operands[3])"
1181 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1182 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1183 operands[2], operands[3]));
1187 ;; For the scalar operations, use operand1 for the upper words that aren't
1188 ;; modified, so restrict the forms that are generated.
1189 ;; Scalar version of fmsub
1190 (define_insn "sse5_vmfmsub<mode>4"
1191 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1192 (vec_merge:SSEMODEF2P
1195 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1196 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1197 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1200 "TARGET_SSE5 && TARGET_FUSED_MADD
1201 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1202 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1203 [(set_attr "type" "ssemuladd")
1204 (set_attr "mode" "<MODE>")])
1206 ;; Floating point negative multiply and add
1207 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1208 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1209 ;; Allow two memory operands to help in optimizing.
1210 (define_insn "sse5_fnmadd<mode>4"
1211 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1213 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1215 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1216 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1217 "TARGET_SSE5 && TARGET_FUSED_MADD
1218 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1219 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1220 [(set_attr "type" "ssemuladd")
1221 (set_attr "mode" "<MODE>")])
1223 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1225 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1227 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1229 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1230 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1232 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1233 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1234 && !reg_mentioned_p (operands[0], operands[1])
1235 && !reg_mentioned_p (operands[0], operands[2])
1236 && !reg_mentioned_p (operands[0], operands[3])"
1239 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1240 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1241 operands[2], operands[3]));
1245 ;; For the scalar operations, use operand1 for the upper words that aren't
1246 ;; modified, so restrict the forms that are generated.
1247 ;; Scalar version of fnmadd
1248 (define_insn "sse5_vmfnmadd<mode>4"
1249 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1250 (vec_merge:SSEMODEF2P
1252 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1254 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1255 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1258 "TARGET_SSE5 && TARGET_FUSED_MADD
1259 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1260 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1261 [(set_attr "type" "ssemuladd")
1262 (set_attr "mode" "<MODE>")])
1264 ;; Floating point negative multiply and subtract
1265 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1266 ;; Allow 2 memory operands to help with optimization
1267 (define_insn "sse5_fnmsub<mode>4"
1268 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1272 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1273 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1274 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1275 "TARGET_SSE5 && TARGET_FUSED_MADD
1276 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1277 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1278 [(set_attr "type" "ssemuladd")
1279 (set_attr "mode" "<MODE>")])
1281 ;; Split fnmsub with two memory operands into a load and the fmsub.
1283 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1287 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1288 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1289 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1291 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1292 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1293 && !reg_mentioned_p (operands[0], operands[1])
1294 && !reg_mentioned_p (operands[0], operands[2])
1295 && !reg_mentioned_p (operands[0], operands[3])"
1298 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1299 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1300 operands[2], operands[3]));
1304 ;; For the scalar operations, use operand1 for the upper words that aren't
1305 ;; modified, so restrict the forms that are generated.
1306 ;; Scalar version of fnmsub
1307 (define_insn "sse5_vmfnmsub<mode>4"
1308 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1309 (vec_merge:SSEMODEF2P
1313 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1314 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1315 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1318 "TARGET_SSE5 && TARGET_FUSED_MADD
1319 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1320 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1321 [(set_attr "type" "ssemuladd")
1322 (set_attr "mode" "<MODE>")])
1324 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1325 ;; even if the user used -mno-fused-madd
1326 ;; Parallel instructions. During instruction generation, just default
1327 ;; to registers, and let combine later build the appropriate instruction.
1328 (define_expand "sse5i_fmadd<mode>4"
1329 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1333 (match_operand:SSEMODEF2P 1 "register_operand" "")
1334 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1335 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1336 UNSPEC_SSE5_INTRINSIC))]
1339 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1340 if (TARGET_FUSED_MADD)
1342 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1343 operands[2], operands[3]));
1348 (define_insn "*sse5i_fmadd<mode>4"
1349 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1353 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1354 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1355 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1356 UNSPEC_SSE5_INTRINSIC))]
1357 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1358 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1359 [(set_attr "type" "ssemuladd")
1360 (set_attr "mode" "<MODE>")])
1362 (define_expand "sse5i_fmsub<mode>4"
1363 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1367 (match_operand:SSEMODEF2P 1 "register_operand" "")
1368 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1369 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1370 UNSPEC_SSE5_INTRINSIC))]
1373 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1374 if (TARGET_FUSED_MADD)
1376 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1377 operands[2], operands[3]));
1382 (define_insn "*sse5i_fmsub<mode>4"
1383 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1387 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1388 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1389 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1390 UNSPEC_SSE5_INTRINSIC))]
1391 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1392 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1393 [(set_attr "type" "ssemuladd")
1394 (set_attr "mode" "<MODE>")])
1396 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1397 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1398 (define_expand "sse5i_fnmadd<mode>4"
1399 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1402 (match_operand:SSEMODEF2P 3 "register_operand" "")
1404 (match_operand:SSEMODEF2P 1 "register_operand" "")
1405 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1406 UNSPEC_SSE5_INTRINSIC))]
1409 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1410 if (TARGET_FUSED_MADD)
1412 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1413 operands[2], operands[3]));
1418 (define_insn "*sse5i_fnmadd<mode>4"
1419 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1422 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1424 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1425 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1426 UNSPEC_SSE5_INTRINSIC))]
1427 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1428 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1429 [(set_attr "type" "ssemuladd")
1430 (set_attr "mode" "<MODE>")])
1432 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1433 (define_expand "sse5i_fnmsub<mode>4"
1434 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1439 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1440 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1441 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1442 UNSPEC_SSE5_INTRINSIC))]
1445 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1446 if (TARGET_FUSED_MADD)
1448 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1449 operands[2], operands[3]));
1454 (define_insn "*sse5i_fnmsub<mode>4"
1455 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1460 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1461 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1462 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1463 UNSPEC_SSE5_INTRINSIC))]
1464 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1465 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1466 [(set_attr "type" "ssemuladd")
1467 (set_attr "mode" "<MODE>")])
1469 ;; Scalar instructions
1470 (define_expand "sse5i_vmfmadd<mode>4"
1471 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1473 [(vec_merge:SSEMODEF2P
1476 (match_operand:SSEMODEF2P 1 "register_operand" "")
1477 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1478 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1481 UNSPEC_SSE5_INTRINSIC))]
1484 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1485 if (TARGET_FUSED_MADD)
1487 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1488 operands[2], operands[3]));
1493 ;; For the scalar operations, use operand1 for the upper words that aren't
1494 ;; modified, so restrict the forms that are accepted.
1495 (define_insn "*sse5i_vmfmadd<mode>4"
1496 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1498 [(vec_merge:SSEMODEF2P
1501 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1502 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1503 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1506 UNSPEC_SSE5_INTRINSIC))]
1507 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1508 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1509 [(set_attr "type" "ssemuladd")
1510 (set_attr "mode" "<ssescalarmode>")])
1512 (define_expand "sse5i_vmfmsub<mode>4"
1513 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1515 [(vec_merge:SSEMODEF2P
1518 (match_operand:SSEMODEF2P 1 "register_operand" "")
1519 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1520 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1523 UNSPEC_SSE5_INTRINSIC))]
1526 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1527 if (TARGET_FUSED_MADD)
1529 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1530 operands[2], operands[3]));
1535 (define_insn "*sse5i_vmfmsub<mode>4"
1536 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1538 [(vec_merge:SSEMODEF2P
1541 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1542 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1543 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1546 UNSPEC_SSE5_INTRINSIC))]
1547 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1548 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1549 [(set_attr "type" "ssemuladd")
1550 (set_attr "mode" "<ssescalarmode>")])
1552 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1553 (define_expand "sse5i_vmfnmadd<mode>4"
1554 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1556 [(vec_merge:SSEMODEF2P
1558 (match_operand:SSEMODEF2P 3 "register_operand" "")
1560 (match_operand:SSEMODEF2P 1 "register_operand" "")
1561 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1564 UNSPEC_SSE5_INTRINSIC))]
1567 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1568 if (TARGET_FUSED_MADD)
1570 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1571 operands[2], operands[3]));
1576 (define_insn "*sse5i_vmfnmadd<mode>4"
1577 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1579 [(vec_merge:SSEMODEF2P
1581 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1583 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1584 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1587 UNSPEC_SSE5_INTRINSIC))]
1588 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1589 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1590 [(set_attr "type" "ssemuladd")
1591 (set_attr "mode" "<ssescalarmode>")])
1593 (define_expand "sse5i_vmfnmsub<mode>4"
1594 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1596 [(vec_merge:SSEMODEF2P
1600 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1601 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1602 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1605 UNSPEC_SSE5_INTRINSIC))]
1608 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1609 if (TARGET_FUSED_MADD)
1611 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1612 operands[2], operands[3]));
1617 (define_insn "*sse5i_vmfnmsub<mode>4"
1618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1620 [(vec_merge:SSEMODEF2P
1624 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1625 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1626 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1629 UNSPEC_SSE5_INTRINSIC))]
1630 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1631 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1632 [(set_attr "type" "ssemuladd")
1633 (set_attr "mode" "<ssescalarmode>")])
1635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1637 ;; Parallel single-precision floating point conversion operations
1639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1641 (define_insn "sse_cvtpi2ps"
1642 [(set (match_operand:V4SF 0 "register_operand" "=x")
1645 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1646 (match_operand:V4SF 1 "register_operand" "0")
1649 "cvtpi2ps\t{%2, %0|%0, %2}"
1650 [(set_attr "type" "ssecvt")
1651 (set_attr "mode" "V4SF")])
1653 (define_insn "sse_cvtps2pi"
1654 [(set (match_operand:V2SI 0 "register_operand" "=y")
1656 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1658 (parallel [(const_int 0) (const_int 1)])))]
1660 "cvtps2pi\t{%1, %0|%0, %1}"
1661 [(set_attr "type" "ssecvt")
1662 (set_attr "unit" "mmx")
1663 (set_attr "mode" "DI")])
1665 (define_insn "sse_cvttps2pi"
1666 [(set (match_operand:V2SI 0 "register_operand" "=y")
1668 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1669 (parallel [(const_int 0) (const_int 1)])))]
1671 "cvttps2pi\t{%1, %0|%0, %1}"
1672 [(set_attr "type" "ssecvt")
1673 (set_attr "unit" "mmx")
1674 (set_attr "mode" "SF")])
1676 (define_insn "sse_cvtsi2ss"
1677 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1680 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1681 (match_operand:V4SF 1 "register_operand" "0,0")
1684 "cvtsi2ss\t{%2, %0|%0, %2}"
1685 [(set_attr "type" "sseicvt")
1686 (set_attr "athlon_decode" "vector,double")
1687 (set_attr "amdfam10_decode" "vector,double")
1688 (set_attr "mode" "SF")])
1690 (define_insn "sse_cvtsi2ssq"
1691 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1694 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1695 (match_operand:V4SF 1 "register_operand" "0,0")
1697 "TARGET_SSE && TARGET_64BIT"
1698 "cvtsi2ssq\t{%2, %0|%0, %2}"
1699 [(set_attr "type" "sseicvt")
1700 (set_attr "athlon_decode" "vector,double")
1701 (set_attr "amdfam10_decode" "vector,double")
1702 (set_attr "mode" "SF")])
1704 (define_insn "sse_cvtss2si"
1705 [(set (match_operand:SI 0 "register_operand" "=r,r")
1708 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1709 (parallel [(const_int 0)]))]
1710 UNSPEC_FIX_NOTRUNC))]
1712 "cvtss2si\t{%1, %0|%0, %1}"
1713 [(set_attr "type" "sseicvt")
1714 (set_attr "athlon_decode" "double,vector")
1715 (set_attr "prefix_rep" "1")
1716 (set_attr "mode" "SI")])
1718 (define_insn "sse_cvtss2si_2"
1719 [(set (match_operand:SI 0 "register_operand" "=r,r")
1720 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1721 UNSPEC_FIX_NOTRUNC))]
1723 "cvtss2si\t{%1, %0|%0, %1}"
1724 [(set_attr "type" "sseicvt")
1725 (set_attr "athlon_decode" "double,vector")
1726 (set_attr "amdfam10_decode" "double,double")
1727 (set_attr "prefix_rep" "1")
1728 (set_attr "mode" "SI")])
1730 (define_insn "sse_cvtss2siq"
1731 [(set (match_operand:DI 0 "register_operand" "=r,r")
1734 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1735 (parallel [(const_int 0)]))]
1736 UNSPEC_FIX_NOTRUNC))]
1737 "TARGET_SSE && TARGET_64BIT"
1738 "cvtss2siq\t{%1, %0|%0, %1}"
1739 [(set_attr "type" "sseicvt")
1740 (set_attr "athlon_decode" "double,vector")
1741 (set_attr "prefix_rep" "1")
1742 (set_attr "mode" "DI")])
1744 (define_insn "sse_cvtss2siq_2"
1745 [(set (match_operand:DI 0 "register_operand" "=r,r")
1746 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1747 UNSPEC_FIX_NOTRUNC))]
1748 "TARGET_SSE && TARGET_64BIT"
1749 "cvtss2siq\t{%1, %0|%0, %1}"
1750 [(set_attr "type" "sseicvt")
1751 (set_attr "athlon_decode" "double,vector")
1752 (set_attr "amdfam10_decode" "double,double")
1753 (set_attr "prefix_rep" "1")
1754 (set_attr "mode" "DI")])
1756 (define_insn "sse_cvttss2si"
1757 [(set (match_operand:SI 0 "register_operand" "=r,r")
1760 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1761 (parallel [(const_int 0)]))))]
1763 "cvttss2si\t{%1, %0|%0, %1}"
1764 [(set_attr "type" "sseicvt")
1765 (set_attr "athlon_decode" "double,vector")
1766 (set_attr "amdfam10_decode" "double,double")
1767 (set_attr "prefix_rep" "1")
1768 (set_attr "mode" "SI")])
1770 (define_insn "sse_cvttss2siq"
1771 [(set (match_operand:DI 0 "register_operand" "=r,r")
1774 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1775 (parallel [(const_int 0)]))))]
1776 "TARGET_SSE && TARGET_64BIT"
1777 "cvttss2siq\t{%1, %0|%0, %1}"
1778 [(set_attr "type" "sseicvt")
1779 (set_attr "athlon_decode" "double,vector")
1780 (set_attr "amdfam10_decode" "double,double")
1781 (set_attr "prefix_rep" "1")
1782 (set_attr "mode" "DI")])
1784 (define_insn "sse2_cvtdq2ps"
1785 [(set (match_operand:V4SF 0 "register_operand" "=x")
1786 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1788 "cvtdq2ps\t{%1, %0|%0, %1}"
1789 [(set_attr "type" "ssecvt")
1790 (set_attr "mode" "V4SF")])
1792 (define_insn "sse2_cvtps2dq"
1793 [(set (match_operand:V4SI 0 "register_operand" "=x")
1794 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1795 UNSPEC_FIX_NOTRUNC))]
1797 "cvtps2dq\t{%1, %0|%0, %1}"
1798 [(set_attr "type" "ssecvt")
1799 (set_attr "prefix_data16" "1")
1800 (set_attr "mode" "TI")])
1802 (define_insn "sse2_cvttps2dq"
1803 [(set (match_operand:V4SI 0 "register_operand" "=x")
1804 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1806 "cvttps2dq\t{%1, %0|%0, %1}"
1807 [(set_attr "type" "ssecvt")
1808 (set_attr "prefix_rep" "1")
1809 (set_attr "mode" "TI")])
1811 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1813 ;; Parallel double-precision floating point conversion operations
1815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1817 (define_insn "sse2_cvtpi2pd"
1818 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1819 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1821 "cvtpi2pd\t{%1, %0|%0, %1}"
1822 [(set_attr "type" "ssecvt")
1823 (set_attr "unit" "mmx,*")
1824 (set_attr "mode" "V2DF")])
1826 (define_insn "sse2_cvtpd2pi"
1827 [(set (match_operand:V2SI 0 "register_operand" "=y")
1828 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1829 UNSPEC_FIX_NOTRUNC))]
1831 "cvtpd2pi\t{%1, %0|%0, %1}"
1832 [(set_attr "type" "ssecvt")
1833 (set_attr "unit" "mmx")
1834 (set_attr "prefix_data16" "1")
1835 (set_attr "mode" "DI")])
1837 (define_insn "sse2_cvttpd2pi"
1838 [(set (match_operand:V2SI 0 "register_operand" "=y")
1839 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1841 "cvttpd2pi\t{%1, %0|%0, %1}"
1842 [(set_attr "type" "ssecvt")
1843 (set_attr "unit" "mmx")
1844 (set_attr "prefix_data16" "1")
1845 (set_attr "mode" "TI")])
1847 (define_insn "sse2_cvtsi2sd"
1848 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1851 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1852 (match_operand:V2DF 1 "register_operand" "0,0")
1855 "cvtsi2sd\t{%2, %0|%0, %2}"
1856 [(set_attr "type" "sseicvt")
1857 (set_attr "mode" "DF")
1858 (set_attr "athlon_decode" "double,direct")
1859 (set_attr "amdfam10_decode" "vector,double")])
1861 (define_insn "sse2_cvtsi2sdq"
1862 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1865 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1866 (match_operand:V2DF 1 "register_operand" "0,0")
1868 "TARGET_SSE2 && TARGET_64BIT"
1869 "cvtsi2sdq\t{%2, %0|%0, %2}"
1870 [(set_attr "type" "sseicvt")
1871 (set_attr "mode" "DF")
1872 (set_attr "athlon_decode" "double,direct")
1873 (set_attr "amdfam10_decode" "vector,double")])
1875 (define_insn "sse2_cvtsd2si"
1876 [(set (match_operand:SI 0 "register_operand" "=r,r")
1879 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1880 (parallel [(const_int 0)]))]
1881 UNSPEC_FIX_NOTRUNC))]
1883 "cvtsd2si\t{%1, %0|%0, %1}"
1884 [(set_attr "type" "sseicvt")
1885 (set_attr "athlon_decode" "double,vector")
1886 (set_attr "prefix_rep" "1")
1887 (set_attr "mode" "SI")])
1889 (define_insn "sse2_cvtsd2si_2"
1890 [(set (match_operand:SI 0 "register_operand" "=r,r")
1891 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1892 UNSPEC_FIX_NOTRUNC))]
1894 "cvtsd2si\t{%1, %0|%0, %1}"
1895 [(set_attr "type" "sseicvt")
1896 (set_attr "athlon_decode" "double,vector")
1897 (set_attr "amdfam10_decode" "double,double")
1898 (set_attr "prefix_rep" "1")
1899 (set_attr "mode" "SI")])
1901 (define_insn "sse2_cvtsd2siq"
1902 [(set (match_operand:DI 0 "register_operand" "=r,r")
1905 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1906 (parallel [(const_int 0)]))]
1907 UNSPEC_FIX_NOTRUNC))]
1908 "TARGET_SSE2 && TARGET_64BIT"
1909 "cvtsd2siq\t{%1, %0|%0, %1}"
1910 [(set_attr "type" "sseicvt")
1911 (set_attr "athlon_decode" "double,vector")
1912 (set_attr "prefix_rep" "1")
1913 (set_attr "mode" "DI")])
1915 (define_insn "sse2_cvtsd2siq_2"
1916 [(set (match_operand:DI 0 "register_operand" "=r,r")
1917 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1918 UNSPEC_FIX_NOTRUNC))]
1919 "TARGET_SSE2 && TARGET_64BIT"
1920 "cvtsd2siq\t{%1, %0|%0, %1}"
1921 [(set_attr "type" "sseicvt")
1922 (set_attr "athlon_decode" "double,vector")
1923 (set_attr "amdfam10_decode" "double,double")
1924 (set_attr "prefix_rep" "1")
1925 (set_attr "mode" "DI")])
1927 (define_insn "sse2_cvttsd2si"
1928 [(set (match_operand:SI 0 "register_operand" "=r,r")
1931 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1932 (parallel [(const_int 0)]))))]
1934 "cvttsd2si\t{%1, %0|%0, %1}"
1935 [(set_attr "type" "sseicvt")
1936 (set_attr "prefix_rep" "1")
1937 (set_attr "mode" "SI")
1938 (set_attr "athlon_decode" "double,vector")
1939 (set_attr "amdfam10_decode" "double,double")])
1941 (define_insn "sse2_cvttsd2siq"
1942 [(set (match_operand:DI 0 "register_operand" "=r,r")
1945 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1946 (parallel [(const_int 0)]))))]
1947 "TARGET_SSE2 && TARGET_64BIT"
1948 "cvttsd2siq\t{%1, %0|%0, %1}"
1949 [(set_attr "type" "sseicvt")
1950 (set_attr "prefix_rep" "1")
1951 (set_attr "mode" "DI")
1952 (set_attr "athlon_decode" "double,vector")
1953 (set_attr "amdfam10_decode" "double,double")])
1955 (define_insn "sse2_cvtdq2pd"
1956 [(set (match_operand:V2DF 0 "register_operand" "=x")
1959 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1960 (parallel [(const_int 0) (const_int 1)]))))]
1962 "cvtdq2pd\t{%1, %0|%0, %1}"
1963 [(set_attr "type" "ssecvt")
1964 (set_attr "mode" "V2DF")])
1966 (define_expand "sse2_cvtpd2dq"
1967 [(set (match_operand:V4SI 0 "register_operand" "")
1969 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1973 "operands[2] = CONST0_RTX (V2SImode);")
1975 (define_insn "*sse2_cvtpd2dq"
1976 [(set (match_operand:V4SI 0 "register_operand" "=x")
1978 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1980 (match_operand:V2SI 2 "const0_operand" "")))]
1982 "cvtpd2dq\t{%1, %0|%0, %1}"
1983 [(set_attr "type" "ssecvt")
1984 (set_attr "prefix_rep" "1")
1985 (set_attr "mode" "TI")
1986 (set_attr "amdfam10_decode" "double")])
1988 (define_expand "sse2_cvttpd2dq"
1989 [(set (match_operand:V4SI 0 "register_operand" "")
1991 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1994 "operands[2] = CONST0_RTX (V2SImode);")
1996 (define_insn "*sse2_cvttpd2dq"
1997 [(set (match_operand:V4SI 0 "register_operand" "=x")
1999 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2000 (match_operand:V2SI 2 "const0_operand" "")))]
2002 "cvttpd2dq\t{%1, %0|%0, %1}"
2003 [(set_attr "type" "ssecvt")
2004 (set_attr "prefix_rep" "1")
2005 (set_attr "mode" "TI")
2006 (set_attr "amdfam10_decode" "double")])
2008 (define_insn "sse2_cvtsd2ss"
2009 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2012 (float_truncate:V2SF
2013 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2014 (match_operand:V4SF 1 "register_operand" "0,0")
2017 "cvtsd2ss\t{%2, %0|%0, %2}"
2018 [(set_attr "type" "ssecvt")
2019 (set_attr "athlon_decode" "vector,double")
2020 (set_attr "amdfam10_decode" "vector,double")
2021 (set_attr "mode" "SF")])
2023 (define_insn "sse2_cvtss2sd"
2024 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2028 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2029 (parallel [(const_int 0) (const_int 1)])))
2030 (match_operand:V2DF 1 "register_operand" "0,0")
2033 "cvtss2sd\t{%2, %0|%0, %2}"
2034 [(set_attr "type" "ssecvt")
2035 (set_attr "amdfam10_decode" "vector,double")
2036 (set_attr "mode" "DF")])
2038 (define_expand "sse2_cvtpd2ps"
2039 [(set (match_operand:V4SF 0 "register_operand" "")
2041 (float_truncate:V2SF
2042 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2045 "operands[2] = CONST0_RTX (V2SFmode);")
2047 (define_insn "*sse2_cvtpd2ps"
2048 [(set (match_operand:V4SF 0 "register_operand" "=x")
2050 (float_truncate:V2SF
2051 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2052 (match_operand:V2SF 2 "const0_operand" "")))]
2054 "cvtpd2ps\t{%1, %0|%0, %1}"
2055 [(set_attr "type" "ssecvt")
2056 (set_attr "prefix_data16" "1")
2057 (set_attr "mode" "V4SF")
2058 (set_attr "amdfam10_decode" "double")])
2060 (define_insn "sse2_cvtps2pd"
2061 [(set (match_operand:V2DF 0 "register_operand" "=x")
2064 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2065 (parallel [(const_int 0) (const_int 1)]))))]
2067 "cvtps2pd\t{%1, %0|%0, %1}"
2068 [(set_attr "type" "ssecvt")
2069 (set_attr "mode" "V2DF")
2070 (set_attr "amdfam10_decode" "direct")])
2072 (define_expand "vec_unpacks_hi_v4sf"
2077 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2078 (parallel [(const_int 6)
2082 (set (match_operand:V2DF 0 "register_operand" "")
2086 (parallel [(const_int 0) (const_int 1)]))))]
2089 operands[2] = gen_reg_rtx (V4SFmode);
2092 (define_expand "vec_unpacks_lo_v4sf"
2093 [(set (match_operand:V2DF 0 "register_operand" "")
2096 (match_operand:V4SF 1 "nonimmediate_operand" "")
2097 (parallel [(const_int 0) (const_int 1)]))))]
2100 (define_expand "vec_unpacks_float_hi_v8hi"
2101 [(match_operand:V4SF 0 "register_operand" "")
2102 (match_operand:V8HI 1 "register_operand" "")]
2105 rtx tmp = gen_reg_rtx (V4SImode);
2107 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2108 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2112 (define_expand "vec_unpacks_float_lo_v8hi"
2113 [(match_operand:V4SF 0 "register_operand" "")
2114 (match_operand:V8HI 1 "register_operand" "")]
2117 rtx tmp = gen_reg_rtx (V4SImode);
2119 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2120 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2124 (define_expand "vec_unpacku_float_hi_v8hi"
2125 [(match_operand:V4SF 0 "register_operand" "")
2126 (match_operand:V8HI 1 "register_operand" "")]
2129 rtx tmp = gen_reg_rtx (V4SImode);
2131 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2132 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2136 (define_expand "vec_unpacku_float_lo_v8hi"
2137 [(match_operand:V4SF 0 "register_operand" "")
2138 (match_operand:V8HI 1 "register_operand" "")]
2141 rtx tmp = gen_reg_rtx (V4SImode);
2143 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2144 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2148 (define_expand "vec_unpacks_float_hi_v4si"
2151 (match_operand:V4SI 1 "nonimmediate_operand" "")
2152 (parallel [(const_int 2)
2156 (set (match_operand:V2DF 0 "register_operand" "")
2160 (parallel [(const_int 0) (const_int 1)]))))]
2163 operands[2] = gen_reg_rtx (V4SImode);
2166 (define_expand "vec_unpacks_float_lo_v4si"
2167 [(set (match_operand:V2DF 0 "register_operand" "")
2170 (match_operand:V4SI 1 "nonimmediate_operand" "")
2171 (parallel [(const_int 0) (const_int 1)]))))]
2174 (define_expand "vec_pack_trunc_v2df"
2175 [(match_operand:V4SF 0 "register_operand" "")
2176 (match_operand:V2DF 1 "nonimmediate_operand" "")
2177 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2182 r1 = gen_reg_rtx (V4SFmode);
2183 r2 = gen_reg_rtx (V4SFmode);
2185 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2186 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2187 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2191 (define_expand "vec_pack_sfix_trunc_v2df"
2192 [(match_operand:V4SI 0 "register_operand" "")
2193 (match_operand:V2DF 1 "nonimmediate_operand" "")
2194 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2199 r1 = gen_reg_rtx (V4SImode);
2200 r2 = gen_reg_rtx (V4SImode);
2202 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2203 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2204 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2205 gen_lowpart (V2DImode, r1),
2206 gen_lowpart (V2DImode, r2)));
2210 (define_expand "vec_pack_sfix_v2df"
2211 [(match_operand:V4SI 0 "register_operand" "")
2212 (match_operand:V2DF 1 "nonimmediate_operand" "")
2213 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2218 r1 = gen_reg_rtx (V4SImode);
2219 r2 = gen_reg_rtx (V4SImode);
2221 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2222 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2223 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2224 gen_lowpart (V2DImode, r1),
2225 gen_lowpart (V2DImode, r2)));
2229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2231 ;; Parallel single-precision floating point element swizzling
2233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2235 (define_insn "sse_movhlps"
2236 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2239 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2240 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2241 (parallel [(const_int 6)
2245 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2247 movhlps\t{%2, %0|%0, %2}
2248 movlps\t{%H2, %0|%0, %H2}
2249 movhps\t{%2, %0|%0, %2}"
2250 [(set_attr "type" "ssemov")
2251 (set_attr "mode" "V4SF,V2SF,V2SF")])
2253 (define_insn "sse_movlhps"
2254 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2257 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2258 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2259 (parallel [(const_int 0)
2263 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2265 movlhps\t{%2, %0|%0, %2}
2266 movhps\t{%2, %0|%0, %2}
2267 movlps\t{%2, %H0|%H0, %2}"
2268 [(set_attr "type" "ssemov")
2269 (set_attr "mode" "V4SF,V2SF,V2SF")])
2271 (define_insn "sse_unpckhps"
2272 [(set (match_operand:V4SF 0 "register_operand" "=x")
2275 (match_operand:V4SF 1 "register_operand" "0")
2276 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2277 (parallel [(const_int 2) (const_int 6)
2278 (const_int 3) (const_int 7)])))]
2280 "unpckhps\t{%2, %0|%0, %2}"
2281 [(set_attr "type" "sselog")
2282 (set_attr "mode" "V4SF")])
2284 (define_insn "sse_unpcklps"
2285 [(set (match_operand:V4SF 0 "register_operand" "=x")
2288 (match_operand:V4SF 1 "register_operand" "0")
2289 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2290 (parallel [(const_int 0) (const_int 4)
2291 (const_int 1) (const_int 5)])))]
2293 "unpcklps\t{%2, %0|%0, %2}"
2294 [(set_attr "type" "sselog")
2295 (set_attr "mode" "V4SF")])
2297 ;; These are modeled with the same vec_concat as the others so that we
2298 ;; capture users of shufps that can use the new instructions
2299 (define_insn "sse3_movshdup"
2300 [(set (match_operand:V4SF 0 "register_operand" "=x")
2303 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2305 (parallel [(const_int 1)
2310 "movshdup\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "sse")
2312 (set_attr "prefix_rep" "1")
2313 (set_attr "mode" "V4SF")])
2315 (define_insn "sse3_movsldup"
2316 [(set (match_operand:V4SF 0 "register_operand" "=x")
2319 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2321 (parallel [(const_int 0)
2326 "movsldup\t{%1, %0|%0, %1}"
2327 [(set_attr "type" "sse")
2328 (set_attr "prefix_rep" "1")
2329 (set_attr "mode" "V4SF")])
2331 (define_expand "sse_shufps"
2332 [(match_operand:V4SF 0 "register_operand" "")
2333 (match_operand:V4SF 1 "register_operand" "")
2334 (match_operand:V4SF 2 "nonimmediate_operand" "")
2335 (match_operand:SI 3 "const_int_operand" "")]
2338 int mask = INTVAL (operands[3]);
2339 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2340 GEN_INT ((mask >> 0) & 3),
2341 GEN_INT ((mask >> 2) & 3),
2342 GEN_INT (((mask >> 4) & 3) + 4),
2343 GEN_INT (((mask >> 6) & 3) + 4)));
2347 (define_insn "sse_shufps_1"
2348 [(set (match_operand:V4SF 0 "register_operand" "=x")
2351 (match_operand:V4SF 1 "register_operand" "0")
2352 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2353 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2354 (match_operand 4 "const_0_to_3_operand" "")
2355 (match_operand 5 "const_4_to_7_operand" "")
2356 (match_operand 6 "const_4_to_7_operand" "")])))]
2360 mask |= INTVAL (operands[3]) << 0;
2361 mask |= INTVAL (operands[4]) << 2;
2362 mask |= (INTVAL (operands[5]) - 4) << 4;
2363 mask |= (INTVAL (operands[6]) - 4) << 6;
2364 operands[3] = GEN_INT (mask);
2366 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2368 [(set_attr "type" "sselog")
2369 (set_attr "mode" "V4SF")])
2371 (define_insn "sse_storehps"
2372 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2374 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2375 (parallel [(const_int 2) (const_int 3)])))]
2378 movhps\t{%1, %0|%0, %1}
2379 movhlps\t{%1, %0|%0, %1}
2380 movlps\t{%H1, %0|%0, %H1}"
2381 [(set_attr "type" "ssemov")
2382 (set_attr "mode" "V2SF,V4SF,V2SF")])
2384 (define_insn "sse_loadhps"
2385 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2388 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2389 (parallel [(const_int 0) (const_int 1)]))
2390 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2393 movhps\t{%2, %0|%0, %2}
2394 movlhps\t{%2, %0|%0, %2}
2395 movlps\t{%2, %H0|%H0, %2}"
2396 [(set_attr "type" "ssemov")
2397 (set_attr "mode" "V2SF,V4SF,V2SF")])
2399 (define_insn "sse_storelps"
2400 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2402 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2403 (parallel [(const_int 0) (const_int 1)])))]
2406 movlps\t{%1, %0|%0, %1}
2407 movaps\t{%1, %0|%0, %1}
2408 movlps\t{%1, %0|%0, %1}"
2409 [(set_attr "type" "ssemov")
2410 (set_attr "mode" "V2SF,V4SF,V2SF")])
2412 (define_insn "sse_loadlps"
2413 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2415 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2417 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2418 (parallel [(const_int 2) (const_int 3)]))))]
2421 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2422 movlps\t{%2, %0|%0, %2}
2423 movlps\t{%2, %0|%0, %2}"
2424 [(set_attr "type" "sselog,ssemov,ssemov")
2425 (set_attr "mode" "V4SF,V2SF,V2SF")])
2427 (define_insn "sse_movss"
2428 [(set (match_operand:V4SF 0 "register_operand" "=x")
2430 (match_operand:V4SF 2 "register_operand" "x")
2431 (match_operand:V4SF 1 "register_operand" "0")
2434 "movss\t{%2, %0|%0, %2}"
2435 [(set_attr "type" "ssemov")
2436 (set_attr "mode" "SF")])
2438 (define_insn "*vec_dupv4sf"
2439 [(set (match_operand:V4SF 0 "register_operand" "=x")
2441 (match_operand:SF 1 "register_operand" "0")))]
2443 "shufps\t{$0, %0, %0|%0, %0, 0}"
2444 [(set_attr "type" "sselog1")
2445 (set_attr "mode" "V4SF")])
2447 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2448 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2449 ;; alternatives pretty much forces the MMX alternative to be chosen.
2450 (define_insn "*sse_concatv2sf"
2451 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2453 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2454 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2457 unpcklps\t{%2, %0|%0, %2}
2458 movss\t{%1, %0|%0, %1}
2459 punpckldq\t{%2, %0|%0, %2}
2460 movd\t{%1, %0|%0, %1}"
2461 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2462 (set_attr "mode" "V4SF,SF,DI,DI")])
2464 (define_insn "*sse_concatv4sf"
2465 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2467 (match_operand:V2SF 1 "register_operand" " 0,0")
2468 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2471 movlhps\t{%2, %0|%0, %2}
2472 movhps\t{%2, %0|%0, %2}"
2473 [(set_attr "type" "ssemov")
2474 (set_attr "mode" "V4SF,V2SF")])
2476 (define_expand "vec_initv4sf"
2477 [(match_operand:V4SF 0 "register_operand" "")
2478 (match_operand 1 "" "")]
2481 ix86_expand_vector_init (false, operands[0], operands[1]);
2485 (define_insn "vec_setv4sf_0"
2486 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2489 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2490 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2494 movss\t{%2, %0|%0, %2}
2495 movss\t{%2, %0|%0, %2}
2496 movd\t{%2, %0|%0, %2}
2498 [(set_attr "type" "ssemov")
2499 (set_attr "mode" "SF")])
2501 ;; A subset is vec_setv4sf.
2502 (define_insn "*vec_setv4sf_sse4_1"
2503 [(set (match_operand:V4SF 0 "register_operand" "=x")
2506 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2507 (match_operand:V4SF 1 "register_operand" "0")
2508 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2511 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2512 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2514 [(set_attr "type" "sselog")
2515 (set_attr "prefix_extra" "1")
2516 (set_attr "mode" "V4SF")])
2518 (define_insn "sse4_1_insertps"
2519 [(set (match_operand:V4SF 0 "register_operand" "=x")
2520 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2521 (match_operand:V4SF 1 "register_operand" "0")
2522 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2525 "insertps\t{%3, %2, %0|%0, %2, %3}";
2526 [(set_attr "type" "sselog")
2527 (set_attr "prefix_extra" "1")
2528 (set_attr "mode" "V4SF")])
2531 [(set (match_operand:V4SF 0 "memory_operand" "")
2534 (match_operand:SF 1 "nonmemory_operand" ""))
2537 "TARGET_SSE && reload_completed"
2540 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2544 (define_expand "vec_setv4sf"
2545 [(match_operand:V4SF 0 "register_operand" "")
2546 (match_operand:SF 1 "register_operand" "")
2547 (match_operand 2 "const_int_operand" "")]
2550 ix86_expand_vector_set (false, operands[0], operands[1],
2551 INTVAL (operands[2]));
2555 (define_insn_and_split "*vec_extractv4sf_0"
2556 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2558 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2559 (parallel [(const_int 0)])))]
2560 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2562 "&& reload_completed"
2565 rtx op1 = operands[1];
2567 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2569 op1 = gen_lowpart (SFmode, op1);
2570 emit_move_insn (operands[0], op1);
2574 (define_insn "*sse4_1_extractps"
2575 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2577 (match_operand:V4SF 1 "register_operand" "x")
2578 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2580 "extractps\t{%2, %1, %0|%0, %1, %2}"
2581 [(set_attr "type" "sselog")
2582 (set_attr "prefix_extra" "1")
2583 (set_attr "mode" "V4SF")])
2585 (define_insn_and_split "*vec_extract_v4sf_mem"
2586 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2588 (match_operand:V4SF 1 "memory_operand" "o")
2589 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2595 int i = INTVAL (operands[2]);
2597 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2601 (define_expand "vec_extractv4sf"
2602 [(match_operand:SF 0 "register_operand" "")
2603 (match_operand:V4SF 1 "register_operand" "")
2604 (match_operand 2 "const_int_operand" "")]
2607 ix86_expand_vector_extract (false, operands[0], operands[1],
2608 INTVAL (operands[2]));
2612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2614 ;; Parallel double-precision floating point element swizzling
2616 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2618 (define_insn "sse2_unpckhpd"
2619 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2622 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2623 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2624 (parallel [(const_int 1)
2626 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2628 unpckhpd\t{%2, %0|%0, %2}
2629 movlpd\t{%H1, %0|%0, %H1}
2630 movhpd\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "sselog,ssemov,ssemov")
2632 (set_attr "mode" "V2DF,V1DF,V1DF")])
2634 (define_insn "*sse3_movddup"
2635 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2638 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2640 (parallel [(const_int 0)
2642 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2644 movddup\t{%1, %0|%0, %1}
2646 [(set_attr "type" "sselog1,ssemov")
2647 (set_attr "mode" "V2DF")])
2650 [(set (match_operand:V2DF 0 "memory_operand" "")
2653 (match_operand:V2DF 1 "register_operand" "")
2655 (parallel [(const_int 0)
2657 "TARGET_SSE3 && reload_completed"
2660 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2661 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2662 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2666 (define_insn "sse2_unpcklpd"
2667 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2670 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2671 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2672 (parallel [(const_int 0)
2674 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2676 unpcklpd\t{%2, %0|%0, %2}
2677 movhpd\t{%2, %0|%0, %2}
2678 movlpd\t{%2, %H0|%H0, %2}"
2679 [(set_attr "type" "sselog,ssemov,ssemov")
2680 (set_attr "mode" "V2DF,V1DF,V1DF")])
2682 (define_expand "sse2_shufpd"
2683 [(match_operand:V2DF 0 "register_operand" "")
2684 (match_operand:V2DF 1 "register_operand" "")
2685 (match_operand:V2DF 2 "nonimmediate_operand" "")
2686 (match_operand:SI 3 "const_int_operand" "")]
2689 int mask = INTVAL (operands[3]);
2690 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2692 GEN_INT (mask & 2 ? 3 : 2)));
2696 (define_insn "sse2_shufpd_1"
2697 [(set (match_operand:V2DF 0 "register_operand" "=x")
2700 (match_operand:V2DF 1 "register_operand" "0")
2701 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2702 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2703 (match_operand 4 "const_2_to_3_operand" "")])))]
2707 mask = INTVAL (operands[3]);
2708 mask |= (INTVAL (operands[4]) - 2) << 1;
2709 operands[3] = GEN_INT (mask);
2711 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2713 [(set_attr "type" "sselog")
2714 (set_attr "mode" "V2DF")])
2716 (define_insn "sse2_storehpd"
2717 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2719 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2720 (parallel [(const_int 1)])))]
2721 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2723 movhpd\t{%1, %0|%0, %1}
2726 [(set_attr "type" "ssemov,sselog1,ssemov")
2727 (set_attr "mode" "V1DF,V2DF,DF")])
2730 [(set (match_operand:DF 0 "register_operand" "")
2732 (match_operand:V2DF 1 "memory_operand" "")
2733 (parallel [(const_int 1)])))]
2734 "TARGET_SSE2 && reload_completed"
2735 [(set (match_dup 0) (match_dup 1))]
2737 operands[1] = adjust_address (operands[1], DFmode, 8);
2740 (define_insn "sse2_storelpd"
2741 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2743 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2744 (parallel [(const_int 0)])))]
2745 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2747 movlpd\t{%1, %0|%0, %1}
2750 [(set_attr "type" "ssemov")
2751 (set_attr "mode" "V1DF,DF,DF")])
2754 [(set (match_operand:DF 0 "register_operand" "")
2756 (match_operand:V2DF 1 "nonimmediate_operand" "")
2757 (parallel [(const_int 0)])))]
2758 "TARGET_SSE2 && reload_completed"
2761 rtx op1 = operands[1];
2763 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2765 op1 = gen_lowpart (DFmode, op1);
2766 emit_move_insn (operands[0], op1);
2770 (define_insn "sse2_loadhpd"
2771 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2774 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2775 (parallel [(const_int 0)]))
2776 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2777 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2779 movhpd\t{%2, %0|%0, %2}
2780 unpcklpd\t{%2, %0|%0, %2}
2781 shufpd\t{$1, %1, %0|%0, %1, 1}
2783 [(set_attr "type" "ssemov,sselog,sselog,other")
2784 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2787 [(set (match_operand:V2DF 0 "memory_operand" "")
2789 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2790 (match_operand:DF 1 "register_operand" "")))]
2791 "TARGET_SSE2 && reload_completed"
2792 [(set (match_dup 0) (match_dup 1))]
2794 operands[0] = adjust_address (operands[0], DFmode, 8);
2797 (define_insn "sse2_loadlpd"
2798 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2800 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2802 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2803 (parallel [(const_int 1)]))))]
2804 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2806 movsd\t{%2, %0|%0, %2}
2807 movlpd\t{%2, %0|%0, %2}
2808 movsd\t{%2, %0|%0, %2}
2809 shufpd\t{$2, %2, %0|%0, %2, 2}
2810 movhpd\t{%H1, %0|%0, %H1}
2812 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2813 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2816 [(set (match_operand:V2DF 0 "memory_operand" "")
2818 (match_operand:DF 1 "register_operand" "")
2819 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2820 "TARGET_SSE2 && reload_completed"
2821 [(set (match_dup 0) (match_dup 1))]
2823 operands[0] = adjust_address (operands[0], DFmode, 8);
2826 ;; Not sure these two are ever used, but it doesn't hurt to have
2828 (define_insn "*vec_extractv2df_1_sse"
2829 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2831 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2832 (parallel [(const_int 1)])))]
2833 "!TARGET_SSE2 && TARGET_SSE
2834 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2836 movhps\t{%1, %0|%0, %1}
2837 movhlps\t{%1, %0|%0, %1}
2838 movlps\t{%H1, %0|%0, %H1}"
2839 [(set_attr "type" "ssemov")
2840 (set_attr "mode" "V2SF,V4SF,V2SF")])
2842 (define_insn "*vec_extractv2df_0_sse"
2843 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2845 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2846 (parallel [(const_int 0)])))]
2847 "!TARGET_SSE2 && TARGET_SSE
2848 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2850 movlps\t{%1, %0|%0, %1}
2851 movaps\t{%1, %0|%0, %1}
2852 movlps\t{%1, %0|%0, %1}"
2853 [(set_attr "type" "ssemov")
2854 (set_attr "mode" "V2SF,V4SF,V2SF")])
2856 (define_insn "sse2_movsd"
2857 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2859 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2860 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2864 movsd\t{%2, %0|%0, %2}
2865 movlpd\t{%2, %0|%0, %2}
2866 movlpd\t{%2, %0|%0, %2}
2867 shufpd\t{$2, %2, %0|%0, %2, 2}
2868 movhps\t{%H1, %0|%0, %H1}
2869 movhps\t{%1, %H0|%H0, %1}"
2870 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2871 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2873 (define_insn "*vec_dupv2df_sse3"
2874 [(set (match_operand:V2DF 0 "register_operand" "=x")
2876 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2878 "movddup\t{%1, %0|%0, %1}"
2879 [(set_attr "type" "sselog1")
2880 (set_attr "mode" "DF")])
2882 (define_insn "vec_dupv2df"
2883 [(set (match_operand:V2DF 0 "register_operand" "=x")
2885 (match_operand:DF 1 "register_operand" "0")))]
2888 [(set_attr "type" "sselog1")
2889 (set_attr "mode" "V2DF")])
2891 (define_insn "*vec_concatv2df_sse3"
2892 [(set (match_operand:V2DF 0 "register_operand" "=x")
2894 (match_operand:DF 1 "nonimmediate_operand" "xm")
2897 "movddup\t{%1, %0|%0, %1}"
2898 [(set_attr "type" "sselog1")
2899 (set_attr "mode" "DF")])
2901 (define_insn "*vec_concatv2df"
2902 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2904 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2905 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2908 unpcklpd\t{%2, %0|%0, %2}
2909 movhpd\t{%2, %0|%0, %2}
2910 movsd\t{%1, %0|%0, %1}
2911 movlhps\t{%2, %0|%0, %2}
2912 movhps\t{%2, %0|%0, %2}"
2913 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2914 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2916 (define_expand "vec_setv2df"
2917 [(match_operand:V2DF 0 "register_operand" "")
2918 (match_operand:DF 1 "register_operand" "")
2919 (match_operand 2 "const_int_operand" "")]
2922 ix86_expand_vector_set (false, operands[0], operands[1],
2923 INTVAL (operands[2]));
2927 (define_expand "vec_extractv2df"
2928 [(match_operand:DF 0 "register_operand" "")
2929 (match_operand:V2DF 1 "register_operand" "")
2930 (match_operand 2 "const_int_operand" "")]
2933 ix86_expand_vector_extract (false, operands[0], operands[1],
2934 INTVAL (operands[2]));
2938 (define_expand "vec_initv2df"
2939 [(match_operand:V2DF 0 "register_operand" "")
2940 (match_operand 1 "" "")]
2943 ix86_expand_vector_init (false, operands[0], operands[1]);
2947 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2949 ;; Parallel integral arithmetic
2951 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2953 (define_expand "neg<mode>2"
2954 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2957 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2959 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2961 (define_expand "add<mode>3"
2962 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2963 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2964 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2966 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2968 (define_insn "*add<mode>3"
2969 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2971 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2972 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2973 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2974 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2975 [(set_attr "type" "sseiadd")
2976 (set_attr "prefix_data16" "1")
2977 (set_attr "mode" "TI")])
2979 (define_insn "sse2_ssadd<mode>3"
2980 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2982 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2983 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2984 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2985 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2986 [(set_attr "type" "sseiadd")
2987 (set_attr "prefix_data16" "1")
2988 (set_attr "mode" "TI")])
2990 (define_insn "sse2_usadd<mode>3"
2991 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2993 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2994 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2995 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2996 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2997 [(set_attr "type" "sseiadd")
2998 (set_attr "prefix_data16" "1")
2999 (set_attr "mode" "TI")])
3001 (define_expand "sub<mode>3"
3002 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3003 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
3004 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3006 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
3008 (define_insn "*sub<mode>3"
3009 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3011 (match_operand:SSEMODEI 1 "register_operand" "0")
3012 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3014 "psub<ssevecsize>\t{%2, %0|%0, %2}"
3015 [(set_attr "type" "sseiadd")
3016 (set_attr "prefix_data16" "1")
3017 (set_attr "mode" "TI")])
3019 (define_insn "sse2_sssub<mode>3"
3020 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3022 (match_operand:SSEMODE12 1 "register_operand" "0")
3023 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3025 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
3026 [(set_attr "type" "sseiadd")
3027 (set_attr "prefix_data16" "1")
3028 (set_attr "mode" "TI")])
3030 (define_insn "sse2_ussub<mode>3"
3031 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3033 (match_operand:SSEMODE12 1 "register_operand" "0")
3034 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3036 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
3037 [(set_attr "type" "sseiadd")
3038 (set_attr "prefix_data16" "1")
3039 (set_attr "mode" "TI")])
3041 (define_insn_and_split "mulv16qi3"
3042 [(set (match_operand:V16QI 0 "register_operand" "")
3043 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
3044 (match_operand:V16QI 2 "register_operand" "")))]
3046 && !(reload_completed || reload_in_progress)"
3051 rtx t[12], op0, op[3];
3056 /* On SSE5, we can take advantage of the pperm instruction to pack and
3057 unpack the bytes. Unpack data such that we've got a source byte in
3058 each low byte of each word. We don't care what goes into the high
3059 byte, so put 0 there. */
3060 for (i = 0; i < 6; ++i)
3061 t[i] = gen_reg_rtx (V8HImode);
3063 for (i = 0; i < 2; i++)
3066 op[1] = operands[i+1];
3067 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
3070 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
3073 /* Multiply words. */
3074 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
3075 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
3077 /* Pack the low byte of each word back into a single xmm */
3078 op[0] = operands[0];
3081 ix86_expand_sse5_pack (op);
3085 for (i = 0; i < 12; ++i)
3086 t[i] = gen_reg_rtx (V16QImode);
3088 /* Unpack data such that we've got a source byte in each low byte of
3089 each word. We don't care what goes into the high byte of each word.
3090 Rather than trying to get zero in there, most convenient is to let
3091 it be a copy of the low byte. */
3092 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
3093 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
3094 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
3095 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
3097 /* Multiply words. The end-of-line annotations here give a picture of what
3098 the output of that instruction looks like. Dot means don't care; the
3099 letters are the bytes of the result with A being the most significant. */
3100 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
3101 gen_lowpart (V8HImode, t[0]),
3102 gen_lowpart (V8HImode, t[1])));
3103 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
3104 gen_lowpart (V8HImode, t[2]),
3105 gen_lowpart (V8HImode, t[3])));
3107 /* Extract the relevant bytes and merge them back together. */
3108 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
3109 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
3110 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
3111 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
3112 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
3113 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
3116 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
3120 (define_expand "mulv8hi3"
3121 [(set (match_operand:V8HI 0 "register_operand" "")
3122 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3123 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3125 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3127 (define_insn "*mulv8hi3"
3128 [(set (match_operand:V8HI 0 "register_operand" "=x")
3129 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3130 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3131 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3132 "pmullw\t{%2, %0|%0, %2}"
3133 [(set_attr "type" "sseimul")
3134 (set_attr "prefix_data16" "1")
3135 (set_attr "mode" "TI")])
3137 (define_expand "smulv8hi3_highpart"
3138 [(set (match_operand:V8HI 0 "register_operand" "")
3143 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3145 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3148 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3150 (define_insn "*smulv8hi3_highpart"
3151 [(set (match_operand:V8HI 0 "register_operand" "=x")
3156 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3158 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3160 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3161 "pmulhw\t{%2, %0|%0, %2}"
3162 [(set_attr "type" "sseimul")
3163 (set_attr "prefix_data16" "1")
3164 (set_attr "mode" "TI")])
3166 (define_expand "umulv8hi3_highpart"
3167 [(set (match_operand:V8HI 0 "register_operand" "")
3172 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3174 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3177 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3179 (define_insn "*umulv8hi3_highpart"
3180 [(set (match_operand:V8HI 0 "register_operand" "=x")
3185 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3187 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3189 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3190 "pmulhuw\t{%2, %0|%0, %2}"
3191 [(set_attr "type" "sseimul")
3192 (set_attr "prefix_data16" "1")
3193 (set_attr "mode" "TI")])
3195 (define_insn "sse2_umulv2siv2di3"
3196 [(set (match_operand:V2DI 0 "register_operand" "=x")
3200 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3201 (parallel [(const_int 0) (const_int 2)])))
3204 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3205 (parallel [(const_int 0) (const_int 2)])))))]
3206 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3207 "pmuludq\t{%2, %0|%0, %2}"
3208 [(set_attr "type" "sseimul")
3209 (set_attr "prefix_data16" "1")
3210 (set_attr "mode" "TI")])
3212 (define_insn "sse4_1_mulv2siv2di3"
3213 [(set (match_operand:V2DI 0 "register_operand" "=x")
3217 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3218 (parallel [(const_int 0) (const_int 2)])))
3221 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3222 (parallel [(const_int 0) (const_int 2)])))))]
3223 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3224 "pmuldq\t{%2, %0|%0, %2}"
3225 [(set_attr "type" "sseimul")
3226 (set_attr "prefix_extra" "1")
3227 (set_attr "mode" "TI")])
3229 (define_insn "sse2_pmaddwd"
3230 [(set (match_operand:V4SI 0 "register_operand" "=x")
3235 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3236 (parallel [(const_int 0)
3242 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3243 (parallel [(const_int 0)
3249 (vec_select:V4HI (match_dup 1)
3250 (parallel [(const_int 1)
3255 (vec_select:V4HI (match_dup 2)
3256 (parallel [(const_int 1)
3259 (const_int 7)]))))))]
3260 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3261 "pmaddwd\t{%2, %0|%0, %2}"
3262 [(set_attr "type" "sseiadd")
3263 (set_attr "prefix_data16" "1")
3264 (set_attr "mode" "TI")])
3266 (define_expand "mulv4si3"
3267 [(set (match_operand:V4SI 0 "register_operand" "")
3268 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3269 (match_operand:V4SI 2 "register_operand" "")))]
3272 if (TARGET_SSE4_1 || TARGET_SSE5)
3273 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3276 (define_insn "*sse4_1_mulv4si3"
3277 [(set (match_operand:V4SI 0 "register_operand" "=x")
3278 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3279 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3280 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3281 "pmulld\t{%2, %0|%0, %2}"
3282 [(set_attr "type" "sseimul")
3283 (set_attr "prefix_extra" "1")
3284 (set_attr "mode" "TI")])
3286 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3287 ;; multiply/add. In general, we expect the define_split to occur before
3288 ;; register allocation, so we have to handle the corner case where the target
3289 ;; is used as the base or index register in operands 1/2.
3290 (define_insn_and_split "*sse5_mulv4si3"
3291 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3292 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3293 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3296 "&& (reload_completed
3297 || (!reg_mentioned_p (operands[0], operands[1])
3298 && !reg_mentioned_p (operands[0], operands[2])))"
3302 (plus:V4SI (mult:V4SI (match_dup 1)
3306 operands[3] = CONST0_RTX (V4SImode);
3308 [(set_attr "type" "ssemuladd")
3309 (set_attr "mode" "TI")])
3311 (define_insn_and_split "*sse2_mulv4si3"
3312 [(set (match_operand:V4SI 0 "register_operand" "")
3313 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3314 (match_operand:V4SI 2 "register_operand" "")))]
3315 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3316 && !(reload_completed || reload_in_progress)"
3321 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3327 t1 = gen_reg_rtx (V4SImode);
3328 t2 = gen_reg_rtx (V4SImode);
3329 t3 = gen_reg_rtx (V4SImode);
3330 t4 = gen_reg_rtx (V4SImode);
3331 t5 = gen_reg_rtx (V4SImode);
3332 t6 = gen_reg_rtx (V4SImode);
3333 thirtytwo = GEN_INT (32);
3335 /* Multiply elements 2 and 0. */
3336 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3339 /* Shift both input vectors down one element, so that elements 3
3340 and 1 are now in the slots for elements 2 and 0. For K8, at
3341 least, this is faster than using a shuffle. */
3342 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3343 gen_lowpart (TImode, op1),
3345 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3346 gen_lowpart (TImode, op2),
3348 /* Multiply elements 3 and 1. */
3349 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3352 /* Move the results in element 2 down to element 1; we don't care
3353 what goes in elements 2 and 3. */
3354 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3355 const0_rtx, const0_rtx));
3356 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3357 const0_rtx, const0_rtx));
3359 /* Merge the parts back together. */
3360 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3364 (define_insn_and_split "mulv2di3"
3365 [(set (match_operand:V2DI 0 "register_operand" "")
3366 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3367 (match_operand:V2DI 2 "register_operand" "")))]
3369 && !(reload_completed || reload_in_progress)"
3374 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3380 t1 = gen_reg_rtx (V2DImode);
3381 t2 = gen_reg_rtx (V2DImode);
3382 t3 = gen_reg_rtx (V2DImode);
3383 t4 = gen_reg_rtx (V2DImode);
3384 t5 = gen_reg_rtx (V2DImode);
3385 t6 = gen_reg_rtx (V2DImode);
3386 thirtytwo = GEN_INT (32);
3388 /* Multiply low parts. */
3389 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3390 gen_lowpart (V4SImode, op2)));
3392 /* Shift input vectors left 32 bits so we can multiply high parts. */
3393 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3394 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3396 /* Multiply high parts by low parts. */
3397 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3398 gen_lowpart (V4SImode, t3)));
3399 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3400 gen_lowpart (V4SImode, t2)));
3402 /* Shift them back. */
3403 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3404 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3406 /* Add the three parts together. */
3407 emit_insn (gen_addv2di3 (t6, t1, t4));
3408 emit_insn (gen_addv2di3 (op0, t6, t5));
3412 (define_expand "vec_widen_smult_hi_v8hi"
3413 [(match_operand:V4SI 0 "register_operand" "")
3414 (match_operand:V8HI 1 "register_operand" "")
3415 (match_operand:V8HI 2 "register_operand" "")]
3418 rtx op1, op2, t1, t2, dest;
3422 t1 = gen_reg_rtx (V8HImode);
3423 t2 = gen_reg_rtx (V8HImode);
3424 dest = gen_lowpart (V8HImode, operands[0]);
3426 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3427 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3428 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3432 (define_expand "vec_widen_smult_lo_v8hi"
3433 [(match_operand:V4SI 0 "register_operand" "")
3434 (match_operand:V8HI 1 "register_operand" "")
3435 (match_operand:V8HI 2 "register_operand" "")]
3438 rtx op1, op2, t1, t2, dest;
3442 t1 = gen_reg_rtx (V8HImode);
3443 t2 = gen_reg_rtx (V8HImode);
3444 dest = gen_lowpart (V8HImode, operands[0]);
3446 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3447 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3448 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3452 (define_expand "vec_widen_umult_hi_v8hi"
3453 [(match_operand:V4SI 0 "register_operand" "")
3454 (match_operand:V8HI 1 "register_operand" "")
3455 (match_operand:V8HI 2 "register_operand" "")]
3458 rtx op1, op2, t1, t2, dest;
3462 t1 = gen_reg_rtx (V8HImode);
3463 t2 = gen_reg_rtx (V8HImode);
3464 dest = gen_lowpart (V8HImode, operands[0]);
3466 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3467 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3468 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3472 (define_expand "vec_widen_umult_lo_v8hi"
3473 [(match_operand:V4SI 0 "register_operand" "")
3474 (match_operand:V8HI 1 "register_operand" "")
3475 (match_operand:V8HI 2 "register_operand" "")]
3478 rtx op1, op2, t1, t2, dest;
3482 t1 = gen_reg_rtx (V8HImode);
3483 t2 = gen_reg_rtx (V8HImode);
3484 dest = gen_lowpart (V8HImode, operands[0]);
3486 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3487 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3488 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3492 (define_expand "vec_widen_smult_hi_v4si"
3493 [(match_operand:V2DI 0 "register_operand" "")
3494 (match_operand:V4SI 1 "register_operand" "")
3495 (match_operand:V4SI 2 "register_operand" "")]
3498 rtx op1, op2, t1, t2;
3502 t1 = gen_reg_rtx (V4SImode);
3503 t2 = gen_reg_rtx (V4SImode);
3505 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3506 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3507 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3511 (define_expand "vec_widen_smult_lo_v4si"
3512 [(match_operand:V2DI 0 "register_operand" "")
3513 (match_operand:V4SI 1 "register_operand" "")
3514 (match_operand:V4SI 2 "register_operand" "")]
3517 rtx op1, op2, t1, t2;
3521 t1 = gen_reg_rtx (V4SImode);
3522 t2 = gen_reg_rtx (V4SImode);
3524 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3525 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3526 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3530 (define_expand "vec_widen_umult_hi_v4si"
3531 [(match_operand:V2DI 0 "register_operand" "")
3532 (match_operand:V4SI 1 "register_operand" "")
3533 (match_operand:V4SI 2 "register_operand" "")]
3536 rtx op1, op2, t1, t2;
3540 t1 = gen_reg_rtx (V4SImode);
3541 t2 = gen_reg_rtx (V4SImode);
3543 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3544 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3545 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3549 (define_expand "vec_widen_umult_lo_v4si"
3550 [(match_operand:V2DI 0 "register_operand" "")
3551 (match_operand:V4SI 1 "register_operand" "")
3552 (match_operand:V4SI 2 "register_operand" "")]
3555 rtx op1, op2, t1, t2;
3559 t1 = gen_reg_rtx (V4SImode);
3560 t2 = gen_reg_rtx (V4SImode);
3562 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3563 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3564 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3568 (define_expand "sdot_prodv8hi"
3569 [(match_operand:V4SI 0 "register_operand" "")
3570 (match_operand:V8HI 1 "register_operand" "")
3571 (match_operand:V8HI 2 "register_operand" "")
3572 (match_operand:V4SI 3 "register_operand" "")]
3575 rtx t = gen_reg_rtx (V4SImode);
3576 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3577 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3581 (define_expand "udot_prodv4si"
3582 [(match_operand:V2DI 0 "register_operand" "")
3583 (match_operand:V4SI 1 "register_operand" "")
3584 (match_operand:V4SI 2 "register_operand" "")
3585 (match_operand:V2DI 3 "register_operand" "")]
3590 t1 = gen_reg_rtx (V2DImode);
3591 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3592 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3594 t2 = gen_reg_rtx (V4SImode);
3595 t3 = gen_reg_rtx (V4SImode);
3596 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3597 gen_lowpart (TImode, operands[1]),
3599 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3600 gen_lowpart (TImode, operands[2]),
3603 t4 = gen_reg_rtx (V2DImode);
3604 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3606 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3610 (define_insn "ashr<mode>3"
3611 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3613 (match_operand:SSEMODE24 1 "register_operand" "0")
3614 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3616 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3617 [(set_attr "type" "sseishft")
3618 (set_attr "prefix_data16" "1")
3619 (set_attr "mode" "TI")])
3621 (define_insn "lshr<mode>3"
3622 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3623 (lshiftrt:SSEMODE248
3624 (match_operand:SSEMODE248 1 "register_operand" "0")
3625 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3627 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3628 [(set_attr "type" "sseishft")
3629 (set_attr "prefix_data16" "1")
3630 (set_attr "mode" "TI")])
3632 (define_insn "ashl<mode>3"
3633 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3635 (match_operand:SSEMODE248 1 "register_operand" "0")
3636 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3638 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3639 [(set_attr "type" "sseishft")
3640 (set_attr "prefix_data16" "1")
3641 (set_attr "mode" "TI")])
3643 (define_expand "vec_shl_<mode>"
3644 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3645 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3646 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3649 operands[0] = gen_lowpart (TImode, operands[0]);
3650 operands[1] = gen_lowpart (TImode, operands[1]);
3653 (define_expand "vec_shr_<mode>"
3654 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3655 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3656 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3659 operands[0] = gen_lowpart (TImode, operands[0]);
3660 operands[1] = gen_lowpart (TImode, operands[1]);
3663 (define_expand "umaxv16qi3"
3664 [(set (match_operand:V16QI 0 "register_operand" "")
3665 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3666 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3668 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3670 (define_insn "*umaxv16qi3"
3671 [(set (match_operand:V16QI 0 "register_operand" "=x")
3672 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3673 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3674 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3675 "pmaxub\t{%2, %0|%0, %2}"
3676 [(set_attr "type" "sseiadd")
3677 (set_attr "prefix_data16" "1")
3678 (set_attr "mode" "TI")])
3680 (define_expand "smaxv8hi3"
3681 [(set (match_operand:V8HI 0 "register_operand" "")
3682 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3683 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3685 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3687 (define_insn "*smaxv8hi3"
3688 [(set (match_operand:V8HI 0 "register_operand" "=x")
3689 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3690 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3691 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3692 "pmaxsw\t{%2, %0|%0, %2}"
3693 [(set_attr "type" "sseiadd")
3694 (set_attr "prefix_data16" "1")
3695 (set_attr "mode" "TI")])
3697 (define_expand "umaxv8hi3"
3698 [(set (match_operand:V8HI 0 "register_operand" "")
3699 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3700 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3704 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3707 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3708 if (rtx_equal_p (op3, op2))
3709 op3 = gen_reg_rtx (V8HImode);
3710 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3711 emit_insn (gen_addv8hi3 (op0, op3, op2));
3716 (define_expand "smax<mode>3"
3717 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3718 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3719 (match_operand:SSEMODE14 2 "register_operand" "")))]
3723 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3729 xops[0] = operands[0];
3730 xops[1] = operands[1];
3731 xops[2] = operands[2];
3732 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3733 xops[4] = operands[1];
3734 xops[5] = operands[2];
3735 ok = ix86_expand_int_vcond (xops);
3741 (define_insn "*sse4_1_smax<mode>3"
3742 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3744 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3745 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3746 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3747 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3748 [(set_attr "type" "sseiadd")
3749 (set_attr "prefix_extra" "1")
3750 (set_attr "mode" "TI")])
3752 (define_expand "umaxv4si3"
3753 [(set (match_operand:V4SI 0 "register_operand" "")
3754 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3755 (match_operand:V4SI 2 "register_operand" "")))]
3759 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3765 xops[0] = operands[0];
3766 xops[1] = operands[1];
3767 xops[2] = operands[2];
3768 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3769 xops[4] = operands[1];
3770 xops[5] = operands[2];
3771 ok = ix86_expand_int_vcond (xops);
3777 (define_insn "*sse4_1_umax<mode>3"
3778 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3780 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3781 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3782 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3783 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3784 [(set_attr "type" "sseiadd")
3785 (set_attr "prefix_extra" "1")
3786 (set_attr "mode" "TI")])
3788 (define_expand "uminv16qi3"
3789 [(set (match_operand:V16QI 0 "register_operand" "")
3790 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3791 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3793 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3795 (define_insn "*uminv16qi3"
3796 [(set (match_operand:V16QI 0 "register_operand" "=x")
3797 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3798 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3799 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3800 "pminub\t{%2, %0|%0, %2}"
3801 [(set_attr "type" "sseiadd")
3802 (set_attr "prefix_data16" "1")
3803 (set_attr "mode" "TI")])
3805 (define_expand "sminv8hi3"
3806 [(set (match_operand:V8HI 0 "register_operand" "")
3807 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3808 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3810 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3812 (define_insn "*sminv8hi3"
3813 [(set (match_operand:V8HI 0 "register_operand" "=x")
3814 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3815 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3816 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3817 "pminsw\t{%2, %0|%0, %2}"
3818 [(set_attr "type" "sseiadd")
3819 (set_attr "prefix_data16" "1")
3820 (set_attr "mode" "TI")])
3822 (define_expand "smin<mode>3"
3823 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3824 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3825 (match_operand:SSEMODE14 2 "register_operand" "")))]
3829 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3835 xops[0] = operands[0];
3836 xops[1] = operands[2];
3837 xops[2] = operands[1];
3838 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3839 xops[4] = operands[1];
3840 xops[5] = operands[2];
3841 ok = ix86_expand_int_vcond (xops);
3847 (define_insn "*sse4_1_smin<mode>3"
3848 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3850 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3851 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3852 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3853 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3854 [(set_attr "type" "sseiadd")
3855 (set_attr "prefix_extra" "1")
3856 (set_attr "mode" "TI")])
3858 (define_expand "umin<mode>3"
3859 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3860 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3861 (match_operand:SSEMODE24 2 "register_operand" "")))]
3865 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3871 xops[0] = operands[0];
3872 xops[1] = operands[2];
3873 xops[2] = operands[1];
3874 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3875 xops[4] = operands[1];
3876 xops[5] = operands[2];
3877 ok = ix86_expand_int_vcond (xops);
3883 (define_insn "*sse4_1_umin<mode>3"
3884 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3886 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3887 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3888 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3889 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3890 [(set_attr "type" "sseiadd")
3891 (set_attr "prefix_extra" "1")
3892 (set_attr "mode" "TI")])
3894 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3896 ;; Parallel integral comparisons
3898 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3900 (define_insn "sse2_eq<mode>3"
3901 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3903 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3904 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3905 "TARGET_SSE2 && !TARGET_SSE5
3906 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3907 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3908 [(set_attr "type" "ssecmp")
3909 (set_attr "prefix_data16" "1")
3910 (set_attr "mode" "TI")])
3912 (define_insn "sse4_1_eqv2di3"
3913 [(set (match_operand:V2DI 0 "register_operand" "=x")
3915 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3916 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3917 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3918 "pcmpeqq\t{%2, %0|%0, %2}"
3919 [(set_attr "type" "ssecmp")
3920 (set_attr "prefix_extra" "1")
3921 (set_attr "mode" "TI")])
3923 (define_insn "sse2_gt<mode>3"
3924 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3926 (match_operand:SSEMODE124 1 "register_operand" "0")
3927 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3928 "TARGET_SSE2 && !TARGET_SSE5"
3929 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3930 [(set_attr "type" "ssecmp")
3931 (set_attr "prefix_data16" "1")
3932 (set_attr "mode" "TI")])
3934 (define_insn "sse4_2_gtv2di3"
3935 [(set (match_operand:V2DI 0 "register_operand" "=x")
3937 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3938 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3940 "pcmpgtq\t{%2, %0|%0, %2}"
3941 [(set_attr "type" "ssecmp")
3942 (set_attr "mode" "TI")])
3944 (define_expand "vcond<mode>"
3945 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3946 (if_then_else:SSEMODEI
3947 (match_operator 3 ""
3948 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3949 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3950 (match_operand:SSEMODEI 1 "general_operand" "")
3951 (match_operand:SSEMODEI 2 "general_operand" "")))]
3954 if (ix86_expand_int_vcond (operands))
3960 (define_expand "vcondu<mode>"
3961 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3962 (if_then_else:SSEMODEI
3963 (match_operator 3 ""
3964 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3965 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3966 (match_operand:SSEMODEI 1 "general_operand" "")
3967 (match_operand:SSEMODEI 2 "general_operand" "")))]
3970 if (ix86_expand_int_vcond (operands))
3976 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3978 ;; Parallel bitwise logical operations
3980 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3982 (define_expand "one_cmpl<mode>2"
3983 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3984 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3988 int i, n = GET_MODE_NUNITS (<MODE>mode);
3989 rtvec v = rtvec_alloc (n);
3991 for (i = 0; i < n; ++i)
3992 RTVEC_ELT (v, i) = constm1_rtx;
3994 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3997 (define_expand "and<mode>3"
3998 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3999 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4000 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4002 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
4004 (define_insn "*sse_and<mode>3"
4005 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4007 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4008 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4009 "(TARGET_SSE && !TARGET_SSE2)
4010 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4011 "andps\t{%2, %0|%0, %2}"
4012 [(set_attr "type" "sselog")
4013 (set_attr "mode" "V4SF")])
4015 (define_insn "*sse2_and<mode>3"
4016 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4018 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4019 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4020 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4021 "pand\t{%2, %0|%0, %2}"
4022 [(set_attr "type" "sselog")
4023 (set_attr "prefix_data16" "1")
4024 (set_attr "mode" "TI")])
4026 (define_insn "*sse_nand<mode>3"
4027 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4029 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4030 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4031 "(TARGET_SSE && !TARGET_SSE2)"
4032 "andnps\t{%2, %0|%0, %2}"
4033 [(set_attr "type" "sselog")
4034 (set_attr "mode" "V4SF")])
4036 (define_insn "sse2_nand<mode>3"
4037 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4039 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4040 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4042 "pandn\t{%2, %0|%0, %2}"
4043 [(set_attr "type" "sselog")
4044 (set_attr "prefix_data16" "1")
4045 (set_attr "mode" "TI")])
4047 (define_expand "andtf3"
4048 [(set (match_operand:TF 0 "register_operand" "")
4049 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
4050 (match_operand:TF 2 "nonimmediate_operand" "")))]
4052 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
4054 (define_insn "*andtf3"
4055 [(set (match_operand:TF 0 "register_operand" "=x")
4057 (match_operand:TF 1 "nonimmediate_operand" "%0")
4058 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4059 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
4060 "pand\t{%2, %0|%0, %2}"
4061 [(set_attr "type" "sselog")
4062 (set_attr "prefix_data16" "1")
4063 (set_attr "mode" "TI")])
4065 (define_insn "*nandtf3"
4066 [(set (match_operand:TF 0 "register_operand" "=x")
4068 (not:TF (match_operand:TF 1 "register_operand" "0"))
4069 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4071 "pandn\t{%2, %0|%0, %2}"
4072 [(set_attr "type" "sselog")
4073 (set_attr "prefix_data16" "1")
4074 (set_attr "mode" "TI")])
4076 (define_expand "ior<mode>3"
4077 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4078 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4079 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4081 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
4083 (define_insn "*sse_ior<mode>3"
4084 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4086 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4087 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4088 "(TARGET_SSE && !TARGET_SSE2)
4089 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4090 "orps\t{%2, %0|%0, %2}"
4091 [(set_attr "type" "sselog")
4092 (set_attr "mode" "V4SF")])
4094 (define_insn "*sse2_ior<mode>3"
4095 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4097 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4098 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4099 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4100 "por\t{%2, %0|%0, %2}"
4101 [(set_attr "type" "sselog")
4102 (set_attr "prefix_data16" "1")
4103 (set_attr "mode" "TI")])
4105 (define_expand "iortf3"
4106 [(set (match_operand:TF 0 "register_operand" "")
4107 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
4108 (match_operand:TF 2 "nonimmediate_operand" "")))]
4110 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
4112 (define_insn "*iortf3"
4113 [(set (match_operand:TF 0 "register_operand" "=x")
4115 (match_operand:TF 1 "nonimmediate_operand" "%0")
4116 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4117 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
4118 "por\t{%2, %0|%0, %2}"
4119 [(set_attr "type" "sselog")
4120 (set_attr "prefix_data16" "1")
4121 (set_attr "mode" "TI")])
4123 (define_expand "xor<mode>3"
4124 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4125 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4126 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4128 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
4130 (define_insn "*sse_xor<mode>3"
4131 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4133 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4134 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4135 "(TARGET_SSE && !TARGET_SSE2)
4136 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4137 "xorps\t{%2, %0|%0, %2}"
4138 [(set_attr "type" "sselog")
4139 (set_attr "mode" "V4SF")])
4141 (define_insn "*sse2_xor<mode>3"
4142 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4144 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4145 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4146 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4147 "pxor\t{%2, %0|%0, %2}"
4148 [(set_attr "type" "sselog")
4149 (set_attr "prefix_data16" "1")
4150 (set_attr "mode" "TI")])
4152 (define_expand "xortf3"
4153 [(set (match_operand:TF 0 "register_operand" "")
4154 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
4155 (match_operand:TF 2 "nonimmediate_operand" "")))]
4157 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
4159 (define_insn "*xortf3"
4160 [(set (match_operand:TF 0 "register_operand" "=x")
4162 (match_operand:TF 1 "nonimmediate_operand" "%0")
4163 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4164 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
4165 "pxor\t{%2, %0|%0, %2}"
4166 [(set_attr "type" "sselog")
4167 (set_attr "prefix_data16" "1")
4168 (set_attr "mode" "TI")])
4170 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4172 ;; Parallel integral element swizzling
4174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4177 ;; op1 = abcdefghijklmnop
4178 ;; op2 = qrstuvwxyz012345
4179 ;; h1 = aqbrcsdteufvgwhx
4180 ;; l1 = iyjzk0l1m2n3o4p5
4181 ;; h2 = aiqybjrzcks0dlt1
4182 ;; l2 = emu2fnv3gow4hpx5
4183 ;; h3 = aeimquy2bfjnrvz3
4184 ;; l3 = cgkosw04dhlptx15
4185 ;; result = bdfhjlnprtvxz135
4186 (define_expand "vec_pack_trunc_v8hi"
4187 [(match_operand:V16QI 0 "register_operand" "")
4188 (match_operand:V8HI 1 "register_operand" "")
4189 (match_operand:V8HI 2 "register_operand" "")]
4192 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4194 op1 = gen_lowpart (V16QImode, operands[1]);
4195 op2 = gen_lowpart (V16QImode, operands[2]);
4196 h1 = gen_reg_rtx (V16QImode);
4197 l1 = gen_reg_rtx (V16QImode);
4198 h2 = gen_reg_rtx (V16QImode);
4199 l2 = gen_reg_rtx (V16QImode);
4200 h3 = gen_reg_rtx (V16QImode);
4201 l3 = gen_reg_rtx (V16QImode);
4203 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4204 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4205 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4206 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4207 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4208 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4209 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4220 ;; result = bdfhjlnp
4221 (define_expand "vec_pack_trunc_v4si"
4222 [(match_operand:V8HI 0 "register_operand" "")
4223 (match_operand:V4SI 1 "register_operand" "")
4224 (match_operand:V4SI 2 "register_operand" "")]
4227 rtx op1, op2, h1, l1, h2, l2;
4229 op1 = gen_lowpart (V8HImode, operands[1]);
4230 op2 = gen_lowpart (V8HImode, operands[2]);
4231 h1 = gen_reg_rtx (V8HImode);
4232 l1 = gen_reg_rtx (V8HImode);
4233 h2 = gen_reg_rtx (V8HImode);
4234 l2 = gen_reg_rtx (V8HImode);
4236 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4237 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4238 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4239 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4240 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4250 (define_expand "vec_pack_trunc_v2di"
4251 [(match_operand:V4SI 0 "register_operand" "")
4252 (match_operand:V2DI 1 "register_operand" "")
4253 (match_operand:V2DI 2 "register_operand" "")]
4256 rtx op1, op2, h1, l1;
4258 op1 = gen_lowpart (V4SImode, operands[1]);
4259 op2 = gen_lowpart (V4SImode, operands[2]);
4260 h1 = gen_reg_rtx (V4SImode);
4261 l1 = gen_reg_rtx (V4SImode);
4263 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4264 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4265 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4269 (define_expand "vec_interleave_highv16qi"
4270 [(set (match_operand:V16QI 0 "register_operand" "")
4273 (match_operand:V16QI 1 "register_operand" "")
4274 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4275 (parallel [(const_int 8) (const_int 24)
4276 (const_int 9) (const_int 25)
4277 (const_int 10) (const_int 26)
4278 (const_int 11) (const_int 27)
4279 (const_int 12) (const_int 28)
4280 (const_int 13) (const_int 29)
4281 (const_int 14) (const_int 30)
4282 (const_int 15) (const_int 31)])))]
4285 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4289 (define_expand "vec_interleave_lowv16qi"
4290 [(set (match_operand:V16QI 0 "register_operand" "")
4293 (match_operand:V16QI 1 "register_operand" "")
4294 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4295 (parallel [(const_int 0) (const_int 16)
4296 (const_int 1) (const_int 17)
4297 (const_int 2) (const_int 18)
4298 (const_int 3) (const_int 19)
4299 (const_int 4) (const_int 20)
4300 (const_int 5) (const_int 21)
4301 (const_int 6) (const_int 22)
4302 (const_int 7) (const_int 23)])))]
4305 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4309 (define_expand "vec_interleave_highv8hi"
4310 [(set (match_operand:V8HI 0 "register_operand" "=")
4313 (match_operand:V8HI 1 "register_operand" "")
4314 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4315 (parallel [(const_int 4) (const_int 12)
4316 (const_int 5) (const_int 13)
4317 (const_int 6) (const_int 14)
4318 (const_int 7) (const_int 15)])))]
4321 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4325 (define_expand "vec_interleave_lowv8hi"
4326 [(set (match_operand:V8HI 0 "register_operand" "")
4329 (match_operand:V8HI 1 "register_operand" "")
4330 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4331 (parallel [(const_int 0) (const_int 8)
4332 (const_int 1) (const_int 9)
4333 (const_int 2) (const_int 10)
4334 (const_int 3) (const_int 11)])))]
4337 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4341 (define_expand "vec_interleave_highv4si"
4342 [(set (match_operand:V4SI 0 "register_operand" "")
4345 (match_operand:V4SI 1 "register_operand" "")
4346 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4347 (parallel [(const_int 2) (const_int 6)
4348 (const_int 3) (const_int 7)])))]
4351 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4355 (define_expand "vec_interleave_lowv4si"
4356 [(set (match_operand:V4SI 0 "register_operand" "")
4359 (match_operand:V4SI 1 "register_operand" "")
4360 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4361 (parallel [(const_int 0) (const_int 4)
4362 (const_int 1) (const_int 5)])))]
4365 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4369 (define_expand "vec_interleave_highv2di"
4370 [(set (match_operand:V2DI 0 "register_operand" "")
4373 (match_operand:V2DI 1 "register_operand" "")
4374 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4375 (parallel [(const_int 1)
4379 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4383 (define_expand "vec_interleave_lowv2di"
4384 [(set (match_operand:V2DI 0 "register_operand" "")
4387 (match_operand:V2DI 1 "register_operand" "")
4388 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4389 (parallel [(const_int 0)
4393 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4397 (define_insn "sse2_packsswb"
4398 [(set (match_operand:V16QI 0 "register_operand" "=x")
4401 (match_operand:V8HI 1 "register_operand" "0"))
4403 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4405 "packsswb\t{%2, %0|%0, %2}"
4406 [(set_attr "type" "sselog")
4407 (set_attr "prefix_data16" "1")
4408 (set_attr "mode" "TI")])
4410 (define_insn "sse2_packssdw"
4411 [(set (match_operand:V8HI 0 "register_operand" "=x")
4414 (match_operand:V4SI 1 "register_operand" "0"))
4416 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4418 "packssdw\t{%2, %0|%0, %2}"
4419 [(set_attr "type" "sselog")
4420 (set_attr "prefix_data16" "1")
4421 (set_attr "mode" "TI")])
4423 (define_insn "sse2_packuswb"
4424 [(set (match_operand:V16QI 0 "register_operand" "=x")
4427 (match_operand:V8HI 1 "register_operand" "0"))
4429 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4431 "packuswb\t{%2, %0|%0, %2}"
4432 [(set_attr "type" "sselog")
4433 (set_attr "prefix_data16" "1")
4434 (set_attr "mode" "TI")])
4436 (define_insn "sse2_punpckhbw"
4437 [(set (match_operand:V16QI 0 "register_operand" "=x")
4440 (match_operand:V16QI 1 "register_operand" "0")
4441 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4442 (parallel [(const_int 8) (const_int 24)
4443 (const_int 9) (const_int 25)
4444 (const_int 10) (const_int 26)
4445 (const_int 11) (const_int 27)
4446 (const_int 12) (const_int 28)
4447 (const_int 13) (const_int 29)
4448 (const_int 14) (const_int 30)
4449 (const_int 15) (const_int 31)])))]
4451 "punpckhbw\t{%2, %0|%0, %2}"
4452 [(set_attr "type" "sselog")
4453 (set_attr "prefix_data16" "1")
4454 (set_attr "mode" "TI")])
4456 (define_insn "sse2_punpcklbw"
4457 [(set (match_operand:V16QI 0 "register_operand" "=x")
4460 (match_operand:V16QI 1 "register_operand" "0")
4461 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4462 (parallel [(const_int 0) (const_int 16)
4463 (const_int 1) (const_int 17)
4464 (const_int 2) (const_int 18)
4465 (const_int 3) (const_int 19)
4466 (const_int 4) (const_int 20)
4467 (const_int 5) (const_int 21)
4468 (const_int 6) (const_int 22)
4469 (const_int 7) (const_int 23)])))]
4471 "punpcklbw\t{%2, %0|%0, %2}"
4472 [(set_attr "type" "sselog")
4473 (set_attr "prefix_data16" "1")
4474 (set_attr "mode" "TI")])
4476 (define_insn "sse2_punpckhwd"
4477 [(set (match_operand:V8HI 0 "register_operand" "=x")
4480 (match_operand:V8HI 1 "register_operand" "0")
4481 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4482 (parallel [(const_int 4) (const_int 12)
4483 (const_int 5) (const_int 13)
4484 (const_int 6) (const_int 14)
4485 (const_int 7) (const_int 15)])))]
4487 "punpckhwd\t{%2, %0|%0, %2}"
4488 [(set_attr "type" "sselog")
4489 (set_attr "prefix_data16" "1")
4490 (set_attr "mode" "TI")])
4492 (define_insn "sse2_punpcklwd"
4493 [(set (match_operand:V8HI 0 "register_operand" "=x")
4496 (match_operand:V8HI 1 "register_operand" "0")
4497 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4498 (parallel [(const_int 0) (const_int 8)
4499 (const_int 1) (const_int 9)
4500 (const_int 2) (const_int 10)
4501 (const_int 3) (const_int 11)])))]
4503 "punpcklwd\t{%2, %0|%0, %2}"
4504 [(set_attr "type" "sselog")
4505 (set_attr "prefix_data16" "1")
4506 (set_attr "mode" "TI")])
4508 (define_insn "sse2_punpckhdq"
4509 [(set (match_operand:V4SI 0 "register_operand" "=x")
4512 (match_operand:V4SI 1 "register_operand" "0")
4513 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4514 (parallel [(const_int 2) (const_int 6)
4515 (const_int 3) (const_int 7)])))]
4517 "punpckhdq\t{%2, %0|%0, %2}"
4518 [(set_attr "type" "sselog")
4519 (set_attr "prefix_data16" "1")
4520 (set_attr "mode" "TI")])
4522 (define_insn "sse2_punpckldq"
4523 [(set (match_operand:V4SI 0 "register_operand" "=x")
4526 (match_operand:V4SI 1 "register_operand" "0")
4527 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4528 (parallel [(const_int 0) (const_int 4)
4529 (const_int 1) (const_int 5)])))]
4531 "punpckldq\t{%2, %0|%0, %2}"
4532 [(set_attr "type" "sselog")
4533 (set_attr "prefix_data16" "1")
4534 (set_attr "mode" "TI")])
4536 (define_insn "sse2_punpckhqdq"
4537 [(set (match_operand:V2DI 0 "register_operand" "=x")
4540 (match_operand:V2DI 1 "register_operand" "0")
4541 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4542 (parallel [(const_int 1)
4545 "punpckhqdq\t{%2, %0|%0, %2}"
4546 [(set_attr "type" "sselog")
4547 (set_attr "prefix_data16" "1")
4548 (set_attr "mode" "TI")])
4550 (define_insn "sse2_punpcklqdq"
4551 [(set (match_operand:V2DI 0 "register_operand" "=x")
4554 (match_operand:V2DI 1 "register_operand" "0")
4555 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4556 (parallel [(const_int 0)
4559 "punpcklqdq\t{%2, %0|%0, %2}"
4560 [(set_attr "type" "sselog")
4561 (set_attr "prefix_data16" "1")
4562 (set_attr "mode" "TI")])
4564 (define_insn "*sse4_1_pinsrb"
4565 [(set (match_operand:V16QI 0 "register_operand" "=x")
4567 (vec_duplicate:V16QI
4568 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4569 (match_operand:V16QI 1 "register_operand" "0")
4570 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4573 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4574 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4576 [(set_attr "type" "sselog")
4577 (set_attr "prefix_extra" "1")
4578 (set_attr "mode" "TI")])
4580 (define_insn "*sse2_pinsrw"
4581 [(set (match_operand:V8HI 0 "register_operand" "=x")
4584 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4585 (match_operand:V8HI 1 "register_operand" "0")
4586 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4589 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4590 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4592 [(set_attr "type" "sselog")
4593 (set_attr "prefix_data16" "1")
4594 (set_attr "mode" "TI")])
4596 ;; It must come before sse2_loadld since it is preferred.
4597 (define_insn "*sse4_1_pinsrd"
4598 [(set (match_operand:V4SI 0 "register_operand" "=x")
4601 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4602 (match_operand:V4SI 1 "register_operand" "0")
4603 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4606 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4607 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4609 [(set_attr "type" "sselog")
4610 (set_attr "prefix_extra" "1")
4611 (set_attr "mode" "TI")])
4613 (define_insn "*sse4_1_pinsrq"
4614 [(set (match_operand:V2DI 0 "register_operand" "=x")
4617 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4618 (match_operand:V2DI 1 "register_operand" "0")
4619 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4622 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4623 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4625 [(set_attr "type" "sselog")
4626 (set_attr "prefix_extra" "1")
4627 (set_attr "mode" "TI")])
4629 (define_insn "*sse4_1_pextrb"
4630 [(set (match_operand:SI 0 "register_operand" "=r")
4633 (match_operand:V16QI 1 "register_operand" "x")
4634 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4636 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4637 [(set_attr "type" "sselog")
4638 (set_attr "prefix_extra" "1")
4639 (set_attr "mode" "TI")])
4641 (define_insn "*sse4_1_pextrb_memory"
4642 [(set (match_operand:QI 0 "memory_operand" "=m")
4644 (match_operand:V16QI 1 "register_operand" "x")
4645 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4647 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4648 [(set_attr "type" "sselog")
4649 (set_attr "prefix_extra" "1")
4650 (set_attr "mode" "TI")])
4652 (define_insn "*sse2_pextrw"
4653 [(set (match_operand:SI 0 "register_operand" "=r")
4656 (match_operand:V8HI 1 "register_operand" "x")
4657 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4659 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4660 [(set_attr "type" "sselog")
4661 (set_attr "prefix_data16" "1")
4662 (set_attr "mode" "TI")])
4664 (define_insn "*sse4_1_pextrw_memory"
4665 [(set (match_operand:HI 0 "memory_operand" "=m")
4667 (match_operand:V8HI 1 "register_operand" "x")
4668 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4670 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4671 [(set_attr "type" "sselog")
4672 (set_attr "prefix_extra" "1")
4673 (set_attr "mode" "TI")])
4675 (define_insn "*sse4_1_pextrd"
4676 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4678 (match_operand:V4SI 1 "register_operand" "x")
4679 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4681 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4682 [(set_attr "type" "sselog")
4683 (set_attr "prefix_extra" "1")
4684 (set_attr "mode" "TI")])
4686 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4687 (define_insn "*sse4_1_pextrq"
4688 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4690 (match_operand:V2DI 1 "register_operand" "x")
4691 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4692 "TARGET_SSE4_1 && TARGET_64BIT"
4693 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4694 [(set_attr "type" "sselog")
4695 (set_attr "prefix_extra" "1")
4696 (set_attr "mode" "TI")])
4698 (define_expand "sse2_pshufd"
4699 [(match_operand:V4SI 0 "register_operand" "")
4700 (match_operand:V4SI 1 "nonimmediate_operand" "")
4701 (match_operand:SI 2 "const_int_operand" "")]
4704 int mask = INTVAL (operands[2]);
4705 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4706 GEN_INT ((mask >> 0) & 3),
4707 GEN_INT ((mask >> 2) & 3),
4708 GEN_INT ((mask >> 4) & 3),
4709 GEN_INT ((mask >> 6) & 3)));
4713 (define_insn "sse2_pshufd_1"
4714 [(set (match_operand:V4SI 0 "register_operand" "=x")
4716 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4717 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4718 (match_operand 3 "const_0_to_3_operand" "")
4719 (match_operand 4 "const_0_to_3_operand" "")
4720 (match_operand 5 "const_0_to_3_operand" "")])))]
4724 mask |= INTVAL (operands[2]) << 0;
4725 mask |= INTVAL (operands[3]) << 2;
4726 mask |= INTVAL (operands[4]) << 4;
4727 mask |= INTVAL (operands[5]) << 6;
4728 operands[2] = GEN_INT (mask);
4730 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4732 [(set_attr "type" "sselog1")
4733 (set_attr "prefix_data16" "1")
4734 (set_attr "mode" "TI")])
4736 (define_expand "sse2_pshuflw"
4737 [(match_operand:V8HI 0 "register_operand" "")
4738 (match_operand:V8HI 1 "nonimmediate_operand" "")
4739 (match_operand:SI 2 "const_int_operand" "")]
4742 int mask = INTVAL (operands[2]);
4743 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4744 GEN_INT ((mask >> 0) & 3),
4745 GEN_INT ((mask >> 2) & 3),
4746 GEN_INT ((mask >> 4) & 3),
4747 GEN_INT ((mask >> 6) & 3)));
4751 (define_insn "sse2_pshuflw_1"
4752 [(set (match_operand:V8HI 0 "register_operand" "=x")
4754 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4755 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4756 (match_operand 3 "const_0_to_3_operand" "")
4757 (match_operand 4 "const_0_to_3_operand" "")
4758 (match_operand 5 "const_0_to_3_operand" "")
4766 mask |= INTVAL (operands[2]) << 0;
4767 mask |= INTVAL (operands[3]) << 2;
4768 mask |= INTVAL (operands[4]) << 4;
4769 mask |= INTVAL (operands[5]) << 6;
4770 operands[2] = GEN_INT (mask);
4772 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4774 [(set_attr "type" "sselog")
4775 (set_attr "prefix_rep" "1")
4776 (set_attr "mode" "TI")])
4778 (define_expand "sse2_pshufhw"
4779 [(match_operand:V8HI 0 "register_operand" "")
4780 (match_operand:V8HI 1 "nonimmediate_operand" "")
4781 (match_operand:SI 2 "const_int_operand" "")]
4784 int mask = INTVAL (operands[2]);
4785 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4786 GEN_INT (((mask >> 0) & 3) + 4),
4787 GEN_INT (((mask >> 2) & 3) + 4),
4788 GEN_INT (((mask >> 4) & 3) + 4),
4789 GEN_INT (((mask >> 6) & 3) + 4)));
4793 (define_insn "sse2_pshufhw_1"
4794 [(set (match_operand:V8HI 0 "register_operand" "=x")
4796 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4797 (parallel [(const_int 0)
4801 (match_operand 2 "const_4_to_7_operand" "")
4802 (match_operand 3 "const_4_to_7_operand" "")
4803 (match_operand 4 "const_4_to_7_operand" "")
4804 (match_operand 5 "const_4_to_7_operand" "")])))]
4808 mask |= (INTVAL (operands[2]) - 4) << 0;
4809 mask |= (INTVAL (operands[3]) - 4) << 2;
4810 mask |= (INTVAL (operands[4]) - 4) << 4;
4811 mask |= (INTVAL (operands[5]) - 4) << 6;
4812 operands[2] = GEN_INT (mask);
4814 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4816 [(set_attr "type" "sselog")
4817 (set_attr "prefix_rep" "1")
4818 (set_attr "mode" "TI")])
4820 (define_expand "sse2_loadd"
4821 [(set (match_operand:V4SI 0 "register_operand" "")
4824 (match_operand:SI 1 "nonimmediate_operand" ""))
4828 "operands[2] = CONST0_RTX (V4SImode);")
4830 (define_insn "sse2_loadld"
4831 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4834 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4835 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4839 movd\t{%2, %0|%0, %2}
4840 movd\t{%2, %0|%0, %2}
4841 movss\t{%2, %0|%0, %2}
4842 movss\t{%2, %0|%0, %2}"
4843 [(set_attr "type" "ssemov")
4844 (set_attr "mode" "TI,TI,V4SF,SF")])
4846 (define_insn_and_split "sse2_stored"
4847 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4849 (match_operand:V4SI 1 "register_operand" "x,Yi")
4850 (parallel [(const_int 0)])))]
4853 "&& reload_completed
4854 && (TARGET_INTER_UNIT_MOVES
4855 || MEM_P (operands [0])
4856 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4857 [(set (match_dup 0) (match_dup 1))]
4859 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4862 (define_insn_and_split "*vec_ext_v4si_mem"
4863 [(set (match_operand:SI 0 "register_operand" "=r")
4865 (match_operand:V4SI 1 "memory_operand" "o")
4866 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4872 int i = INTVAL (operands[2]);
4874 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4878 (define_expand "sse_storeq"
4879 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4881 (match_operand:V2DI 1 "register_operand" "")
4882 (parallel [(const_int 0)])))]
4886 (define_insn "*sse2_storeq_rex64"
4887 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4889 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4890 (parallel [(const_int 0)])))]
4891 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4895 mov{q}\t{%1, %0|%0, %1}"
4896 [(set_attr "type" "*,*,imov")
4897 (set_attr "mode" "*,*,DI")])
4899 (define_insn "*sse2_storeq"
4900 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4902 (match_operand:V2DI 1 "register_operand" "x")
4903 (parallel [(const_int 0)])))]
4908 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4910 (match_operand:V2DI 1 "register_operand" "")
4911 (parallel [(const_int 0)])))]
4914 && (TARGET_INTER_UNIT_MOVES
4915 || MEM_P (operands [0])
4916 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4917 [(set (match_dup 0) (match_dup 1))]
4919 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4922 (define_insn "*vec_extractv2di_1_rex64"
4923 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4925 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4926 (parallel [(const_int 1)])))]
4927 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4929 movhps\t{%1, %0|%0, %1}
4930 psrldq\t{$8, %0|%0, 8}
4931 movq\t{%H1, %0|%0, %H1}
4932 mov{q}\t{%H1, %0|%0, %H1}"
4933 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4934 (set_attr "memory" "*,none,*,*")
4935 (set_attr "mode" "V2SF,TI,TI,DI")])
4937 (define_insn "*vec_extractv2di_1_sse2"
4938 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4940 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4941 (parallel [(const_int 1)])))]
4943 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4945 movhps\t{%1, %0|%0, %1}
4946 psrldq\t{$8, %0|%0, 8}
4947 movq\t{%H1, %0|%0, %H1}"
4948 [(set_attr "type" "ssemov,sseishft,ssemov")
4949 (set_attr "memory" "*,none,*")
4950 (set_attr "mode" "V2SF,TI,TI")])
4952 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4953 (define_insn "*vec_extractv2di_1_sse"
4954 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4956 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4957 (parallel [(const_int 1)])))]
4958 "!TARGET_SSE2 && TARGET_SSE
4959 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4961 movhps\t{%1, %0|%0, %1}
4962 movhlps\t{%1, %0|%0, %1}
4963 movlps\t{%H1, %0|%0, %H1}"
4964 [(set_attr "type" "ssemov")
4965 (set_attr "mode" "V2SF,V4SF,V2SF")])
4967 (define_insn "*vec_dupv4si"
4968 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4970 (match_operand:SI 1 "register_operand" " Y2,0")))]
4973 pshufd\t{$0, %1, %0|%0, %1, 0}
4974 shufps\t{$0, %0, %0|%0, %0, 0}"
4975 [(set_attr "type" "sselog1")
4976 (set_attr "mode" "TI,V4SF")])
4978 (define_insn "*vec_dupv2di"
4979 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4981 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4986 [(set_attr "type" "sselog1,ssemov")
4987 (set_attr "mode" "TI,V4SF")])
4989 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4990 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4991 ;; alternatives pretty much forces the MMX alternative to be chosen.
4992 (define_insn "*sse2_concatv2si"
4993 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4995 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4996 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4999 punpckldq\t{%2, %0|%0, %2}
5000 movd\t{%1, %0|%0, %1}
5001 punpckldq\t{%2, %0|%0, %2}
5002 movd\t{%1, %0|%0, %1}"
5003 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5004 (set_attr "mode" "TI,TI,DI,DI")])
5006 (define_insn "*sse1_concatv2si"
5007 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
5009 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
5010 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
5013 unpcklps\t{%2, %0|%0, %2}
5014 movss\t{%1, %0|%0, %1}
5015 punpckldq\t{%2, %0|%0, %2}
5016 movd\t{%1, %0|%0, %1}"
5017 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5018 (set_attr "mode" "V4SF,V4SF,DI,DI")])
5020 (define_insn "*vec_concatv4si_1"
5021 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
5023 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
5024 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
5027 punpcklqdq\t{%2, %0|%0, %2}
5028 movlhps\t{%2, %0|%0, %2}
5029 movhps\t{%2, %0|%0, %2}"
5030 [(set_attr "type" "sselog,ssemov,ssemov")
5031 (set_attr "mode" "TI,V4SF,V2SF")])
5033 (define_insn "vec_concatv2di"
5034 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
5036 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
5037 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
5038 "!TARGET_64BIT && TARGET_SSE"
5040 movq\t{%1, %0|%0, %1}
5041 movq2dq\t{%1, %0|%0, %1}
5042 punpcklqdq\t{%2, %0|%0, %2}
5043 movlhps\t{%2, %0|%0, %2}
5044 movhps\t{%2, %0|%0, %2}
5045 movlps\t{%1, %0|%0, %1}"
5046 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5047 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
5049 (define_insn "*vec_concatv2di_rex"
5050 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
5052 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
5053 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
5056 movq\t{%1, %0|%0, %1}
5057 movq\t{%1, %0|%0, %1}
5058 movq2dq\t{%1, %0|%0, %1}
5059 punpcklqdq\t{%2, %0|%0, %2}
5060 movlhps\t{%2, %0|%0, %2}
5061 movhps\t{%2, %0|%0, %2}
5062 movlps\t{%1, %0|%0, %1}"
5063 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5064 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
5066 (define_expand "vec_setv2di"
5067 [(match_operand:V2DI 0 "register_operand" "")
5068 (match_operand:DI 1 "register_operand" "")
5069 (match_operand 2 "const_int_operand" "")]
5072 ix86_expand_vector_set (false, operands[0], operands[1],
5073 INTVAL (operands[2]));
5077 (define_expand "vec_extractv2di"
5078 [(match_operand:DI 0 "register_operand" "")
5079 (match_operand:V2DI 1 "register_operand" "")
5080 (match_operand 2 "const_int_operand" "")]
5083 ix86_expand_vector_extract (false, operands[0], operands[1],
5084 INTVAL (operands[2]));
5088 (define_expand "vec_initv2di"
5089 [(match_operand:V2DI 0 "register_operand" "")
5090 (match_operand 1 "" "")]
5093 ix86_expand_vector_init (false, operands[0], operands[1]);
5097 (define_expand "vec_setv4si"
5098 [(match_operand:V4SI 0 "register_operand" "")
5099 (match_operand:SI 1 "register_operand" "")
5100 (match_operand 2 "const_int_operand" "")]
5103 ix86_expand_vector_set (false, operands[0], operands[1],
5104 INTVAL (operands[2]));
5108 (define_expand "vec_extractv4si"
5109 [(match_operand:SI 0 "register_operand" "")
5110 (match_operand:V4SI 1 "register_operand" "")
5111 (match_operand 2 "const_int_operand" "")]
5114 ix86_expand_vector_extract (false, operands[0], operands[1],
5115 INTVAL (operands[2]));
5119 (define_expand "vec_initv4si"
5120 [(match_operand:V4SI 0 "register_operand" "")
5121 (match_operand 1 "" "")]
5124 ix86_expand_vector_init (false, operands[0], operands[1]);
5128 (define_expand "vec_setv8hi"
5129 [(match_operand:V8HI 0 "register_operand" "")
5130 (match_operand:HI 1 "register_operand" "")
5131 (match_operand 2 "const_int_operand" "")]
5134 ix86_expand_vector_set (false, operands[0], operands[1],
5135 INTVAL (operands[2]));
5139 (define_expand "vec_extractv8hi"
5140 [(match_operand:HI 0 "register_operand" "")
5141 (match_operand:V8HI 1 "register_operand" "")
5142 (match_operand 2 "const_int_operand" "")]
5145 ix86_expand_vector_extract (false, operands[0], operands[1],
5146 INTVAL (operands[2]));
5150 (define_expand "vec_initv8hi"
5151 [(match_operand:V8HI 0 "register_operand" "")
5152 (match_operand 1 "" "")]
5155 ix86_expand_vector_init (false, operands[0], operands[1]);
5159 (define_expand "vec_setv16qi"
5160 [(match_operand:V16QI 0 "register_operand" "")
5161 (match_operand:QI 1 "register_operand" "")
5162 (match_operand 2 "const_int_operand" "")]
5165 ix86_expand_vector_set (false, operands[0], operands[1],
5166 INTVAL (operands[2]));
5170 (define_expand "vec_extractv16qi"
5171 [(match_operand:QI 0 "register_operand" "")
5172 (match_operand:V16QI 1 "register_operand" "")
5173 (match_operand 2 "const_int_operand" "")]
5176 ix86_expand_vector_extract (false, operands[0], operands[1],
5177 INTVAL (operands[2]));
5181 (define_expand "vec_initv16qi"
5182 [(match_operand:V16QI 0 "register_operand" "")
5183 (match_operand 1 "" "")]
5186 ix86_expand_vector_init (false, operands[0], operands[1]);
5190 (define_expand "vec_unpacku_hi_v16qi"
5191 [(match_operand:V8HI 0 "register_operand" "")
5192 (match_operand:V16QI 1 "register_operand" "")]
5196 ix86_expand_sse4_unpack (operands, true, true);
5197 else if (TARGET_SSE5)
5198 ix86_expand_sse5_unpack (operands, true, true);
5200 ix86_expand_sse_unpack (operands, true, true);
5204 (define_expand "vec_unpacks_hi_v16qi"
5205 [(match_operand:V8HI 0 "register_operand" "")
5206 (match_operand:V16QI 1 "register_operand" "")]
5210 ix86_expand_sse4_unpack (operands, false, true);
5211 else if (TARGET_SSE5)
5212 ix86_expand_sse5_unpack (operands, false, true);
5214 ix86_expand_sse_unpack (operands, false, true);
5218 (define_expand "vec_unpacku_lo_v16qi"
5219 [(match_operand:V8HI 0 "register_operand" "")
5220 (match_operand:V16QI 1 "register_operand" "")]
5224 ix86_expand_sse4_unpack (operands, true, false);
5225 else if (TARGET_SSE5)
5226 ix86_expand_sse5_unpack (operands, true, false);
5228 ix86_expand_sse_unpack (operands, true, false);
5232 (define_expand "vec_unpacks_lo_v16qi"
5233 [(match_operand:V8HI 0 "register_operand" "")
5234 (match_operand:V16QI 1 "register_operand" "")]
5238 ix86_expand_sse4_unpack (operands, false, false);
5239 else if (TARGET_SSE5)
5240 ix86_expand_sse5_unpack (operands, false, false);
5242 ix86_expand_sse_unpack (operands, false, false);
5246 (define_expand "vec_unpacku_hi_v8hi"
5247 [(match_operand:V4SI 0 "register_operand" "")
5248 (match_operand:V8HI 1 "register_operand" "")]
5252 ix86_expand_sse4_unpack (operands, true, true);
5253 else if (TARGET_SSE5)
5254 ix86_expand_sse5_unpack (operands, true, true);
5256 ix86_expand_sse_unpack (operands, true, true);
5260 (define_expand "vec_unpacks_hi_v8hi"
5261 [(match_operand:V4SI 0 "register_operand" "")
5262 (match_operand:V8HI 1 "register_operand" "")]
5266 ix86_expand_sse4_unpack (operands, false, true);
5267 else if (TARGET_SSE5)
5268 ix86_expand_sse5_unpack (operands, false, true);
5270 ix86_expand_sse_unpack (operands, false, true);
5274 (define_expand "vec_unpacku_lo_v8hi"
5275 [(match_operand:V4SI 0 "register_operand" "")
5276 (match_operand:V8HI 1 "register_operand" "")]
5280 ix86_expand_sse4_unpack (operands, true, false);
5281 else if (TARGET_SSE5)
5282 ix86_expand_sse5_unpack (operands, true, false);
5284 ix86_expand_sse_unpack (operands, true, false);
5288 (define_expand "vec_unpacks_lo_v8hi"
5289 [(match_operand:V4SI 0 "register_operand" "")
5290 (match_operand:V8HI 1 "register_operand" "")]
5294 ix86_expand_sse4_unpack (operands, false, false);
5295 else if (TARGET_SSE5)
5296 ix86_expand_sse5_unpack (operands, false, false);
5298 ix86_expand_sse_unpack (operands, false, false);
5302 (define_expand "vec_unpacku_hi_v4si"
5303 [(match_operand:V2DI 0 "register_operand" "")
5304 (match_operand:V4SI 1 "register_operand" "")]
5308 ix86_expand_sse4_unpack (operands, true, true);
5309 else if (TARGET_SSE5)
5310 ix86_expand_sse5_unpack (operands, true, true);
5312 ix86_expand_sse_unpack (operands, true, true);
5316 (define_expand "vec_unpacks_hi_v4si"
5317 [(match_operand:V2DI 0 "register_operand" "")
5318 (match_operand:V4SI 1 "register_operand" "")]
5322 ix86_expand_sse4_unpack (operands, false, true);
5323 else if (TARGET_SSE5)
5324 ix86_expand_sse5_unpack (operands, false, true);
5326 ix86_expand_sse_unpack (operands, false, true);
5330 (define_expand "vec_unpacku_lo_v4si"
5331 [(match_operand:V2DI 0 "register_operand" "")
5332 (match_operand:V4SI 1 "register_operand" "")]
5336 ix86_expand_sse4_unpack (operands, true, false);
5337 else if (TARGET_SSE5)
5338 ix86_expand_sse5_unpack (operands, true, false);
5340 ix86_expand_sse_unpack (operands, true, false);
5344 (define_expand "vec_unpacks_lo_v4si"
5345 [(match_operand:V2DI 0 "register_operand" "")
5346 (match_operand:V4SI 1 "register_operand" "")]
5350 ix86_expand_sse4_unpack (operands, false, false);
5351 else if (TARGET_SSE5)
5352 ix86_expand_sse5_unpack (operands, false, false);
5354 ix86_expand_sse_unpack (operands, false, false);
5358 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5364 (define_insn "sse2_uavgv16qi3"
5365 [(set (match_operand:V16QI 0 "register_operand" "=x")
5371 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5373 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5374 (const_vector:V16QI [(const_int 1) (const_int 1)
5375 (const_int 1) (const_int 1)
5376 (const_int 1) (const_int 1)
5377 (const_int 1) (const_int 1)
5378 (const_int 1) (const_int 1)
5379 (const_int 1) (const_int 1)
5380 (const_int 1) (const_int 1)
5381 (const_int 1) (const_int 1)]))
5383 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5384 "pavgb\t{%2, %0|%0, %2}"
5385 [(set_attr "type" "sseiadd")
5386 (set_attr "prefix_data16" "1")
5387 (set_attr "mode" "TI")])
5389 (define_insn "sse2_uavgv8hi3"
5390 [(set (match_operand:V8HI 0 "register_operand" "=x")
5396 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5398 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5399 (const_vector:V8HI [(const_int 1) (const_int 1)
5400 (const_int 1) (const_int 1)
5401 (const_int 1) (const_int 1)
5402 (const_int 1) (const_int 1)]))
5404 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5405 "pavgw\t{%2, %0|%0, %2}"
5406 [(set_attr "type" "sseiadd")
5407 (set_attr "prefix_data16" "1")
5408 (set_attr "mode" "TI")])
5410 ;; The correct representation for this is absolutely enormous, and
5411 ;; surely not generally useful.
5412 (define_insn "sse2_psadbw"
5413 [(set (match_operand:V2DI 0 "register_operand" "=x")
5414 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5415 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5418 "psadbw\t{%2, %0|%0, %2}"
5419 [(set_attr "type" "sseiadd")
5420 (set_attr "prefix_data16" "1")
5421 (set_attr "mode" "TI")])
5423 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5424 [(set (match_operand:SI 0 "register_operand" "=r")
5426 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5428 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5429 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5430 [(set_attr "type" "ssecvt")
5431 (set_attr "mode" "<MODE>")])
5433 (define_insn "sse2_pmovmskb"
5434 [(set (match_operand:SI 0 "register_operand" "=r")
5435 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5438 "pmovmskb\t{%1, %0|%0, %1}"
5439 [(set_attr "type" "ssecvt")
5440 (set_attr "prefix_data16" "1")
5441 (set_attr "mode" "SI")])
5443 (define_expand "sse2_maskmovdqu"
5444 [(set (match_operand:V16QI 0 "memory_operand" "")
5445 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5446 (match_operand:V16QI 2 "register_operand" "")
5452 (define_insn "*sse2_maskmovdqu"
5453 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5454 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5455 (match_operand:V16QI 2 "register_operand" "x")
5456 (mem:V16QI (match_dup 0))]
5458 "TARGET_SSE2 && !TARGET_64BIT"
5459 ;; @@@ check ordering of operands in intel/nonintel syntax
5460 "maskmovdqu\t{%2, %1|%1, %2}"
5461 [(set_attr "type" "ssecvt")
5462 (set_attr "prefix_data16" "1")
5463 (set_attr "mode" "TI")])
5465 (define_insn "*sse2_maskmovdqu_rex64"
5466 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5467 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5468 (match_operand:V16QI 2 "register_operand" "x")
5469 (mem:V16QI (match_dup 0))]
5471 "TARGET_SSE2 && TARGET_64BIT"
5472 ;; @@@ check ordering of operands in intel/nonintel syntax
5473 "maskmovdqu\t{%2, %1|%1, %2}"
5474 [(set_attr "type" "ssecvt")
5475 (set_attr "prefix_data16" "1")
5476 (set_attr "mode" "TI")])
5478 (define_insn "sse_ldmxcsr"
5479 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5483 [(set_attr "type" "sse")
5484 (set_attr "memory" "load")])
5486 (define_insn "sse_stmxcsr"
5487 [(set (match_operand:SI 0 "memory_operand" "=m")
5488 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5491 [(set_attr "type" "sse")
5492 (set_attr "memory" "store")])
5494 (define_expand "sse_sfence"
5496 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5497 "TARGET_SSE || TARGET_3DNOW_A"
5499 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5500 MEM_VOLATILE_P (operands[0]) = 1;
5503 (define_insn "*sse_sfence"
5504 [(set (match_operand:BLK 0 "" "")
5505 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5506 "TARGET_SSE || TARGET_3DNOW_A"
5508 [(set_attr "type" "sse")
5509 (set_attr "memory" "unknown")])
5511 (define_insn "sse2_clflush"
5512 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5516 [(set_attr "type" "sse")
5517 (set_attr "memory" "unknown")])
5519 (define_expand "sse2_mfence"
5521 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5524 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5525 MEM_VOLATILE_P (operands[0]) = 1;
5528 (define_insn "*sse2_mfence"
5529 [(set (match_operand:BLK 0 "" "")
5530 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5533 [(set_attr "type" "sse")
5534 (set_attr "memory" "unknown")])
5536 (define_expand "sse2_lfence"
5538 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5541 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5542 MEM_VOLATILE_P (operands[0]) = 1;
5545 (define_insn "*sse2_lfence"
5546 [(set (match_operand:BLK 0 "" "")
5547 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5550 [(set_attr "type" "sse")
5551 (set_attr "memory" "unknown")])
5553 (define_insn "sse3_mwait"
5554 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5555 (match_operand:SI 1 "register_operand" "c")]
5558 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5559 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5560 ;; we only need to set up 32bit registers.
5562 [(set_attr "length" "3")])
5564 (define_insn "sse3_monitor"
5565 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5566 (match_operand:SI 1 "register_operand" "c")
5567 (match_operand:SI 2 "register_operand" "d")]
5569 "TARGET_SSE3 && !TARGET_64BIT"
5570 "monitor\t%0, %1, %2"
5571 [(set_attr "length" "3")])
5573 (define_insn "sse3_monitor64"
5574 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5575 (match_operand:SI 1 "register_operand" "c")
5576 (match_operand:SI 2 "register_operand" "d")]
5578 "TARGET_SSE3 && TARGET_64BIT"
5579 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5580 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5581 ;; zero extended to 64bit, we only need to set up 32bit registers.
5583 [(set_attr "length" "3")])
5585 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5587 ;; SSSE3 instructions
5589 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5591 (define_insn "ssse3_phaddwv8hi3"
5592 [(set (match_operand:V8HI 0 "register_operand" "=x")
5598 (match_operand:V8HI 1 "register_operand" "0")
5599 (parallel [(const_int 0)]))
5600 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5602 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5603 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5606 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5607 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5609 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5610 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5615 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5616 (parallel [(const_int 0)]))
5617 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5619 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5620 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5623 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5624 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5626 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5627 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5629 "phaddw\t{%2, %0|%0, %2}"
5630 [(set_attr "type" "sseiadd")
5631 (set_attr "prefix_data16" "1")
5632 (set_attr "prefix_extra" "1")
5633 (set_attr "mode" "TI")])
5635 (define_insn "ssse3_phaddwv4hi3"
5636 [(set (match_operand:V4HI 0 "register_operand" "=y")
5641 (match_operand:V4HI 1 "register_operand" "0")
5642 (parallel [(const_int 0)]))
5643 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5645 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5646 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5650 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5651 (parallel [(const_int 0)]))
5652 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5654 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5655 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5657 "phaddw\t{%2, %0|%0, %2}"
5658 [(set_attr "type" "sseiadd")
5659 (set_attr "prefix_extra" "1")
5660 (set_attr "mode" "DI")])
5662 (define_insn "ssse3_phadddv4si3"
5663 [(set (match_operand:V4SI 0 "register_operand" "=x")
5668 (match_operand:V4SI 1 "register_operand" "0")
5669 (parallel [(const_int 0)]))
5670 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5672 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5673 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5677 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5678 (parallel [(const_int 0)]))
5679 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5681 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5682 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5684 "phaddd\t{%2, %0|%0, %2}"
5685 [(set_attr "type" "sseiadd")
5686 (set_attr "prefix_data16" "1")
5687 (set_attr "prefix_extra" "1")
5688 (set_attr "mode" "TI")])
5690 (define_insn "ssse3_phadddv2si3"
5691 [(set (match_operand:V2SI 0 "register_operand" "=y")
5695 (match_operand:V2SI 1 "register_operand" "0")
5696 (parallel [(const_int 0)]))
5697 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5700 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5701 (parallel [(const_int 0)]))
5702 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5704 "phaddd\t{%2, %0|%0, %2}"
5705 [(set_attr "type" "sseiadd")
5706 (set_attr "prefix_extra" "1")
5707 (set_attr "mode" "DI")])
5709 (define_insn "ssse3_phaddswv8hi3"
5710 [(set (match_operand:V8HI 0 "register_operand" "=x")
5716 (match_operand:V8HI 1 "register_operand" "0")
5717 (parallel [(const_int 0)]))
5718 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5720 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5721 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5724 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5725 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5727 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5728 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5733 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5734 (parallel [(const_int 0)]))
5735 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5737 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5738 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5741 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5742 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5744 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5745 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5747 "phaddsw\t{%2, %0|%0, %2}"
5748 [(set_attr "type" "sseiadd")
5749 (set_attr "prefix_data16" "1")
5750 (set_attr "prefix_extra" "1")
5751 (set_attr "mode" "TI")])
5753 (define_insn "ssse3_phaddswv4hi3"
5754 [(set (match_operand:V4HI 0 "register_operand" "=y")
5759 (match_operand:V4HI 1 "register_operand" "0")
5760 (parallel [(const_int 0)]))
5761 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5763 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5764 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5768 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5769 (parallel [(const_int 0)]))
5770 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5772 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5773 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5775 "phaddsw\t{%2, %0|%0, %2}"
5776 [(set_attr "type" "sseiadd")
5777 (set_attr "prefix_extra" "1")
5778 (set_attr "mode" "DI")])
5780 (define_insn "ssse3_phsubwv8hi3"
5781 [(set (match_operand:V8HI 0 "register_operand" "=x")
5787 (match_operand:V8HI 1 "register_operand" "0")
5788 (parallel [(const_int 0)]))
5789 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5791 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5792 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5795 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5796 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5798 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5799 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5804 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5805 (parallel [(const_int 0)]))
5806 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5808 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5809 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5812 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5813 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5815 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5816 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5818 "phsubw\t{%2, %0|%0, %2}"
5819 [(set_attr "type" "sseiadd")
5820 (set_attr "prefix_data16" "1")
5821 (set_attr "prefix_extra" "1")
5822 (set_attr "mode" "TI")])
5824 (define_insn "ssse3_phsubwv4hi3"
5825 [(set (match_operand:V4HI 0 "register_operand" "=y")
5830 (match_operand:V4HI 1 "register_operand" "0")
5831 (parallel [(const_int 0)]))
5832 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5834 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5835 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5839 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5840 (parallel [(const_int 0)]))
5841 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5843 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5844 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5846 "phsubw\t{%2, %0|%0, %2}"
5847 [(set_attr "type" "sseiadd")
5848 (set_attr "prefix_extra" "1")
5849 (set_attr "mode" "DI")])
5851 (define_insn "ssse3_phsubdv4si3"
5852 [(set (match_operand:V4SI 0 "register_operand" "=x")
5857 (match_operand:V4SI 1 "register_operand" "0")
5858 (parallel [(const_int 0)]))
5859 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5861 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5862 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5866 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5867 (parallel [(const_int 0)]))
5868 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5870 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5871 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5873 "phsubd\t{%2, %0|%0, %2}"
5874 [(set_attr "type" "sseiadd")
5875 (set_attr "prefix_data16" "1")
5876 (set_attr "prefix_extra" "1")
5877 (set_attr "mode" "TI")])
5879 (define_insn "ssse3_phsubdv2si3"
5880 [(set (match_operand:V2SI 0 "register_operand" "=y")
5884 (match_operand:V2SI 1 "register_operand" "0")
5885 (parallel [(const_int 0)]))
5886 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5889 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5890 (parallel [(const_int 0)]))
5891 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5893 "phsubd\t{%2, %0|%0, %2}"
5894 [(set_attr "type" "sseiadd")
5895 (set_attr "prefix_extra" "1")
5896 (set_attr "mode" "DI")])
5898 (define_insn "ssse3_phsubswv8hi3"
5899 [(set (match_operand:V8HI 0 "register_operand" "=x")
5905 (match_operand:V8HI 1 "register_operand" "0")
5906 (parallel [(const_int 0)]))
5907 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5909 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5910 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5913 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5914 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5916 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5917 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5922 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5923 (parallel [(const_int 0)]))
5924 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5926 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5927 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5930 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5931 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5933 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5934 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5936 "phsubsw\t{%2, %0|%0, %2}"
5937 [(set_attr "type" "sseiadd")
5938 (set_attr "prefix_data16" "1")
5939 (set_attr "prefix_extra" "1")
5940 (set_attr "mode" "TI")])
5942 (define_insn "ssse3_phsubswv4hi3"
5943 [(set (match_operand:V4HI 0 "register_operand" "=y")
5948 (match_operand:V4HI 1 "register_operand" "0")
5949 (parallel [(const_int 0)]))
5950 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5952 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5953 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5957 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5958 (parallel [(const_int 0)]))
5959 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5961 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5962 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5964 "phsubsw\t{%2, %0|%0, %2}"
5965 [(set_attr "type" "sseiadd")
5966 (set_attr "prefix_extra" "1")
5967 (set_attr "mode" "DI")])
5969 (define_insn "ssse3_pmaddubswv8hi3"
5970 [(set (match_operand:V8HI 0 "register_operand" "=x")
5975 (match_operand:V16QI 1 "nonimmediate_operand" "0")
5976 (parallel [(const_int 0)
5986 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5987 (parallel [(const_int 0)
5997 (vec_select:V16QI (match_dup 1)
5998 (parallel [(const_int 1)
6007 (vec_select:V16QI (match_dup 2)
6008 (parallel [(const_int 1)
6015 (const_int 15)]))))))]
6017 "pmaddubsw\t{%2, %0|%0, %2}"
6018 [(set_attr "type" "sseiadd")
6019 (set_attr "prefix_data16" "1")
6020 (set_attr "prefix_extra" "1")
6021 (set_attr "mode" "TI")])
6023 (define_insn "ssse3_pmaddubswv4hi3"
6024 [(set (match_operand:V4HI 0 "register_operand" "=y")
6029 (match_operand:V8QI 1 "nonimmediate_operand" "0")
6030 (parallel [(const_int 0)
6036 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
6037 (parallel [(const_int 0)
6043 (vec_select:V8QI (match_dup 1)
6044 (parallel [(const_int 1)
6049 (vec_select:V8QI (match_dup 2)
6050 (parallel [(const_int 1)
6053 (const_int 7)]))))))]
6055 "pmaddubsw\t{%2, %0|%0, %2}"
6056 [(set_attr "type" "sseiadd")
6057 (set_attr "prefix_extra" "1")
6058 (set_attr "mode" "DI")])
6060 (define_insn "ssse3_pmulhrswv8hi3"
6061 [(set (match_operand:V8HI 0 "register_operand" "=x")
6068 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
6070 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
6072 (const_vector:V8HI [(const_int 1) (const_int 1)
6073 (const_int 1) (const_int 1)
6074 (const_int 1) (const_int 1)
6075 (const_int 1) (const_int 1)]))
6077 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
6078 "pmulhrsw\t{%2, %0|%0, %2}"
6079 [(set_attr "type" "sseimul")
6080 (set_attr "prefix_data16" "1")
6081 (set_attr "prefix_extra" "1")
6082 (set_attr "mode" "TI")])
6084 (define_insn "ssse3_pmulhrswv4hi3"
6085 [(set (match_operand:V4HI 0 "register_operand" "=y")
6092 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
6094 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
6096 (const_vector:V4HI [(const_int 1) (const_int 1)
6097 (const_int 1) (const_int 1)]))
6099 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
6100 "pmulhrsw\t{%2, %0|%0, %2}"
6101 [(set_attr "type" "sseimul")
6102 (set_attr "prefix_extra" "1")
6103 (set_attr "mode" "DI")])
6105 (define_insn "ssse3_pshufbv16qi3"
6106 [(set (match_operand:V16QI 0 "register_operand" "=x")
6107 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6108 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
6111 "pshufb\t{%2, %0|%0, %2}";
6112 [(set_attr "type" "sselog1")
6113 (set_attr "prefix_data16" "1")
6114 (set_attr "prefix_extra" "1")
6115 (set_attr "mode" "TI")])
6117 (define_insn "ssse3_pshufbv8qi3"
6118 [(set (match_operand:V8QI 0 "register_operand" "=y")
6119 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
6120 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
6123 "pshufb\t{%2, %0|%0, %2}";
6124 [(set_attr "type" "sselog1")
6125 (set_attr "prefix_extra" "1")
6126 (set_attr "mode" "DI")])
6128 (define_insn "ssse3_psign<mode>3"
6129 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6131 [(match_operand:SSEMODE124 1 "register_operand" "0")
6132 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
6135 "psign<ssevecsize>\t{%2, %0|%0, %2}";
6136 [(set_attr "type" "sselog1")
6137 (set_attr "prefix_data16" "1")
6138 (set_attr "prefix_extra" "1")
6139 (set_attr "mode" "TI")])
6141 (define_insn "ssse3_psign<mode>3"
6142 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6144 [(match_operand:MMXMODEI 1 "register_operand" "0")
6145 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
6148 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
6149 [(set_attr "type" "sselog1")
6150 (set_attr "prefix_extra" "1")
6151 (set_attr "mode" "DI")])
6153 (define_insn "ssse3_palignrti"
6154 [(set (match_operand:TI 0 "register_operand" "=x")
6155 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6156 (match_operand:TI 2 "nonimmediate_operand" "xm")
6157 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6161 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6162 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6164 [(set_attr "type" "sseishft")
6165 (set_attr "prefix_data16" "1")
6166 (set_attr "prefix_extra" "1")
6167 (set_attr "mode" "TI")])
6169 (define_insn "ssse3_palignrdi"
6170 [(set (match_operand:DI 0 "register_operand" "=y")
6171 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6172 (match_operand:DI 2 "nonimmediate_operand" "ym")
6173 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6177 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6178 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6180 [(set_attr "type" "sseishft")
6181 (set_attr "prefix_extra" "1")
6182 (set_attr "mode" "DI")])
6184 (define_insn "abs<mode>2"
6185 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6186 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6188 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6189 [(set_attr "type" "sselog1")
6190 (set_attr "prefix_data16" "1")
6191 (set_attr "prefix_extra" "1")
6192 (set_attr "mode" "TI")])
6194 (define_insn "abs<mode>2"
6195 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6196 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6198 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6199 [(set_attr "type" "sselog1")
6200 (set_attr "prefix_extra" "1")
6201 (set_attr "mode" "DI")])
6203 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6205 ;; AMD SSE4A instructions
6207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6209 (define_insn "sse4a_movnt<mode>"
6210 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6212 [(match_operand:MODEF 1 "register_operand" "x")]
6215 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6216 [(set_attr "type" "ssemov")
6217 (set_attr "mode" "<MODE>")])
6219 (define_insn "sse4a_vmmovnt<mode>"
6220 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6221 (unspec:<ssescalarmode>
6222 [(vec_select:<ssescalarmode>
6223 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6224 (parallel [(const_int 0)]))]
6227 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6228 [(set_attr "type" "ssemov")
6229 (set_attr "mode" "<ssescalarmode>")])
6231 (define_insn "sse4a_extrqi"
6232 [(set (match_operand:V2DI 0 "register_operand" "=x")
6233 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6234 (match_operand 2 "const_int_operand" "")
6235 (match_operand 3 "const_int_operand" "")]
6238 "extrq\t{%3, %2, %0|%0, %2, %3}"
6239 [(set_attr "type" "sse")
6240 (set_attr "prefix_data16" "1")
6241 (set_attr "mode" "TI")])
6243 (define_insn "sse4a_extrq"
6244 [(set (match_operand:V2DI 0 "register_operand" "=x")
6245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6246 (match_operand:V16QI 2 "register_operand" "x")]
6249 "extrq\t{%2, %0|%0, %2}"
6250 [(set_attr "type" "sse")
6251 (set_attr "prefix_data16" "1")
6252 (set_attr "mode" "TI")])
6254 (define_insn "sse4a_insertqi"
6255 [(set (match_operand:V2DI 0 "register_operand" "=x")
6256 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6257 (match_operand:V2DI 2 "register_operand" "x")
6258 (match_operand 3 "const_int_operand" "")
6259 (match_operand 4 "const_int_operand" "")]
6262 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6263 [(set_attr "type" "sseins")
6264 (set_attr "prefix_rep" "1")
6265 (set_attr "mode" "TI")])
6267 (define_insn "sse4a_insertq"
6268 [(set (match_operand:V2DI 0 "register_operand" "=x")
6269 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6270 (match_operand:V2DI 2 "register_operand" "x")]
6273 "insertq\t{%2, %0|%0, %2}"
6274 [(set_attr "type" "sseins")
6275 (set_attr "prefix_rep" "1")
6276 (set_attr "mode" "TI")])
6278 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6280 ;; Intel SSE4.1 instructions
6282 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6284 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6285 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6286 (vec_merge:SSEMODEF2P
6287 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6288 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6289 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
6291 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6292 [(set_attr "type" "ssemov")
6293 (set_attr "prefix_extra" "1")
6294 (set_attr "mode" "<MODE>")])
6296 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6297 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6299 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6300 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6301 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6304 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6305 [(set_attr "type" "ssemov")
6306 (set_attr "prefix_extra" "1")
6307 (set_attr "mode" "<MODE>")])
6309 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6310 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6312 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6313 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6314 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6317 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6318 [(set_attr "type" "ssemul")
6319 (set_attr "prefix_extra" "1")
6320 (set_attr "mode" "<MODE>")])
6322 (define_insn "sse4_1_movntdqa"
6323 [(set (match_operand:V2DI 0 "register_operand" "=x")
6324 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6327 "movntdqa\t{%1, %0|%0, %1}"
6328 [(set_attr "type" "ssecvt")
6329 (set_attr "prefix_extra" "1")
6330 (set_attr "mode" "TI")])
6332 (define_insn "sse4_1_mpsadbw"
6333 [(set (match_operand:V16QI 0 "register_operand" "=x")
6334 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6335 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6336 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6339 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6340 [(set_attr "type" "sselog1")
6341 (set_attr "prefix_extra" "1")
6342 (set_attr "mode" "TI")])
6344 (define_insn "sse4_1_packusdw"
6345 [(set (match_operand:V8HI 0 "register_operand" "=x")
6348 (match_operand:V4SI 1 "register_operand" "0"))
6350 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6352 "packusdw\t{%2, %0|%0, %2}"
6353 [(set_attr "type" "sselog")
6354 (set_attr "prefix_extra" "1")
6355 (set_attr "mode" "TI")])
6357 (define_insn "sse4_1_pblendvb"
6358 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6359 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6360 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6361 (match_operand:V16QI 3 "register_operand" "Yz")]
6364 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6365 [(set_attr "type" "ssemov")
6366 (set_attr "prefix_extra" "1")
6367 (set_attr "mode" "TI")])
6369 (define_insn "sse4_1_pblendw"
6370 [(set (match_operand:V8HI 0 "register_operand" "=x")
6372 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6373 (match_operand:V8HI 1 "register_operand" "0")
6374 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6376 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6377 [(set_attr "type" "ssemov")
6378 (set_attr "prefix_extra" "1")
6379 (set_attr "mode" "TI")])
6381 (define_insn "sse4_1_phminposuw"
6382 [(set (match_operand:V8HI 0 "register_operand" "=x")
6383 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6384 UNSPEC_PHMINPOSUW))]
6386 "phminposuw\t{%1, %0|%0, %1}"
6387 [(set_attr "type" "sselog1")
6388 (set_attr "prefix_extra" "1")
6389 (set_attr "mode" "TI")])
6391 (define_insn "sse4_1_extendv8qiv8hi2"
6392 [(set (match_operand:V8HI 0 "register_operand" "=x")
6395 (match_operand:V16QI 1 "register_operand" "x")
6396 (parallel [(const_int 0)
6405 "pmovsxbw\t{%1, %0|%0, %1}"
6406 [(set_attr "type" "ssemov")
6407 (set_attr "prefix_extra" "1")
6408 (set_attr "mode" "TI")])
6410 (define_insn "*sse4_1_extendv8qiv8hi2"
6411 [(set (match_operand:V8HI 0 "register_operand" "=x")
6414 (vec_duplicate:V16QI
6415 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6416 (parallel [(const_int 0)
6425 "pmovsxbw\t{%1, %0|%0, %1}"
6426 [(set_attr "type" "ssemov")
6427 (set_attr "prefix_extra" "1")
6428 (set_attr "mode" "TI")])
6430 (define_insn "sse4_1_extendv4qiv4si2"
6431 [(set (match_operand:V4SI 0 "register_operand" "=x")
6434 (match_operand:V16QI 1 "register_operand" "x")
6435 (parallel [(const_int 0)
6440 "pmovsxbd\t{%1, %0|%0, %1}"
6441 [(set_attr "type" "ssemov")
6442 (set_attr "prefix_extra" "1")
6443 (set_attr "mode" "TI")])
6445 (define_insn "*sse4_1_extendv4qiv4si2"
6446 [(set (match_operand:V4SI 0 "register_operand" "=x")
6449 (vec_duplicate:V16QI
6450 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6451 (parallel [(const_int 0)
6456 "pmovsxbd\t{%1, %0|%0, %1}"
6457 [(set_attr "type" "ssemov")
6458 (set_attr "prefix_extra" "1")
6459 (set_attr "mode" "TI")])
6461 (define_insn "sse4_1_extendv2qiv2di2"
6462 [(set (match_operand:V2DI 0 "register_operand" "=x")
6465 (match_operand:V16QI 1 "register_operand" "x")
6466 (parallel [(const_int 0)
6469 "pmovsxbq\t{%1, %0|%0, %1}"
6470 [(set_attr "type" "ssemov")
6471 (set_attr "prefix_extra" "1")
6472 (set_attr "mode" "TI")])
6474 (define_insn "*sse4_1_extendv2qiv2di2"
6475 [(set (match_operand:V2DI 0 "register_operand" "=x")
6478 (vec_duplicate:V16QI
6479 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6480 (parallel [(const_int 0)
6483 "pmovsxbq\t{%1, %0|%0, %1}"
6484 [(set_attr "type" "ssemov")
6485 (set_attr "prefix_extra" "1")
6486 (set_attr "mode" "TI")])
6488 (define_insn "sse4_1_extendv4hiv4si2"
6489 [(set (match_operand:V4SI 0 "register_operand" "=x")
6492 (match_operand:V8HI 1 "register_operand" "x")
6493 (parallel [(const_int 0)
6498 "pmovsxwd\t{%1, %0|%0, %1}"
6499 [(set_attr "type" "ssemov")
6500 (set_attr "prefix_extra" "1")
6501 (set_attr "mode" "TI")])
6503 (define_insn "*sse4_1_extendv4hiv4si2"
6504 [(set (match_operand:V4SI 0 "register_operand" "=x")
6508 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6509 (parallel [(const_int 0)
6514 "pmovsxwd\t{%1, %0|%0, %1}"
6515 [(set_attr "type" "ssemov")
6516 (set_attr "prefix_extra" "1")
6517 (set_attr "mode" "TI")])
6519 (define_insn "sse4_1_extendv2hiv2di2"
6520 [(set (match_operand:V2DI 0 "register_operand" "=x")
6523 (match_operand:V8HI 1 "register_operand" "x")
6524 (parallel [(const_int 0)
6527 "pmovsxwq\t{%1, %0|%0, %1}"
6528 [(set_attr "type" "ssemov")
6529 (set_attr "prefix_extra" "1")
6530 (set_attr "mode" "TI")])
6532 (define_insn "*sse4_1_extendv2hiv2di2"
6533 [(set (match_operand:V2DI 0 "register_operand" "=x")
6537 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6538 (parallel [(const_int 0)
6541 "pmovsxwq\t{%1, %0|%0, %1}"
6542 [(set_attr "type" "ssemov")
6543 (set_attr "prefix_extra" "1")
6544 (set_attr "mode" "TI")])
6546 (define_insn "sse4_1_extendv2siv2di2"
6547 [(set (match_operand:V2DI 0 "register_operand" "=x")
6550 (match_operand:V4SI 1 "register_operand" "x")
6551 (parallel [(const_int 0)
6554 "pmovsxdq\t{%1, %0|%0, %1}"
6555 [(set_attr "type" "ssemov")
6556 (set_attr "prefix_extra" "1")
6557 (set_attr "mode" "TI")])
6559 (define_insn "*sse4_1_extendv2siv2di2"
6560 [(set (match_operand:V2DI 0 "register_operand" "=x")
6564 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6565 (parallel [(const_int 0)
6568 "pmovsxdq\t{%1, %0|%0, %1}"
6569 [(set_attr "type" "ssemov")
6570 (set_attr "prefix_extra" "1")
6571 (set_attr "mode" "TI")])
6573 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6574 [(set (match_operand:V8HI 0 "register_operand" "=x")
6577 (match_operand:V16QI 1 "register_operand" "x")
6578 (parallel [(const_int 0)
6587 "pmovzxbw\t{%1, %0|%0, %1}"
6588 [(set_attr "type" "ssemov")
6589 (set_attr "prefix_extra" "1")
6590 (set_attr "mode" "TI")])
6592 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6593 [(set (match_operand:V8HI 0 "register_operand" "=x")
6596 (vec_duplicate:V16QI
6597 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6598 (parallel [(const_int 0)
6607 "pmovzxbw\t{%1, %0|%0, %1}"
6608 [(set_attr "type" "ssemov")
6609 (set_attr "prefix_extra" "1")
6610 (set_attr "mode" "TI")])
6612 (define_insn "sse4_1_zero_extendv4qiv4si2"
6613 [(set (match_operand:V4SI 0 "register_operand" "=x")
6616 (match_operand:V16QI 1 "register_operand" "x")
6617 (parallel [(const_int 0)
6622 "pmovzxbd\t{%1, %0|%0, %1}"
6623 [(set_attr "type" "ssemov")
6624 (set_attr "prefix_extra" "1")
6625 (set_attr "mode" "TI")])
6627 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6628 [(set (match_operand:V4SI 0 "register_operand" "=x")
6631 (vec_duplicate:V16QI
6632 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6633 (parallel [(const_int 0)
6638 "pmovzxbd\t{%1, %0|%0, %1}"
6639 [(set_attr "type" "ssemov")
6640 (set_attr "prefix_extra" "1")
6641 (set_attr "mode" "TI")])
6643 (define_insn "sse4_1_zero_extendv2qiv2di2"
6644 [(set (match_operand:V2DI 0 "register_operand" "=x")
6647 (match_operand:V16QI 1 "register_operand" "x")
6648 (parallel [(const_int 0)
6651 "pmovzxbq\t{%1, %0|%0, %1}"
6652 [(set_attr "type" "ssemov")
6653 (set_attr "prefix_extra" "1")
6654 (set_attr "mode" "TI")])
6656 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6657 [(set (match_operand:V2DI 0 "register_operand" "=x")
6660 (vec_duplicate:V16QI
6661 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6662 (parallel [(const_int 0)
6665 "pmovzxbq\t{%1, %0|%0, %1}"
6666 [(set_attr "type" "ssemov")
6667 (set_attr "prefix_extra" "1")
6668 (set_attr "mode" "TI")])
6670 (define_insn "sse4_1_zero_extendv4hiv4si2"
6671 [(set (match_operand:V4SI 0 "register_operand" "=x")
6674 (match_operand:V8HI 1 "register_operand" "x")
6675 (parallel [(const_int 0)
6680 "pmovzxwd\t{%1, %0|%0, %1}"
6681 [(set_attr "type" "ssemov")
6682 (set_attr "prefix_extra" "1")
6683 (set_attr "mode" "TI")])
6685 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6686 [(set (match_operand:V4SI 0 "register_operand" "=x")
6690 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6691 (parallel [(const_int 0)
6696 "pmovzxwd\t{%1, %0|%0, %1}"
6697 [(set_attr "type" "ssemov")
6698 (set_attr "prefix_extra" "1")
6699 (set_attr "mode" "TI")])
6701 (define_insn "sse4_1_zero_extendv2hiv2di2"
6702 [(set (match_operand:V2DI 0 "register_operand" "=x")
6705 (match_operand:V8HI 1 "register_operand" "x")
6706 (parallel [(const_int 0)
6709 "pmovzxwq\t{%1, %0|%0, %1}"
6710 [(set_attr "type" "ssemov")
6711 (set_attr "prefix_extra" "1")
6712 (set_attr "mode" "TI")])
6714 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6715 [(set (match_operand:V2DI 0 "register_operand" "=x")
6719 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6720 (parallel [(const_int 0)
6723 "pmovzxwq\t{%1, %0|%0, %1}"
6724 [(set_attr "type" "ssemov")
6725 (set_attr "prefix_extra" "1")
6726 (set_attr "mode" "TI")])
6728 (define_insn "sse4_1_zero_extendv2siv2di2"
6729 [(set (match_operand:V2DI 0 "register_operand" "=x")
6732 (match_operand:V4SI 1 "register_operand" "x")
6733 (parallel [(const_int 0)
6736 "pmovzxdq\t{%1, %0|%0, %1}"
6737 [(set_attr "type" "ssemov")
6738 (set_attr "prefix_extra" "1")
6739 (set_attr "mode" "TI")])
6741 (define_insn "*sse4_1_zero_extendv2siv2di2"
6742 [(set (match_operand:V2DI 0 "register_operand" "=x")
6746 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6747 (parallel [(const_int 0)
6750 "pmovzxdq\t{%1, %0|%0, %1}"
6751 [(set_attr "type" "ssemov")
6752 (set_attr "prefix_extra" "1")
6753 (set_attr "mode" "TI")])
6755 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6756 ;; But it is not a really compare instruction.
6757 (define_insn "sse4_1_ptest"
6758 [(set (reg:CC FLAGS_REG)
6759 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6760 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6763 "ptest\t{%1, %0|%0, %1}"
6764 [(set_attr "type" "ssecomi")
6765 (set_attr "prefix_extra" "1")
6766 (set_attr "mode" "TI")])
6768 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6769 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6771 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6772 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6775 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6776 [(set_attr "type" "ssecvt")
6777 (set_attr "prefix_extra" "1")
6778 (set_attr "mode" "<MODE>")])
6780 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6781 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6782 (vec_merge:SSEMODEF2P
6784 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6785 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6787 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6790 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6791 [(set_attr "type" "ssecvt")
6792 (set_attr "prefix_extra" "1")
6793 (set_attr "mode" "<MODE>")])
6795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6797 ;; Intel SSE4.2 string/text processing instructions
6799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6801 (define_insn_and_split "sse4_2_pcmpestr"
6802 [(set (match_operand:SI 0 "register_operand" "=c,c")
6804 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6805 (match_operand:SI 3 "register_operand" "a,a")
6806 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6807 (match_operand:SI 5 "register_operand" "d,d")
6808 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6810 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6818 (set (reg:CC FLAGS_REG)
6827 && !(reload_completed || reload_in_progress)"
6832 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6833 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6834 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6837 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6838 operands[3], operands[4],
6839 operands[5], operands[6]));
6841 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6842 operands[3], operands[4],
6843 operands[5], operands[6]));
6844 if (flags && !(ecx || xmm0))
6845 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6846 operands[2], operands[3],
6847 operands[4], operands[5],
6851 [(set_attr "type" "sselog")
6852 (set_attr "prefix_data16" "1")
6853 (set_attr "prefix_extra" "1")
6854 (set_attr "memory" "none,load")
6855 (set_attr "mode" "TI")])
6857 (define_insn "sse4_2_pcmpestri"
6858 [(set (match_operand:SI 0 "register_operand" "=c,c")
6860 [(match_operand:V16QI 1 "register_operand" "x,x")
6861 (match_operand:SI 2 "register_operand" "a,a")
6862 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6863 (match_operand:SI 4 "register_operand" "d,d")
6864 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6866 (set (reg:CC FLAGS_REG)
6875 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6876 [(set_attr "type" "sselog")
6877 (set_attr "prefix_data16" "1")
6878 (set_attr "prefix_extra" "1")
6879 (set_attr "memory" "none,load")
6880 (set_attr "mode" "TI")])
6882 (define_insn "sse4_2_pcmpestrm"
6883 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6885 [(match_operand:V16QI 1 "register_operand" "x,x")
6886 (match_operand:SI 2 "register_operand" "a,a")
6887 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6888 (match_operand:SI 4 "register_operand" "d,d")
6889 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6891 (set (reg:CC FLAGS_REG)
6900 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6901 [(set_attr "type" "sselog")
6902 (set_attr "prefix_data16" "1")
6903 (set_attr "prefix_extra" "1")
6904 (set_attr "memory" "none,load")
6905 (set_attr "mode" "TI")])
6907 (define_insn "sse4_2_pcmpestr_cconly"
6908 [(set (reg:CC FLAGS_REG)
6910 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6911 (match_operand:SI 3 "register_operand" "a,a,a,a")
6912 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6913 (match_operand:SI 5 "register_operand" "d,d,d,d")
6914 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6916 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6917 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6920 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6921 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6922 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6923 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6924 [(set_attr "type" "sselog")
6925 (set_attr "prefix_data16" "1")
6926 (set_attr "prefix_extra" "1")
6927 (set_attr "memory" "none,load,none,load")
6928 (set_attr "mode" "TI")])
6930 (define_insn_and_split "sse4_2_pcmpistr"
6931 [(set (match_operand:SI 0 "register_operand" "=c,c")
6933 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6934 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6935 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6937 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6943 (set (reg:CC FLAGS_REG)
6950 && !(reload_completed || reload_in_progress)"
6955 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6956 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6957 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6960 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6961 operands[3], operands[4]));
6963 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6964 operands[3], operands[4]));
6965 if (flags && !(ecx || xmm0))
6966 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6967 operands[2], operands[3],
6971 [(set_attr "type" "sselog")
6972 (set_attr "prefix_data16" "1")
6973 (set_attr "prefix_extra" "1")
6974 (set_attr "memory" "none,load")
6975 (set_attr "mode" "TI")])
6977 (define_insn "sse4_2_pcmpistri"
6978 [(set (match_operand:SI 0 "register_operand" "=c,c")
6980 [(match_operand:V16QI 1 "register_operand" "x,x")
6981 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6982 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6984 (set (reg:CC FLAGS_REG)
6991 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6992 [(set_attr "type" "sselog")
6993 (set_attr "prefix_data16" "1")
6994 (set_attr "prefix_extra" "1")
6995 (set_attr "memory" "none,load")
6996 (set_attr "mode" "TI")])
6998 (define_insn "sse4_2_pcmpistrm"
6999 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
7001 [(match_operand:V16QI 1 "register_operand" "x,x")
7002 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7003 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7005 (set (reg:CC FLAGS_REG)
7012 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
7013 [(set_attr "type" "sselog")
7014 (set_attr "prefix_data16" "1")
7015 (set_attr "prefix_extra" "1")
7016 (set_attr "memory" "none,load")
7017 (set_attr "mode" "TI")])
7019 (define_insn "sse4_2_pcmpistr_cconly"
7020 [(set (reg:CC FLAGS_REG)
7022 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
7023 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
7024 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
7026 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
7027 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
7030 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7031 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7032 pcmpistri\t{%4, %3, %2|%2, %3, %4}
7033 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
7034 [(set_attr "type" "sselog")
7035 (set_attr "prefix_data16" "1")
7036 (set_attr "prefix_extra" "1")
7037 (set_attr "memory" "none,load,none,load")
7038 (set_attr "mode" "TI")])
7040 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7042 ;; SSE5 instructions
7044 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7046 ;; SSE5 parallel integer multiply/add instructions.
7047 ;; Note the instruction does not allow the value being added to be a memory
7048 ;; operation. However by pretending via the nonimmediate_operand predicate
7049 ;; that it does and splitting it later allows the following to be recognized:
7050 ;; a[i] = b[i] * c[i] + d[i];
7051 (define_insn "sse5_pmacsww"
7052 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7055 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7056 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7057 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7058 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7060 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7061 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7062 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7063 [(set_attr "type" "ssemuladd")
7064 (set_attr "mode" "TI")])
7066 ;; Split pmacsww with two memory operands into a load and the pmacsww.
7068 [(set (match_operand:V8HI 0 "register_operand" "")
7070 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
7071 (match_operand:V8HI 2 "nonimmediate_operand" ""))
7072 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
7074 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7075 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7076 && !reg_mentioned_p (operands[0], operands[1])
7077 && !reg_mentioned_p (operands[0], operands[2])
7078 && !reg_mentioned_p (operands[0], operands[3])"
7081 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
7082 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
7087 (define_insn "sse5_pmacssww"
7088 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7090 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7091 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7092 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7093 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7095 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7096 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7097 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7098 [(set_attr "type" "ssemuladd")
7099 (set_attr "mode" "TI")])
7101 ;; Note the instruction does not allow the value being added to be a memory
7102 ;; operation. However by pretending via the nonimmediate_operand predicate
7103 ;; that it does and splitting it later allows the following to be recognized:
7104 ;; a[i] = b[i] * c[i] + d[i];
7105 (define_insn "sse5_pmacsdd"
7106 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7109 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7110 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7111 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7112 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7114 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7115 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7116 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7117 [(set_attr "type" "ssemuladd")
7118 (set_attr "mode" "TI")])
7120 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
7122 [(set (match_operand:V4SI 0 "register_operand" "")
7124 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
7125 (match_operand:V4SI 2 "nonimmediate_operand" ""))
7126 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
7128 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7129 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7130 && !reg_mentioned_p (operands[0], operands[1])
7131 && !reg_mentioned_p (operands[0], operands[2])
7132 && !reg_mentioned_p (operands[0], operands[3])"
7135 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
7136 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
7141 (define_insn "sse5_pmacssdd"
7142 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7144 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7145 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7146 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7147 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7149 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7150 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7151 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7152 [(set_attr "type" "ssemuladd")
7153 (set_attr "mode" "TI")])
7155 (define_insn "sse5_pmacssdql"
7156 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7161 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7162 (parallel [(const_int 1)
7165 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7166 (parallel [(const_int 1)
7168 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7169 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7171 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7172 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7173 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7174 [(set_attr "type" "ssemuladd")
7175 (set_attr "mode" "TI")])
7177 (define_insn "sse5_pmacssdqh"
7178 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7183 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7184 (parallel [(const_int 0)
7188 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7189 (parallel [(const_int 0)
7191 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7192 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7194 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7195 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7196 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7197 [(set_attr "type" "ssemuladd")
7198 (set_attr "mode" "TI")])
7200 (define_insn "sse5_pmacsdql"
7201 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7206 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7207 (parallel [(const_int 1)
7211 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7212 (parallel [(const_int 1)
7214 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7215 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7217 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7218 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7219 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7220 [(set_attr "type" "ssemuladd")
7221 (set_attr "mode" "TI")])
7223 (define_insn "sse5_pmacsdqh"
7224 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7229 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7230 (parallel [(const_int 0)
7234 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7235 (parallel [(const_int 0)
7237 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7238 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7240 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7241 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7242 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7243 [(set_attr "type" "ssemuladd")
7244 (set_attr "mode" "TI")])
7246 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7247 (define_insn "sse5_pmacsswd"
7248 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7253 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7254 (parallel [(const_int 1)
7260 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7261 (parallel [(const_int 1)
7265 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7266 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7268 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7269 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7270 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7271 [(set_attr "type" "ssemuladd")
7272 (set_attr "mode" "TI")])
7274 (define_insn "sse5_pmacswd"
7275 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7280 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7281 (parallel [(const_int 1)
7287 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7288 (parallel [(const_int 1)
7292 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7293 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7295 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7296 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7297 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7298 [(set_attr "type" "ssemuladd")
7299 (set_attr "mode" "TI")])
7301 (define_insn "sse5_pmadcsswd"
7302 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7308 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7309 (parallel [(const_int 0)
7315 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7316 (parallel [(const_int 0)
7324 (parallel [(const_int 1)
7331 (parallel [(const_int 1)
7335 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7336 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7338 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7339 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7340 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7341 [(set_attr "type" "ssemuladd")
7342 (set_attr "mode" "TI")])
7344 (define_insn "sse5_pmadcswd"
7345 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7351 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7352 (parallel [(const_int 0)
7358 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7359 (parallel [(const_int 0)
7367 (parallel [(const_int 1)
7374 (parallel [(const_int 1)
7378 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7379 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7381 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7382 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7383 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7384 [(set_attr "type" "ssemuladd")
7385 (set_attr "mode" "TI")])
7387 ;; SSE5 parallel XMM conditional moves
7388 (define_insn "sse5_pcmov_<mode>"
7389 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7390 (if_then_else:SSEMODE
7391 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
7392 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
7393 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
7394 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7396 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7397 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7398 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7399 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7400 andps\t{%2, %0|%0, %2}
7401 andnps\t{%1, %0|%0, %1}"
7402 [(set_attr "type" "sse4arg")])
7404 ;; SSE5 horizontal add/subtract instructions
7405 (define_insn "sse5_phaddbw"
7406 [(set (match_operand:V8HI 0 "register_operand" "=x")
7410 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7411 (parallel [(const_int 0)
7422 (parallel [(const_int 1)
7429 (const_int 15)])))))]
7431 "phaddbw\t{%1, %0|%0, %1}"
7432 [(set_attr "type" "sseiadd1")])
7434 (define_insn "sse5_phaddbd"
7435 [(set (match_operand:V4SI 0 "register_operand" "=x")
7440 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7441 (parallel [(const_int 0)
7448 (parallel [(const_int 1)
7456 (parallel [(const_int 2)
7463 (parallel [(const_int 3)
7466 (const_int 15)]))))))]
7468 "phaddbd\t{%1, %0|%0, %1}"
7469 [(set_attr "type" "sseiadd1")])
7471 (define_insn "sse5_phaddbq"
7472 [(set (match_operand:V2DI 0 "register_operand" "=x")
7478 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7479 (parallel [(const_int 0)
7484 (parallel [(const_int 1)
7490 (parallel [(const_int 2)
7495 (parallel [(const_int 3)
7502 (parallel [(const_int 8)
7507 (parallel [(const_int 9)
7513 (parallel [(const_int 10)
7518 (parallel [(const_int 11)
7519 (const_int 15)])))))))]
7521 "phaddbq\t{%1, %0|%0, %1}"
7522 [(set_attr "type" "sseiadd1")])
7524 (define_insn "sse5_phaddwd"
7525 [(set (match_operand:V4SI 0 "register_operand" "=x")
7529 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7530 (parallel [(const_int 0)
7537 (parallel [(const_int 1)
7540 (const_int 7)])))))]
7542 "phaddwd\t{%1, %0|%0, %1}"
7543 [(set_attr "type" "sseiadd1")])
7545 (define_insn "sse5_phaddwq"
7546 [(set (match_operand:V2DI 0 "register_operand" "=x")
7551 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7552 (parallel [(const_int 0)
7557 (parallel [(const_int 1)
7563 (parallel [(const_int 2)
7568 (parallel [(const_int 3)
7569 (const_int 7)]))))))]
7571 "phaddwq\t{%1, %0|%0, %1}"
7572 [(set_attr "type" "sseiadd1")])
7574 (define_insn "sse5_phadddq"
7575 [(set (match_operand:V2DI 0 "register_operand" "=x")
7579 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7580 (parallel [(const_int 0)
7585 (parallel [(const_int 1)
7586 (const_int 3)])))))]
7588 "phadddq\t{%1, %0|%0, %1}"
7589 [(set_attr "type" "sseiadd1")])
7591 (define_insn "sse5_phaddubw"
7592 [(set (match_operand:V8HI 0 "register_operand" "=x")
7596 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7597 (parallel [(const_int 0)
7608 (parallel [(const_int 1)
7615 (const_int 15)])))))]
7617 "phaddubw\t{%1, %0|%0, %1}"
7618 [(set_attr "type" "sseiadd1")])
7620 (define_insn "sse5_phaddubd"
7621 [(set (match_operand:V4SI 0 "register_operand" "=x")
7626 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7627 (parallel [(const_int 0)
7634 (parallel [(const_int 1)
7642 (parallel [(const_int 2)
7649 (parallel [(const_int 3)
7652 (const_int 15)]))))))]
7654 "phaddubd\t{%1, %0|%0, %1}"
7655 [(set_attr "type" "sseiadd1")])
7657 (define_insn "sse5_phaddubq"
7658 [(set (match_operand:V2DI 0 "register_operand" "=x")
7664 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7665 (parallel [(const_int 0)
7670 (parallel [(const_int 1)
7676 (parallel [(const_int 2)
7681 (parallel [(const_int 3)
7688 (parallel [(const_int 8)
7693 (parallel [(const_int 9)
7699 (parallel [(const_int 10)
7704 (parallel [(const_int 11)
7705 (const_int 15)])))))))]
7707 "phaddubq\t{%1, %0|%0, %1}"
7708 [(set_attr "type" "sseiadd1")])
7710 (define_insn "sse5_phadduwd"
7711 [(set (match_operand:V4SI 0 "register_operand" "=x")
7715 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7716 (parallel [(const_int 0)
7723 (parallel [(const_int 1)
7726 (const_int 7)])))))]
7728 "phadduwd\t{%1, %0|%0, %1}"
7729 [(set_attr "type" "sseiadd1")])
7731 (define_insn "sse5_phadduwq"
7732 [(set (match_operand:V2DI 0 "register_operand" "=x")
7737 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7738 (parallel [(const_int 0)
7743 (parallel [(const_int 1)
7749 (parallel [(const_int 2)
7754 (parallel [(const_int 3)
7755 (const_int 7)]))))))]
7757 "phadduwq\t{%1, %0|%0, %1}"
7758 [(set_attr "type" "sseiadd1")])
7760 (define_insn "sse5_phaddudq"
7761 [(set (match_operand:V2DI 0 "register_operand" "=x")
7765 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7766 (parallel [(const_int 0)
7771 (parallel [(const_int 1)
7772 (const_int 3)])))))]
7774 "phaddudq\t{%1, %0|%0, %1}"
7775 [(set_attr "type" "sseiadd1")])
7777 (define_insn "sse5_phsubbw"
7778 [(set (match_operand:V8HI 0 "register_operand" "=x")
7782 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7783 (parallel [(const_int 0)
7794 (parallel [(const_int 1)
7801 (const_int 15)])))))]
7803 "phsubbw\t{%1, %0|%0, %1}"
7804 [(set_attr "type" "sseiadd1")])
7806 (define_insn "sse5_phsubwd"
7807 [(set (match_operand:V4SI 0 "register_operand" "=x")
7811 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7812 (parallel [(const_int 0)
7819 (parallel [(const_int 1)
7822 (const_int 7)])))))]
7824 "phsubwd\t{%1, %0|%0, %1}"
7825 [(set_attr "type" "sseiadd1")])
7827 (define_insn "sse5_phsubdq"
7828 [(set (match_operand:V2DI 0 "register_operand" "=x")
7832 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7833 (parallel [(const_int 0)
7838 (parallel [(const_int 1)
7839 (const_int 3)])))))]
7841 "phsubdq\t{%1, %0|%0, %1}"
7842 [(set_attr "type" "sseiadd1")])
7844 ;; SSE5 permute instructions
7845 (define_insn "sse5_pperm"
7846 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7848 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7849 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7850 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7851 UNSPEC_SSE5_PERMUTE))]
7852 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7853 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7854 [(set_attr "type" "sse4arg")
7855 (set_attr "mode" "TI")])
7857 ;; The following are for the various unpack insns which doesn't need the first
7858 ;; source operand, so we can just use the output operand for the first operand.
7859 ;; This allows either of the other two operands to be a memory operand. We
7860 ;; can't just use the first operand as an argument to the normal pperm because
7861 ;; then an output only argument, suddenly becomes an input operand.
7862 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7863 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7866 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7867 (match_operand 2 "" "")))) ;; parallel with const_int's
7868 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7870 && (register_operand (operands[1], V16QImode)
7871 || register_operand (operands[2], V16QImode))"
7872 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7873 [(set_attr "type" "sseadd")
7874 (set_attr "mode" "TI")])
7876 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7877 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7880 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7881 (match_operand 2 "" "")))) ;; parallel with const_int's
7882 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7884 && (register_operand (operands[1], V16QImode)
7885 || register_operand (operands[2], V16QImode))"
7886 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7887 [(set_attr "type" "sseadd")
7888 (set_attr "mode" "TI")])
7890 (define_insn "sse5_pperm_zero_v8hi_v4si"
7891 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7894 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7895 (match_operand 2 "" "")))) ;; parallel with const_int's
7896 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7898 && (register_operand (operands[1], V8HImode)
7899 || register_operand (operands[2], V16QImode))"
7900 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7901 [(set_attr "type" "sseadd")
7902 (set_attr "mode" "TI")])
7904 (define_insn "sse5_pperm_sign_v8hi_v4si"
7905 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7908 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7909 (match_operand 2 "" "")))) ;; parallel with const_int's
7910 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7912 && (register_operand (operands[1], V8HImode)
7913 || register_operand (operands[2], V16QImode))"
7914 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7915 [(set_attr "type" "sseadd")
7916 (set_attr "mode" "TI")])
7918 (define_insn "sse5_pperm_zero_v4si_v2di"
7919 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7922 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7923 (match_operand 2 "" "")))) ;; parallel with const_int's
7924 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7926 && (register_operand (operands[1], V4SImode)
7927 || register_operand (operands[2], V16QImode))"
7928 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7929 [(set_attr "type" "sseadd")
7930 (set_attr "mode" "TI")])
7932 (define_insn "sse5_pperm_sign_v4si_v2di"
7933 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7936 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7937 (match_operand 2 "" "")))) ;; parallel with const_int's
7938 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7940 && (register_operand (operands[1], V4SImode)
7941 || register_operand (operands[2], V16QImode))"
7942 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7943 [(set_attr "type" "sseadd")
7944 (set_attr "mode" "TI")])
7946 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7947 (define_insn "sse5_pperm_pack_v2di_v4si"
7948 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7951 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7953 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7954 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7955 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7956 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7957 [(set_attr "type" "sse4arg")
7958 (set_attr "mode" "TI")])
7960 (define_insn "sse5_pperm_pack_v4si_v8hi"
7961 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7964 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7966 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7967 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7968 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7969 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7970 [(set_attr "type" "sse4arg")
7971 (set_attr "mode" "TI")])
7973 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7974 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7977 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7979 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7980 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7981 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7982 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7983 [(set_attr "type" "sse4arg")
7984 (set_attr "mode" "TI")])
7986 ;; Floating point permutation (permps, permpd)
7987 (define_insn "sse5_perm<mode>"
7988 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7990 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7991 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7992 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7993 UNSPEC_SSE5_PERMUTE))]
7994 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7995 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7996 [(set_attr "type" "sse4arg")
7997 (set_attr "mode" "<MODE>")])
7999 ;; SSE5 packed rotate instructions
8000 (define_insn "rotl<mode>3"
8001 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8003 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8004 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8006 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8007 [(set_attr "type" "sseishft")
8008 (set_attr "mode" "TI")])
8010 (define_insn "sse5_rotl<mode>3"
8011 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8013 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8014 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
8015 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8016 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8017 [(set_attr "type" "sseishft")
8018 (set_attr "mode" "TI")])
8020 ;; SSE5 packed shift instructions. Note negative values for the shift amount
8021 ;; convert this into a right shift instead of left shift. For now, model this
8022 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
8023 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
8024 (define_insn "sse5_ashl<mode>3"
8025 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8027 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8028 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8029 UNSPEC_SSE5_ASHIFT))]
8030 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8031 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8032 [(set_attr "type" "sseishft")
8033 (set_attr "mode" "TI")])
8035 (define_insn "sse5_lshl<mode>3"
8036 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8038 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8039 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8040 UNSPEC_SSE5_LSHIFT))]
8041 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8042 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8043 [(set_attr "type" "sseishft")
8044 (set_attr "mode" "TI")])
8046 ;; SSE5 FRCZ support
8048 (define_insn "sse5_frcz<mode>2"
8049 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8051 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8054 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
8055 [(set_attr "type" "ssecvt1")
8056 (set_attr "prefix_extra" "1")
8057 (set_attr "mode" "<MODE>")])
8060 (define_insn "sse5_vmfrcz<mode>2"
8061 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8062 (vec_merge:SSEMODEF2P
8064 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8066 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8069 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
8070 [(set_attr "type" "ssecvt1")
8071 (set_attr "prefix_extra" "1")
8072 (set_attr "mode" "<MODE>")])
8074 (define_insn "sse5_cvtph2ps"
8075 [(set (match_operand:V4SF 0 "register_operand" "=x")
8076 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8079 "cvtph2ps\t{%1, %0|%0, %1}"
8080 [(set_attr "type" "ssecvt")
8081 (set_attr "mode" "V4SF")])
8083 (define_insn "sse5_cvtps2ph"
8084 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8085 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8088 "cvtps2ph\t{%1, %0|%0, %1}"
8089 [(set_attr "type" "ssecvt")
8090 (set_attr "mode" "V4SF")])
8092 ;; Scalar versions of the com instructions that use vector types that are
8093 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8094 ;; com instructions fill in 0's in the upper bits instead of leaving them
8095 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8096 (define_expand "sse5_vmmaskcmp<mode>3"
8097 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8098 (vec_merge:SSEMODEF2P
8099 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8100 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8101 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8106 operands[4] = CONST0_RTX (<MODE>mode);
8109 (define_insn "*sse5_vmmaskcmp<mode>3"
8110 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8111 (vec_merge:SSEMODEF2P
8112 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8113 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8114 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8115 (match_operand:SSEMODEF2P 4 "")
8118 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8119 [(set_attr "type" "sse4arg")
8120 (set_attr "mode" "<ssescalarmode>")])
8122 ;; We don't have a comparison operator that always returns true/false, so
8123 ;; handle comfalse and comtrue specially.
8124 (define_insn "sse5_com_tf<mode>3"
8125 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8127 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8129 (match_operand:SI 3 "const_int_operand" "n")]
8130 UNSPEC_SSE5_TRUEFALSE))]
8133 const char *ret = NULL;
8135 switch (INTVAL (operands[3]))
8138 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8142 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8146 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8150 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8159 [(set_attr "type" "ssecmp")
8160 (set_attr "mode" "<MODE>")])
8162 (define_insn "sse5_maskcmp<mode>3"
8163 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8164 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8165 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8166 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8168 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8169 [(set_attr "type" "ssecmp")
8170 (set_attr "mode" "<MODE>")])
8172 (define_insn "sse5_maskcmp<mode>3"
8173 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8174 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8175 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8176 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8178 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8179 [(set_attr "type" "sse4arg")
8180 (set_attr "mode" "TI")])
8182 (define_insn "sse5_maskcmp_uns<mode>3"
8183 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8184 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8185 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8186 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8188 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8189 [(set_attr "type" "ssecmp")
8190 (set_attr "mode" "TI")])
8192 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8193 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8194 ;; the exact instruction generated for the intrinsic.
8195 (define_insn "sse5_maskcmp_uns2<mode>3"
8196 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8198 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8199 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8200 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8201 UNSPEC_SSE5_UNSIGNED_CMP))]
8203 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8204 [(set_attr "type" "ssecmp")
8205 (set_attr "mode" "TI")])
8207 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8208 ;; being added here to be complete.
8209 (define_insn "sse5_pcom_tf<mode>3"
8210 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8212 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8213 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8214 (match_operand:SI 3 "const_int_operand" "n")]
8215 UNSPEC_SSE5_TRUEFALSE))]
8218 return ((INTVAL (operands[3]) != 0)
8219 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8220 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8222 [(set_attr "type" "ssecmp")
8223 (set_attr "mode" "TI")])