1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
58 ;; Mapping of immediate bits for blend instructions
59 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
61 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
63 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69 ;; All of these patterns are enabled for SSE1 as well as SSE2.
70 ;; This is essential for maintaining stable calling conventions.
72 (define_expand "mov<mode>"
73 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
74 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
77 ix86_expand_vector_move (<MODE>mode, operands);
81 (define_insn "*mov<mode>_internal"
82 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
83 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
85 && (register_operand (operands[0], <MODE>mode)
86 || register_operand (operands[1], <MODE>mode))"
88 switch (which_alternative)
91 return standard_sse_constant_opcode (insn, operands[1]);
94 switch (get_attr_mode (insn))
97 return "movaps\t{%1, %0|%0, %1}";
99 return "movapd\t{%1, %0|%0, %1}";
101 return "movdqa\t{%1, %0|%0, %1}";
107 [(set_attr "type" "sselog1,ssemov,ssemov")
109 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
110 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
111 (and (eq_attr "alternative" "2")
112 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
114 (const_string "V4SF")
115 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
116 (const_string "V4SF")
117 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
118 (const_string "V2DF")
120 (const_string "TI")))])
122 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
123 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
124 ;; from memory, we'd prefer to load the memory directly into the %xmm
125 ;; register. To facilitate this happy circumstance, this pattern won't
126 ;; split until after register allocation. If the 64-bit value didn't
127 ;; come from memory, this is the best we can do. This is much better
128 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
131 (define_insn_and_split "movdi_to_sse"
133 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
134 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
135 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
136 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
138 "&& reload_completed"
141 if (register_operand (operands[1], DImode))
143 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
144 Assemble the 64-bit DImode value in an xmm register. */
145 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
146 gen_rtx_SUBREG (SImode, operands[1], 0)));
147 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
148 gen_rtx_SUBREG (SImode, operands[1], 4)));
149 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
151 else if (memory_operand (operands[1], DImode))
152 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
163 (vec_duplicate:V4SF (match_dup 1))
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "<sse>_movup<ssemodesuffixf2c>"
199 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
201 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
203 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
204 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
205 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
206 [(set_attr "type" "ssemov")
207 (set_attr "mode" "<MODE>")])
209 (define_insn "sse2_movdqu"
210 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
211 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
213 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
214 "movdqu\t{%1, %0|%0, %1}"
215 [(set_attr "type" "ssemov")
216 (set_attr "prefix_data16" "1")
217 (set_attr "mode" "TI")])
219 (define_insn "<sse>_movnt<mode>"
220 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
222 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
224 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
225 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
226 [(set_attr "type" "ssemov")
227 (set_attr "mode" "<MODE>")])
229 (define_insn "sse2_movntv2di"
230 [(set (match_operand:V2DI 0 "memory_operand" "=m")
231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
234 "movntdq\t{%1, %0|%0, %1}"
235 [(set_attr "type" "ssecvt")
236 (set_attr "prefix_data16" "1")
237 (set_attr "mode" "TI")])
239 (define_insn "sse2_movntsi"
240 [(set (match_operand:SI 0 "memory_operand" "=m")
241 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
244 "movnti\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssecvt")
246 (set_attr "mode" "V2DF")])
248 (define_insn "sse3_lddqu"
249 [(set (match_operand:V16QI 0 "register_operand" "=x")
250 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
253 "lddqu\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssecvt")
255 (set_attr "prefix_rep" "1")
256 (set_attr "mode" "TI")])
258 ; Expand patterns for non-temporal stores. At the moment, only those
259 ; that directly map to insns are defined; it would be possible to
260 ; define patterns for other modes that would expand to several insns.
262 (define_expand "storent<mode>"
263 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
265 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
267 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
270 (define_expand "storent<mode>"
271 [(set (match_operand:MODEF 0 "memory_operand" "")
273 [(match_operand:MODEF 1 "register_operand" "")]
278 (define_expand "storentv2di"
279 [(set (match_operand:V2DI 0 "memory_operand" "")
280 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
285 (define_expand "storentsi"
286 [(set (match_operand:SI 0 "memory_operand" "")
287 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
294 ;; Parallel floating point arithmetic
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
298 (define_expand "<code><mode>2"
299 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
301 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
302 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
303 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
305 (define_expand "<plusminus_insn><mode>3"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
307 (plusminus:SSEMODEF2P
308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
310 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
311 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
313 (define_insn "*<plusminus_insn><mode>3"
314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
315 (plusminus:SSEMODEF2P
316 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
317 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
318 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
319 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
320 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
321 [(set_attr "type" "sseadd")
322 (set_attr "mode" "<MODE>")])
324 (define_insn "<sse>_vm<plusminus_insn><mode>3"
325 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
326 (vec_merge:SSEMODEF2P
327 (plusminus:SSEMODEF2P
328 (match_operand:SSEMODEF2P 1 "register_operand" "0")
329 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
332 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
333 && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
334 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
335 [(set_attr "type" "sseadd")
336 (set_attr "mode" "<ssescalarmode>")])
338 (define_expand "mul<mode>3"
339 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
341 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
342 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
343 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
344 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
346 (define_insn "*mul<mode>3"
347 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
349 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
350 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
351 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
352 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
353 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
354 [(set_attr "type" "ssemul")
355 (set_attr "mode" "<MODE>")])
357 (define_insn "<sse>_vmmul<mode>3"
358 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
359 (vec_merge:SSEMODEF2P
361 (match_operand:SSEMODEF2P 1 "register_operand" "0")
362 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
365 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
366 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
367 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
368 [(set_attr "type" "ssemul")
369 (set_attr "mode" "<ssescalarmode>")])
371 (define_expand "divv4sf3"
372 [(set (match_operand:V4SF 0 "register_operand" "")
373 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
374 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
379 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
380 && flag_finite_math_only && !flag_trapping_math
381 && flag_unsafe_math_optimizations)
383 ix86_emit_swdivsf (operands[0], operands[1],
384 operands[2], V4SFmode);
389 (define_expand "divv2df3"
390 [(set (match_operand:V2DF 0 "register_operand" "")
391 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
392 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
394 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
396 (define_insn "<sse>_div<mode>3"
397 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
399 (match_operand:SSEMODEF2P 1 "register_operand" "0")
400 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
401 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
402 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
403 [(set_attr "type" "ssediv")
404 (set_attr "mode" "<MODE>")])
406 (define_insn "<sse>_vmdiv<mode>3"
407 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
408 (vec_merge:SSEMODEF2P
410 (match_operand:SSEMODEF2P 1 "register_operand" "0")
411 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
414 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
415 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
416 [(set_attr "type" "ssediv")
417 (set_attr "mode" "<ssescalarmode>")])
419 (define_insn "sse_rcpv4sf2"
420 [(set (match_operand:V4SF 0 "register_operand" "=x")
422 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
424 "rcpps\t{%1, %0|%0, %1}"
425 [(set_attr "type" "sse")
426 (set_attr "mode" "V4SF")])
428 (define_insn "sse_vmrcpv4sf2"
429 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
433 (match_operand:V4SF 2 "register_operand" "0")
436 "rcpss\t{%1, %0|%0, %1}"
437 [(set_attr "type" "sse")
438 (set_attr "mode" "SF")])
440 (define_expand "sqrtv4sf2"
441 [(set (match_operand:V4SF 0 "register_operand" "")
442 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
445 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
446 && flag_finite_math_only && !flag_trapping_math
447 && flag_unsafe_math_optimizations)
449 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
454 (define_insn "sse_sqrtv4sf2"
455 [(set (match_operand:V4SF 0 "register_operand" "=x")
456 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
458 "sqrtps\t{%1, %0|%0, %1}"
459 [(set_attr "type" "sse")
460 (set_attr "mode" "V4SF")])
462 (define_insn "sqrtv2df2"
463 [(set (match_operand:V2DF 0 "register_operand" "=x")
464 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
466 "sqrtpd\t{%1, %0|%0, %1}"
467 [(set_attr "type" "sse")
468 (set_attr "mode" "V2DF")])
470 (define_insn "<sse>_vmsqrt<mode>2"
471 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
472 (vec_merge:SSEMODEF2P
474 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
475 (match_operand:SSEMODEF2P 2 "register_operand" "0")
477 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
478 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
479 [(set_attr "type" "sse")
480 (set_attr "mode" "<ssescalarmode>")])
482 (define_expand "rsqrtv4sf2"
483 [(set (match_operand:V4SF 0 "register_operand" "")
485 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
488 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
492 (define_insn "sse_rsqrtv4sf2"
493 [(set (match_operand:V4SF 0 "register_operand" "=x")
495 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
497 "rsqrtps\t{%1, %0|%0, %1}"
498 [(set_attr "type" "sse")
499 (set_attr "mode" "V4SF")])
501 (define_insn "sse_vmrsqrtv4sf2"
502 [(set (match_operand:V4SF 0 "register_operand" "=x")
504 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
506 (match_operand:V4SF 2 "register_operand" "0")
509 "rsqrtss\t{%1, %0|%0, %1}"
510 [(set_attr "type" "sse")
511 (set_attr "mode" "SF")])
513 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
514 ;; isn't really correct, as those rtl operators aren't defined when
515 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
517 (define_expand "<code><mode>3"
518 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
520 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
521 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
522 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
524 if (!flag_finite_math_only)
525 operands[1] = force_reg (<MODE>mode, operands[1]);
526 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
529 (define_insn "*<code><mode>3_finite"
530 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
532 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
533 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
534 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
535 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
536 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
537 [(set_attr "type" "sseadd")
538 (set_attr "mode" "<MODE>")])
540 (define_insn "*<code><mode>3"
541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
543 (match_operand:SSEMODEF2P 1 "register_operand" "0")
544 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
545 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
546 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
547 [(set_attr "type" "sseadd")
548 (set_attr "mode" "<MODE>")])
550 (define_insn "<sse>_vm<code><mode>3"
551 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
552 (vec_merge:SSEMODEF2P
554 (match_operand:SSEMODEF2P 1 "register_operand" "0")
555 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
558 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
559 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sse")
561 (set_attr "mode" "<ssescalarmode>")])
563 ;; These versions of the min/max patterns implement exactly the operations
564 ;; min = (op1 < op2 ? op1 : op2)
565 ;; max = (!(op1 < op2) ? op1 : op2)
566 ;; Their operands are not commutative, and thus they may be used in the
567 ;; presence of -0.0 and NaN.
569 (define_insn "*ieee_smin<mode>3"
570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
572 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
575 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
576 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<MODE>")])
580 (define_insn "*ieee_smax<mode>3"
581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
583 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
584 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
586 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
587 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sseadd")
589 (set_attr "mode" "<MODE>")])
591 (define_insn "sse3_addsubv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
595 (match_operand:V4SF 1 "register_operand" "0")
596 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
597 (minus:V4SF (match_dup 1) (match_dup 2))
600 "addsubps\t{%2, %0|%0, %2}"
601 [(set_attr "type" "sseadd")
602 (set_attr "prefix_rep" "1")
603 (set_attr "mode" "V4SF")])
605 (define_insn "sse3_addsubv2df3"
606 [(set (match_operand:V2DF 0 "register_operand" "=x")
609 (match_operand:V2DF 1 "register_operand" "0")
610 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
611 (minus:V2DF (match_dup 1) (match_dup 2))
614 "addsubpd\t{%2, %0|%0, %2}"
615 [(set_attr "type" "sseadd")
616 (set_attr "mode" "V2DF")])
618 (define_insn "sse3_h<plusminus_insn>v4sf3"
619 [(set (match_operand:V4SF 0 "register_operand" "=x")
624 (match_operand:V4SF 1 "register_operand" "0")
625 (parallel [(const_int 0)]))
626 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
628 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
629 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
633 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
634 (parallel [(const_int 0)]))
635 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
637 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
638 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
640 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
641 [(set_attr "type" "sseadd")
642 (set_attr "prefix_rep" "1")
643 (set_attr "mode" "V4SF")])
645 (define_insn "sse3_h<plusminus_insn>v2df3"
646 [(set (match_operand:V2DF 0 "register_operand" "=x")
650 (match_operand:V2DF 1 "register_operand" "0")
651 (parallel [(const_int 0)]))
652 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
655 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
656 (parallel [(const_int 0)]))
657 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
659 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
660 [(set_attr "type" "sseadd")
661 (set_attr "mode" "V2DF")])
663 (define_expand "reduc_splus_v4sf"
664 [(match_operand:V4SF 0 "register_operand" "")
665 (match_operand:V4SF 1 "register_operand" "")]
670 rtx tmp = gen_reg_rtx (V4SFmode);
671 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
672 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
675 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
679 (define_expand "reduc_splus_v2df"
680 [(match_operand:V2DF 0 "register_operand" "")
681 (match_operand:V2DF 1 "register_operand" "")]
684 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
688 (define_expand "reduc_smax_v4sf"
689 [(match_operand:V4SF 0 "register_operand" "")
690 (match_operand:V4SF 1 "register_operand" "")]
693 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
697 (define_expand "reduc_smin_v4sf"
698 [(match_operand:V4SF 0 "register_operand" "")
699 (match_operand:V4SF 1 "register_operand" "")]
702 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
708 ;; Parallel floating point comparisons
710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
712 (define_insn "<sse>_maskcmp<mode>3"
713 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
714 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
715 [(match_operand:SSEMODEF4 1 "register_operand" "0")
716 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
717 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
719 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
720 [(set_attr "type" "ssecmp")
721 (set_attr "mode" "<MODE>")])
723 (define_insn "<sse>_vmmaskcmp<mode>3"
724 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
725 (vec_merge:SSEMODEF2P
726 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
727 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
728 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
731 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
732 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssecmp")
734 (set_attr "mode" "<ssescalarmode>")])
736 (define_insn "<sse>_comi"
737 [(set (reg:CCFP FLAGS_REG)
740 (match_operand:<ssevecmode> 0 "register_operand" "x")
741 (parallel [(const_int 0)]))
743 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
744 (parallel [(const_int 0)]))))]
745 "SSE_FLOAT_MODE_P (<MODE>mode)"
746 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
747 [(set_attr "type" "ssecomi")
748 (set_attr "mode" "<MODE>")])
750 (define_insn "<sse>_ucomi"
751 [(set (reg:CCFPU FLAGS_REG)
754 (match_operand:<ssevecmode> 0 "register_operand" "x")
755 (parallel [(const_int 0)]))
757 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
758 (parallel [(const_int 0)]))))]
759 "SSE_FLOAT_MODE_P (<MODE>mode)"
760 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
761 [(set_attr "type" "ssecomi")
762 (set_attr "mode" "<MODE>")])
764 (define_expand "vcond<mode>"
765 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
766 (if_then_else:SSEMODEF2P
768 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
769 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
770 (match_operand:SSEMODEF2P 1 "general_operand" "")
771 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
772 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
774 if (ix86_expand_fp_vcond (operands))
780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
782 ;; Parallel floating point logical operations
784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
786 (define_insn "<sse>_nand<mode>3"
787 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
790 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
791 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
792 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
793 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
794 [(set_attr "type" "sselog")
795 (set_attr "mode" "<MODE>")])
797 (define_expand "<code><mode>3"
798 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
800 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
801 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
802 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
803 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
805 (define_insn "*<code><mode>3"
806 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
808 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
809 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
810 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
811 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
812 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
813 [(set_attr "type" "sselog")
814 (set_attr "mode" "<MODE>")])
816 ;; Also define scalar versions. These are used for abs, neg, and
817 ;; conditional move. Using subregs into vector modes causes register
818 ;; allocation lossage. These patterns do not allow memory operands
819 ;; because the native instructions read the full 128-bits.
821 (define_insn "*nand<mode>3"
822 [(set (match_operand:MODEF 0 "register_operand" "=x")
825 (match_operand:MODEF 1 "register_operand" "0"))
826 (match_operand:MODEF 2 "register_operand" "x")))]
827 "SSE_FLOAT_MODE_P (<MODE>mode)"
828 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
829 [(set_attr "type" "sselog")
830 (set_attr "mode" "<ssevecmode>")])
832 (define_insn "*<code><mode>3"
833 [(set (match_operand:MODEF 0 "register_operand" "=x")
835 (match_operand:MODEF 1 "register_operand" "0")
836 (match_operand:MODEF 2 "register_operand" "x")))]
837 "SSE_FLOAT_MODE_P (<MODE>mode)"
838 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
839 [(set_attr "type" "sselog")
840 (set_attr "mode" "<ssevecmode>")])
842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
844 ;; SSE5 floating point multiply/accumulate instructions This includes the
845 ;; scalar version of the instructions as well as the vector
847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
849 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
850 ;; combine to generate a multiply/add with two memory references. We then
851 ;; split this insn, into loading up the destination register with one of the
852 ;; memory operations. If we don't manage to split the insn, reload will
853 ;; generate the appropriate moves. The reason this is needed, is that combine
854 ;; has already folded one of the memory references into both the multiply and
855 ;; add insns, and it can't generate a new pseudo. I.e.:
856 ;; (set (reg1) (mem (addr1)))
857 ;; (set (reg2) (mult (reg1) (mem (addr2))))
858 ;; (set (reg3) (plus (reg2) (mem (addr3))))
860 (define_insn "sse5_fmadd<mode>4"
861 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
864 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
865 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
866 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
867 "TARGET_SSE5 && TARGET_FUSED_MADD
868 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
869 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
870 [(set_attr "type" "ssemuladd")
871 (set_attr "mode" "<MODE>")])
873 ;; Split fmadd with two memory operands into a load and the fmadd.
875 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
878 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
879 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
880 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
882 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
883 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
884 && !reg_mentioned_p (operands[0], operands[1])
885 && !reg_mentioned_p (operands[0], operands[2])
886 && !reg_mentioned_p (operands[0], operands[3])"
889 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
890 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
891 operands[2], operands[3]));
895 ;; For the scalar operations, use operand1 for the upper words that aren't
896 ;; modified, so restrict the forms that are generated.
897 ;; Scalar version of fmadd
898 (define_insn "sse5_vmfmadd<mode>4"
899 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
900 (vec_merge:SSEMODEF2P
903 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
904 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
905 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
908 "TARGET_SSE5 && TARGET_FUSED_MADD
909 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
910 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
911 [(set_attr "type" "ssemuladd")
912 (set_attr "mode" "<MODE>")])
914 ;; Floating multiply and subtract
915 ;; Allow two memory operands the same as fmadd
916 (define_insn "sse5_fmsub<mode>4"
917 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
920 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
921 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
922 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
923 "TARGET_SSE5 && TARGET_FUSED_MADD
924 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
925 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
926 [(set_attr "type" "ssemuladd")
927 (set_attr "mode" "<MODE>")])
929 ;; Split fmsub with two memory operands into a load and the fmsub.
931 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
934 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
935 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
936 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
938 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
939 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
940 && !reg_mentioned_p (operands[0], operands[1])
941 && !reg_mentioned_p (operands[0], operands[2])
942 && !reg_mentioned_p (operands[0], operands[3])"
945 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
946 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
947 operands[2], operands[3]));
951 ;; For the scalar operations, use operand1 for the upper words that aren't
952 ;; modified, so restrict the forms that are generated.
953 ;; Scalar version of fmsub
954 (define_insn "sse5_vmfmsub<mode>4"
955 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
956 (vec_merge:SSEMODEF2P
959 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
960 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
961 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
964 "TARGET_SSE5 && TARGET_FUSED_MADD
965 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
966 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
967 [(set_attr "type" "ssemuladd")
968 (set_attr "mode" "<MODE>")])
970 ;; Floating point negative multiply and add
971 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
972 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
973 ;; Allow two memory operands to help in optimizing.
974 (define_insn "sse5_fnmadd<mode>4"
975 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
977 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
979 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
980 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
981 "TARGET_SSE5 && TARGET_FUSED_MADD
982 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
983 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
984 [(set_attr "type" "ssemuladd")
985 (set_attr "mode" "<MODE>")])
987 ;; Split fnmadd with two memory operands into a load and the fnmadd.
989 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
991 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
993 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
994 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
996 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
997 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
998 && !reg_mentioned_p (operands[0], operands[1])
999 && !reg_mentioned_p (operands[0], operands[2])
1000 && !reg_mentioned_p (operands[0], operands[3])"
1003 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1004 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1005 operands[2], operands[3]));
1009 ;; For the scalar operations, use operand1 for the upper words that aren't
1010 ;; modified, so restrict the forms that are generated.
1011 ;; Scalar version of fnmadd
1012 (define_insn "sse5_vmfnmadd<mode>4"
1013 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1014 (vec_merge:SSEMODEF2P
1016 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1018 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1019 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1022 "TARGET_SSE5 && TARGET_FUSED_MADD
1023 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1024 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1025 [(set_attr "type" "ssemuladd")
1026 (set_attr "mode" "<MODE>")])
1028 ;; Floating point negative multiply and subtract
1029 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1030 ;; Allow 2 memory operands to help with optimization
1031 (define_insn "sse5_fnmsub<mode>4"
1032 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1036 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1037 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1038 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1039 "TARGET_SSE5 && TARGET_FUSED_MADD
1040 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1041 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1042 [(set_attr "type" "ssemuladd")
1043 (set_attr "mode" "<MODE>")])
1045 ;; Split fnmsub with two memory operands into a load and the fmsub.
1047 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1051 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1052 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1053 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1055 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1056 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1057 && !reg_mentioned_p (operands[0], operands[1])
1058 && !reg_mentioned_p (operands[0], operands[2])
1059 && !reg_mentioned_p (operands[0], operands[3])"
1062 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1063 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1064 operands[2], operands[3]));
1068 ;; For the scalar operations, use operand1 for the upper words that aren't
1069 ;; modified, so restrict the forms that are generated.
1070 ;; Scalar version of fnmsub
1071 (define_insn "sse5_vmfnmsub<mode>4"
1072 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1073 (vec_merge:SSEMODEF2P
1077 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1078 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1079 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1082 "TARGET_SSE5 && TARGET_FUSED_MADD
1083 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1084 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1085 [(set_attr "type" "ssemuladd")
1086 (set_attr "mode" "<MODE>")])
1088 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1089 ;; even if the user used -mno-fused-madd
1090 ;; Parallel instructions. During instruction generation, just default
1091 ;; to registers, and let combine later build the appropriate instruction.
1092 (define_expand "sse5i_fmadd<mode>4"
1093 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1097 (match_operand:SSEMODEF2P 1 "register_operand" "")
1098 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1099 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1100 UNSPEC_SSE5_INTRINSIC))]
1103 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1104 if (TARGET_FUSED_MADD)
1106 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1107 operands[2], operands[3]));
1112 (define_insn "*sse5i_fmadd<mode>4"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1117 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1118 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1119 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1120 UNSPEC_SSE5_INTRINSIC))]
1121 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1122 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1123 [(set_attr "type" "ssemuladd")
1124 (set_attr "mode" "<MODE>")])
1126 (define_expand "sse5i_fmsub<mode>4"
1127 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1131 (match_operand:SSEMODEF2P 1 "register_operand" "")
1132 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1133 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1134 UNSPEC_SSE5_INTRINSIC))]
1137 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1138 if (TARGET_FUSED_MADD)
1140 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1141 operands[2], operands[3]));
1146 (define_insn "*sse5i_fmsub<mode>4"
1147 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1151 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1152 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1153 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1154 UNSPEC_SSE5_INTRINSIC))]
1155 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1156 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1157 [(set_attr "type" "ssemuladd")
1158 (set_attr "mode" "<MODE>")])
1160 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1161 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1162 (define_expand "sse5i_fnmadd<mode>4"
1163 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1166 (match_operand:SSEMODEF2P 3 "register_operand" "")
1168 (match_operand:SSEMODEF2P 1 "register_operand" "")
1169 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1170 UNSPEC_SSE5_INTRINSIC))]
1173 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1174 if (TARGET_FUSED_MADD)
1176 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1177 operands[2], operands[3]));
1182 (define_insn "*sse5i_fnmadd<mode>4"
1183 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1186 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1188 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1189 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1190 UNSPEC_SSE5_INTRINSIC))]
1191 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1192 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1193 [(set_attr "type" "ssemuladd")
1194 (set_attr "mode" "<MODE>")])
1196 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1197 (define_expand "sse5i_fnmsub<mode>4"
1198 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1203 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1204 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1205 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1206 UNSPEC_SSE5_INTRINSIC))]
1209 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1210 if (TARGET_FUSED_MADD)
1212 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1213 operands[2], operands[3]));
1218 (define_insn "*sse5i_fnmsub<mode>4"
1219 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1224 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1225 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1226 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1227 UNSPEC_SSE5_INTRINSIC))]
1228 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1229 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1230 [(set_attr "type" "ssemuladd")
1231 (set_attr "mode" "<MODE>")])
1233 ;; Scalar instructions
1234 (define_expand "sse5i_vmfmadd<mode>4"
1235 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1237 [(vec_merge:SSEMODEF2P
1240 (match_operand:SSEMODEF2P 1 "register_operand" "")
1241 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1242 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1245 UNSPEC_SSE5_INTRINSIC))]
1248 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1249 if (TARGET_FUSED_MADD)
1251 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1252 operands[2], operands[3]));
1257 ;; For the scalar operations, use operand1 for the upper words that aren't
1258 ;; modified, so restrict the forms that are accepted.
1259 (define_insn "*sse5i_vmfmadd<mode>4"
1260 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1262 [(vec_merge:SSEMODEF2P
1265 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1266 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1267 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1270 UNSPEC_SSE5_INTRINSIC))]
1271 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1272 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1273 [(set_attr "type" "ssemuladd")
1274 (set_attr "mode" "<ssescalarmode>")])
1276 (define_expand "sse5i_vmfmsub<mode>4"
1277 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1279 [(vec_merge:SSEMODEF2P
1282 (match_operand:SSEMODEF2P 1 "register_operand" "")
1283 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1284 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1287 UNSPEC_SSE5_INTRINSIC))]
1290 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1291 if (TARGET_FUSED_MADD)
1293 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1294 operands[2], operands[3]));
1299 (define_insn "*sse5i_vmfmsub<mode>4"
1300 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1302 [(vec_merge:SSEMODEF2P
1305 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1306 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1307 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1310 UNSPEC_SSE5_INTRINSIC))]
1311 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1312 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1313 [(set_attr "type" "ssemuladd")
1314 (set_attr "mode" "<ssescalarmode>")])
1316 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1317 (define_expand "sse5i_vmfnmadd<mode>4"
1318 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1320 [(vec_merge:SSEMODEF2P
1322 (match_operand:SSEMODEF2P 3 "register_operand" "")
1324 (match_operand:SSEMODEF2P 1 "register_operand" "")
1325 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1328 UNSPEC_SSE5_INTRINSIC))]
1331 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1332 if (TARGET_FUSED_MADD)
1334 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1335 operands[2], operands[3]));
1340 (define_insn "*sse5i_vmfnmadd<mode>4"
1341 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1343 [(vec_merge:SSEMODEF2P
1345 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1347 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1348 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1351 UNSPEC_SSE5_INTRINSIC))]
1352 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1353 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1354 [(set_attr "type" "ssemuladd")
1355 (set_attr "mode" "<ssescalarmode>")])
1357 (define_expand "sse5i_vmfnmsub<mode>4"
1358 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1360 [(vec_merge:SSEMODEF2P
1364 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1365 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1366 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1369 UNSPEC_SSE5_INTRINSIC))]
1372 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1373 if (TARGET_FUSED_MADD)
1375 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1376 operands[2], operands[3]));
1381 (define_insn "*sse5i_vmfnmsub<mode>4"
1382 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1384 [(vec_merge:SSEMODEF2P
1388 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1389 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1390 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1393 UNSPEC_SSE5_INTRINSIC))]
1394 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1395 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1396 [(set_attr "type" "ssemuladd")
1397 (set_attr "mode" "<ssescalarmode>")])
1399 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1401 ;; Parallel single-precision floating point conversion operations
1403 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1405 (define_insn "sse_cvtpi2ps"
1406 [(set (match_operand:V4SF 0 "register_operand" "=x")
1409 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1410 (match_operand:V4SF 1 "register_operand" "0")
1413 "cvtpi2ps\t{%2, %0|%0, %2}"
1414 [(set_attr "type" "ssecvt")
1415 (set_attr "mode" "V4SF")])
1417 (define_insn "sse_cvtps2pi"
1418 [(set (match_operand:V2SI 0 "register_operand" "=y")
1420 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1422 (parallel [(const_int 0) (const_int 1)])))]
1424 "cvtps2pi\t{%1, %0|%0, %1}"
1425 [(set_attr "type" "ssecvt")
1426 (set_attr "unit" "mmx")
1427 (set_attr "mode" "DI")])
1429 (define_insn "sse_cvttps2pi"
1430 [(set (match_operand:V2SI 0 "register_operand" "=y")
1432 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1433 (parallel [(const_int 0) (const_int 1)])))]
1435 "cvttps2pi\t{%1, %0|%0, %1}"
1436 [(set_attr "type" "ssecvt")
1437 (set_attr "unit" "mmx")
1438 (set_attr "mode" "SF")])
1440 (define_insn "sse_cvtsi2ss"
1441 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1444 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1445 (match_operand:V4SF 1 "register_operand" "0,0")
1448 "cvtsi2ss\t{%2, %0|%0, %2}"
1449 [(set_attr "type" "sseicvt")
1450 (set_attr "athlon_decode" "vector,double")
1451 (set_attr "amdfam10_decode" "vector,double")
1452 (set_attr "mode" "SF")])
1454 (define_insn "sse_cvtsi2ssq"
1455 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1458 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1459 (match_operand:V4SF 1 "register_operand" "0,0")
1461 "TARGET_SSE && TARGET_64BIT"
1462 "cvtsi2ssq\t{%2, %0|%0, %2}"
1463 [(set_attr "type" "sseicvt")
1464 (set_attr "athlon_decode" "vector,double")
1465 (set_attr "amdfam10_decode" "vector,double")
1466 (set_attr "mode" "SF")])
1468 (define_insn "sse_cvtss2si"
1469 [(set (match_operand:SI 0 "register_operand" "=r,r")
1472 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1473 (parallel [(const_int 0)]))]
1474 UNSPEC_FIX_NOTRUNC))]
1476 "cvtss2si\t{%1, %0|%0, %1}"
1477 [(set_attr "type" "sseicvt")
1478 (set_attr "athlon_decode" "double,vector")
1479 (set_attr "prefix_rep" "1")
1480 (set_attr "mode" "SI")])
1482 (define_insn "sse_cvtss2si_2"
1483 [(set (match_operand:SI 0 "register_operand" "=r,r")
1484 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1485 UNSPEC_FIX_NOTRUNC))]
1487 "cvtss2si\t{%1, %0|%0, %1}"
1488 [(set_attr "type" "sseicvt")
1489 (set_attr "athlon_decode" "double,vector")
1490 (set_attr "amdfam10_decode" "double,double")
1491 (set_attr "prefix_rep" "1")
1492 (set_attr "mode" "SI")])
1494 (define_insn "sse_cvtss2siq"
1495 [(set (match_operand:DI 0 "register_operand" "=r,r")
1498 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1499 (parallel [(const_int 0)]))]
1500 UNSPEC_FIX_NOTRUNC))]
1501 "TARGET_SSE && TARGET_64BIT"
1502 "cvtss2siq\t{%1, %0|%0, %1}"
1503 [(set_attr "type" "sseicvt")
1504 (set_attr "athlon_decode" "double,vector")
1505 (set_attr "prefix_rep" "1")
1506 (set_attr "mode" "DI")])
1508 (define_insn "sse_cvtss2siq_2"
1509 [(set (match_operand:DI 0 "register_operand" "=r,r")
1510 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1511 UNSPEC_FIX_NOTRUNC))]
1512 "TARGET_SSE && TARGET_64BIT"
1513 "cvtss2siq\t{%1, %0|%0, %1}"
1514 [(set_attr "type" "sseicvt")
1515 (set_attr "athlon_decode" "double,vector")
1516 (set_attr "amdfam10_decode" "double,double")
1517 (set_attr "prefix_rep" "1")
1518 (set_attr "mode" "DI")])
1520 (define_insn "sse_cvttss2si"
1521 [(set (match_operand:SI 0 "register_operand" "=r,r")
1524 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1525 (parallel [(const_int 0)]))))]
1527 "cvttss2si\t{%1, %0|%0, %1}"
1528 [(set_attr "type" "sseicvt")
1529 (set_attr "athlon_decode" "double,vector")
1530 (set_attr "amdfam10_decode" "double,double")
1531 (set_attr "prefix_rep" "1")
1532 (set_attr "mode" "SI")])
1534 (define_insn "sse_cvttss2siq"
1535 [(set (match_operand:DI 0 "register_operand" "=r,r")
1538 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1539 (parallel [(const_int 0)]))))]
1540 "TARGET_SSE && TARGET_64BIT"
1541 "cvttss2siq\t{%1, %0|%0, %1}"
1542 [(set_attr "type" "sseicvt")
1543 (set_attr "athlon_decode" "double,vector")
1544 (set_attr "amdfam10_decode" "double,double")
1545 (set_attr "prefix_rep" "1")
1546 (set_attr "mode" "DI")])
1548 (define_insn "sse2_cvtdq2ps"
1549 [(set (match_operand:V4SF 0 "register_operand" "=x")
1550 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1552 "cvtdq2ps\t{%1, %0|%0, %1}"
1553 [(set_attr "type" "ssecvt")
1554 (set_attr "mode" "V4SF")])
1556 (define_insn "sse2_cvtps2dq"
1557 [(set (match_operand:V4SI 0 "register_operand" "=x")
1558 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1559 UNSPEC_FIX_NOTRUNC))]
1561 "cvtps2dq\t{%1, %0|%0, %1}"
1562 [(set_attr "type" "ssecvt")
1563 (set_attr "prefix_data16" "1")
1564 (set_attr "mode" "TI")])
1566 (define_insn "sse2_cvttps2dq"
1567 [(set (match_operand:V4SI 0 "register_operand" "=x")
1568 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1570 "cvttps2dq\t{%1, %0|%0, %1}"
1571 [(set_attr "type" "ssecvt")
1572 (set_attr "prefix_rep" "1")
1573 (set_attr "mode" "TI")])
1575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1577 ;; Parallel double-precision floating point conversion operations
1579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1581 (define_insn "sse2_cvtpi2pd"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1583 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1585 "cvtpi2pd\t{%1, %0|%0, %1}"
1586 [(set_attr "type" "ssecvt")
1587 (set_attr "unit" "mmx,*")
1588 (set_attr "mode" "V2DF")])
1590 (define_insn "sse2_cvtpd2pi"
1591 [(set (match_operand:V2SI 0 "register_operand" "=y")
1592 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1593 UNSPEC_FIX_NOTRUNC))]
1595 "cvtpd2pi\t{%1, %0|%0, %1}"
1596 [(set_attr "type" "ssecvt")
1597 (set_attr "unit" "mmx")
1598 (set_attr "prefix_data16" "1")
1599 (set_attr "mode" "DI")])
1601 (define_insn "sse2_cvttpd2pi"
1602 [(set (match_operand:V2SI 0 "register_operand" "=y")
1603 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1605 "cvttpd2pi\t{%1, %0|%0, %1}"
1606 [(set_attr "type" "ssecvt")
1607 (set_attr "unit" "mmx")
1608 (set_attr "prefix_data16" "1")
1609 (set_attr "mode" "TI")])
1611 (define_insn "sse2_cvtsi2sd"
1612 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1615 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1616 (match_operand:V2DF 1 "register_operand" "0,0")
1619 "cvtsi2sd\t{%2, %0|%0, %2}"
1620 [(set_attr "type" "sseicvt")
1621 (set_attr "mode" "DF")
1622 (set_attr "athlon_decode" "double,direct")
1623 (set_attr "amdfam10_decode" "vector,double")])
1625 (define_insn "sse2_cvtsi2sdq"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1629 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1630 (match_operand:V2DF 1 "register_operand" "0,0")
1632 "TARGET_SSE2 && TARGET_64BIT"
1633 "cvtsi2sdq\t{%2, %0|%0, %2}"
1634 [(set_attr "type" "sseicvt")
1635 (set_attr "mode" "DF")
1636 (set_attr "athlon_decode" "double,direct")
1637 (set_attr "amdfam10_decode" "vector,double")])
1639 (define_insn "sse2_cvtsd2si"
1640 [(set (match_operand:SI 0 "register_operand" "=r,r")
1643 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1644 (parallel [(const_int 0)]))]
1645 UNSPEC_FIX_NOTRUNC))]
1647 "cvtsd2si\t{%1, %0|%0, %1}"
1648 [(set_attr "type" "sseicvt")
1649 (set_attr "athlon_decode" "double,vector")
1650 (set_attr "prefix_rep" "1")
1651 (set_attr "mode" "SI")])
1653 (define_insn "sse2_cvtsd2si_2"
1654 [(set (match_operand:SI 0 "register_operand" "=r,r")
1655 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1656 UNSPEC_FIX_NOTRUNC))]
1658 "cvtsd2si\t{%1, %0|%0, %1}"
1659 [(set_attr "type" "sseicvt")
1660 (set_attr "athlon_decode" "double,vector")
1661 (set_attr "amdfam10_decode" "double,double")
1662 (set_attr "prefix_rep" "1")
1663 (set_attr "mode" "SI")])
1665 (define_insn "sse2_cvtsd2siq"
1666 [(set (match_operand:DI 0 "register_operand" "=r,r")
1669 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1670 (parallel [(const_int 0)]))]
1671 UNSPEC_FIX_NOTRUNC))]
1672 "TARGET_SSE2 && TARGET_64BIT"
1673 "cvtsd2siq\t{%1, %0|%0, %1}"
1674 [(set_attr "type" "sseicvt")
1675 (set_attr "athlon_decode" "double,vector")
1676 (set_attr "prefix_rep" "1")
1677 (set_attr "mode" "DI")])
1679 (define_insn "sse2_cvtsd2siq_2"
1680 [(set (match_operand:DI 0 "register_operand" "=r,r")
1681 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1682 UNSPEC_FIX_NOTRUNC))]
1683 "TARGET_SSE2 && TARGET_64BIT"
1684 "cvtsd2siq\t{%1, %0|%0, %1}"
1685 [(set_attr "type" "sseicvt")
1686 (set_attr "athlon_decode" "double,vector")
1687 (set_attr "amdfam10_decode" "double,double")
1688 (set_attr "prefix_rep" "1")
1689 (set_attr "mode" "DI")])
1691 (define_insn "sse2_cvttsd2si"
1692 [(set (match_operand:SI 0 "register_operand" "=r,r")
1695 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1696 (parallel [(const_int 0)]))))]
1698 "cvttsd2si\t{%1, %0|%0, %1}"
1699 [(set_attr "type" "sseicvt")
1700 (set_attr "prefix_rep" "1")
1701 (set_attr "mode" "SI")
1702 (set_attr "athlon_decode" "double,vector")
1703 (set_attr "amdfam10_decode" "double,double")])
1705 (define_insn "sse2_cvttsd2siq"
1706 [(set (match_operand:DI 0 "register_operand" "=r,r")
1709 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1710 (parallel [(const_int 0)]))))]
1711 "TARGET_SSE2 && TARGET_64BIT"
1712 "cvttsd2siq\t{%1, %0|%0, %1}"
1713 [(set_attr "type" "sseicvt")
1714 (set_attr "prefix_rep" "1")
1715 (set_attr "mode" "DI")
1716 (set_attr "athlon_decode" "double,vector")
1717 (set_attr "amdfam10_decode" "double,double")])
1719 (define_insn "sse2_cvtdq2pd"
1720 [(set (match_operand:V2DF 0 "register_operand" "=x")
1723 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1724 (parallel [(const_int 0) (const_int 1)]))))]
1726 "cvtdq2pd\t{%1, %0|%0, %1}"
1727 [(set_attr "type" "ssecvt")
1728 (set_attr "mode" "V2DF")])
1730 (define_expand "sse2_cvtpd2dq"
1731 [(set (match_operand:V4SI 0 "register_operand" "")
1733 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1737 "operands[2] = CONST0_RTX (V2SImode);")
1739 (define_insn "*sse2_cvtpd2dq"
1740 [(set (match_operand:V4SI 0 "register_operand" "=x")
1742 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1744 (match_operand:V2SI 2 "const0_operand" "")))]
1746 "cvtpd2dq\t{%1, %0|%0, %1}"
1747 [(set_attr "type" "ssecvt")
1748 (set_attr "prefix_rep" "1")
1749 (set_attr "mode" "TI")
1750 (set_attr "amdfam10_decode" "double")])
1752 (define_expand "sse2_cvttpd2dq"
1753 [(set (match_operand:V4SI 0 "register_operand" "")
1755 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1758 "operands[2] = CONST0_RTX (V2SImode);")
1760 (define_insn "*sse2_cvttpd2dq"
1761 [(set (match_operand:V4SI 0 "register_operand" "=x")
1763 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1764 (match_operand:V2SI 2 "const0_operand" "")))]
1766 "cvttpd2dq\t{%1, %0|%0, %1}"
1767 [(set_attr "type" "ssecvt")
1768 (set_attr "prefix_rep" "1")
1769 (set_attr "mode" "TI")
1770 (set_attr "amdfam10_decode" "double")])
1772 (define_insn "sse2_cvtsd2ss"
1773 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1776 (float_truncate:V2SF
1777 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1778 (match_operand:V4SF 1 "register_operand" "0,0")
1781 "cvtsd2ss\t{%2, %0|%0, %2}"
1782 [(set_attr "type" "ssecvt")
1783 (set_attr "athlon_decode" "vector,double")
1784 (set_attr "amdfam10_decode" "vector,double")
1785 (set_attr "mode" "SF")])
1787 (define_insn "sse2_cvtss2sd"
1788 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1792 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1793 (parallel [(const_int 0) (const_int 1)])))
1794 (match_operand:V2DF 1 "register_operand" "0,0")
1797 "cvtss2sd\t{%2, %0|%0, %2}"
1798 [(set_attr "type" "ssecvt")
1799 (set_attr "amdfam10_decode" "vector,double")
1800 (set_attr "mode" "DF")])
1802 (define_expand "sse2_cvtpd2ps"
1803 [(set (match_operand:V4SF 0 "register_operand" "")
1805 (float_truncate:V2SF
1806 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1809 "operands[2] = CONST0_RTX (V2SFmode);")
1811 (define_insn "*sse2_cvtpd2ps"
1812 [(set (match_operand:V4SF 0 "register_operand" "=x")
1814 (float_truncate:V2SF
1815 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1816 (match_operand:V2SF 2 "const0_operand" "")))]
1818 "cvtpd2ps\t{%1, %0|%0, %1}"
1819 [(set_attr "type" "ssecvt")
1820 (set_attr "prefix_data16" "1")
1821 (set_attr "mode" "V4SF")
1822 (set_attr "amdfam10_decode" "double")])
1824 (define_insn "sse2_cvtps2pd"
1825 [(set (match_operand:V2DF 0 "register_operand" "=x")
1828 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1829 (parallel [(const_int 0) (const_int 1)]))))]
1831 "cvtps2pd\t{%1, %0|%0, %1}"
1832 [(set_attr "type" "ssecvt")
1833 (set_attr "mode" "V2DF")
1834 (set_attr "amdfam10_decode" "direct")])
1836 (define_expand "vec_unpacks_hi_v4sf"
1841 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1842 (parallel [(const_int 6)
1846 (set (match_operand:V2DF 0 "register_operand" "")
1850 (parallel [(const_int 0) (const_int 1)]))))]
1853 operands[2] = gen_reg_rtx (V4SFmode);
1856 (define_expand "vec_unpacks_lo_v4sf"
1857 [(set (match_operand:V2DF 0 "register_operand" "")
1860 (match_operand:V4SF 1 "nonimmediate_operand" "")
1861 (parallel [(const_int 0) (const_int 1)]))))]
1864 (define_expand "vec_unpacks_float_hi_v8hi"
1865 [(match_operand:V4SF 0 "register_operand" "")
1866 (match_operand:V8HI 1 "register_operand" "")]
1869 rtx tmp = gen_reg_rtx (V4SImode);
1871 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1872 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1876 (define_expand "vec_unpacks_float_lo_v8hi"
1877 [(match_operand:V4SF 0 "register_operand" "")
1878 (match_operand:V8HI 1 "register_operand" "")]
1881 rtx tmp = gen_reg_rtx (V4SImode);
1883 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1884 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1888 (define_expand "vec_unpacku_float_hi_v8hi"
1889 [(match_operand:V4SF 0 "register_operand" "")
1890 (match_operand:V8HI 1 "register_operand" "")]
1893 rtx tmp = gen_reg_rtx (V4SImode);
1895 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1896 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1900 (define_expand "vec_unpacku_float_lo_v8hi"
1901 [(match_operand:V4SF 0 "register_operand" "")
1902 (match_operand:V8HI 1 "register_operand" "")]
1905 rtx tmp = gen_reg_rtx (V4SImode);
1907 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1908 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1912 (define_expand "vec_unpacks_float_hi_v4si"
1915 (match_operand:V4SI 1 "nonimmediate_operand" "")
1916 (parallel [(const_int 2)
1920 (set (match_operand:V2DF 0 "register_operand" "")
1924 (parallel [(const_int 0) (const_int 1)]))))]
1927 operands[2] = gen_reg_rtx (V4SImode);
1930 (define_expand "vec_unpacks_float_lo_v4si"
1931 [(set (match_operand:V2DF 0 "register_operand" "")
1934 (match_operand:V4SI 1 "nonimmediate_operand" "")
1935 (parallel [(const_int 0) (const_int 1)]))))]
1938 (define_expand "vec_pack_trunc_v2df"
1939 [(match_operand:V4SF 0 "register_operand" "")
1940 (match_operand:V2DF 1 "nonimmediate_operand" "")
1941 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1946 r1 = gen_reg_rtx (V4SFmode);
1947 r2 = gen_reg_rtx (V4SFmode);
1949 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1950 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1951 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1955 (define_expand "vec_pack_sfix_trunc_v2df"
1956 [(match_operand:V4SI 0 "register_operand" "")
1957 (match_operand:V2DF 1 "nonimmediate_operand" "")
1958 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1963 r1 = gen_reg_rtx (V4SImode);
1964 r2 = gen_reg_rtx (V4SImode);
1966 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1967 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1968 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1969 gen_lowpart (V2DImode, r1),
1970 gen_lowpart (V2DImode, r2)));
1974 (define_expand "vec_pack_sfix_v2df"
1975 [(match_operand:V4SI 0 "register_operand" "")
1976 (match_operand:V2DF 1 "nonimmediate_operand" "")
1977 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1982 r1 = gen_reg_rtx (V4SImode);
1983 r2 = gen_reg_rtx (V4SImode);
1985 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1986 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1987 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1988 gen_lowpart (V2DImode, r1),
1989 gen_lowpart (V2DImode, r2)));
1993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1995 ;; Parallel single-precision floating point element swizzling
1997 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1999 (define_insn "sse_movhlps"
2000 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2003 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2004 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2005 (parallel [(const_int 6)
2009 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2011 movhlps\t{%2, %0|%0, %2}
2012 movlps\t{%H2, %0|%0, %H2}
2013 movhps\t{%2, %0|%0, %2}"
2014 [(set_attr "type" "ssemov")
2015 (set_attr "mode" "V4SF,V2SF,V2SF")])
2017 (define_insn "sse_movlhps"
2018 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2021 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2022 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2023 (parallel [(const_int 0)
2027 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2029 movlhps\t{%2, %0|%0, %2}
2030 movhps\t{%2, %0|%0, %2}
2031 movlps\t{%2, %H0|%H0, %2}"
2032 [(set_attr "type" "ssemov")
2033 (set_attr "mode" "V4SF,V2SF,V2SF")])
2035 (define_insn "sse_unpckhps"
2036 [(set (match_operand:V4SF 0 "register_operand" "=x")
2039 (match_operand:V4SF 1 "register_operand" "0")
2040 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2041 (parallel [(const_int 2) (const_int 6)
2042 (const_int 3) (const_int 7)])))]
2044 "unpckhps\t{%2, %0|%0, %2}"
2045 [(set_attr "type" "sselog")
2046 (set_attr "mode" "V4SF")])
2048 (define_insn "sse_unpcklps"
2049 [(set (match_operand:V4SF 0 "register_operand" "=x")
2052 (match_operand:V4SF 1 "register_operand" "0")
2053 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2054 (parallel [(const_int 0) (const_int 4)
2055 (const_int 1) (const_int 5)])))]
2057 "unpcklps\t{%2, %0|%0, %2}"
2058 [(set_attr "type" "sselog")
2059 (set_attr "mode" "V4SF")])
2061 ;; These are modeled with the same vec_concat as the others so that we
2062 ;; capture users of shufps that can use the new instructions
2063 (define_insn "sse3_movshdup"
2064 [(set (match_operand:V4SF 0 "register_operand" "=x")
2067 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2069 (parallel [(const_int 1)
2074 "movshdup\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "sse")
2076 (set_attr "prefix_rep" "1")
2077 (set_attr "mode" "V4SF")])
2079 (define_insn "sse3_movsldup"
2080 [(set (match_operand:V4SF 0 "register_operand" "=x")
2083 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2085 (parallel [(const_int 0)
2090 "movsldup\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "sse")
2092 (set_attr "prefix_rep" "1")
2093 (set_attr "mode" "V4SF")])
2095 (define_expand "sse_shufps"
2096 [(match_operand:V4SF 0 "register_operand" "")
2097 (match_operand:V4SF 1 "register_operand" "")
2098 (match_operand:V4SF 2 "nonimmediate_operand" "")
2099 (match_operand:SI 3 "const_int_operand" "")]
2102 int mask = INTVAL (operands[3]);
2103 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2104 GEN_INT ((mask >> 0) & 3),
2105 GEN_INT ((mask >> 2) & 3),
2106 GEN_INT (((mask >> 4) & 3) + 4),
2107 GEN_INT (((mask >> 6) & 3) + 4)));
2111 (define_insn "sse_shufps_1"
2112 [(set (match_operand:V4SF 0 "register_operand" "=x")
2115 (match_operand:V4SF 1 "register_operand" "0")
2116 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2117 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2118 (match_operand 4 "const_0_to_3_operand" "")
2119 (match_operand 5 "const_4_to_7_operand" "")
2120 (match_operand 6 "const_4_to_7_operand" "")])))]
2124 mask |= INTVAL (operands[3]) << 0;
2125 mask |= INTVAL (operands[4]) << 2;
2126 mask |= (INTVAL (operands[5]) - 4) << 4;
2127 mask |= (INTVAL (operands[6]) - 4) << 6;
2128 operands[3] = GEN_INT (mask);
2130 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2132 [(set_attr "type" "sselog")
2133 (set_attr "mode" "V4SF")])
2135 (define_insn "sse_storehps"
2136 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2138 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2139 (parallel [(const_int 2) (const_int 3)])))]
2142 movhps\t{%1, %0|%0, %1}
2143 movhlps\t{%1, %0|%0, %1}
2144 movlps\t{%H1, %0|%0, %H1}"
2145 [(set_attr "type" "ssemov")
2146 (set_attr "mode" "V2SF,V4SF,V2SF")])
2148 (define_insn "sse_loadhps"
2149 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2152 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2153 (parallel [(const_int 0) (const_int 1)]))
2154 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2157 movhps\t{%2, %0|%0, %2}
2158 movlhps\t{%2, %0|%0, %2}
2159 movlps\t{%2, %H0|%H0, %2}"
2160 [(set_attr "type" "ssemov")
2161 (set_attr "mode" "V2SF,V4SF,V2SF")])
2163 (define_insn "sse_storelps"
2164 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2166 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2167 (parallel [(const_int 0) (const_int 1)])))]
2170 movlps\t{%1, %0|%0, %1}
2171 movaps\t{%1, %0|%0, %1}
2172 movlps\t{%1, %0|%0, %1}"
2173 [(set_attr "type" "ssemov")
2174 (set_attr "mode" "V2SF,V4SF,V2SF")])
2176 (define_insn "sse_loadlps"
2177 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2179 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2181 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2182 (parallel [(const_int 2) (const_int 3)]))))]
2185 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2186 movlps\t{%2, %0|%0, %2}
2187 movlps\t{%2, %0|%0, %2}"
2188 [(set_attr "type" "sselog,ssemov,ssemov")
2189 (set_attr "mode" "V4SF,V2SF,V2SF")])
2191 (define_insn "sse_movss"
2192 [(set (match_operand:V4SF 0 "register_operand" "=x")
2194 (match_operand:V4SF 2 "register_operand" "x")
2195 (match_operand:V4SF 1 "register_operand" "0")
2198 "movss\t{%2, %0|%0, %2}"
2199 [(set_attr "type" "ssemov")
2200 (set_attr "mode" "SF")])
2202 (define_insn "*vec_dupv4sf"
2203 [(set (match_operand:V4SF 0 "register_operand" "=x")
2205 (match_operand:SF 1 "register_operand" "0")))]
2207 "shufps\t{$0, %0, %0|%0, %0, 0}"
2208 [(set_attr "type" "sselog1")
2209 (set_attr "mode" "V4SF")])
2211 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2212 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2213 ;; alternatives pretty much forces the MMX alternative to be chosen.
2214 (define_insn "*sse_concatv2sf"
2215 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2217 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2218 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2221 unpcklps\t{%2, %0|%0, %2}
2222 movss\t{%1, %0|%0, %1}
2223 punpckldq\t{%2, %0|%0, %2}
2224 movd\t{%1, %0|%0, %1}"
2225 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2226 (set_attr "mode" "V4SF,SF,DI,DI")])
2228 (define_insn "*sse_concatv4sf"
2229 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2231 (match_operand:V2SF 1 "register_operand" " 0,0")
2232 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2235 movlhps\t{%2, %0|%0, %2}
2236 movhps\t{%2, %0|%0, %2}"
2237 [(set_attr "type" "ssemov")
2238 (set_attr "mode" "V4SF,V2SF")])
2240 (define_expand "vec_initv4sf"
2241 [(match_operand:V4SF 0 "register_operand" "")
2242 (match_operand 1 "" "")]
2245 ix86_expand_vector_init (false, operands[0], operands[1]);
2249 (define_insn "vec_setv4sf_0"
2250 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2253 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2254 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2258 movss\t{%2, %0|%0, %2}
2259 movss\t{%2, %0|%0, %2}
2260 movd\t{%2, %0|%0, %2}
2262 [(set_attr "type" "ssemov")
2263 (set_attr "mode" "SF")])
2265 ;; A subset is vec_setv4sf.
2266 (define_insn "*vec_setv4sf_sse4_1"
2267 [(set (match_operand:V4SF 0 "register_operand" "=x")
2270 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2271 (match_operand:V4SF 1 "register_operand" "0")
2272 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2275 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2276 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2278 [(set_attr "type" "sselog")
2279 (set_attr "prefix_extra" "1")
2280 (set_attr "mode" "V4SF")])
2282 (define_insn "sse4_1_insertps"
2283 [(set (match_operand:V4SF 0 "register_operand" "=x")
2284 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2285 (match_operand:V4SF 1 "register_operand" "0")
2286 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2289 "insertps\t{%3, %2, %0|%0, %2, %3}";
2290 [(set_attr "type" "sselog")
2291 (set_attr "prefix_extra" "1")
2292 (set_attr "mode" "V4SF")])
2295 [(set (match_operand:V4SF 0 "memory_operand" "")
2298 (match_operand:SF 1 "nonmemory_operand" ""))
2301 "TARGET_SSE && reload_completed"
2304 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2308 (define_expand "vec_setv4sf"
2309 [(match_operand:V4SF 0 "register_operand" "")
2310 (match_operand:SF 1 "register_operand" "")
2311 (match_operand 2 "const_int_operand" "")]
2314 ix86_expand_vector_set (false, operands[0], operands[1],
2315 INTVAL (operands[2]));
2319 (define_insn_and_split "*vec_extractv4sf_0"
2320 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2322 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2323 (parallel [(const_int 0)])))]
2324 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2326 "&& reload_completed"
2329 rtx op1 = operands[1];
2331 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2333 op1 = gen_lowpart (SFmode, op1);
2334 emit_move_insn (operands[0], op1);
2338 (define_insn "*sse4_1_extractps"
2339 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2341 (match_operand:V4SF 1 "register_operand" "x")
2342 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2344 "extractps\t{%2, %1, %0|%0, %1, %2}"
2345 [(set_attr "type" "sselog")
2346 (set_attr "prefix_extra" "1")
2347 (set_attr "mode" "V4SF")])
2349 (define_insn_and_split "*vec_extract_v4sf_mem"
2350 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2352 (match_operand:V4SF 1 "memory_operand" "o")
2353 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2359 int i = INTVAL (operands[2]);
2361 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2365 (define_expand "vec_extractv4sf"
2366 [(match_operand:SF 0 "register_operand" "")
2367 (match_operand:V4SF 1 "register_operand" "")
2368 (match_operand 2 "const_int_operand" "")]
2371 ix86_expand_vector_extract (false, operands[0], operands[1],
2372 INTVAL (operands[2]));
2376 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2378 ;; Parallel double-precision floating point element swizzling
2380 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2382 (define_insn "sse2_unpckhpd"
2383 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2386 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2387 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2388 (parallel [(const_int 1)
2390 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2392 unpckhpd\t{%2, %0|%0, %2}
2393 movlpd\t{%H1, %0|%0, %H1}
2394 movhpd\t{%1, %0|%0, %1}"
2395 [(set_attr "type" "sselog,ssemov,ssemov")
2396 (set_attr "mode" "V2DF,V1DF,V1DF")])
2398 (define_insn "*sse3_movddup"
2399 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2402 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2404 (parallel [(const_int 0)
2406 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2408 movddup\t{%1, %0|%0, %1}
2410 [(set_attr "type" "sselog1,ssemov")
2411 (set_attr "mode" "V2DF")])
2414 [(set (match_operand:V2DF 0 "memory_operand" "")
2417 (match_operand:V2DF 1 "register_operand" "")
2419 (parallel [(const_int 0)
2421 "TARGET_SSE3 && reload_completed"
2424 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2425 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2426 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2430 (define_insn "sse2_unpcklpd"
2431 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2434 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2435 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2436 (parallel [(const_int 0)
2438 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2440 unpcklpd\t{%2, %0|%0, %2}
2441 movhpd\t{%2, %0|%0, %2}
2442 movlpd\t{%2, %H0|%H0, %2}"
2443 [(set_attr "type" "sselog,ssemov,ssemov")
2444 (set_attr "mode" "V2DF,V1DF,V1DF")])
2446 (define_expand "sse2_shufpd"
2447 [(match_operand:V2DF 0 "register_operand" "")
2448 (match_operand:V2DF 1 "register_operand" "")
2449 (match_operand:V2DF 2 "nonimmediate_operand" "")
2450 (match_operand:SI 3 "const_int_operand" "")]
2453 int mask = INTVAL (operands[3]);
2454 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2456 GEN_INT (mask & 2 ? 3 : 2)));
2460 (define_insn "sse2_shufpd_1"
2461 [(set (match_operand:V2DF 0 "register_operand" "=x")
2464 (match_operand:V2DF 1 "register_operand" "0")
2465 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2466 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2467 (match_operand 4 "const_2_to_3_operand" "")])))]
2471 mask = INTVAL (operands[3]);
2472 mask |= (INTVAL (operands[4]) - 2) << 1;
2473 operands[3] = GEN_INT (mask);
2475 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2477 [(set_attr "type" "sselog")
2478 (set_attr "mode" "V2DF")])
2480 (define_insn "sse2_storehpd"
2481 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2483 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2484 (parallel [(const_int 1)])))]
2485 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2487 movhpd\t{%1, %0|%0, %1}
2490 [(set_attr "type" "ssemov,sselog1,ssemov")
2491 (set_attr "mode" "V1DF,V2DF,DF")])
2494 [(set (match_operand:DF 0 "register_operand" "")
2496 (match_operand:V2DF 1 "memory_operand" "")
2497 (parallel [(const_int 1)])))]
2498 "TARGET_SSE2 && reload_completed"
2499 [(set (match_dup 0) (match_dup 1))]
2501 operands[1] = adjust_address (operands[1], DFmode, 8);
2504 (define_insn "sse2_storelpd"
2505 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2507 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2508 (parallel [(const_int 0)])))]
2509 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2511 movlpd\t{%1, %0|%0, %1}
2514 [(set_attr "type" "ssemov")
2515 (set_attr "mode" "V1DF,DF,DF")])
2518 [(set (match_operand:DF 0 "register_operand" "")
2520 (match_operand:V2DF 1 "nonimmediate_operand" "")
2521 (parallel [(const_int 0)])))]
2522 "TARGET_SSE2 && reload_completed"
2525 rtx op1 = operands[1];
2527 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2529 op1 = gen_lowpart (DFmode, op1);
2530 emit_move_insn (operands[0], op1);
2534 (define_insn "sse2_loadhpd"
2535 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2538 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2539 (parallel [(const_int 0)]))
2540 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2541 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2543 movhpd\t{%2, %0|%0, %2}
2544 unpcklpd\t{%2, %0|%0, %2}
2545 shufpd\t{$1, %1, %0|%0, %1, 1}
2547 [(set_attr "type" "ssemov,sselog,sselog,other")
2548 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2551 [(set (match_operand:V2DF 0 "memory_operand" "")
2553 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2554 (match_operand:DF 1 "register_operand" "")))]
2555 "TARGET_SSE2 && reload_completed"
2556 [(set (match_dup 0) (match_dup 1))]
2558 operands[0] = adjust_address (operands[0], DFmode, 8);
2561 (define_insn "sse2_loadlpd"
2562 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2564 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2566 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2567 (parallel [(const_int 1)]))))]
2568 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2570 movsd\t{%2, %0|%0, %2}
2571 movlpd\t{%2, %0|%0, %2}
2572 movsd\t{%2, %0|%0, %2}
2573 shufpd\t{$2, %2, %0|%0, %2, 2}
2574 movhpd\t{%H1, %0|%0, %H1}
2576 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2577 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2580 [(set (match_operand:V2DF 0 "memory_operand" "")
2582 (match_operand:DF 1 "register_operand" "")
2583 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2584 "TARGET_SSE2 && reload_completed"
2585 [(set (match_dup 0) (match_dup 1))]
2587 operands[0] = adjust_address (operands[0], DFmode, 8);
2590 ;; Not sure these two are ever used, but it doesn't hurt to have
2592 (define_insn "*vec_extractv2df_1_sse"
2593 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2595 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2596 (parallel [(const_int 1)])))]
2597 "!TARGET_SSE2 && TARGET_SSE
2598 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2600 movhps\t{%1, %0|%0, %1}
2601 movhlps\t{%1, %0|%0, %1}
2602 movlps\t{%H1, %0|%0, %H1}"
2603 [(set_attr "type" "ssemov")
2604 (set_attr "mode" "V2SF,V4SF,V2SF")])
2606 (define_insn "*vec_extractv2df_0_sse"
2607 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2609 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2610 (parallel [(const_int 0)])))]
2611 "!TARGET_SSE2 && TARGET_SSE
2612 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2614 movlps\t{%1, %0|%0, %1}
2615 movaps\t{%1, %0|%0, %1}
2616 movlps\t{%1, %0|%0, %1}"
2617 [(set_attr "type" "ssemov")
2618 (set_attr "mode" "V2SF,V4SF,V2SF")])
2620 (define_insn "sse2_movsd"
2621 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2623 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2624 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2628 movsd\t{%2, %0|%0, %2}
2629 movlpd\t{%2, %0|%0, %2}
2630 movlpd\t{%2, %0|%0, %2}
2631 shufpd\t{$2, %2, %0|%0, %2, 2}
2632 movhps\t{%H1, %0|%0, %H1}
2633 movhps\t{%1, %H0|%H0, %1}"
2634 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2635 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2637 (define_insn "*vec_dupv2df_sse3"
2638 [(set (match_operand:V2DF 0 "register_operand" "=x")
2640 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2642 "movddup\t{%1, %0|%0, %1}"
2643 [(set_attr "type" "sselog1")
2644 (set_attr "mode" "DF")])
2646 (define_insn "vec_dupv2df"
2647 [(set (match_operand:V2DF 0 "register_operand" "=x")
2649 (match_operand:DF 1 "register_operand" "0")))]
2652 [(set_attr "type" "sselog1")
2653 (set_attr "mode" "V2DF")])
2655 (define_insn "*vec_concatv2df_sse3"
2656 [(set (match_operand:V2DF 0 "register_operand" "=x")
2658 (match_operand:DF 1 "nonimmediate_operand" "xm")
2661 "movddup\t{%1, %0|%0, %1}"
2662 [(set_attr "type" "sselog1")
2663 (set_attr "mode" "DF")])
2665 (define_insn "*vec_concatv2df"
2666 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2668 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2669 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2672 unpcklpd\t{%2, %0|%0, %2}
2673 movhpd\t{%2, %0|%0, %2}
2674 movsd\t{%1, %0|%0, %1}
2675 movlhps\t{%2, %0|%0, %2}
2676 movhps\t{%2, %0|%0, %2}"
2677 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2678 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2680 (define_expand "vec_setv2df"
2681 [(match_operand:V2DF 0 "register_operand" "")
2682 (match_operand:DF 1 "register_operand" "")
2683 (match_operand 2 "const_int_operand" "")]
2686 ix86_expand_vector_set (false, operands[0], operands[1],
2687 INTVAL (operands[2]));
2691 (define_expand "vec_extractv2df"
2692 [(match_operand:DF 0 "register_operand" "")
2693 (match_operand:V2DF 1 "register_operand" "")
2694 (match_operand 2 "const_int_operand" "")]
2697 ix86_expand_vector_extract (false, operands[0], operands[1],
2698 INTVAL (operands[2]));
2702 (define_expand "vec_initv2df"
2703 [(match_operand:V2DF 0 "register_operand" "")
2704 (match_operand 1 "" "")]
2707 ix86_expand_vector_init (false, operands[0], operands[1]);
2711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2713 ;; Parallel integral arithmetic
2715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2717 (define_expand "neg<mode>2"
2718 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2721 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2723 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2725 (define_expand "<plusminus_insn><mode>3"
2726 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2728 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2729 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2731 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2733 (define_insn "*<plusminus_insn><mode>3"
2734 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2736 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2737 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2738 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2739 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2740 [(set_attr "type" "sseiadd")
2741 (set_attr "prefix_data16" "1")
2742 (set_attr "mode" "TI")])
2744 (define_insn "sse2_<plusminus_insn><mode>3"
2745 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2746 (sat_plusminus:SSEMODE12
2747 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2748 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2749 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2750 "p<sat_plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2751 [(set_attr "type" "sseiadd")
2752 (set_attr "prefix_data16" "1")
2753 (set_attr "mode" "TI")])
2755 (define_insn_and_split "mulv16qi3"
2756 [(set (match_operand:V16QI 0 "register_operand" "")
2757 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2758 (match_operand:V16QI 2 "register_operand" "")))]
2760 && !(reload_completed || reload_in_progress)"
2765 rtx t[12], op0, op[3];
2770 /* On SSE5, we can take advantage of the pperm instruction to pack and
2771 unpack the bytes. Unpack data such that we've got a source byte in
2772 each low byte of each word. We don't care what goes into the high
2773 byte, so put 0 there. */
2774 for (i = 0; i < 6; ++i)
2775 t[i] = gen_reg_rtx (V8HImode);
2777 for (i = 0; i < 2; i++)
2780 op[1] = operands[i+1];
2781 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2784 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2787 /* Multiply words. */
2788 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2789 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2791 /* Pack the low byte of each word back into a single xmm */
2792 op[0] = operands[0];
2795 ix86_expand_sse5_pack (op);
2799 for (i = 0; i < 12; ++i)
2800 t[i] = gen_reg_rtx (V16QImode);
2802 /* Unpack data such that we've got a source byte in each low byte of
2803 each word. We don't care what goes into the high byte of each word.
2804 Rather than trying to get zero in there, most convenient is to let
2805 it be a copy of the low byte. */
2806 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2807 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2808 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2809 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2811 /* Multiply words. The end-of-line annotations here give a picture of what
2812 the output of that instruction looks like. Dot means don't care; the
2813 letters are the bytes of the result with A being the most significant. */
2814 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2815 gen_lowpart (V8HImode, t[0]),
2816 gen_lowpart (V8HImode, t[1])));
2817 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2818 gen_lowpart (V8HImode, t[2]),
2819 gen_lowpart (V8HImode, t[3])));
2821 /* Extract the relevant bytes and merge them back together. */
2822 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2823 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2824 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2825 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2826 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2827 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2830 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2834 (define_expand "mulv8hi3"
2835 [(set (match_operand:V8HI 0 "register_operand" "")
2836 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2837 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2839 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2841 (define_insn "*mulv8hi3"
2842 [(set (match_operand:V8HI 0 "register_operand" "=x")
2843 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2844 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2845 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2846 "pmullw\t{%2, %0|%0, %2}"
2847 [(set_attr "type" "sseimul")
2848 (set_attr "prefix_data16" "1")
2849 (set_attr "mode" "TI")])
2851 (define_expand "smulv8hi3_highpart"
2852 [(set (match_operand:V8HI 0 "register_operand" "")
2857 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2859 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2862 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2864 (define_insn "*smulv8hi3_highpart"
2865 [(set (match_operand:V8HI 0 "register_operand" "=x")
2870 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2872 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2874 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2875 "pmulhw\t{%2, %0|%0, %2}"
2876 [(set_attr "type" "sseimul")
2877 (set_attr "prefix_data16" "1")
2878 (set_attr "mode" "TI")])
2880 (define_expand "umulv8hi3_highpart"
2881 [(set (match_operand:V8HI 0 "register_operand" "")
2886 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2888 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2891 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2893 (define_insn "*umulv8hi3_highpart"
2894 [(set (match_operand:V8HI 0 "register_operand" "=x")
2899 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2901 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2903 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2904 "pmulhuw\t{%2, %0|%0, %2}"
2905 [(set_attr "type" "sseimul")
2906 (set_attr "prefix_data16" "1")
2907 (set_attr "mode" "TI")])
2909 (define_insn "sse2_umulv2siv2di3"
2910 [(set (match_operand:V2DI 0 "register_operand" "=x")
2914 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2915 (parallel [(const_int 0) (const_int 2)])))
2918 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2919 (parallel [(const_int 0) (const_int 2)])))))]
2920 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2921 "pmuludq\t{%2, %0|%0, %2}"
2922 [(set_attr "type" "sseimul")
2923 (set_attr "prefix_data16" "1")
2924 (set_attr "mode" "TI")])
2926 (define_insn "sse4_1_mulv2siv2di3"
2927 [(set (match_operand:V2DI 0 "register_operand" "=x")
2931 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2932 (parallel [(const_int 0) (const_int 2)])))
2935 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2936 (parallel [(const_int 0) (const_int 2)])))))]
2937 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2938 "pmuldq\t{%2, %0|%0, %2}"
2939 [(set_attr "type" "sseimul")
2940 (set_attr "prefix_extra" "1")
2941 (set_attr "mode" "TI")])
2943 (define_insn "sse2_pmaddwd"
2944 [(set (match_operand:V4SI 0 "register_operand" "=x")
2949 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2950 (parallel [(const_int 0)
2956 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2957 (parallel [(const_int 0)
2963 (vec_select:V4HI (match_dup 1)
2964 (parallel [(const_int 1)
2969 (vec_select:V4HI (match_dup 2)
2970 (parallel [(const_int 1)
2973 (const_int 7)]))))))]
2974 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2975 "pmaddwd\t{%2, %0|%0, %2}"
2976 [(set_attr "type" "sseiadd")
2977 (set_attr "prefix_data16" "1")
2978 (set_attr "mode" "TI")])
2980 (define_expand "mulv4si3"
2981 [(set (match_operand:V4SI 0 "register_operand" "")
2982 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2983 (match_operand:V4SI 2 "register_operand" "")))]
2986 if (TARGET_SSE4_1 || TARGET_SSE5)
2987 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
2990 (define_insn "*sse4_1_mulv4si3"
2991 [(set (match_operand:V4SI 0 "register_operand" "=x")
2992 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2993 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
2994 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2995 "pmulld\t{%2, %0|%0, %2}"
2996 [(set_attr "type" "sseimul")
2997 (set_attr "prefix_extra" "1")
2998 (set_attr "mode" "TI")])
3000 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3001 ;; multiply/add. In general, we expect the define_split to occur before
3002 ;; register allocation, so we have to handle the corner case where the target
3003 ;; is used as the base or index register in operands 1/2.
3004 (define_insn_and_split "*sse5_mulv4si3"
3005 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3006 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3007 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3010 "&& (reload_completed
3011 || (!reg_mentioned_p (operands[0], operands[1])
3012 && !reg_mentioned_p (operands[0], operands[2])))"
3016 (plus:V4SI (mult:V4SI (match_dup 1)
3020 operands[3] = CONST0_RTX (V4SImode);
3022 [(set_attr "type" "ssemuladd")
3023 (set_attr "mode" "TI")])
3025 (define_insn_and_split "*sse2_mulv4si3"
3026 [(set (match_operand:V4SI 0 "register_operand" "")
3027 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3028 (match_operand:V4SI 2 "register_operand" "")))]
3029 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3030 && !(reload_completed || reload_in_progress)"
3035 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3041 t1 = gen_reg_rtx (V4SImode);
3042 t2 = gen_reg_rtx (V4SImode);
3043 t3 = gen_reg_rtx (V4SImode);
3044 t4 = gen_reg_rtx (V4SImode);
3045 t5 = gen_reg_rtx (V4SImode);
3046 t6 = gen_reg_rtx (V4SImode);
3047 thirtytwo = GEN_INT (32);
3049 /* Multiply elements 2 and 0. */
3050 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3053 /* Shift both input vectors down one element, so that elements 3
3054 and 1 are now in the slots for elements 2 and 0. For K8, at
3055 least, this is faster than using a shuffle. */
3056 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3057 gen_lowpart (TImode, op1),
3059 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3060 gen_lowpart (TImode, op2),
3062 /* Multiply elements 3 and 1. */
3063 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3066 /* Move the results in element 2 down to element 1; we don't care
3067 what goes in elements 2 and 3. */
3068 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3069 const0_rtx, const0_rtx));
3070 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3071 const0_rtx, const0_rtx));
3073 /* Merge the parts back together. */
3074 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3078 (define_insn_and_split "mulv2di3"
3079 [(set (match_operand:V2DI 0 "register_operand" "")
3080 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3081 (match_operand:V2DI 2 "register_operand" "")))]
3083 && !(reload_completed || reload_in_progress)"
3088 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3094 t1 = gen_reg_rtx (V2DImode);
3095 t2 = gen_reg_rtx (V2DImode);
3096 t3 = gen_reg_rtx (V2DImode);
3097 t4 = gen_reg_rtx (V2DImode);
3098 t5 = gen_reg_rtx (V2DImode);
3099 t6 = gen_reg_rtx (V2DImode);
3100 thirtytwo = GEN_INT (32);
3102 /* Multiply low parts. */
3103 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3104 gen_lowpart (V4SImode, op2)));
3106 /* Shift input vectors left 32 bits so we can multiply high parts. */
3107 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3108 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3110 /* Multiply high parts by low parts. */
3111 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3112 gen_lowpart (V4SImode, t3)));
3113 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3114 gen_lowpart (V4SImode, t2)));
3116 /* Shift them back. */
3117 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3118 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3120 /* Add the three parts together. */
3121 emit_insn (gen_addv2di3 (t6, t1, t4));
3122 emit_insn (gen_addv2di3 (op0, t6, t5));
3126 (define_expand "vec_widen_smult_hi_v8hi"
3127 [(match_operand:V4SI 0 "register_operand" "")
3128 (match_operand:V8HI 1 "register_operand" "")
3129 (match_operand:V8HI 2 "register_operand" "")]
3132 rtx op1, op2, t1, t2, dest;
3136 t1 = gen_reg_rtx (V8HImode);
3137 t2 = gen_reg_rtx (V8HImode);
3138 dest = gen_lowpart (V8HImode, operands[0]);
3140 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3141 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3142 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3146 (define_expand "vec_widen_smult_lo_v8hi"
3147 [(match_operand:V4SI 0 "register_operand" "")
3148 (match_operand:V8HI 1 "register_operand" "")
3149 (match_operand:V8HI 2 "register_operand" "")]
3152 rtx op1, op2, t1, t2, dest;
3156 t1 = gen_reg_rtx (V8HImode);
3157 t2 = gen_reg_rtx (V8HImode);
3158 dest = gen_lowpart (V8HImode, operands[0]);
3160 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3161 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3162 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3166 (define_expand "vec_widen_umult_hi_v8hi"
3167 [(match_operand:V4SI 0 "register_operand" "")
3168 (match_operand:V8HI 1 "register_operand" "")
3169 (match_operand:V8HI 2 "register_operand" "")]
3172 rtx op1, op2, t1, t2, dest;
3176 t1 = gen_reg_rtx (V8HImode);
3177 t2 = gen_reg_rtx (V8HImode);
3178 dest = gen_lowpart (V8HImode, operands[0]);
3180 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3181 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3182 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3186 (define_expand "vec_widen_umult_lo_v8hi"
3187 [(match_operand:V4SI 0 "register_operand" "")
3188 (match_operand:V8HI 1 "register_operand" "")
3189 (match_operand:V8HI 2 "register_operand" "")]
3192 rtx op1, op2, t1, t2, dest;
3196 t1 = gen_reg_rtx (V8HImode);
3197 t2 = gen_reg_rtx (V8HImode);
3198 dest = gen_lowpart (V8HImode, operands[0]);
3200 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3201 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3202 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3206 (define_expand "vec_widen_smult_hi_v4si"
3207 [(match_operand:V2DI 0 "register_operand" "")
3208 (match_operand:V4SI 1 "register_operand" "")
3209 (match_operand:V4SI 2 "register_operand" "")]
3212 rtx op1, op2, t1, t2;
3216 t1 = gen_reg_rtx (V4SImode);
3217 t2 = gen_reg_rtx (V4SImode);
3219 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3220 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3221 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3225 (define_expand "vec_widen_smult_lo_v4si"
3226 [(match_operand:V2DI 0 "register_operand" "")
3227 (match_operand:V4SI 1 "register_operand" "")
3228 (match_operand:V4SI 2 "register_operand" "")]
3231 rtx op1, op2, t1, t2;
3235 t1 = gen_reg_rtx (V4SImode);
3236 t2 = gen_reg_rtx (V4SImode);
3238 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3239 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3240 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3244 (define_expand "vec_widen_umult_hi_v4si"
3245 [(match_operand:V2DI 0 "register_operand" "")
3246 (match_operand:V4SI 1 "register_operand" "")
3247 (match_operand:V4SI 2 "register_operand" "")]
3250 rtx op1, op2, t1, t2;
3254 t1 = gen_reg_rtx (V4SImode);
3255 t2 = gen_reg_rtx (V4SImode);
3257 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3258 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3259 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3263 (define_expand "vec_widen_umult_lo_v4si"
3264 [(match_operand:V2DI 0 "register_operand" "")
3265 (match_operand:V4SI 1 "register_operand" "")
3266 (match_operand:V4SI 2 "register_operand" "")]
3269 rtx op1, op2, t1, t2;
3273 t1 = gen_reg_rtx (V4SImode);
3274 t2 = gen_reg_rtx (V4SImode);
3276 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3277 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3278 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3282 (define_expand "sdot_prodv8hi"
3283 [(match_operand:V4SI 0 "register_operand" "")
3284 (match_operand:V8HI 1 "register_operand" "")
3285 (match_operand:V8HI 2 "register_operand" "")
3286 (match_operand:V4SI 3 "register_operand" "")]
3289 rtx t = gen_reg_rtx (V4SImode);
3290 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3291 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3295 (define_expand "udot_prodv4si"
3296 [(match_operand:V2DI 0 "register_operand" "")
3297 (match_operand:V4SI 1 "register_operand" "")
3298 (match_operand:V4SI 2 "register_operand" "")
3299 (match_operand:V2DI 3 "register_operand" "")]
3304 t1 = gen_reg_rtx (V2DImode);
3305 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3306 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3308 t2 = gen_reg_rtx (V4SImode);
3309 t3 = gen_reg_rtx (V4SImode);
3310 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3311 gen_lowpart (TImode, operands[1]),
3313 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3314 gen_lowpart (TImode, operands[2]),
3317 t4 = gen_reg_rtx (V2DImode);
3318 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3320 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3324 (define_insn "ashr<mode>3"
3325 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3327 (match_operand:SSEMODE24 1 "register_operand" "0")
3328 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3330 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3331 [(set_attr "type" "sseishft")
3332 (set_attr "prefix_data16" "1")
3333 (set_attr "mode" "TI")])
3335 (define_insn "lshr<mode>3"
3336 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3337 (lshiftrt:SSEMODE248
3338 (match_operand:SSEMODE248 1 "register_operand" "0")
3339 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3341 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3342 [(set_attr "type" "sseishft")
3343 (set_attr "prefix_data16" "1")
3344 (set_attr "mode" "TI")])
3346 (define_insn "ashl<mode>3"
3347 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3349 (match_operand:SSEMODE248 1 "register_operand" "0")
3350 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3352 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3353 [(set_attr "type" "sseishft")
3354 (set_attr "prefix_data16" "1")
3355 (set_attr "mode" "TI")])
3357 (define_expand "vec_shl_<mode>"
3358 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3359 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3360 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3363 operands[0] = gen_lowpart (TImode, operands[0]);
3364 operands[1] = gen_lowpart (TImode, operands[1]);
3367 (define_expand "vec_shr_<mode>"
3368 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3369 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3370 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3373 operands[0] = gen_lowpart (TImode, operands[0]);
3374 operands[1] = gen_lowpart (TImode, operands[1]);
3377 (define_expand "<code>v16qi3"
3378 [(set (match_operand:V16QI 0 "register_operand" "")
3380 (match_operand:V16QI 1 "nonimmediate_operand" "")
3381 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3383 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3385 (define_insn "*<code>v16qi3"
3386 [(set (match_operand:V16QI 0 "register_operand" "=x")
3388 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3389 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3390 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3391 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3392 [(set_attr "type" "sseiadd")
3393 (set_attr "prefix_data16" "1")
3394 (set_attr "mode" "TI")])
3396 (define_expand "<code>v8hi3"
3397 [(set (match_operand:V8HI 0 "register_operand" "")
3399 (match_operand:V8HI 1 "nonimmediate_operand" "")
3400 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3402 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3404 (define_insn "*<code>v8hi3"
3405 [(set (match_operand:V8HI 0 "register_operand" "=x")
3407 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3408 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3409 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3410 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3411 [(set_attr "type" "sseiadd")
3412 (set_attr "prefix_data16" "1")
3413 (set_attr "mode" "TI")])
3415 (define_expand "umaxv8hi3"
3416 [(set (match_operand:V8HI 0 "register_operand" "")
3417 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3418 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3422 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3425 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3426 if (rtx_equal_p (op3, op2))
3427 op3 = gen_reg_rtx (V8HImode);
3428 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3429 emit_insn (gen_addv8hi3 (op0, op3, op2));
3434 (define_expand "smax<mode>3"
3435 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3436 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3437 (match_operand:SSEMODE14 2 "register_operand" "")))]
3441 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3447 xops[0] = operands[0];
3448 xops[1] = operands[1];
3449 xops[2] = operands[2];
3450 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3451 xops[4] = operands[1];
3452 xops[5] = operands[2];
3453 ok = ix86_expand_int_vcond (xops);
3459 (define_insn "*sse4_1_<code><mode>3"
3460 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3462 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3463 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3464 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3465 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3466 [(set_attr "type" "sseiadd")
3467 (set_attr "prefix_extra" "1")
3468 (set_attr "mode" "TI")])
3470 (define_expand "umaxv4si3"
3471 [(set (match_operand:V4SI 0 "register_operand" "")
3472 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3473 (match_operand:V4SI 2 "register_operand" "")))]
3477 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3483 xops[0] = operands[0];
3484 xops[1] = operands[1];
3485 xops[2] = operands[2];
3486 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3487 xops[4] = operands[1];
3488 xops[5] = operands[2];
3489 ok = ix86_expand_int_vcond (xops);
3495 (define_insn "*sse4_1_<code><mode>3"
3496 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3498 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3499 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3500 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3501 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3502 [(set_attr "type" "sseiadd")
3503 (set_attr "prefix_extra" "1")
3504 (set_attr "mode" "TI")])
3506 (define_expand "smin<mode>3"
3507 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3508 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3509 (match_operand:SSEMODE14 2 "register_operand" "")))]
3513 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3519 xops[0] = operands[0];
3520 xops[1] = operands[2];
3521 xops[2] = operands[1];
3522 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3523 xops[4] = operands[1];
3524 xops[5] = operands[2];
3525 ok = ix86_expand_int_vcond (xops);
3531 (define_expand "umin<mode>3"
3532 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3533 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3534 (match_operand:SSEMODE24 2 "register_operand" "")))]
3538 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3544 xops[0] = operands[0];
3545 xops[1] = operands[2];
3546 xops[2] = operands[1];
3547 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3548 xops[4] = operands[1];
3549 xops[5] = operands[2];
3550 ok = ix86_expand_int_vcond (xops);
3556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3558 ;; Parallel integral comparisons
3560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3562 (define_insn "sse2_eq<mode>3"
3563 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3565 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3566 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3567 "TARGET_SSE2 && !TARGET_SSE5
3568 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3569 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3570 [(set_attr "type" "ssecmp")
3571 (set_attr "prefix_data16" "1")
3572 (set_attr "mode" "TI")])
3574 (define_insn "sse4_1_eqv2di3"
3575 [(set (match_operand:V2DI 0 "register_operand" "=x")
3577 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3578 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3579 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3580 "pcmpeqq\t{%2, %0|%0, %2}"
3581 [(set_attr "type" "ssecmp")
3582 (set_attr "prefix_extra" "1")
3583 (set_attr "mode" "TI")])
3585 (define_insn "sse2_gt<mode>3"
3586 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3588 (match_operand:SSEMODE124 1 "register_operand" "0")
3589 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3590 "TARGET_SSE2 && !TARGET_SSE5"
3591 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3592 [(set_attr "type" "ssecmp")
3593 (set_attr "prefix_data16" "1")
3594 (set_attr "mode" "TI")])
3596 (define_insn "sse4_2_gtv2di3"
3597 [(set (match_operand:V2DI 0 "register_operand" "=x")
3599 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3600 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3602 "pcmpgtq\t{%2, %0|%0, %2}"
3603 [(set_attr "type" "ssecmp")
3604 (set_attr "mode" "TI")])
3606 (define_expand "vcond<mode>"
3607 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3608 (if_then_else:SSEMODEI
3609 (match_operator 3 ""
3610 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3611 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3612 (match_operand:SSEMODEI 1 "general_operand" "")
3613 (match_operand:SSEMODEI 2 "general_operand" "")))]
3616 if (ix86_expand_int_vcond (operands))
3622 (define_expand "vcondu<mode>"
3623 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3624 (if_then_else:SSEMODEI
3625 (match_operator 3 ""
3626 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3627 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3628 (match_operand:SSEMODEI 1 "general_operand" "")
3629 (match_operand:SSEMODEI 2 "general_operand" "")))]
3632 if (ix86_expand_int_vcond (operands))
3638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3640 ;; Parallel bitwise logical operations
3642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3644 (define_expand "one_cmpl<mode>2"
3645 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3646 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3650 int i, n = GET_MODE_NUNITS (<MODE>mode);
3651 rtvec v = rtvec_alloc (n);
3653 for (i = 0; i < n; ++i)
3654 RTVEC_ELT (v, i) = constm1_rtx;
3656 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3659 (define_insn "*sse_nand<mode>3"
3660 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3662 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3663 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3664 "(TARGET_SSE && !TARGET_SSE2)"
3665 "andnps\t{%2, %0|%0, %2}"
3666 [(set_attr "type" "sselog")
3667 (set_attr "mode" "V4SF")])
3669 (define_insn "sse2_nand<mode>3"
3670 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3672 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3673 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3675 "pandn\t{%2, %0|%0, %2}"
3676 [(set_attr "type" "sselog")
3677 (set_attr "prefix_data16" "1")
3678 (set_attr "mode" "TI")])
3680 (define_insn "*nandtf3"
3681 [(set (match_operand:TF 0 "register_operand" "=x")
3683 (not:TF (match_operand:TF 1 "register_operand" "0"))
3684 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3686 "pandn\t{%2, %0|%0, %2}"
3687 [(set_attr "type" "sselog")
3688 (set_attr "prefix_data16" "1")
3689 (set_attr "mode" "TI")])
3691 (define_expand "<code><mode>3"
3692 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3694 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3695 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3697 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3699 (define_insn "*sse_<code><mode>3"
3700 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3702 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3703 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3704 "(TARGET_SSE && !TARGET_SSE2)
3705 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3706 "<plogicprefix>ps\t{%2, %0|%0, %2}"
3707 [(set_attr "type" "sselog")
3708 (set_attr "mode" "V4SF")])
3710 (define_insn "*sse2_<code><mode>3"
3711 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3713 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3714 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3715 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3716 "p<plogicprefix>\t{%2, %0|%0, %2}"
3717 [(set_attr "type" "sselog")
3718 (set_attr "prefix_data16" "1")
3719 (set_attr "mode" "TI")])
3721 (define_expand "<code>tf3"
3722 [(set (match_operand:TF 0 "register_operand" "")
3724 (match_operand:TF 1 "nonimmediate_operand" "")
3725 (match_operand:TF 2 "nonimmediate_operand" "")))]
3727 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3729 (define_insn "*<code>tf3"
3730 [(set (match_operand:TF 0 "register_operand" "=x")
3732 (match_operand:TF 1 "nonimmediate_operand" "%0")
3733 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3734 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3735 "p<plogicprefix>\t{%2, %0|%0, %2}"
3736 [(set_attr "type" "sselog")
3737 (set_attr "prefix_data16" "1")
3738 (set_attr "mode" "TI")])
3740 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3742 ;; Parallel integral element swizzling
3744 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3747 ;; op1 = abcdefghijklmnop
3748 ;; op2 = qrstuvwxyz012345
3749 ;; h1 = aqbrcsdteufvgwhx
3750 ;; l1 = iyjzk0l1m2n3o4p5
3751 ;; h2 = aiqybjrzcks0dlt1
3752 ;; l2 = emu2fnv3gow4hpx5
3753 ;; h3 = aeimquy2bfjnrvz3
3754 ;; l3 = cgkosw04dhlptx15
3755 ;; result = bdfhjlnprtvxz135
3756 (define_expand "vec_pack_trunc_v8hi"
3757 [(match_operand:V16QI 0 "register_operand" "")
3758 (match_operand:V8HI 1 "register_operand" "")
3759 (match_operand:V8HI 2 "register_operand" "")]
3762 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3764 op1 = gen_lowpart (V16QImode, operands[1]);
3765 op2 = gen_lowpart (V16QImode, operands[2]);
3766 h1 = gen_reg_rtx (V16QImode);
3767 l1 = gen_reg_rtx (V16QImode);
3768 h2 = gen_reg_rtx (V16QImode);
3769 l2 = gen_reg_rtx (V16QImode);
3770 h3 = gen_reg_rtx (V16QImode);
3771 l3 = gen_reg_rtx (V16QImode);
3773 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3774 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3775 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3776 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3777 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3778 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3779 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3790 ;; result = bdfhjlnp
3791 (define_expand "vec_pack_trunc_v4si"
3792 [(match_operand:V8HI 0 "register_operand" "")
3793 (match_operand:V4SI 1 "register_operand" "")
3794 (match_operand:V4SI 2 "register_operand" "")]
3797 rtx op1, op2, h1, l1, h2, l2;
3799 op1 = gen_lowpart (V8HImode, operands[1]);
3800 op2 = gen_lowpart (V8HImode, operands[2]);
3801 h1 = gen_reg_rtx (V8HImode);
3802 l1 = gen_reg_rtx (V8HImode);
3803 h2 = gen_reg_rtx (V8HImode);
3804 l2 = gen_reg_rtx (V8HImode);
3806 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3807 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3808 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3809 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3810 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3820 (define_expand "vec_pack_trunc_v2di"
3821 [(match_operand:V4SI 0 "register_operand" "")
3822 (match_operand:V2DI 1 "register_operand" "")
3823 (match_operand:V2DI 2 "register_operand" "")]
3826 rtx op1, op2, h1, l1;
3828 op1 = gen_lowpart (V4SImode, operands[1]);
3829 op2 = gen_lowpart (V4SImode, operands[2]);
3830 h1 = gen_reg_rtx (V4SImode);
3831 l1 = gen_reg_rtx (V4SImode);
3833 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3834 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3835 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3839 (define_expand "vec_interleave_highv16qi"
3840 [(set (match_operand:V16QI 0 "register_operand" "")
3843 (match_operand:V16QI 1 "register_operand" "")
3844 (match_operand:V16QI 2 "nonimmediate_operand" ""))
3845 (parallel [(const_int 8) (const_int 24)
3846 (const_int 9) (const_int 25)
3847 (const_int 10) (const_int 26)
3848 (const_int 11) (const_int 27)
3849 (const_int 12) (const_int 28)
3850 (const_int 13) (const_int 29)
3851 (const_int 14) (const_int 30)
3852 (const_int 15) (const_int 31)])))]
3855 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3859 (define_expand "vec_interleave_lowv16qi"
3860 [(set (match_operand:V16QI 0 "register_operand" "")
3863 (match_operand:V16QI 1 "register_operand" "")
3864 (match_operand:V16QI 2 "nonimmediate_operand" ""))
3865 (parallel [(const_int 0) (const_int 16)
3866 (const_int 1) (const_int 17)
3867 (const_int 2) (const_int 18)
3868 (const_int 3) (const_int 19)
3869 (const_int 4) (const_int 20)
3870 (const_int 5) (const_int 21)
3871 (const_int 6) (const_int 22)
3872 (const_int 7) (const_int 23)])))]
3875 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3879 (define_expand "vec_interleave_highv8hi"
3880 [(set (match_operand:V8HI 0 "register_operand" "=")
3883 (match_operand:V8HI 1 "register_operand" "")
3884 (match_operand:V8HI 2 "nonimmediate_operand" ""))
3885 (parallel [(const_int 4) (const_int 12)
3886 (const_int 5) (const_int 13)
3887 (const_int 6) (const_int 14)
3888 (const_int 7) (const_int 15)])))]
3891 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3895 (define_expand "vec_interleave_lowv8hi"
3896 [(set (match_operand:V8HI 0 "register_operand" "")
3899 (match_operand:V8HI 1 "register_operand" "")
3900 (match_operand:V8HI 2 "nonimmediate_operand" ""))
3901 (parallel [(const_int 0) (const_int 8)
3902 (const_int 1) (const_int 9)
3903 (const_int 2) (const_int 10)
3904 (const_int 3) (const_int 11)])))]
3907 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
3911 (define_expand "vec_interleave_highv4si"
3912 [(set (match_operand:V4SI 0 "register_operand" "")
3915 (match_operand:V4SI 1 "register_operand" "")
3916 (match_operand:V4SI 2 "nonimmediate_operand" ""))
3917 (parallel [(const_int 2) (const_int 6)
3918 (const_int 3) (const_int 7)])))]
3921 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
3925 (define_expand "vec_interleave_lowv4si"
3926 [(set (match_operand:V4SI 0 "register_operand" "")
3929 (match_operand:V4SI 1 "register_operand" "")
3930 (match_operand:V4SI 2 "nonimmediate_operand" ""))
3931 (parallel [(const_int 0) (const_int 4)
3932 (const_int 1) (const_int 5)])))]
3935 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
3939 (define_expand "vec_interleave_highv2di"
3940 [(set (match_operand:V2DI 0 "register_operand" "")
3943 (match_operand:V2DI 1 "register_operand" "")
3944 (match_operand:V2DI 2 "nonimmediate_operand" ""))
3945 (parallel [(const_int 1)
3949 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
3953 (define_expand "vec_interleave_lowv2di"
3954 [(set (match_operand:V2DI 0 "register_operand" "")
3957 (match_operand:V2DI 1 "register_operand" "")
3958 (match_operand:V2DI 2 "nonimmediate_operand" ""))
3959 (parallel [(const_int 0)
3963 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
3967 (define_insn "sse2_packsswb"
3968 [(set (match_operand:V16QI 0 "register_operand" "=x")
3971 (match_operand:V8HI 1 "register_operand" "0"))
3973 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3975 "packsswb\t{%2, %0|%0, %2}"
3976 [(set_attr "type" "sselog")
3977 (set_attr "prefix_data16" "1")
3978 (set_attr "mode" "TI")])
3980 (define_insn "sse2_packssdw"
3981 [(set (match_operand:V8HI 0 "register_operand" "=x")
3984 (match_operand:V4SI 1 "register_operand" "0"))
3986 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3988 "packssdw\t{%2, %0|%0, %2}"
3989 [(set_attr "type" "sselog")
3990 (set_attr "prefix_data16" "1")
3991 (set_attr "mode" "TI")])
3993 (define_insn "sse2_packuswb"
3994 [(set (match_operand:V16QI 0 "register_operand" "=x")
3997 (match_operand:V8HI 1 "register_operand" "0"))
3999 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4001 "packuswb\t{%2, %0|%0, %2}"
4002 [(set_attr "type" "sselog")
4003 (set_attr "prefix_data16" "1")
4004 (set_attr "mode" "TI")])
4006 (define_insn "sse2_punpckhbw"
4007 [(set (match_operand:V16QI 0 "register_operand" "=x")
4010 (match_operand:V16QI 1 "register_operand" "0")
4011 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4012 (parallel [(const_int 8) (const_int 24)
4013 (const_int 9) (const_int 25)
4014 (const_int 10) (const_int 26)
4015 (const_int 11) (const_int 27)
4016 (const_int 12) (const_int 28)
4017 (const_int 13) (const_int 29)
4018 (const_int 14) (const_int 30)
4019 (const_int 15) (const_int 31)])))]
4021 "punpckhbw\t{%2, %0|%0, %2}"
4022 [(set_attr "type" "sselog")
4023 (set_attr "prefix_data16" "1")
4024 (set_attr "mode" "TI")])
4026 (define_insn "sse2_punpcklbw"
4027 [(set (match_operand:V16QI 0 "register_operand" "=x")
4030 (match_operand:V16QI 1 "register_operand" "0")
4031 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4032 (parallel [(const_int 0) (const_int 16)
4033 (const_int 1) (const_int 17)
4034 (const_int 2) (const_int 18)
4035 (const_int 3) (const_int 19)
4036 (const_int 4) (const_int 20)
4037 (const_int 5) (const_int 21)
4038 (const_int 6) (const_int 22)
4039 (const_int 7) (const_int 23)])))]
4041 "punpcklbw\t{%2, %0|%0, %2}"
4042 [(set_attr "type" "sselog")
4043 (set_attr "prefix_data16" "1")
4044 (set_attr "mode" "TI")])
4046 (define_insn "sse2_punpckhwd"
4047 [(set (match_operand:V8HI 0 "register_operand" "=x")
4050 (match_operand:V8HI 1 "register_operand" "0")
4051 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4052 (parallel [(const_int 4) (const_int 12)
4053 (const_int 5) (const_int 13)
4054 (const_int 6) (const_int 14)
4055 (const_int 7) (const_int 15)])))]
4057 "punpckhwd\t{%2, %0|%0, %2}"
4058 [(set_attr "type" "sselog")
4059 (set_attr "prefix_data16" "1")
4060 (set_attr "mode" "TI")])
4062 (define_insn "sse2_punpcklwd"
4063 [(set (match_operand:V8HI 0 "register_operand" "=x")
4066 (match_operand:V8HI 1 "register_operand" "0")
4067 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4068 (parallel [(const_int 0) (const_int 8)
4069 (const_int 1) (const_int 9)
4070 (const_int 2) (const_int 10)
4071 (const_int 3) (const_int 11)])))]
4073 "punpcklwd\t{%2, %0|%0, %2}"
4074 [(set_attr "type" "sselog")
4075 (set_attr "prefix_data16" "1")
4076 (set_attr "mode" "TI")])
4078 (define_insn "sse2_punpckhdq"
4079 [(set (match_operand:V4SI 0 "register_operand" "=x")
4082 (match_operand:V4SI 1 "register_operand" "0")
4083 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4084 (parallel [(const_int 2) (const_int 6)
4085 (const_int 3) (const_int 7)])))]
4087 "punpckhdq\t{%2, %0|%0, %2}"
4088 [(set_attr "type" "sselog")
4089 (set_attr "prefix_data16" "1")
4090 (set_attr "mode" "TI")])
4092 (define_insn "sse2_punpckldq"
4093 [(set (match_operand:V4SI 0 "register_operand" "=x")
4096 (match_operand:V4SI 1 "register_operand" "0")
4097 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4098 (parallel [(const_int 0) (const_int 4)
4099 (const_int 1) (const_int 5)])))]
4101 "punpckldq\t{%2, %0|%0, %2}"
4102 [(set_attr "type" "sselog")
4103 (set_attr "prefix_data16" "1")
4104 (set_attr "mode" "TI")])
4106 (define_insn "sse2_punpckhqdq"
4107 [(set (match_operand:V2DI 0 "register_operand" "=x")
4110 (match_operand:V2DI 1 "register_operand" "0")
4111 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4112 (parallel [(const_int 1)
4115 "punpckhqdq\t{%2, %0|%0, %2}"
4116 [(set_attr "type" "sselog")
4117 (set_attr "prefix_data16" "1")
4118 (set_attr "mode" "TI")])
4120 (define_insn "sse2_punpcklqdq"
4121 [(set (match_operand:V2DI 0 "register_operand" "=x")
4124 (match_operand:V2DI 1 "register_operand" "0")
4125 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4126 (parallel [(const_int 0)
4129 "punpcklqdq\t{%2, %0|%0, %2}"
4130 [(set_attr "type" "sselog")
4131 (set_attr "prefix_data16" "1")
4132 (set_attr "mode" "TI")])
4134 (define_insn "*sse4_1_pinsrb"
4135 [(set (match_operand:V16QI 0 "register_operand" "=x")
4137 (vec_duplicate:V16QI
4138 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4139 (match_operand:V16QI 1 "register_operand" "0")
4140 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4143 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4144 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4146 [(set_attr "type" "sselog")
4147 (set_attr "prefix_extra" "1")
4148 (set_attr "mode" "TI")])
4150 (define_insn "*sse2_pinsrw"
4151 [(set (match_operand:V8HI 0 "register_operand" "=x")
4154 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4155 (match_operand:V8HI 1 "register_operand" "0")
4156 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4159 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4160 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4162 [(set_attr "type" "sselog")
4163 (set_attr "prefix_data16" "1")
4164 (set_attr "mode" "TI")])
4166 ;; It must come before sse2_loadld since it is preferred.
4167 (define_insn "*sse4_1_pinsrd"
4168 [(set (match_operand:V4SI 0 "register_operand" "=x")
4171 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4172 (match_operand:V4SI 1 "register_operand" "0")
4173 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4176 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4177 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4179 [(set_attr "type" "sselog")
4180 (set_attr "prefix_extra" "1")
4181 (set_attr "mode" "TI")])
4183 (define_insn "*sse4_1_pinsrq"
4184 [(set (match_operand:V2DI 0 "register_operand" "=x")
4187 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4188 (match_operand:V2DI 1 "register_operand" "0")
4189 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4192 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4193 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4195 [(set_attr "type" "sselog")
4196 (set_attr "prefix_extra" "1")
4197 (set_attr "mode" "TI")])
4199 (define_insn "*sse4_1_pextrb"
4200 [(set (match_operand:SI 0 "register_operand" "=r")
4203 (match_operand:V16QI 1 "register_operand" "x")
4204 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4206 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4207 [(set_attr "type" "sselog")
4208 (set_attr "prefix_extra" "1")
4209 (set_attr "mode" "TI")])
4211 (define_insn "*sse4_1_pextrb_memory"
4212 [(set (match_operand:QI 0 "memory_operand" "=m")
4214 (match_operand:V16QI 1 "register_operand" "x")
4215 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4217 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4218 [(set_attr "type" "sselog")
4219 (set_attr "prefix_extra" "1")
4220 (set_attr "mode" "TI")])
4222 (define_insn "*sse2_pextrw"
4223 [(set (match_operand:SI 0 "register_operand" "=r")
4226 (match_operand:V8HI 1 "register_operand" "x")
4227 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4229 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4230 [(set_attr "type" "sselog")
4231 (set_attr "prefix_data16" "1")
4232 (set_attr "mode" "TI")])
4234 (define_insn "*sse4_1_pextrw_memory"
4235 [(set (match_operand:HI 0 "memory_operand" "=m")
4237 (match_operand:V8HI 1 "register_operand" "x")
4238 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4240 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4241 [(set_attr "type" "sselog")
4242 (set_attr "prefix_extra" "1")
4243 (set_attr "mode" "TI")])
4245 (define_insn "*sse4_1_pextrd"
4246 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4248 (match_operand:V4SI 1 "register_operand" "x")
4249 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4251 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4252 [(set_attr "type" "sselog")
4253 (set_attr "prefix_extra" "1")
4254 (set_attr "mode" "TI")])
4256 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4257 (define_insn "*sse4_1_pextrq"
4258 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4260 (match_operand:V2DI 1 "register_operand" "x")
4261 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4262 "TARGET_SSE4_1 && TARGET_64BIT"
4263 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4264 [(set_attr "type" "sselog")
4265 (set_attr "prefix_extra" "1")
4266 (set_attr "mode" "TI")])
4268 (define_expand "sse2_pshufd"
4269 [(match_operand:V4SI 0 "register_operand" "")
4270 (match_operand:V4SI 1 "nonimmediate_operand" "")
4271 (match_operand:SI 2 "const_int_operand" "")]
4274 int mask = INTVAL (operands[2]);
4275 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4276 GEN_INT ((mask >> 0) & 3),
4277 GEN_INT ((mask >> 2) & 3),
4278 GEN_INT ((mask >> 4) & 3),
4279 GEN_INT ((mask >> 6) & 3)));
4283 (define_insn "sse2_pshufd_1"
4284 [(set (match_operand:V4SI 0 "register_operand" "=x")
4286 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4287 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4288 (match_operand 3 "const_0_to_3_operand" "")
4289 (match_operand 4 "const_0_to_3_operand" "")
4290 (match_operand 5 "const_0_to_3_operand" "")])))]
4294 mask |= INTVAL (operands[2]) << 0;
4295 mask |= INTVAL (operands[3]) << 2;
4296 mask |= INTVAL (operands[4]) << 4;
4297 mask |= INTVAL (operands[5]) << 6;
4298 operands[2] = GEN_INT (mask);
4300 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4302 [(set_attr "type" "sselog1")
4303 (set_attr "prefix_data16" "1")
4304 (set_attr "mode" "TI")])
4306 (define_expand "sse2_pshuflw"
4307 [(match_operand:V8HI 0 "register_operand" "")
4308 (match_operand:V8HI 1 "nonimmediate_operand" "")
4309 (match_operand:SI 2 "const_int_operand" "")]
4312 int mask = INTVAL (operands[2]);
4313 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4314 GEN_INT ((mask >> 0) & 3),
4315 GEN_INT ((mask >> 2) & 3),
4316 GEN_INT ((mask >> 4) & 3),
4317 GEN_INT ((mask >> 6) & 3)));
4321 (define_insn "sse2_pshuflw_1"
4322 [(set (match_operand:V8HI 0 "register_operand" "=x")
4324 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4325 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4326 (match_operand 3 "const_0_to_3_operand" "")
4327 (match_operand 4 "const_0_to_3_operand" "")
4328 (match_operand 5 "const_0_to_3_operand" "")
4336 mask |= INTVAL (operands[2]) << 0;
4337 mask |= INTVAL (operands[3]) << 2;
4338 mask |= INTVAL (operands[4]) << 4;
4339 mask |= INTVAL (operands[5]) << 6;
4340 operands[2] = GEN_INT (mask);
4342 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4344 [(set_attr "type" "sselog")
4345 (set_attr "prefix_rep" "1")
4346 (set_attr "mode" "TI")])
4348 (define_expand "sse2_pshufhw"
4349 [(match_operand:V8HI 0 "register_operand" "")
4350 (match_operand:V8HI 1 "nonimmediate_operand" "")
4351 (match_operand:SI 2 "const_int_operand" "")]
4354 int mask = INTVAL (operands[2]);
4355 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4356 GEN_INT (((mask >> 0) & 3) + 4),
4357 GEN_INT (((mask >> 2) & 3) + 4),
4358 GEN_INT (((mask >> 4) & 3) + 4),
4359 GEN_INT (((mask >> 6) & 3) + 4)));
4363 (define_insn "sse2_pshufhw_1"
4364 [(set (match_operand:V8HI 0 "register_operand" "=x")
4366 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4367 (parallel [(const_int 0)
4371 (match_operand 2 "const_4_to_7_operand" "")
4372 (match_operand 3 "const_4_to_7_operand" "")
4373 (match_operand 4 "const_4_to_7_operand" "")
4374 (match_operand 5 "const_4_to_7_operand" "")])))]
4378 mask |= (INTVAL (operands[2]) - 4) << 0;
4379 mask |= (INTVAL (operands[3]) - 4) << 2;
4380 mask |= (INTVAL (operands[4]) - 4) << 4;
4381 mask |= (INTVAL (operands[5]) - 4) << 6;
4382 operands[2] = GEN_INT (mask);
4384 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4386 [(set_attr "type" "sselog")
4387 (set_attr "prefix_rep" "1")
4388 (set_attr "mode" "TI")])
4390 (define_expand "sse2_loadd"
4391 [(set (match_operand:V4SI 0 "register_operand" "")
4394 (match_operand:SI 1 "nonimmediate_operand" ""))
4398 "operands[2] = CONST0_RTX (V4SImode);")
4400 (define_insn "sse2_loadld"
4401 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4404 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4405 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4409 movd\t{%2, %0|%0, %2}
4410 movd\t{%2, %0|%0, %2}
4411 movss\t{%2, %0|%0, %2}
4412 movss\t{%2, %0|%0, %2}"
4413 [(set_attr "type" "ssemov")
4414 (set_attr "mode" "TI,TI,V4SF,SF")])
4416 (define_insn_and_split "sse2_stored"
4417 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4419 (match_operand:V4SI 1 "register_operand" "x,Yi")
4420 (parallel [(const_int 0)])))]
4423 "&& reload_completed
4424 && (TARGET_INTER_UNIT_MOVES
4425 || MEM_P (operands [0])
4426 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4427 [(set (match_dup 0) (match_dup 1))]
4429 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4432 (define_insn_and_split "*vec_ext_v4si_mem"
4433 [(set (match_operand:SI 0 "register_operand" "=r")
4435 (match_operand:V4SI 1 "memory_operand" "o")
4436 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4442 int i = INTVAL (operands[2]);
4444 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4448 (define_expand "sse_storeq"
4449 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4451 (match_operand:V2DI 1 "register_operand" "")
4452 (parallel [(const_int 0)])))]
4456 (define_insn "*sse2_storeq_rex64"
4457 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4459 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4460 (parallel [(const_int 0)])))]
4461 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4465 mov{q}\t{%1, %0|%0, %1}"
4466 [(set_attr "type" "*,*,imov")
4467 (set_attr "mode" "*,*,DI")])
4469 (define_insn "*sse2_storeq"
4470 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4472 (match_operand:V2DI 1 "register_operand" "x")
4473 (parallel [(const_int 0)])))]
4478 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4480 (match_operand:V2DI 1 "register_operand" "")
4481 (parallel [(const_int 0)])))]
4484 && (TARGET_INTER_UNIT_MOVES
4485 || MEM_P (operands [0])
4486 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4487 [(set (match_dup 0) (match_dup 1))]
4489 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4492 (define_insn "*vec_extractv2di_1_rex64"
4493 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4495 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4496 (parallel [(const_int 1)])))]
4497 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4499 movhps\t{%1, %0|%0, %1}
4500 psrldq\t{$8, %0|%0, 8}
4501 movq\t{%H1, %0|%0, %H1}
4502 mov{q}\t{%H1, %0|%0, %H1}"
4503 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4504 (set_attr "memory" "*,none,*,*")
4505 (set_attr "mode" "V2SF,TI,TI,DI")])
4507 (define_insn "*vec_extractv2di_1_sse2"
4508 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4510 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4511 (parallel [(const_int 1)])))]
4513 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4515 movhps\t{%1, %0|%0, %1}
4516 psrldq\t{$8, %0|%0, 8}
4517 movq\t{%H1, %0|%0, %H1}"
4518 [(set_attr "type" "ssemov,sseishft,ssemov")
4519 (set_attr "memory" "*,none,*")
4520 (set_attr "mode" "V2SF,TI,TI")])
4522 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4523 (define_insn "*vec_extractv2di_1_sse"
4524 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4526 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4527 (parallel [(const_int 1)])))]
4528 "!TARGET_SSE2 && TARGET_SSE
4529 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4531 movhps\t{%1, %0|%0, %1}
4532 movhlps\t{%1, %0|%0, %1}
4533 movlps\t{%H1, %0|%0, %H1}"
4534 [(set_attr "type" "ssemov")
4535 (set_attr "mode" "V2SF,V4SF,V2SF")])
4537 (define_insn "*vec_dupv4si"
4538 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4540 (match_operand:SI 1 "register_operand" " Y2,0")))]
4543 pshufd\t{$0, %1, %0|%0, %1, 0}
4544 shufps\t{$0, %0, %0|%0, %0, 0}"
4545 [(set_attr "type" "sselog1")
4546 (set_attr "mode" "TI,V4SF")])
4548 (define_insn "*vec_dupv2di"
4549 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4551 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4556 [(set_attr "type" "sselog1,ssemov")
4557 (set_attr "mode" "TI,V4SF")])
4559 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4560 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4561 ;; alternatives pretty much forces the MMX alternative to be chosen.
4562 (define_insn "*sse2_concatv2si"
4563 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4565 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4566 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4569 punpckldq\t{%2, %0|%0, %2}
4570 movd\t{%1, %0|%0, %1}
4571 punpckldq\t{%2, %0|%0, %2}
4572 movd\t{%1, %0|%0, %1}"
4573 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4574 (set_attr "mode" "TI,TI,DI,DI")])
4576 (define_insn "*sse1_concatv2si"
4577 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4579 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4580 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4583 unpcklps\t{%2, %0|%0, %2}
4584 movss\t{%1, %0|%0, %1}
4585 punpckldq\t{%2, %0|%0, %2}
4586 movd\t{%1, %0|%0, %1}"
4587 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4588 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4590 (define_insn "*vec_concatv4si_1"
4591 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4593 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4594 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4597 punpcklqdq\t{%2, %0|%0, %2}
4598 movlhps\t{%2, %0|%0, %2}
4599 movhps\t{%2, %0|%0, %2}"
4600 [(set_attr "type" "sselog,ssemov,ssemov")
4601 (set_attr "mode" "TI,V4SF,V2SF")])
4603 (define_insn "vec_concatv2di"
4604 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4606 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4607 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4608 "!TARGET_64BIT && TARGET_SSE"
4610 movq\t{%1, %0|%0, %1}
4611 movq2dq\t{%1, %0|%0, %1}
4612 punpcklqdq\t{%2, %0|%0, %2}
4613 movlhps\t{%2, %0|%0, %2}
4614 movhps\t{%2, %0|%0, %2}
4615 movlps\t{%1, %0|%0, %1}"
4616 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4617 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4619 (define_insn "*vec_concatv2di_rex"
4620 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
4622 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4623 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
4626 movq\t{%1, %0|%0, %1}
4627 movq\t{%1, %0|%0, %1}
4628 movq2dq\t{%1, %0|%0, %1}
4629 punpcklqdq\t{%2, %0|%0, %2}
4630 movlhps\t{%2, %0|%0, %2}
4631 movhps\t{%2, %0|%0, %2}
4632 movlps\t{%1, %0|%0, %1}"
4633 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4634 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4636 (define_expand "vec_setv2di"
4637 [(match_operand:V2DI 0 "register_operand" "")
4638 (match_operand:DI 1 "register_operand" "")
4639 (match_operand 2 "const_int_operand" "")]
4642 ix86_expand_vector_set (false, operands[0], operands[1],
4643 INTVAL (operands[2]));
4647 (define_expand "vec_extractv2di"
4648 [(match_operand:DI 0 "register_operand" "")
4649 (match_operand:V2DI 1 "register_operand" "")
4650 (match_operand 2 "const_int_operand" "")]
4653 ix86_expand_vector_extract (false, operands[0], operands[1],
4654 INTVAL (operands[2]));
4658 (define_expand "vec_initv2di"
4659 [(match_operand:V2DI 0 "register_operand" "")
4660 (match_operand 1 "" "")]
4663 ix86_expand_vector_init (false, operands[0], operands[1]);
4667 (define_expand "vec_setv4si"
4668 [(match_operand:V4SI 0 "register_operand" "")
4669 (match_operand:SI 1 "register_operand" "")
4670 (match_operand 2 "const_int_operand" "")]
4673 ix86_expand_vector_set (false, operands[0], operands[1],
4674 INTVAL (operands[2]));
4678 (define_expand "vec_extractv4si"
4679 [(match_operand:SI 0 "register_operand" "")
4680 (match_operand:V4SI 1 "register_operand" "")
4681 (match_operand 2 "const_int_operand" "")]
4684 ix86_expand_vector_extract (false, operands[0], operands[1],
4685 INTVAL (operands[2]));
4689 (define_expand "vec_initv4si"
4690 [(match_operand:V4SI 0 "register_operand" "")
4691 (match_operand 1 "" "")]
4694 ix86_expand_vector_init (false, operands[0], operands[1]);
4698 (define_expand "vec_setv8hi"
4699 [(match_operand:V8HI 0 "register_operand" "")
4700 (match_operand:HI 1 "register_operand" "")
4701 (match_operand 2 "const_int_operand" "")]
4704 ix86_expand_vector_set (false, operands[0], operands[1],
4705 INTVAL (operands[2]));
4709 (define_expand "vec_extractv8hi"
4710 [(match_operand:HI 0 "register_operand" "")
4711 (match_operand:V8HI 1 "register_operand" "")
4712 (match_operand 2 "const_int_operand" "")]
4715 ix86_expand_vector_extract (false, operands[0], operands[1],
4716 INTVAL (operands[2]));
4720 (define_expand "vec_initv8hi"
4721 [(match_operand:V8HI 0 "register_operand" "")
4722 (match_operand 1 "" "")]
4725 ix86_expand_vector_init (false, operands[0], operands[1]);
4729 (define_expand "vec_setv16qi"
4730 [(match_operand:V16QI 0 "register_operand" "")
4731 (match_operand:QI 1 "register_operand" "")
4732 (match_operand 2 "const_int_operand" "")]
4735 ix86_expand_vector_set (false, operands[0], operands[1],
4736 INTVAL (operands[2]));
4740 (define_expand "vec_extractv16qi"
4741 [(match_operand:QI 0 "register_operand" "")
4742 (match_operand:V16QI 1 "register_operand" "")
4743 (match_operand 2 "const_int_operand" "")]
4746 ix86_expand_vector_extract (false, operands[0], operands[1],
4747 INTVAL (operands[2]));
4751 (define_expand "vec_initv16qi"
4752 [(match_operand:V16QI 0 "register_operand" "")
4753 (match_operand 1 "" "")]
4756 ix86_expand_vector_init (false, operands[0], operands[1]);
4760 (define_expand "vec_unpacku_hi_v16qi"
4761 [(match_operand:V8HI 0 "register_operand" "")
4762 (match_operand:V16QI 1 "register_operand" "")]
4766 ix86_expand_sse4_unpack (operands, true, true);
4767 else if (TARGET_SSE5)
4768 ix86_expand_sse5_unpack (operands, true, true);
4770 ix86_expand_sse_unpack (operands, true, true);
4774 (define_expand "vec_unpacks_hi_v16qi"
4775 [(match_operand:V8HI 0 "register_operand" "")
4776 (match_operand:V16QI 1 "register_operand" "")]
4780 ix86_expand_sse4_unpack (operands, false, true);
4781 else if (TARGET_SSE5)
4782 ix86_expand_sse5_unpack (operands, false, true);
4784 ix86_expand_sse_unpack (operands, false, true);
4788 (define_expand "vec_unpacku_lo_v16qi"
4789 [(match_operand:V8HI 0 "register_operand" "")
4790 (match_operand:V16QI 1 "register_operand" "")]
4794 ix86_expand_sse4_unpack (operands, true, false);
4795 else if (TARGET_SSE5)
4796 ix86_expand_sse5_unpack (operands, true, false);
4798 ix86_expand_sse_unpack (operands, true, false);
4802 (define_expand "vec_unpacks_lo_v16qi"
4803 [(match_operand:V8HI 0 "register_operand" "")
4804 (match_operand:V16QI 1 "register_operand" "")]
4808 ix86_expand_sse4_unpack (operands, false, false);
4809 else if (TARGET_SSE5)
4810 ix86_expand_sse5_unpack (operands, false, false);
4812 ix86_expand_sse_unpack (operands, false, false);
4816 (define_expand "vec_unpacku_hi_v8hi"
4817 [(match_operand:V4SI 0 "register_operand" "")
4818 (match_operand:V8HI 1 "register_operand" "")]
4822 ix86_expand_sse4_unpack (operands, true, true);
4823 else if (TARGET_SSE5)
4824 ix86_expand_sse5_unpack (operands, true, true);
4826 ix86_expand_sse_unpack (operands, true, true);
4830 (define_expand "vec_unpacks_hi_v8hi"
4831 [(match_operand:V4SI 0 "register_operand" "")
4832 (match_operand:V8HI 1 "register_operand" "")]
4836 ix86_expand_sse4_unpack (operands, false, true);
4837 else if (TARGET_SSE5)
4838 ix86_expand_sse5_unpack (operands, false, true);
4840 ix86_expand_sse_unpack (operands, false, true);
4844 (define_expand "vec_unpacku_lo_v8hi"
4845 [(match_operand:V4SI 0 "register_operand" "")
4846 (match_operand:V8HI 1 "register_operand" "")]
4850 ix86_expand_sse4_unpack (operands, true, false);
4851 else if (TARGET_SSE5)
4852 ix86_expand_sse5_unpack (operands, true, false);
4854 ix86_expand_sse_unpack (operands, true, false);
4858 (define_expand "vec_unpacks_lo_v8hi"
4859 [(match_operand:V4SI 0 "register_operand" "")
4860 (match_operand:V8HI 1 "register_operand" "")]
4864 ix86_expand_sse4_unpack (operands, false, false);
4865 else if (TARGET_SSE5)
4866 ix86_expand_sse5_unpack (operands, false, false);
4868 ix86_expand_sse_unpack (operands, false, false);
4872 (define_expand "vec_unpacku_hi_v4si"
4873 [(match_operand:V2DI 0 "register_operand" "")
4874 (match_operand:V4SI 1 "register_operand" "")]
4878 ix86_expand_sse4_unpack (operands, true, true);
4879 else if (TARGET_SSE5)
4880 ix86_expand_sse5_unpack (operands, true, true);
4882 ix86_expand_sse_unpack (operands, true, true);
4886 (define_expand "vec_unpacks_hi_v4si"
4887 [(match_operand:V2DI 0 "register_operand" "")
4888 (match_operand:V4SI 1 "register_operand" "")]
4892 ix86_expand_sse4_unpack (operands, false, true);
4893 else if (TARGET_SSE5)
4894 ix86_expand_sse5_unpack (operands, false, true);
4896 ix86_expand_sse_unpack (operands, false, true);
4900 (define_expand "vec_unpacku_lo_v4si"
4901 [(match_operand:V2DI 0 "register_operand" "")
4902 (match_operand:V4SI 1 "register_operand" "")]
4906 ix86_expand_sse4_unpack (operands, true, false);
4907 else if (TARGET_SSE5)
4908 ix86_expand_sse5_unpack (operands, true, false);
4910 ix86_expand_sse_unpack (operands, true, false);
4914 (define_expand "vec_unpacks_lo_v4si"
4915 [(match_operand:V2DI 0 "register_operand" "")
4916 (match_operand:V4SI 1 "register_operand" "")]
4920 ix86_expand_sse4_unpack (operands, false, false);
4921 else if (TARGET_SSE5)
4922 ix86_expand_sse5_unpack (operands, false, false);
4924 ix86_expand_sse_unpack (operands, false, false);
4928 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4932 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4934 (define_insn "sse2_uavgv16qi3"
4935 [(set (match_operand:V16QI 0 "register_operand" "=x")
4941 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4943 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4944 (const_vector:V16QI [(const_int 1) (const_int 1)
4945 (const_int 1) (const_int 1)
4946 (const_int 1) (const_int 1)
4947 (const_int 1) (const_int 1)
4948 (const_int 1) (const_int 1)
4949 (const_int 1) (const_int 1)
4950 (const_int 1) (const_int 1)
4951 (const_int 1) (const_int 1)]))
4953 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4954 "pavgb\t{%2, %0|%0, %2}"
4955 [(set_attr "type" "sseiadd")
4956 (set_attr "prefix_data16" "1")
4957 (set_attr "mode" "TI")])
4959 (define_insn "sse2_uavgv8hi3"
4960 [(set (match_operand:V8HI 0 "register_operand" "=x")
4966 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4968 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4969 (const_vector:V8HI [(const_int 1) (const_int 1)
4970 (const_int 1) (const_int 1)
4971 (const_int 1) (const_int 1)
4972 (const_int 1) (const_int 1)]))
4974 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4975 "pavgw\t{%2, %0|%0, %2}"
4976 [(set_attr "type" "sseiadd")
4977 (set_attr "prefix_data16" "1")
4978 (set_attr "mode" "TI")])
4980 ;; The correct representation for this is absolutely enormous, and
4981 ;; surely not generally useful.
4982 (define_insn "sse2_psadbw"
4983 [(set (match_operand:V2DI 0 "register_operand" "=x")
4984 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4985 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4988 "psadbw\t{%2, %0|%0, %2}"
4989 [(set_attr "type" "sseiadd")
4990 (set_attr "prefix_data16" "1")
4991 (set_attr "mode" "TI")])
4993 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
4994 [(set (match_operand:SI 0 "register_operand" "=r")
4996 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
4998 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
4999 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5000 [(set_attr "type" "ssecvt")
5001 (set_attr "mode" "<MODE>")])
5003 (define_insn "sse2_pmovmskb"
5004 [(set (match_operand:SI 0 "register_operand" "=r")
5005 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5008 "pmovmskb\t{%1, %0|%0, %1}"
5009 [(set_attr "type" "ssecvt")
5010 (set_attr "prefix_data16" "1")
5011 (set_attr "mode" "SI")])
5013 (define_expand "sse2_maskmovdqu"
5014 [(set (match_operand:V16QI 0 "memory_operand" "")
5015 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5016 (match_operand:V16QI 2 "register_operand" "")
5022 (define_insn "*sse2_maskmovdqu"
5023 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5024 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5025 (match_operand:V16QI 2 "register_operand" "x")
5026 (mem:V16QI (match_dup 0))]
5028 "TARGET_SSE2 && !TARGET_64BIT"
5029 ;; @@@ check ordering of operands in intel/nonintel syntax
5030 "maskmovdqu\t{%2, %1|%1, %2}"
5031 [(set_attr "type" "ssecvt")
5032 (set_attr "prefix_data16" "1")
5033 (set_attr "mode" "TI")])
5035 (define_insn "*sse2_maskmovdqu_rex64"
5036 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5037 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5038 (match_operand:V16QI 2 "register_operand" "x")
5039 (mem:V16QI (match_dup 0))]
5041 "TARGET_SSE2 && TARGET_64BIT"
5042 ;; @@@ check ordering of operands in intel/nonintel syntax
5043 "maskmovdqu\t{%2, %1|%1, %2}"
5044 [(set_attr "type" "ssecvt")
5045 (set_attr "prefix_data16" "1")
5046 (set_attr "mode" "TI")])
5048 (define_insn "sse_ldmxcsr"
5049 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5053 [(set_attr "type" "sse")
5054 (set_attr "memory" "load")])
5056 (define_insn "sse_stmxcsr"
5057 [(set (match_operand:SI 0 "memory_operand" "=m")
5058 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5061 [(set_attr "type" "sse")
5062 (set_attr "memory" "store")])
5064 (define_expand "sse_sfence"
5066 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5067 "TARGET_SSE || TARGET_3DNOW_A"
5069 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5070 MEM_VOLATILE_P (operands[0]) = 1;
5073 (define_insn "*sse_sfence"
5074 [(set (match_operand:BLK 0 "" "")
5075 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5076 "TARGET_SSE || TARGET_3DNOW_A"
5078 [(set_attr "type" "sse")
5079 (set_attr "memory" "unknown")])
5081 (define_insn "sse2_clflush"
5082 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5086 [(set_attr "type" "sse")
5087 (set_attr "memory" "unknown")])
5089 (define_expand "sse2_mfence"
5091 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5094 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5095 MEM_VOLATILE_P (operands[0]) = 1;
5098 (define_insn "*sse2_mfence"
5099 [(set (match_operand:BLK 0 "" "")
5100 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5103 [(set_attr "type" "sse")
5104 (set_attr "memory" "unknown")])
5106 (define_expand "sse2_lfence"
5108 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5111 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5112 MEM_VOLATILE_P (operands[0]) = 1;
5115 (define_insn "*sse2_lfence"
5116 [(set (match_operand:BLK 0 "" "")
5117 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5120 [(set_attr "type" "sse")
5121 (set_attr "memory" "unknown")])
5123 (define_insn "sse3_mwait"
5124 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5125 (match_operand:SI 1 "register_operand" "c")]
5128 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5129 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5130 ;; we only need to set up 32bit registers.
5132 [(set_attr "length" "3")])
5134 (define_insn "sse3_monitor"
5135 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5136 (match_operand:SI 1 "register_operand" "c")
5137 (match_operand:SI 2 "register_operand" "d")]
5139 "TARGET_SSE3 && !TARGET_64BIT"
5140 "monitor\t%0, %1, %2"
5141 [(set_attr "length" "3")])
5143 (define_insn "sse3_monitor64"
5144 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5145 (match_operand:SI 1 "register_operand" "c")
5146 (match_operand:SI 2 "register_operand" "d")]
5148 "TARGET_SSE3 && TARGET_64BIT"
5149 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5150 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5151 ;; zero extended to 64bit, we only need to set up 32bit registers.
5153 [(set_attr "length" "3")])
5155 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5157 ;; SSSE3 instructions
5159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5161 (define_insn "ssse3_phaddwv8hi3"
5162 [(set (match_operand:V8HI 0 "register_operand" "=x")
5168 (match_operand:V8HI 1 "register_operand" "0")
5169 (parallel [(const_int 0)]))
5170 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5172 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5173 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5176 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5177 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5179 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5180 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5185 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5186 (parallel [(const_int 0)]))
5187 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5189 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5190 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5193 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5194 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5196 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5197 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5199 "phaddw\t{%2, %0|%0, %2}"
5200 [(set_attr "type" "sseiadd")
5201 (set_attr "prefix_data16" "1")
5202 (set_attr "prefix_extra" "1")
5203 (set_attr "mode" "TI")])
5205 (define_insn "ssse3_phaddwv4hi3"
5206 [(set (match_operand:V4HI 0 "register_operand" "=y")
5211 (match_operand:V4HI 1 "register_operand" "0")
5212 (parallel [(const_int 0)]))
5213 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5215 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5216 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5220 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5221 (parallel [(const_int 0)]))
5222 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5224 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5225 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5227 "phaddw\t{%2, %0|%0, %2}"
5228 [(set_attr "type" "sseiadd")
5229 (set_attr "prefix_extra" "1")
5230 (set_attr "mode" "DI")])
5232 (define_insn "ssse3_phadddv4si3"
5233 [(set (match_operand:V4SI 0 "register_operand" "=x")
5238 (match_operand:V4SI 1 "register_operand" "0")
5239 (parallel [(const_int 0)]))
5240 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5242 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5243 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5247 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5248 (parallel [(const_int 0)]))
5249 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5251 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5252 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5254 "phaddd\t{%2, %0|%0, %2}"
5255 [(set_attr "type" "sseiadd")
5256 (set_attr "prefix_data16" "1")
5257 (set_attr "prefix_extra" "1")
5258 (set_attr "mode" "TI")])
5260 (define_insn "ssse3_phadddv2si3"
5261 [(set (match_operand:V2SI 0 "register_operand" "=y")
5265 (match_operand:V2SI 1 "register_operand" "0")
5266 (parallel [(const_int 0)]))
5267 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5270 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5271 (parallel [(const_int 0)]))
5272 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5274 "phaddd\t{%2, %0|%0, %2}"
5275 [(set_attr "type" "sseiadd")
5276 (set_attr "prefix_extra" "1")
5277 (set_attr "mode" "DI")])
5279 (define_insn "ssse3_phaddswv8hi3"
5280 [(set (match_operand:V8HI 0 "register_operand" "=x")
5286 (match_operand:V8HI 1 "register_operand" "0")
5287 (parallel [(const_int 0)]))
5288 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5290 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5291 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5294 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5295 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5297 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5298 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5303 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5304 (parallel [(const_int 0)]))
5305 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5307 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5308 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5311 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5312 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5314 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5315 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5317 "phaddsw\t{%2, %0|%0, %2}"
5318 [(set_attr "type" "sseiadd")
5319 (set_attr "prefix_data16" "1")
5320 (set_attr "prefix_extra" "1")
5321 (set_attr "mode" "TI")])
5323 (define_insn "ssse3_phaddswv4hi3"
5324 [(set (match_operand:V4HI 0 "register_operand" "=y")
5329 (match_operand:V4HI 1 "register_operand" "0")
5330 (parallel [(const_int 0)]))
5331 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5333 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5334 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5338 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5339 (parallel [(const_int 0)]))
5340 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5342 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5343 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5345 "phaddsw\t{%2, %0|%0, %2}"
5346 [(set_attr "type" "sseiadd")
5347 (set_attr "prefix_extra" "1")
5348 (set_attr "mode" "DI")])
5350 (define_insn "ssse3_phsubwv8hi3"
5351 [(set (match_operand:V8HI 0 "register_operand" "=x")
5357 (match_operand:V8HI 1 "register_operand" "0")
5358 (parallel [(const_int 0)]))
5359 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5361 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5362 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5365 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5366 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5368 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5369 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5374 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5375 (parallel [(const_int 0)]))
5376 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5378 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5379 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5382 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5383 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5385 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5386 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5388 "phsubw\t{%2, %0|%0, %2}"
5389 [(set_attr "type" "sseiadd")
5390 (set_attr "prefix_data16" "1")
5391 (set_attr "prefix_extra" "1")
5392 (set_attr "mode" "TI")])
5394 (define_insn "ssse3_phsubwv4hi3"
5395 [(set (match_operand:V4HI 0 "register_operand" "=y")
5400 (match_operand:V4HI 1 "register_operand" "0")
5401 (parallel [(const_int 0)]))
5402 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5404 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5405 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5409 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5410 (parallel [(const_int 0)]))
5411 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5413 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5414 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5416 "phsubw\t{%2, %0|%0, %2}"
5417 [(set_attr "type" "sseiadd")
5418 (set_attr "prefix_extra" "1")
5419 (set_attr "mode" "DI")])
5421 (define_insn "ssse3_phsubdv4si3"
5422 [(set (match_operand:V4SI 0 "register_operand" "=x")
5427 (match_operand:V4SI 1 "register_operand" "0")
5428 (parallel [(const_int 0)]))
5429 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5431 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5432 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5436 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5437 (parallel [(const_int 0)]))
5438 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5440 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5441 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5443 "phsubd\t{%2, %0|%0, %2}"
5444 [(set_attr "type" "sseiadd")
5445 (set_attr "prefix_data16" "1")
5446 (set_attr "prefix_extra" "1")
5447 (set_attr "mode" "TI")])
5449 (define_insn "ssse3_phsubdv2si3"
5450 [(set (match_operand:V2SI 0 "register_operand" "=y")
5454 (match_operand:V2SI 1 "register_operand" "0")
5455 (parallel [(const_int 0)]))
5456 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5459 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5460 (parallel [(const_int 0)]))
5461 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5463 "phsubd\t{%2, %0|%0, %2}"
5464 [(set_attr "type" "sseiadd")
5465 (set_attr "prefix_extra" "1")
5466 (set_attr "mode" "DI")])
5468 (define_insn "ssse3_phsubswv8hi3"
5469 [(set (match_operand:V8HI 0 "register_operand" "=x")
5475 (match_operand:V8HI 1 "register_operand" "0")
5476 (parallel [(const_int 0)]))
5477 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5479 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5480 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5483 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5484 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5486 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5487 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5492 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5493 (parallel [(const_int 0)]))
5494 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5496 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5497 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5500 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5501 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5503 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5504 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5506 "phsubsw\t{%2, %0|%0, %2}"
5507 [(set_attr "type" "sseiadd")
5508 (set_attr "prefix_data16" "1")
5509 (set_attr "prefix_extra" "1")
5510 (set_attr "mode" "TI")])
5512 (define_insn "ssse3_phsubswv4hi3"
5513 [(set (match_operand:V4HI 0 "register_operand" "=y")
5518 (match_operand:V4HI 1 "register_operand" "0")
5519 (parallel [(const_int 0)]))
5520 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5522 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5523 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5527 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5528 (parallel [(const_int 0)]))
5529 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5531 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5532 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5534 "phsubsw\t{%2, %0|%0, %2}"
5535 [(set_attr "type" "sseiadd")
5536 (set_attr "prefix_extra" "1")
5537 (set_attr "mode" "DI")])
5539 (define_insn "ssse3_pmaddubsw128"
5540 [(set (match_operand:V8HI 0 "register_operand" "=x")
5545 (match_operand:V16QI 1 "nonimmediate_operand" "0")
5546 (parallel [(const_int 0)
5556 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5557 (parallel [(const_int 0)
5567 (vec_select:V16QI (match_dup 1)
5568 (parallel [(const_int 1)
5577 (vec_select:V16QI (match_dup 2)
5578 (parallel [(const_int 1)
5585 (const_int 15)]))))))]
5587 "pmaddubsw\t{%2, %0|%0, %2}"
5588 [(set_attr "type" "sseiadd")
5589 (set_attr "prefix_data16" "1")
5590 (set_attr "prefix_extra" "1")
5591 (set_attr "mode" "TI")])
5593 (define_insn "ssse3_pmaddubsw"
5594 [(set (match_operand:V4HI 0 "register_operand" "=y")
5599 (match_operand:V8QI 1 "nonimmediate_operand" "0")
5600 (parallel [(const_int 0)
5606 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5607 (parallel [(const_int 0)
5613 (vec_select:V8QI (match_dup 1)
5614 (parallel [(const_int 1)
5619 (vec_select:V8QI (match_dup 2)
5620 (parallel [(const_int 1)
5623 (const_int 7)]))))))]
5625 "pmaddubsw\t{%2, %0|%0, %2}"
5626 [(set_attr "type" "sseiadd")
5627 (set_attr "prefix_extra" "1")
5628 (set_attr "mode" "DI")])
5630 (define_insn "ssse3_pmulhrswv8hi3"
5631 [(set (match_operand:V8HI 0 "register_operand" "=x")
5638 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5640 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5642 (const_vector:V8HI [(const_int 1) (const_int 1)
5643 (const_int 1) (const_int 1)
5644 (const_int 1) (const_int 1)
5645 (const_int 1) (const_int 1)]))
5647 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5648 "pmulhrsw\t{%2, %0|%0, %2}"
5649 [(set_attr "type" "sseimul")
5650 (set_attr "prefix_data16" "1")
5651 (set_attr "prefix_extra" "1")
5652 (set_attr "mode" "TI")])
5654 (define_insn "ssse3_pmulhrswv4hi3"
5655 [(set (match_operand:V4HI 0 "register_operand" "=y")
5662 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5664 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5666 (const_vector:V4HI [(const_int 1) (const_int 1)
5667 (const_int 1) (const_int 1)]))
5669 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5670 "pmulhrsw\t{%2, %0|%0, %2}"
5671 [(set_attr "type" "sseimul")
5672 (set_attr "prefix_extra" "1")
5673 (set_attr "mode" "DI")])
5675 (define_insn "ssse3_pshufbv16qi3"
5676 [(set (match_operand:V16QI 0 "register_operand" "=x")
5677 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5678 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5681 "pshufb\t{%2, %0|%0, %2}";
5682 [(set_attr "type" "sselog1")
5683 (set_attr "prefix_data16" "1")
5684 (set_attr "prefix_extra" "1")
5685 (set_attr "mode" "TI")])
5687 (define_insn "ssse3_pshufbv8qi3"
5688 [(set (match_operand:V8QI 0 "register_operand" "=y")
5689 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5690 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5693 "pshufb\t{%2, %0|%0, %2}";
5694 [(set_attr "type" "sselog1")
5695 (set_attr "prefix_extra" "1")
5696 (set_attr "mode" "DI")])
5698 (define_insn "ssse3_psign<mode>3"
5699 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5701 [(match_operand:SSEMODE124 1 "register_operand" "0")
5702 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5705 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5706 [(set_attr "type" "sselog1")
5707 (set_attr "prefix_data16" "1")
5708 (set_attr "prefix_extra" "1")
5709 (set_attr "mode" "TI")])
5711 (define_insn "ssse3_psign<mode>3"
5712 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5714 [(match_operand:MMXMODEI 1 "register_operand" "0")
5715 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5718 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5719 [(set_attr "type" "sselog1")
5720 (set_attr "prefix_extra" "1")
5721 (set_attr "mode" "DI")])
5723 (define_insn "ssse3_palignrti"
5724 [(set (match_operand:TI 0 "register_operand" "=x")
5725 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5726 (match_operand:TI 2 "nonimmediate_operand" "xm")
5727 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5731 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5732 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5734 [(set_attr "type" "sseishft")
5735 (set_attr "prefix_data16" "1")
5736 (set_attr "prefix_extra" "1")
5737 (set_attr "mode" "TI")])
5739 (define_insn "ssse3_palignrdi"
5740 [(set (match_operand:DI 0 "register_operand" "=y")
5741 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5742 (match_operand:DI 2 "nonimmediate_operand" "ym")
5743 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5747 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5748 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5750 [(set_attr "type" "sseishft")
5751 (set_attr "prefix_extra" "1")
5752 (set_attr "mode" "DI")])
5754 (define_insn "abs<mode>2"
5755 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5756 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5758 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5759 [(set_attr "type" "sselog1")
5760 (set_attr "prefix_data16" "1")
5761 (set_attr "prefix_extra" "1")
5762 (set_attr "mode" "TI")])
5764 (define_insn "abs<mode>2"
5765 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5766 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5768 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5769 [(set_attr "type" "sselog1")
5770 (set_attr "prefix_extra" "1")
5771 (set_attr "mode" "DI")])
5773 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5775 ;; AMD SSE4A instructions
5777 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5779 (define_insn "sse4a_movnt<mode>"
5780 [(set (match_operand:MODEF 0 "memory_operand" "=m")
5782 [(match_operand:MODEF 1 "register_operand" "x")]
5785 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
5786 [(set_attr "type" "ssemov")
5787 (set_attr "mode" "<MODE>")])
5789 (define_insn "sse4a_vmmovnt<mode>"
5790 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
5791 (unspec:<ssescalarmode>
5792 [(vec_select:<ssescalarmode>
5793 (match_operand:SSEMODEF2P 1 "register_operand" "x")
5794 (parallel [(const_int 0)]))]
5797 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5798 [(set_attr "type" "ssemov")
5799 (set_attr "mode" "<ssescalarmode>")])
5801 (define_insn "sse4a_extrqi"
5802 [(set (match_operand:V2DI 0 "register_operand" "=x")
5803 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5804 (match_operand 2 "const_int_operand" "")
5805 (match_operand 3 "const_int_operand" "")]
5808 "extrq\t{%3, %2, %0|%0, %2, %3}"
5809 [(set_attr "type" "sse")
5810 (set_attr "prefix_data16" "1")
5811 (set_attr "mode" "TI")])
5813 (define_insn "sse4a_extrq"
5814 [(set (match_operand:V2DI 0 "register_operand" "=x")
5815 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5816 (match_operand:V16QI 2 "register_operand" "x")]
5819 "extrq\t{%2, %0|%0, %2}"
5820 [(set_attr "type" "sse")
5821 (set_attr "prefix_data16" "1")
5822 (set_attr "mode" "TI")])
5824 (define_insn "sse4a_insertqi"
5825 [(set (match_operand:V2DI 0 "register_operand" "=x")
5826 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5827 (match_operand:V2DI 2 "register_operand" "x")
5828 (match_operand 3 "const_int_operand" "")
5829 (match_operand 4 "const_int_operand" "")]
5832 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5833 [(set_attr "type" "sseins")
5834 (set_attr "prefix_rep" "1")
5835 (set_attr "mode" "TI")])
5837 (define_insn "sse4a_insertq"
5838 [(set (match_operand:V2DI 0 "register_operand" "=x")
5839 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5840 (match_operand:V2DI 2 "register_operand" "x")]
5843 "insertq\t{%2, %0|%0, %2}"
5844 [(set_attr "type" "sseins")
5845 (set_attr "prefix_rep" "1")
5846 (set_attr "mode" "TI")])
5848 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5850 ;; Intel SSE4.1 instructions
5852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5854 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
5855 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
5856 (vec_merge:SSEMODEF2P
5857 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
5858 (match_operand:SSEMODEF2P 1 "register_operand" "0")
5859 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
5861 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
5862 [(set_attr "type" "ssemov")
5863 (set_attr "prefix_extra" "1")
5864 (set_attr "mode" "<MODE>")])
5866 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
5867 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
5869 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
5870 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
5871 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
5874 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
5875 [(set_attr "type" "ssemov")
5876 (set_attr "prefix_extra" "1")
5877 (set_attr "mode" "<MODE>")])
5879 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
5880 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
5882 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
5883 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
5884 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5887 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
5888 [(set_attr "type" "ssemul")
5889 (set_attr "prefix_extra" "1")
5890 (set_attr "mode" "<MODE>")])
5892 (define_insn "sse4_1_movntdqa"
5893 [(set (match_operand:V2DI 0 "register_operand" "=x")
5894 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
5897 "movntdqa\t{%1, %0|%0, %1}"
5898 [(set_attr "type" "ssecvt")
5899 (set_attr "prefix_extra" "1")
5900 (set_attr "mode" "TI")])
5902 (define_insn "sse4_1_mpsadbw"
5903 [(set (match_operand:V16QI 0 "register_operand" "=x")
5904 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5905 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5906 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5909 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
5910 [(set_attr "type" "sselog1")
5911 (set_attr "prefix_extra" "1")
5912 (set_attr "mode" "TI")])
5914 (define_insn "sse4_1_packusdw"
5915 [(set (match_operand:V8HI 0 "register_operand" "=x")
5918 (match_operand:V4SI 1 "register_operand" "0"))
5920 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
5922 "packusdw\t{%2, %0|%0, %2}"
5923 [(set_attr "type" "sselog")
5924 (set_attr "prefix_extra" "1")
5925 (set_attr "mode" "TI")])
5927 (define_insn "sse4_1_pblendvb"
5928 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
5929 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
5930 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
5931 (match_operand:V16QI 3 "register_operand" "Yz")]
5934 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
5935 [(set_attr "type" "ssemov")
5936 (set_attr "prefix_extra" "1")
5937 (set_attr "mode" "TI")])
5939 (define_insn "sse4_1_pblendw"
5940 [(set (match_operand:V8HI 0 "register_operand" "=x")
5942 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5943 (match_operand:V8HI 1 "register_operand" "0")
5944 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
5946 "pblendw\t{%3, %2, %0|%0, %2, %3}"
5947 [(set_attr "type" "ssemov")
5948 (set_attr "prefix_extra" "1")
5949 (set_attr "mode" "TI")])
5951 (define_insn "sse4_1_phminposuw"
5952 [(set (match_operand:V8HI 0 "register_operand" "=x")
5953 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
5954 UNSPEC_PHMINPOSUW))]
5956 "phminposuw\t{%1, %0|%0, %1}"
5957 [(set_attr "type" "sselog1")
5958 (set_attr "prefix_extra" "1")
5959 (set_attr "mode" "TI")])
5961 (define_insn "sse4_1_extendv8qiv8hi2"
5962 [(set (match_operand:V8HI 0 "register_operand" "=x")
5965 (match_operand:V16QI 1 "register_operand" "x")
5966 (parallel [(const_int 0)
5975 "pmovsxbw\t{%1, %0|%0, %1}"
5976 [(set_attr "type" "ssemov")
5977 (set_attr "prefix_extra" "1")
5978 (set_attr "mode" "TI")])
5980 (define_insn "*sse4_1_extendv8qiv8hi2"
5981 [(set (match_operand:V8HI 0 "register_operand" "=x")
5984 (vec_duplicate:V16QI
5985 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
5986 (parallel [(const_int 0)
5995 "pmovsxbw\t{%1, %0|%0, %1}"
5996 [(set_attr "type" "ssemov")
5997 (set_attr "prefix_extra" "1")
5998 (set_attr "mode" "TI")])
6000 (define_insn "sse4_1_extendv4qiv4si2"
6001 [(set (match_operand:V4SI 0 "register_operand" "=x")
6004 (match_operand:V16QI 1 "register_operand" "x")
6005 (parallel [(const_int 0)
6010 "pmovsxbd\t{%1, %0|%0, %1}"
6011 [(set_attr "type" "ssemov")
6012 (set_attr "prefix_extra" "1")
6013 (set_attr "mode" "TI")])
6015 (define_insn "*sse4_1_extendv4qiv4si2"
6016 [(set (match_operand:V4SI 0 "register_operand" "=x")
6019 (vec_duplicate:V16QI
6020 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6021 (parallel [(const_int 0)
6026 "pmovsxbd\t{%1, %0|%0, %1}"
6027 [(set_attr "type" "ssemov")
6028 (set_attr "prefix_extra" "1")
6029 (set_attr "mode" "TI")])
6031 (define_insn "sse4_1_extendv2qiv2di2"
6032 [(set (match_operand:V2DI 0 "register_operand" "=x")
6035 (match_operand:V16QI 1 "register_operand" "x")
6036 (parallel [(const_int 0)
6039 "pmovsxbq\t{%1, %0|%0, %1}"
6040 [(set_attr "type" "ssemov")
6041 (set_attr "prefix_extra" "1")
6042 (set_attr "mode" "TI")])
6044 (define_insn "*sse4_1_extendv2qiv2di2"
6045 [(set (match_operand:V2DI 0 "register_operand" "=x")
6048 (vec_duplicate:V16QI
6049 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6050 (parallel [(const_int 0)
6053 "pmovsxbq\t{%1, %0|%0, %1}"
6054 [(set_attr "type" "ssemov")
6055 (set_attr "prefix_extra" "1")
6056 (set_attr "mode" "TI")])
6058 (define_insn "sse4_1_extendv4hiv4si2"
6059 [(set (match_operand:V4SI 0 "register_operand" "=x")
6062 (match_operand:V8HI 1 "register_operand" "x")
6063 (parallel [(const_int 0)
6068 "pmovsxwd\t{%1, %0|%0, %1}"
6069 [(set_attr "type" "ssemov")
6070 (set_attr "prefix_extra" "1")
6071 (set_attr "mode" "TI")])
6073 (define_insn "*sse4_1_extendv4hiv4si2"
6074 [(set (match_operand:V4SI 0 "register_operand" "=x")
6078 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6079 (parallel [(const_int 0)
6084 "pmovsxwd\t{%1, %0|%0, %1}"
6085 [(set_attr "type" "ssemov")
6086 (set_attr "prefix_extra" "1")
6087 (set_attr "mode" "TI")])
6089 (define_insn "sse4_1_extendv2hiv2di2"
6090 [(set (match_operand:V2DI 0 "register_operand" "=x")
6093 (match_operand:V8HI 1 "register_operand" "x")
6094 (parallel [(const_int 0)
6097 "pmovsxwq\t{%1, %0|%0, %1}"
6098 [(set_attr "type" "ssemov")
6099 (set_attr "prefix_extra" "1")
6100 (set_attr "mode" "TI")])
6102 (define_insn "*sse4_1_extendv2hiv2di2"
6103 [(set (match_operand:V2DI 0 "register_operand" "=x")
6107 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6108 (parallel [(const_int 0)
6111 "pmovsxwq\t{%1, %0|%0, %1}"
6112 [(set_attr "type" "ssemov")
6113 (set_attr "prefix_extra" "1")
6114 (set_attr "mode" "TI")])
6116 (define_insn "sse4_1_extendv2siv2di2"
6117 [(set (match_operand:V2DI 0 "register_operand" "=x")
6120 (match_operand:V4SI 1 "register_operand" "x")
6121 (parallel [(const_int 0)
6124 "pmovsxdq\t{%1, %0|%0, %1}"
6125 [(set_attr "type" "ssemov")
6126 (set_attr "prefix_extra" "1")
6127 (set_attr "mode" "TI")])
6129 (define_insn "*sse4_1_extendv2siv2di2"
6130 [(set (match_operand:V2DI 0 "register_operand" "=x")
6134 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6135 (parallel [(const_int 0)
6138 "pmovsxdq\t{%1, %0|%0, %1}"
6139 [(set_attr "type" "ssemov")
6140 (set_attr "prefix_extra" "1")
6141 (set_attr "mode" "TI")])
6143 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6144 [(set (match_operand:V8HI 0 "register_operand" "=x")
6147 (match_operand:V16QI 1 "register_operand" "x")
6148 (parallel [(const_int 0)
6157 "pmovzxbw\t{%1, %0|%0, %1}"
6158 [(set_attr "type" "ssemov")
6159 (set_attr "prefix_extra" "1")
6160 (set_attr "mode" "TI")])
6162 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6163 [(set (match_operand:V8HI 0 "register_operand" "=x")
6166 (vec_duplicate:V16QI
6167 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6168 (parallel [(const_int 0)
6177 "pmovzxbw\t{%1, %0|%0, %1}"
6178 [(set_attr "type" "ssemov")
6179 (set_attr "prefix_extra" "1")
6180 (set_attr "mode" "TI")])
6182 (define_insn "sse4_1_zero_extendv4qiv4si2"
6183 [(set (match_operand:V4SI 0 "register_operand" "=x")
6186 (match_operand:V16QI 1 "register_operand" "x")
6187 (parallel [(const_int 0)
6192 "pmovzxbd\t{%1, %0|%0, %1}"
6193 [(set_attr "type" "ssemov")
6194 (set_attr "prefix_extra" "1")
6195 (set_attr "mode" "TI")])
6197 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6198 [(set (match_operand:V4SI 0 "register_operand" "=x")
6201 (vec_duplicate:V16QI
6202 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6203 (parallel [(const_int 0)
6208 "pmovzxbd\t{%1, %0|%0, %1}"
6209 [(set_attr "type" "ssemov")
6210 (set_attr "prefix_extra" "1")
6211 (set_attr "mode" "TI")])
6213 (define_insn "sse4_1_zero_extendv2qiv2di2"
6214 [(set (match_operand:V2DI 0 "register_operand" "=x")
6217 (match_operand:V16QI 1 "register_operand" "x")
6218 (parallel [(const_int 0)
6221 "pmovzxbq\t{%1, %0|%0, %1}"
6222 [(set_attr "type" "ssemov")
6223 (set_attr "prefix_extra" "1")
6224 (set_attr "mode" "TI")])
6226 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6227 [(set (match_operand:V2DI 0 "register_operand" "=x")
6230 (vec_duplicate:V16QI
6231 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6232 (parallel [(const_int 0)
6235 "pmovzxbq\t{%1, %0|%0, %1}"
6236 [(set_attr "type" "ssemov")
6237 (set_attr "prefix_extra" "1")
6238 (set_attr "mode" "TI")])
6240 (define_insn "sse4_1_zero_extendv4hiv4si2"
6241 [(set (match_operand:V4SI 0 "register_operand" "=x")
6244 (match_operand:V8HI 1 "register_operand" "x")
6245 (parallel [(const_int 0)
6250 "pmovzxwd\t{%1, %0|%0, %1}"
6251 [(set_attr "type" "ssemov")
6252 (set_attr "prefix_extra" "1")
6253 (set_attr "mode" "TI")])
6255 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6256 [(set (match_operand:V4SI 0 "register_operand" "=x")
6260 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6261 (parallel [(const_int 0)
6266 "pmovzxwd\t{%1, %0|%0, %1}"
6267 [(set_attr "type" "ssemov")
6268 (set_attr "prefix_extra" "1")
6269 (set_attr "mode" "TI")])
6271 (define_insn "sse4_1_zero_extendv2hiv2di2"
6272 [(set (match_operand:V2DI 0 "register_operand" "=x")
6275 (match_operand:V8HI 1 "register_operand" "x")
6276 (parallel [(const_int 0)
6279 "pmovzxwq\t{%1, %0|%0, %1}"
6280 [(set_attr "type" "ssemov")
6281 (set_attr "prefix_extra" "1")
6282 (set_attr "mode" "TI")])
6284 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6285 [(set (match_operand:V2DI 0 "register_operand" "=x")
6289 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6290 (parallel [(const_int 0)
6293 "pmovzxwq\t{%1, %0|%0, %1}"
6294 [(set_attr "type" "ssemov")
6295 (set_attr "prefix_extra" "1")
6296 (set_attr "mode" "TI")])
6298 (define_insn "sse4_1_zero_extendv2siv2di2"
6299 [(set (match_operand:V2DI 0 "register_operand" "=x")
6302 (match_operand:V4SI 1 "register_operand" "x")
6303 (parallel [(const_int 0)
6306 "pmovzxdq\t{%1, %0|%0, %1}"
6307 [(set_attr "type" "ssemov")
6308 (set_attr "prefix_extra" "1")
6309 (set_attr "mode" "TI")])
6311 (define_insn "*sse4_1_zero_extendv2siv2di2"
6312 [(set (match_operand:V2DI 0 "register_operand" "=x")
6316 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6317 (parallel [(const_int 0)
6320 "pmovzxdq\t{%1, %0|%0, %1}"
6321 [(set_attr "type" "ssemov")
6322 (set_attr "prefix_extra" "1")
6323 (set_attr "mode" "TI")])
6325 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6326 ;; But it is not a really compare instruction.
6327 (define_insn "sse4_1_ptest"
6328 [(set (reg:CC FLAGS_REG)
6329 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6330 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6333 "ptest\t{%1, %0|%0, %1}"
6334 [(set_attr "type" "ssecomi")
6335 (set_attr "prefix_extra" "1")
6336 (set_attr "mode" "TI")])
6338 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6339 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6341 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6342 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6345 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6346 [(set_attr "type" "ssecvt")
6347 (set_attr "prefix_extra" "1")
6348 (set_attr "mode" "<MODE>")])
6350 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6351 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6352 (vec_merge:SSEMODEF2P
6354 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6355 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6357 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6360 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6361 [(set_attr "type" "ssecvt")
6362 (set_attr "prefix_extra" "1")
6363 (set_attr "mode" "<MODE>")])
6365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6367 ;; Intel SSE4.2 string/text processing instructions
6369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6371 (define_insn_and_split "sse4_2_pcmpestr"
6372 [(set (match_operand:SI 0 "register_operand" "=c,c")
6374 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6375 (match_operand:SI 3 "register_operand" "a,a")
6376 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6377 (match_operand:SI 5 "register_operand" "d,d")
6378 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6380 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6388 (set (reg:CC FLAGS_REG)
6397 && !(reload_completed || reload_in_progress)"
6402 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6403 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6404 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6407 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6408 operands[3], operands[4],
6409 operands[5], operands[6]));
6411 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6412 operands[3], operands[4],
6413 operands[5], operands[6]));
6414 if (flags && !(ecx || xmm0))
6415 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6416 operands[2], operands[3],
6417 operands[4], operands[5],
6421 [(set_attr "type" "sselog")
6422 (set_attr "prefix_data16" "1")
6423 (set_attr "prefix_extra" "1")
6424 (set_attr "memory" "none,load")
6425 (set_attr "mode" "TI")])
6427 (define_insn "sse4_2_pcmpestri"
6428 [(set (match_operand:SI 0 "register_operand" "=c,c")
6430 [(match_operand:V16QI 1 "register_operand" "x,x")
6431 (match_operand:SI 2 "register_operand" "a,a")
6432 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6433 (match_operand:SI 4 "register_operand" "d,d")
6434 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6436 (set (reg:CC FLAGS_REG)
6445 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6446 [(set_attr "type" "sselog")
6447 (set_attr "prefix_data16" "1")
6448 (set_attr "prefix_extra" "1")
6449 (set_attr "memory" "none,load")
6450 (set_attr "mode" "TI")])
6452 (define_insn "sse4_2_pcmpestrm"
6453 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6455 [(match_operand:V16QI 1 "register_operand" "x,x")
6456 (match_operand:SI 2 "register_operand" "a,a")
6457 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6458 (match_operand:SI 4 "register_operand" "d,d")
6459 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6461 (set (reg:CC FLAGS_REG)
6470 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6471 [(set_attr "type" "sselog")
6472 (set_attr "prefix_data16" "1")
6473 (set_attr "prefix_extra" "1")
6474 (set_attr "memory" "none,load")
6475 (set_attr "mode" "TI")])
6477 (define_insn "sse4_2_pcmpestr_cconly"
6478 [(set (reg:CC FLAGS_REG)
6480 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6481 (match_operand:SI 3 "register_operand" "a,a,a,a")
6482 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6483 (match_operand:SI 5 "register_operand" "d,d,d,d")
6484 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6486 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6487 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6490 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6491 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6492 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6493 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6494 [(set_attr "type" "sselog")
6495 (set_attr "prefix_data16" "1")
6496 (set_attr "prefix_extra" "1")
6497 (set_attr "memory" "none,load,none,load")
6498 (set_attr "mode" "TI")])
6500 (define_insn_and_split "sse4_2_pcmpistr"
6501 [(set (match_operand:SI 0 "register_operand" "=c,c")
6503 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6504 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6505 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6507 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6513 (set (reg:CC FLAGS_REG)
6520 && !(reload_completed || reload_in_progress)"
6525 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6526 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6527 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6530 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6531 operands[3], operands[4]));
6533 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6534 operands[3], operands[4]));
6535 if (flags && !(ecx || xmm0))
6536 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6537 operands[2], operands[3],
6541 [(set_attr "type" "sselog")
6542 (set_attr "prefix_data16" "1")
6543 (set_attr "prefix_extra" "1")
6544 (set_attr "memory" "none,load")
6545 (set_attr "mode" "TI")])
6547 (define_insn "sse4_2_pcmpistri"
6548 [(set (match_operand:SI 0 "register_operand" "=c,c")
6550 [(match_operand:V16QI 1 "register_operand" "x,x")
6551 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6552 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6554 (set (reg:CC FLAGS_REG)
6561 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6562 [(set_attr "type" "sselog")
6563 (set_attr "prefix_data16" "1")
6564 (set_attr "prefix_extra" "1")
6565 (set_attr "memory" "none,load")
6566 (set_attr "mode" "TI")])
6568 (define_insn "sse4_2_pcmpistrm"
6569 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6571 [(match_operand:V16QI 1 "register_operand" "x,x")
6572 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6573 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6575 (set (reg:CC FLAGS_REG)
6582 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6583 [(set_attr "type" "sselog")
6584 (set_attr "prefix_data16" "1")
6585 (set_attr "prefix_extra" "1")
6586 (set_attr "memory" "none,load")
6587 (set_attr "mode" "TI")])
6589 (define_insn "sse4_2_pcmpistr_cconly"
6590 [(set (reg:CC FLAGS_REG)
6592 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6593 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6594 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6596 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6597 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6600 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6601 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6602 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6603 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6604 [(set_attr "type" "sselog")
6605 (set_attr "prefix_data16" "1")
6606 (set_attr "prefix_extra" "1")
6607 (set_attr "memory" "none,load,none,load")
6608 (set_attr "mode" "TI")])
6610 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6612 ;; SSE5 instructions
6614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6616 ;; SSE5 parallel integer multiply/add instructions.
6617 ;; Note the instruction does not allow the value being added to be a memory
6618 ;; operation. However by pretending via the nonimmediate_operand predicate
6619 ;; that it does and splitting it later allows the following to be recognized:
6620 ;; a[i] = b[i] * c[i] + d[i];
6621 (define_insn "sse5_pmacsww"
6622 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6625 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6626 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6627 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6628 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6630 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6631 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6632 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6633 [(set_attr "type" "ssemuladd")
6634 (set_attr "mode" "TI")])
6636 ;; Split pmacsww with two memory operands into a load and the pmacsww.
6638 [(set (match_operand:V8HI 0 "register_operand" "")
6640 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
6641 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6642 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
6644 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6645 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6646 && !reg_mentioned_p (operands[0], operands[1])
6647 && !reg_mentioned_p (operands[0], operands[2])
6648 && !reg_mentioned_p (operands[0], operands[3])"
6651 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
6652 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
6657 (define_insn "sse5_pmacssww"
6658 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6660 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6661 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6662 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6663 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6665 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6666 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6667 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6668 [(set_attr "type" "ssemuladd")
6669 (set_attr "mode" "TI")])
6671 ;; Note the instruction does not allow the value being added to be a memory
6672 ;; operation. However by pretending via the nonimmediate_operand predicate
6673 ;; that it does and splitting it later allows the following to be recognized:
6674 ;; a[i] = b[i] * c[i] + d[i];
6675 (define_insn "sse5_pmacsdd"
6676 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6679 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6680 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6681 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6682 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6684 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6685 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6686 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6687 [(set_attr "type" "ssemuladd")
6688 (set_attr "mode" "TI")])
6690 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
6692 [(set (match_operand:V4SI 0 "register_operand" "")
6694 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
6695 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6696 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
6698 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6699 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6700 && !reg_mentioned_p (operands[0], operands[1])
6701 && !reg_mentioned_p (operands[0], operands[2])
6702 && !reg_mentioned_p (operands[0], operands[3])"
6705 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
6706 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
6711 (define_insn "sse5_pmacssdd"
6712 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6714 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6715 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6716 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6717 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6719 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6720 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6721 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6722 [(set_attr "type" "ssemuladd")
6723 (set_attr "mode" "TI")])
6725 (define_insn "sse5_pmacssdql"
6726 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6731 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6732 (parallel [(const_int 1)
6735 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6736 (parallel [(const_int 1)
6738 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6739 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6741 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6742 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6743 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6744 [(set_attr "type" "ssemuladd")
6745 (set_attr "mode" "TI")])
6747 (define_insn "sse5_pmacssdqh"
6748 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6753 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6754 (parallel [(const_int 0)
6758 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6759 (parallel [(const_int 0)
6761 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6762 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6764 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6765 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6766 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6767 [(set_attr "type" "ssemuladd")
6768 (set_attr "mode" "TI")])
6770 (define_insn "sse5_pmacsdql"
6771 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6776 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6777 (parallel [(const_int 1)
6781 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6782 (parallel [(const_int 1)
6784 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6785 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6787 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6788 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6789 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6790 [(set_attr "type" "ssemuladd")
6791 (set_attr "mode" "TI")])
6793 (define_insn "sse5_pmacsdqh"
6794 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6799 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6800 (parallel [(const_int 0)
6804 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6805 (parallel [(const_int 0)
6807 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6808 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6810 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6811 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6812 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6813 [(set_attr "type" "ssemuladd")
6814 (set_attr "mode" "TI")])
6816 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
6817 (define_insn "sse5_pmacsswd"
6818 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6823 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
6824 (parallel [(const_int 1)
6830 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
6831 (parallel [(const_int 1)
6835 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
6836 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6838 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6839 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6840 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6841 [(set_attr "type" "ssemuladd")
6842 (set_attr "mode" "TI")])
6844 (define_insn "sse5_pmacswd"
6845 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6850 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
6851 (parallel [(const_int 1)
6857 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
6858 (parallel [(const_int 1)
6862 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
6863 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6865 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6866 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6867 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6868 [(set_attr "type" "ssemuladd")
6869 (set_attr "mode" "TI")])
6871 (define_insn "sse5_pmadcsswd"
6872 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6878 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
6879 (parallel [(const_int 0)
6885 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
6886 (parallel [(const_int 0)
6894 (parallel [(const_int 1)
6901 (parallel [(const_int 1)
6905 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
6906 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6908 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6909 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6910 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6911 [(set_attr "type" "ssemuladd")
6912 (set_attr "mode" "TI")])
6914 (define_insn "sse5_pmadcswd"
6915 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6921 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
6922 (parallel [(const_int 0)
6928 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
6929 (parallel [(const_int 0)
6937 (parallel [(const_int 1)
6944 (parallel [(const_int 1)
6948 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
6949 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6951 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6952 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6953 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6954 [(set_attr "type" "ssemuladd")
6955 (set_attr "mode" "TI")])
6957 ;; SSE5 parallel XMM conditional moves
6958 (define_insn "sse5_pcmov_<mode>"
6959 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
6960 (if_then_else:SSEMODE
6961 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
6962 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
6963 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
6964 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
6966 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
6967 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
6968 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
6969 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
6970 andps\t{%2, %0|%0, %2}
6971 andnps\t{%1, %0|%0, %1}"
6972 [(set_attr "type" "sse4arg")])
6974 ;; SSE5 horizontal add/subtract instructions
6975 (define_insn "sse5_phaddbw"
6976 [(set (match_operand:V8HI 0 "register_operand" "=x")
6980 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
6981 (parallel [(const_int 0)
6992 (parallel [(const_int 1)
6999 (const_int 15)])))))]
7001 "phaddbw\t{%1, %0|%0, %1}"
7002 [(set_attr "type" "sseiadd1")])
7004 (define_insn "sse5_phaddbd"
7005 [(set (match_operand:V4SI 0 "register_operand" "=x")
7010 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7011 (parallel [(const_int 0)
7018 (parallel [(const_int 1)
7026 (parallel [(const_int 2)
7033 (parallel [(const_int 3)
7036 (const_int 15)]))))))]
7038 "phaddbd\t{%1, %0|%0, %1}"
7039 [(set_attr "type" "sseiadd1")])
7041 (define_insn "sse5_phaddbq"
7042 [(set (match_operand:V2DI 0 "register_operand" "=x")
7048 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7049 (parallel [(const_int 0)
7054 (parallel [(const_int 1)
7060 (parallel [(const_int 2)
7065 (parallel [(const_int 3)
7072 (parallel [(const_int 8)
7077 (parallel [(const_int 9)
7083 (parallel [(const_int 10)
7088 (parallel [(const_int 11)
7089 (const_int 15)])))))))]
7091 "phaddbq\t{%1, %0|%0, %1}"
7092 [(set_attr "type" "sseiadd1")])
7094 (define_insn "sse5_phaddwd"
7095 [(set (match_operand:V4SI 0 "register_operand" "=x")
7099 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7100 (parallel [(const_int 0)
7107 (parallel [(const_int 1)
7110 (const_int 7)])))))]
7112 "phaddwd\t{%1, %0|%0, %1}"
7113 [(set_attr "type" "sseiadd1")])
7115 (define_insn "sse5_phaddwq"
7116 [(set (match_operand:V2DI 0 "register_operand" "=x")
7121 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7122 (parallel [(const_int 0)
7127 (parallel [(const_int 1)
7133 (parallel [(const_int 2)
7138 (parallel [(const_int 3)
7139 (const_int 7)]))))))]
7141 "phaddwq\t{%1, %0|%0, %1}"
7142 [(set_attr "type" "sseiadd1")])
7144 (define_insn "sse5_phadddq"
7145 [(set (match_operand:V2DI 0 "register_operand" "=x")
7149 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7150 (parallel [(const_int 0)
7155 (parallel [(const_int 1)
7156 (const_int 3)])))))]
7158 "phadddq\t{%1, %0|%0, %1}"
7159 [(set_attr "type" "sseiadd1")])
7161 (define_insn "sse5_phaddubw"
7162 [(set (match_operand:V8HI 0 "register_operand" "=x")
7166 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7167 (parallel [(const_int 0)
7178 (parallel [(const_int 1)
7185 (const_int 15)])))))]
7187 "phaddubw\t{%1, %0|%0, %1}"
7188 [(set_attr "type" "sseiadd1")])
7190 (define_insn "sse5_phaddubd"
7191 [(set (match_operand:V4SI 0 "register_operand" "=x")
7196 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7197 (parallel [(const_int 0)
7204 (parallel [(const_int 1)
7212 (parallel [(const_int 2)
7219 (parallel [(const_int 3)
7222 (const_int 15)]))))))]
7224 "phaddubd\t{%1, %0|%0, %1}"
7225 [(set_attr "type" "sseiadd1")])
7227 (define_insn "sse5_phaddubq"
7228 [(set (match_operand:V2DI 0 "register_operand" "=x")
7234 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7235 (parallel [(const_int 0)
7240 (parallel [(const_int 1)
7246 (parallel [(const_int 2)
7251 (parallel [(const_int 3)
7258 (parallel [(const_int 8)
7263 (parallel [(const_int 9)
7269 (parallel [(const_int 10)
7274 (parallel [(const_int 11)
7275 (const_int 15)])))))))]
7277 "phaddubq\t{%1, %0|%0, %1}"
7278 [(set_attr "type" "sseiadd1")])
7280 (define_insn "sse5_phadduwd"
7281 [(set (match_operand:V4SI 0 "register_operand" "=x")
7285 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7286 (parallel [(const_int 0)
7293 (parallel [(const_int 1)
7296 (const_int 7)])))))]
7298 "phadduwd\t{%1, %0|%0, %1}"
7299 [(set_attr "type" "sseiadd1")])
7301 (define_insn "sse5_phadduwq"
7302 [(set (match_operand:V2DI 0 "register_operand" "=x")
7307 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7308 (parallel [(const_int 0)
7313 (parallel [(const_int 1)
7319 (parallel [(const_int 2)
7324 (parallel [(const_int 3)
7325 (const_int 7)]))))))]
7327 "phadduwq\t{%1, %0|%0, %1}"
7328 [(set_attr "type" "sseiadd1")])
7330 (define_insn "sse5_phaddudq"
7331 [(set (match_operand:V2DI 0 "register_operand" "=x")
7335 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7336 (parallel [(const_int 0)
7341 (parallel [(const_int 1)
7342 (const_int 3)])))))]
7344 "phaddudq\t{%1, %0|%0, %1}"
7345 [(set_attr "type" "sseiadd1")])
7347 (define_insn "sse5_phsubbw"
7348 [(set (match_operand:V8HI 0 "register_operand" "=x")
7352 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7353 (parallel [(const_int 0)
7364 (parallel [(const_int 1)
7371 (const_int 15)])))))]
7373 "phsubbw\t{%1, %0|%0, %1}"
7374 [(set_attr "type" "sseiadd1")])
7376 (define_insn "sse5_phsubwd"
7377 [(set (match_operand:V4SI 0 "register_operand" "=x")
7381 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7382 (parallel [(const_int 0)
7389 (parallel [(const_int 1)
7392 (const_int 7)])))))]
7394 "phsubwd\t{%1, %0|%0, %1}"
7395 [(set_attr "type" "sseiadd1")])
7397 (define_insn "sse5_phsubdq"
7398 [(set (match_operand:V2DI 0 "register_operand" "=x")
7402 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7403 (parallel [(const_int 0)
7408 (parallel [(const_int 1)
7409 (const_int 3)])))))]
7411 "phsubdq\t{%1, %0|%0, %1}"
7412 [(set_attr "type" "sseiadd1")])
7414 ;; SSE5 permute instructions
7415 (define_insn "sse5_pperm"
7416 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7418 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7419 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7420 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7421 UNSPEC_SSE5_PERMUTE))]
7422 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7423 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7424 [(set_attr "type" "sse4arg")
7425 (set_attr "mode" "TI")])
7427 ;; The following are for the various unpack insns which doesn't need the first
7428 ;; source operand, so we can just use the output operand for the first operand.
7429 ;; This allows either of the other two operands to be a memory operand. We
7430 ;; can't just use the first operand as an argument to the normal pperm because
7431 ;; then an output only argument, suddenly becomes an input operand.
7432 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7433 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7436 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7437 (match_operand 2 "" "")))) ;; parallel with const_int's
7438 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7440 && (register_operand (operands[1], V16QImode)
7441 || register_operand (operands[2], V16QImode))"
7442 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7443 [(set_attr "type" "sseadd")
7444 (set_attr "mode" "TI")])
7446 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7447 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7450 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7451 (match_operand 2 "" "")))) ;; parallel with const_int's
7452 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7454 && (register_operand (operands[1], V16QImode)
7455 || register_operand (operands[2], V16QImode))"
7456 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7457 [(set_attr "type" "sseadd")
7458 (set_attr "mode" "TI")])
7460 (define_insn "sse5_pperm_zero_v8hi_v4si"
7461 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7464 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7465 (match_operand 2 "" "")))) ;; parallel with const_int's
7466 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7468 && (register_operand (operands[1], V8HImode)
7469 || register_operand (operands[2], V16QImode))"
7470 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7471 [(set_attr "type" "sseadd")
7472 (set_attr "mode" "TI")])
7474 (define_insn "sse5_pperm_sign_v8hi_v4si"
7475 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7478 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7479 (match_operand 2 "" "")))) ;; parallel with const_int's
7480 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7482 && (register_operand (operands[1], V8HImode)
7483 || register_operand (operands[2], V16QImode))"
7484 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7485 [(set_attr "type" "sseadd")
7486 (set_attr "mode" "TI")])
7488 (define_insn "sse5_pperm_zero_v4si_v2di"
7489 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7492 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7493 (match_operand 2 "" "")))) ;; parallel with const_int's
7494 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7496 && (register_operand (operands[1], V4SImode)
7497 || register_operand (operands[2], V16QImode))"
7498 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7499 [(set_attr "type" "sseadd")
7500 (set_attr "mode" "TI")])
7502 (define_insn "sse5_pperm_sign_v4si_v2di"
7503 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7506 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7507 (match_operand 2 "" "")))) ;; parallel with const_int's
7508 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7510 && (register_operand (operands[1], V4SImode)
7511 || register_operand (operands[2], V16QImode))"
7512 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7513 [(set_attr "type" "sseadd")
7514 (set_attr "mode" "TI")])
7516 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7517 (define_insn "sse5_pperm_pack_v2di_v4si"
7518 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7521 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7523 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7524 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7525 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7526 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7527 [(set_attr "type" "sse4arg")
7528 (set_attr "mode" "TI")])
7530 (define_insn "sse5_pperm_pack_v4si_v8hi"
7531 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7534 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7536 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7537 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7538 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7539 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7540 [(set_attr "type" "sse4arg")
7541 (set_attr "mode" "TI")])
7543 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7544 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7547 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7549 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7550 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7551 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7552 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7553 [(set_attr "type" "sse4arg")
7554 (set_attr "mode" "TI")])
7556 ;; Floating point permutation (permps, permpd)
7557 (define_insn "sse5_perm<mode>"
7558 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7560 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7561 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7562 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7563 UNSPEC_SSE5_PERMUTE))]
7564 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7565 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7566 [(set_attr "type" "sse4arg")
7567 (set_attr "mode" "<MODE>")])
7569 ;; SSE5 packed rotate instructions
7570 (define_insn "rotl<mode>3"
7571 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7573 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
7574 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
7576 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7577 [(set_attr "type" "sseishft")
7578 (set_attr "mode" "TI")])
7580 (define_insn "sse5_rotl<mode>3"
7581 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7583 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7584 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
7585 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7586 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7587 [(set_attr "type" "sseishft")
7588 (set_attr "mode" "TI")])
7590 ;; SSE5 packed shift instructions. Note negative values for the shift amount
7591 ;; convert this into a right shift instead of left shift. For now, model this
7592 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
7593 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
7594 (define_insn "sse5_ashl<mode>3"
7595 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7597 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7598 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7599 UNSPEC_SSE5_ASHIFT))]
7600 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7601 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7602 [(set_attr "type" "sseishft")
7603 (set_attr "mode" "TI")])
7605 (define_insn "sse5_lshl<mode>3"
7606 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7608 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7609 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7610 UNSPEC_SSE5_LSHIFT))]
7611 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7612 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7613 [(set_attr "type" "sseishft")
7614 (set_attr "mode" "TI")])
7616 ;; SSE5 FRCZ support
7618 (define_insn "sse5_frcz<mode>2"
7619 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7621 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
7624 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
7625 [(set_attr "type" "ssecvt1")
7626 (set_attr "prefix_extra" "1")
7627 (set_attr "mode" "<MODE>")])
7630 (define_insn "sse5_vmfrcz<mode>2"
7631 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7632 (vec_merge:SSEMODEF2P
7634 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
7636 (match_operand:SSEMODEF2P 1 "register_operand" "0")
7639 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
7640 [(set_attr "type" "ssecvt1")
7641 (set_attr "prefix_extra" "1")
7642 (set_attr "mode" "<MODE>")])
7644 (define_insn "sse5_cvtph2ps"
7645 [(set (match_operand:V4SF 0 "register_operand" "=x")
7646 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
7649 "cvtph2ps\t{%1, %0|%0, %1}"
7650 [(set_attr "type" "ssecvt")
7651 (set_attr "mode" "V4SF")])
7653 (define_insn "sse5_cvtps2ph"
7654 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
7655 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
7658 "cvtps2ph\t{%1, %0|%0, %1}"
7659 [(set_attr "type" "ssecvt")
7660 (set_attr "mode" "V4SF")])
7662 ;; Scalar versions of the com instructions that use vector types that are
7663 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
7664 ;; com instructions fill in 0's in the upper bits instead of leaving them
7665 ;; unmodified, so we use const_vector of 0 instead of match_dup.
7666 (define_expand "sse5_vmmaskcmp<mode>3"
7667 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
7668 (vec_merge:SSEMODEF2P
7669 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7670 [(match_operand:SSEMODEF2P 2 "register_operand" "")
7671 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
7676 operands[4] = CONST0_RTX (<MODE>mode);
7679 (define_insn "*sse5_vmmaskcmp<mode>3"
7680 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7681 (vec_merge:SSEMODEF2P
7682 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7683 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
7684 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
7685 (match_operand:SSEMODEF2P 4 "")
7688 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
7689 [(set_attr "type" "sse4arg")
7690 (set_attr "mode" "<ssescalarmode>")])
7692 ;; We don't have a comparison operator that always returns true/false, so
7693 ;; handle comfalse and comtrue specially.
7694 (define_insn "sse5_com_tf<mode>3"
7695 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7697 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
7698 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
7699 (match_operand:SI 3 "const_int_operand" "n")]
7700 UNSPEC_SSE5_TRUEFALSE))]
7703 const char *ret = NULL;
7705 switch (INTVAL (operands[3]))
7708 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7712 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7716 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7720 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7729 [(set_attr "type" "ssecmp")
7730 (set_attr "mode" "<MODE>")])
7732 (define_insn "sse5_maskcmp<mode>3"
7733 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7734 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7735 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
7736 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
7738 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
7739 [(set_attr "type" "ssecmp")
7740 (set_attr "mode" "<MODE>")])
7742 (define_insn "sse5_maskcmp<mode>3"
7743 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7744 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
7745 [(match_operand:SSEMODE1248 2 "register_operand" "x")
7746 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
7748 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
7749 [(set_attr "type" "sse4arg")
7750 (set_attr "mode" "TI")])
7752 (define_insn "sse5_maskcmp_uns<mode>3"
7753 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7754 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
7755 [(match_operand:SSEMODE1248 2 "register_operand" "x")
7756 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
7758 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
7759 [(set_attr "type" "ssecmp")
7760 (set_attr "mode" "TI")])
7762 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
7763 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
7764 ;; the exact instruction generated for the intrinsic.
7765 (define_insn "sse5_maskcmp_uns2<mode>3"
7766 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7768 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
7769 [(match_operand:SSEMODE1248 2 "register_operand" "x")
7770 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
7771 UNSPEC_SSE5_UNSIGNED_CMP))]
7773 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
7774 [(set_attr "type" "ssecmp")
7775 (set_attr "mode" "TI")])
7777 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
7778 ;; being added here to be complete.
7779 (define_insn "sse5_pcom_tf<mode>3"
7780 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7782 [(match_operand:SSEMODE1248 1 "register_operand" "x")
7783 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
7784 (match_operand:SI 3 "const_int_operand" "n")]
7785 UNSPEC_SSE5_TRUEFALSE))]
7788 return ((INTVAL (operands[3]) != 0)
7789 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7790 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
7792 [(set_attr "type" "ssecmp")
7793 (set_attr "mode" "TI")])
7795 (define_insn "aesenc"
7796 [(set (match_operand:V2DI 0 "register_operand" "=x")
7797 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7798 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7801 "aesenc\t{%2, %0|%0, %2}"
7802 [(set_attr "type" "sselog1")
7803 (set_attr "prefix_extra" "1")
7804 (set_attr "mode" "TI")])
7806 (define_insn "aesenclast"
7807 [(set (match_operand:V2DI 0 "register_operand" "=x")
7808 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7809 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7810 UNSPEC_AESENCLAST))]
7812 "aesenclast\t{%2, %0|%0, %2}"
7813 [(set_attr "type" "sselog1")
7814 (set_attr "prefix_extra" "1")
7815 (set_attr "mode" "TI")])
7817 (define_insn "aesdec"
7818 [(set (match_operand:V2DI 0 "register_operand" "=x")
7819 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7820 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7823 "aesdec\t{%2, %0|%0, %2}"
7824 [(set_attr "type" "sselog1")
7825 (set_attr "prefix_extra" "1")
7826 (set_attr "mode" "TI")])
7828 (define_insn "aesdeclast"
7829 [(set (match_operand:V2DI 0 "register_operand" "=x")
7830 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7831 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
7832 UNSPEC_AESDECLAST))]
7834 "aesdeclast\t{%2, %0|%0, %2}"
7835 [(set_attr "type" "sselog1")
7836 (set_attr "prefix_extra" "1")
7837 (set_attr "mode" "TI")])
7839 (define_insn "aesimc"
7840 [(set (match_operand:V2DI 0 "register_operand" "=x")
7841 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
7844 "aesimc\t{%1, %0|%0, %1}"
7845 [(set_attr "type" "sselog1")
7846 (set_attr "prefix_extra" "1")
7847 (set_attr "mode" "TI")])
7849 (define_insn "aeskeygenassist"
7850 [(set (match_operand:V2DI 0 "register_operand" "=x")
7851 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
7852 (match_operand:SI 2 "const_0_to_255_operand" "n")]
7853 UNSPEC_AESKEYGENASSIST))]
7855 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
7856 [(set_attr "type" "sselog1")
7857 (set_attr "prefix_extra" "1")
7858 (set_attr "mode" "TI")])
7860 (define_insn "pclmulqdq"
7861 [(set (match_operand:V2DI 0 "register_operand" "=x")
7862 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7863 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
7864 (match_operand:SI 3 "const_0_to_255_operand" "n")]
7867 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
7868 [(set_attr "type" "sselog1")
7869 (set_attr "prefix_extra" "1")
7870 (set_attr "mode" "TI")])