1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
58 ;; Mapping of immediate bits for blend instructions
59 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
61 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
63 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69 ;; All of these patterns are enabled for SSE1 as well as SSE2.
70 ;; This is essential for maintaining stable calling conventions.
72 (define_expand "mov<mode>"
73 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
74 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
77 ix86_expand_vector_move (<MODE>mode, operands);
81 (define_insn "*mov<mode>_internal"
82 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
83 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
85 && (register_operand (operands[0], <MODE>mode)
86 || register_operand (operands[1], <MODE>mode))"
88 switch (which_alternative)
91 return standard_sse_constant_opcode (insn, operands[1]);
94 switch (get_attr_mode (insn))
97 return "movaps\t{%1, %0|%0, %1}";
99 return "movapd\t{%1, %0|%0, %1}";
101 return "movdqa\t{%1, %0|%0, %1}";
107 [(set_attr "type" "sselog1,ssemov,ssemov")
109 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
110 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
111 (and (eq_attr "alternative" "2")
112 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
114 (const_string "V4SF")
115 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
116 (const_string "V4SF")
117 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
118 (const_string "V2DF")
120 (const_string "TI")))])
122 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
123 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
124 ;; from memory, we'd prefer to load the memory directly into the %xmm
125 ;; register. To facilitate this happy circumstance, this pattern won't
126 ;; split until after register allocation. If the 64-bit value didn't
127 ;; come from memory, this is the best we can do. This is much better
128 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
131 (define_insn_and_split "movdi_to_sse"
133 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
134 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
135 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
136 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
138 "&& reload_completed"
141 if (register_operand (operands[1], DImode))
143 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
144 Assemble the 64-bit DImode value in an xmm register. */
145 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
146 gen_rtx_SUBREG (SImode, operands[1], 0)));
147 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
148 gen_rtx_SUBREG (SImode, operands[1], 4)));
149 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
151 else if (memory_operand (operands[1], DImode))
152 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
163 (vec_duplicate:V4SF (match_dup 1))
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "<sse>_movup<ssemodesuffixf2c>"
199 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
201 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
203 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
204 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
205 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
206 [(set_attr "type" "ssemov")
207 (set_attr "mode" "<MODE>")])
209 (define_insn "sse2_movdqu"
210 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
211 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
213 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
214 "movdqu\t{%1, %0|%0, %1}"
215 [(set_attr "type" "ssemov")
216 (set_attr "prefix_data16" "1")
217 (set_attr "mode" "TI")])
219 (define_insn "<sse>_movnt<mode>"
220 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
222 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
224 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
225 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
226 [(set_attr "type" "ssemov")
227 (set_attr "mode" "<MODE>")])
229 (define_insn "sse2_movntv2di"
230 [(set (match_operand:V2DI 0 "memory_operand" "=m")
231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
234 "movntdq\t{%1, %0|%0, %1}"
235 [(set_attr "type" "ssecvt")
236 (set_attr "prefix_data16" "1")
237 (set_attr "mode" "TI")])
239 (define_insn "sse2_movntsi"
240 [(set (match_operand:SI 0 "memory_operand" "=m")
241 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
244 "movnti\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssecvt")
246 (set_attr "mode" "V2DF")])
248 (define_insn "sse3_lddqu"
249 [(set (match_operand:V16QI 0 "register_operand" "=x")
250 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
253 "lddqu\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssecvt")
255 (set_attr "prefix_rep" "1")
256 (set_attr "mode" "TI")])
258 ; Expand patterns for non-temporal stores. At the moment, only those
259 ; that directly map to insns are defined; it would be possible to
260 ; define patterns for other modes that would expand to several insns.
262 (define_expand "storent<mode>"
263 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
265 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
267 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
270 (define_expand "storent<mode>"
271 [(set (match_operand:MODEF 0 "memory_operand" "")
273 [(match_operand:MODEF 1 "register_operand" "")]
278 (define_expand "storentv2di"
279 [(set (match_operand:V2DI 0 "memory_operand" "")
280 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
285 (define_expand "storentsi"
286 [(set (match_operand:SI 0 "memory_operand" "")
287 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
294 ;; Parallel floating point arithmetic
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
298 (define_expand "<code><mode>2"
299 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
301 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
302 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
303 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
305 (define_expand "<plusminus_insn><mode>3"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
307 (plusminus:SSEMODEF2P
308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
310 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
311 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
313 (define_insn "*<plusminus_insn><mode>3"
314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
315 (plusminus:SSEMODEF2P
316 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
317 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
318 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
319 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
320 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
321 [(set_attr "type" "sseadd")
322 (set_attr "mode" "<MODE>")])
324 (define_insn "<sse>_vm<plusminus_insn><mode>3"
325 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
326 (vec_merge:SSEMODEF2P
327 (plusminus:SSEMODEF2P
328 (match_operand:SSEMODEF2P 1 "register_operand" "0")
329 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
332 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
333 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
334 [(set_attr "type" "sseadd")
335 (set_attr "mode" "<ssescalarmode>")])
337 (define_expand "mul<mode>3"
338 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
340 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
341 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
342 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
343 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
345 (define_insn "*mul<mode>3"
346 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
348 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
349 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
350 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
351 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
352 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
353 [(set_attr "type" "ssemul")
354 (set_attr "mode" "<MODE>")])
356 (define_insn "<sse>_vmmul<mode>3"
357 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
358 (vec_merge:SSEMODEF2P
360 (match_operand:SSEMODEF2P 1 "register_operand" "0")
361 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
364 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
365 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
366 [(set_attr "type" "ssemul")
367 (set_attr "mode" "<ssescalarmode>")])
369 (define_expand "divv4sf3"
370 [(set (match_operand:V4SF 0 "register_operand" "")
371 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
372 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
376 && flag_finite_math_only && !flag_trapping_math
377 && flag_unsafe_math_optimizations)
379 ix86_emit_swdivsf (operands[0], operands[1],
380 operands[2], V4SFmode);
385 (define_expand "divv2df3"
386 [(set (match_operand:V2DF 0 "register_operand" "")
387 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
388 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
392 (define_insn "<sse>_div<mode>3"
393 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
395 (match_operand:SSEMODEF2P 1 "register_operand" "0")
396 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
397 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
398 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
399 [(set_attr "type" "ssediv")
400 (set_attr "mode" "<MODE>")])
402 (define_insn "<sse>_vmdiv<mode>3"
403 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
404 (vec_merge:SSEMODEF2P
406 (match_operand:SSEMODEF2P 1 "register_operand" "0")
407 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
410 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
411 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
412 [(set_attr "type" "ssediv")
413 (set_attr "mode" "<ssescalarmode>")])
415 (define_insn "sse_rcpv4sf2"
416 [(set (match_operand:V4SF 0 "register_operand" "=x")
418 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
420 "rcpps\t{%1, %0|%0, %1}"
421 [(set_attr "type" "sse")
422 (set_attr "mode" "V4SF")])
424 (define_insn "sse_vmrcpv4sf2"
425 [(set (match_operand:V4SF 0 "register_operand" "=x")
427 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
429 (match_operand:V4SF 2 "register_operand" "0")
432 "rcpss\t{%1, %0|%0, %1}"
433 [(set_attr "type" "sse")
434 (set_attr "mode" "SF")])
436 (define_expand "sqrtv4sf2"
437 [(set (match_operand:V4SF 0 "register_operand" "")
438 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
441 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
442 && flag_finite_math_only && !flag_trapping_math
443 && flag_unsafe_math_optimizations)
445 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
450 (define_insn "sse_sqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
454 "sqrtps\t{%1, %0|%0, %1}"
455 [(set_attr "type" "sse")
456 (set_attr "mode" "V4SF")])
458 (define_insn "sqrtv2df2"
459 [(set (match_operand:V2DF 0 "register_operand" "=x")
460 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
462 "sqrtpd\t{%1, %0|%0, %1}"
463 [(set_attr "type" "sse")
464 (set_attr "mode" "V2DF")])
466 (define_insn "<sse>_vmsqrt<mode>2"
467 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
468 (vec_merge:SSEMODEF2P
470 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
471 (match_operand:SSEMODEF2P 2 "register_operand" "0")
473 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
474 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
475 [(set_attr "type" "sse")
476 (set_attr "mode" "<ssescalarmode>")])
478 (define_expand "rsqrtv4sf2"
479 [(set (match_operand:V4SF 0 "register_operand" "")
481 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
484 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
488 (define_insn "sse_rsqrtv4sf2"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
493 "rsqrtps\t{%1, %0|%0, %1}"
494 [(set_attr "type" "sse")
495 (set_attr "mode" "V4SF")])
497 (define_insn "sse_vmrsqrtv4sf2"
498 [(set (match_operand:V4SF 0 "register_operand" "=x")
500 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
502 (match_operand:V4SF 2 "register_operand" "0")
505 "rsqrtss\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "SF")])
509 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
510 ;; isn't really correct, as those rtl operators aren't defined when
511 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
513 (define_expand "<code><mode>3"
514 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
516 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
517 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
518 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
520 if (!flag_finite_math_only)
521 operands[1] = force_reg (<MODE>mode, operands[1]);
522 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
525 (define_insn "*<code><mode>3_finite"
526 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
528 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
529 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
530 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
531 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
532 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sseadd")
534 (set_attr "mode" "<MODE>")])
536 (define_insn "*<code><mode>3"
537 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
539 (match_operand:SSEMODEF2P 1 "register_operand" "0")
540 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
541 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
542 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
543 [(set_attr "type" "sseadd")
544 (set_attr "mode" "<MODE>")])
546 (define_insn "<sse>_vm<code><mode>3"
547 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
548 (vec_merge:SSEMODEF2P
550 (match_operand:SSEMODEF2P 1 "register_operand" "0")
551 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
554 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
555 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
556 [(set_attr "type" "sse")
557 (set_attr "mode" "<ssescalarmode>")])
559 ;; These versions of the min/max patterns implement exactly the operations
560 ;; min = (op1 < op2 ? op1 : op2)
561 ;; max = (!(op1 < op2) ? op1 : op2)
562 ;; Their operands are not commutative, and thus they may be used in the
563 ;; presence of -0.0 and NaN.
565 (define_insn "*ieee_smin<mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
568 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
569 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
571 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
572 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
573 [(set_attr "type" "sseadd")
574 (set_attr "mode" "<MODE>")])
576 (define_insn "*ieee_smax<mode>3"
577 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
579 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
580 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
582 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
583 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
584 [(set_attr "type" "sseadd")
585 (set_attr "mode" "<MODE>")])
587 (define_insn "sse3_addsubv4sf3"
588 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (match_operand:V4SF 1 "register_operand" "0")
592 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
593 (minus:V4SF (match_dup 1) (match_dup 2))
596 "addsubps\t{%2, %0|%0, %2}"
597 [(set_attr "type" "sseadd")
598 (set_attr "prefix_rep" "1")
599 (set_attr "mode" "V4SF")])
601 (define_insn "sse3_addsubv2df3"
602 [(set (match_operand:V2DF 0 "register_operand" "=x")
605 (match_operand:V2DF 1 "register_operand" "0")
606 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
607 (minus:V2DF (match_dup 1) (match_dup 2))
610 "addsubpd\t{%2, %0|%0, %2}"
611 [(set_attr "type" "sseadd")
612 (set_attr "mode" "V2DF")])
614 (define_insn "sse3_h<plusminus_insn>v4sf3"
615 [(set (match_operand:V4SF 0 "register_operand" "=x")
620 (match_operand:V4SF 1 "register_operand" "0")
621 (parallel [(const_int 0)]))
622 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
624 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
625 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
629 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
630 (parallel [(const_int 0)]))
631 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
633 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
634 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
636 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
637 [(set_attr "type" "sseadd")
638 (set_attr "prefix_rep" "1")
639 (set_attr "mode" "V4SF")])
641 (define_insn "sse3_h<plusminus_insn>v2df3"
642 [(set (match_operand:V2DF 0 "register_operand" "=x")
646 (match_operand:V2DF 1 "register_operand" "0")
647 (parallel [(const_int 0)]))
648 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
651 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
652 (parallel [(const_int 0)]))
653 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
655 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
656 [(set_attr "type" "sseadd")
657 (set_attr "mode" "V2DF")])
659 (define_expand "reduc_splus_v4sf"
660 [(match_operand:V4SF 0 "register_operand" "")
661 (match_operand:V4SF 1 "register_operand" "")]
666 rtx tmp = gen_reg_rtx (V4SFmode);
667 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
668 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
671 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
675 (define_expand "reduc_splus_v2df"
676 [(match_operand:V2DF 0 "register_operand" "")
677 (match_operand:V2DF 1 "register_operand" "")]
680 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
684 (define_expand "reduc_smax_v4sf"
685 [(match_operand:V4SF 0 "register_operand" "")
686 (match_operand:V4SF 1 "register_operand" "")]
689 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
693 (define_expand "reduc_smin_v4sf"
694 [(match_operand:V4SF 0 "register_operand" "")
695 (match_operand:V4SF 1 "register_operand" "")]
698 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
704 ;; Parallel floating point comparisons
706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
708 (define_insn "<sse>_maskcmp<mode>3"
709 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
710 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
711 [(match_operand:SSEMODEF4 1 "register_operand" "0")
712 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
713 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
715 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
716 [(set_attr "type" "ssecmp")
717 (set_attr "mode" "<MODE>")])
719 (define_insn "<sse>_vmmaskcmp<mode>3"
720 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
721 (vec_merge:SSEMODEF2P
722 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
723 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
724 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
727 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
728 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
729 [(set_attr "type" "ssecmp")
730 (set_attr "mode" "<ssescalarmode>")])
732 (define_insn "<sse>_comi"
733 [(set (reg:CCFP FLAGS_REG)
736 (match_operand:<ssevecmode> 0 "register_operand" "x")
737 (parallel [(const_int 0)]))
739 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
740 (parallel [(const_int 0)]))))]
741 "SSE_FLOAT_MODE_P (<MODE>mode)"
742 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
743 [(set_attr "type" "ssecomi")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "<sse>_ucomi"
747 [(set (reg:CCFPU FLAGS_REG)
750 (match_operand:<ssevecmode> 0 "register_operand" "x")
751 (parallel [(const_int 0)]))
753 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))))]
755 "SSE_FLOAT_MODE_P (<MODE>mode)"
756 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
757 [(set_attr "type" "ssecomi")
758 (set_attr "mode" "<MODE>")])
760 (define_expand "vcond<mode>"
761 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
762 (if_then_else:SSEMODEF2P
764 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
765 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
766 (match_operand:SSEMODEF2P 1 "general_operand" "")
767 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
768 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
770 if (ix86_expand_fp_vcond (operands))
776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
778 ;; Parallel floating point logical operations
780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
782 (define_insn "<sse>_nand<mode>3"
783 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
786 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
787 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
788 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
789 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
790 [(set_attr "type" "sselog")
791 (set_attr "mode" "<MODE>")])
793 (define_expand "<code><mode>3"
794 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
796 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
797 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
798 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
799 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
801 (define_insn "*<code><mode>3"
802 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
804 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
805 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
806 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
807 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
808 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
809 [(set_attr "type" "sselog")
810 (set_attr "mode" "<MODE>")])
812 ;; Also define scalar versions. These are used for abs, neg, and
813 ;; conditional move. Using subregs into vector modes causes register
814 ;; allocation lossage. These patterns do not allow memory operands
815 ;; because the native instructions read the full 128-bits.
817 (define_insn "*nand<mode>3"
818 [(set (match_operand:MODEF 0 "register_operand" "=x")
821 (match_operand:MODEF 1 "register_operand" "0"))
822 (match_operand:MODEF 2 "register_operand" "x")))]
823 "SSE_FLOAT_MODE_P (<MODE>mode)"
824 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
825 [(set_attr "type" "sselog")
826 (set_attr "mode" "<ssevecmode>")])
828 (define_insn "*<code><mode>3"
829 [(set (match_operand:MODEF 0 "register_operand" "=x")
831 (match_operand:MODEF 1 "register_operand" "0")
832 (match_operand:MODEF 2 "register_operand" "x")))]
833 "SSE_FLOAT_MODE_P (<MODE>mode)"
834 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sselog")
836 (set_attr "mode" "<ssevecmode>")])
838 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
840 ;; SSE5 floating point multiply/accumulate instructions This includes the
841 ;; scalar version of the instructions as well as the vector
843 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
845 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
846 ;; combine to generate a multiply/add with two memory references. We then
847 ;; split this insn, into loading up the destination register with one of the
848 ;; memory operations. If we don't manage to split the insn, reload will
849 ;; generate the appropriate moves. The reason this is needed, is that combine
850 ;; has already folded one of the memory references into both the multiply and
851 ;; add insns, and it can't generate a new pseudo. I.e.:
852 ;; (set (reg1) (mem (addr1)))
853 ;; (set (reg2) (mult (reg1) (mem (addr2))))
854 ;; (set (reg3) (plus (reg2) (mem (addr3))))
856 (define_insn "sse5_fmadd<mode>4"
857 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
860 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
861 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
862 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
863 "TARGET_SSE5 && TARGET_FUSED_MADD
864 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
865 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
866 [(set_attr "type" "ssemuladd")
867 (set_attr "mode" "<MODE>")])
869 ;; Split fmadd with two memory operands into a load and the fmadd.
871 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
874 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
875 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
876 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
878 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
879 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
880 && !reg_mentioned_p (operands[0], operands[1])
881 && !reg_mentioned_p (operands[0], operands[2])
882 && !reg_mentioned_p (operands[0], operands[3])"
885 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
887 operands[2], operands[3]));
891 ;; For the scalar operations, use operand1 for the upper words that aren't
892 ;; modified, so restrict the forms that are generated.
893 ;; Scalar version of fmadd
894 (define_insn "sse5_vmfmadd<mode>4"
895 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
896 (vec_merge:SSEMODEF2P
899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
900 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
901 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
904 "TARGET_SSE5 && TARGET_FUSED_MADD
905 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
906 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
907 [(set_attr "type" "ssemuladd")
908 (set_attr "mode" "<MODE>")])
910 ;; Floating multiply and subtract
911 ;; Allow two memory operands the same as fmadd
912 (define_insn "sse5_fmsub<mode>4"
913 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
916 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
917 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
918 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
919 "TARGET_SSE5 && TARGET_FUSED_MADD
920 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
921 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
922 [(set_attr "type" "ssemuladd")
923 (set_attr "mode" "<MODE>")])
925 ;; Split fmsub with two memory operands into a load and the fmsub.
927 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
930 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
931 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
932 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
934 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
935 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
936 && !reg_mentioned_p (operands[0], operands[1])
937 && !reg_mentioned_p (operands[0], operands[2])
938 && !reg_mentioned_p (operands[0], operands[3])"
941 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
942 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
943 operands[2], operands[3]));
947 ;; For the scalar operations, use operand1 for the upper words that aren't
948 ;; modified, so restrict the forms that are generated.
949 ;; Scalar version of fmsub
950 (define_insn "sse5_vmfmsub<mode>4"
951 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
952 (vec_merge:SSEMODEF2P
955 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
956 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
957 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
960 "TARGET_SSE5 && TARGET_FUSED_MADD
961 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
962 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
963 [(set_attr "type" "ssemuladd")
964 (set_attr "mode" "<MODE>")])
966 ;; Floating point negative multiply and add
967 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
968 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
969 ;; Allow two memory operands to help in optimizing.
970 (define_insn "sse5_fnmadd<mode>4"
971 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
973 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
975 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
976 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
977 "TARGET_SSE5 && TARGET_FUSED_MADD
978 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
979 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
980 [(set_attr "type" "ssemuladd")
981 (set_attr "mode" "<MODE>")])
983 ;; Split fnmadd with two memory operands into a load and the fnmadd.
985 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
987 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
989 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
990 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
992 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
993 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
994 && !reg_mentioned_p (operands[0], operands[1])
995 && !reg_mentioned_p (operands[0], operands[2])
996 && !reg_mentioned_p (operands[0], operands[3])"
999 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1000 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1001 operands[2], operands[3]));
1005 ;; For the scalar operations, use operand1 for the upper words that aren't
1006 ;; modified, so restrict the forms that are generated.
1007 ;; Scalar version of fnmadd
1008 (define_insn "sse5_vmfnmadd<mode>4"
1009 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1010 (vec_merge:SSEMODEF2P
1012 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1014 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1015 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1018 "TARGET_SSE5 && TARGET_FUSED_MADD
1019 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1020 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1021 [(set_attr "type" "ssemuladd")
1022 (set_attr "mode" "<MODE>")])
1024 ;; Floating point negative multiply and subtract
1025 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1026 ;; Allow 2 memory operands to help with optimization
1027 (define_insn "sse5_fnmsub<mode>4"
1028 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1032 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1033 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1034 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1035 "TARGET_SSE5 && TARGET_FUSED_MADD
1036 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1037 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1038 [(set_attr "type" "ssemuladd")
1039 (set_attr "mode" "<MODE>")])
1041 ;; Split fnmsub with two memory operands into a load and the fmsub.
1043 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1047 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1048 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1049 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1051 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1052 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1053 && !reg_mentioned_p (operands[0], operands[1])
1054 && !reg_mentioned_p (operands[0], operands[2])
1055 && !reg_mentioned_p (operands[0], operands[3])"
1058 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1059 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1060 operands[2], operands[3]));
1064 ;; For the scalar operations, use operand1 for the upper words that aren't
1065 ;; modified, so restrict the forms that are generated.
1066 ;; Scalar version of fnmsub
1067 (define_insn "sse5_vmfnmsub<mode>4"
1068 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1069 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1075 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1078 "TARGET_SSE5 && TARGET_FUSED_MADD
1079 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1080 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1081 [(set_attr "type" "ssemuladd")
1082 (set_attr "mode" "<MODE>")])
1084 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1085 ;; even if the user used -mno-fused-madd
1086 ;; Parallel instructions. During instruction generation, just default
1087 ;; to registers, and let combine later build the appropriate instruction.
1088 (define_expand "sse5i_fmadd<mode>4"
1089 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1093 (match_operand:SSEMODEF2P 1 "register_operand" "")
1094 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1095 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1096 UNSPEC_SSE5_INTRINSIC))]
1099 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1100 if (TARGET_FUSED_MADD)
1102 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1103 operands[2], operands[3]));
1108 (define_insn "*sse5i_fmadd<mode>4"
1109 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1113 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1114 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1115 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1116 UNSPEC_SSE5_INTRINSIC))]
1117 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1118 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1119 [(set_attr "type" "ssemuladd")
1120 (set_attr "mode" "<MODE>")])
1122 (define_expand "sse5i_fmsub<mode>4"
1123 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1127 (match_operand:SSEMODEF2P 1 "register_operand" "")
1128 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1129 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1130 UNSPEC_SSE5_INTRINSIC))]
1133 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1134 if (TARGET_FUSED_MADD)
1136 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1137 operands[2], operands[3]));
1142 (define_insn "*sse5i_fmsub<mode>4"
1143 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1147 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1148 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1149 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1150 UNSPEC_SSE5_INTRINSIC))]
1151 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1152 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1153 [(set_attr "type" "ssemuladd")
1154 (set_attr "mode" "<MODE>")])
1156 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1157 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1158 (define_expand "sse5i_fnmadd<mode>4"
1159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1162 (match_operand:SSEMODEF2P 3 "register_operand" "")
1164 (match_operand:SSEMODEF2P 1 "register_operand" "")
1165 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1166 UNSPEC_SSE5_INTRINSIC))]
1169 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1170 if (TARGET_FUSED_MADD)
1172 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1173 operands[2], operands[3]));
1178 (define_insn "*sse5i_fnmadd<mode>4"
1179 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1182 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1184 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1185 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1186 UNSPEC_SSE5_INTRINSIC))]
1187 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1188 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1189 [(set_attr "type" "ssemuladd")
1190 (set_attr "mode" "<MODE>")])
1192 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1193 (define_expand "sse5i_fnmsub<mode>4"
1194 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1199 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1200 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1201 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1202 UNSPEC_SSE5_INTRINSIC))]
1205 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1206 if (TARGET_FUSED_MADD)
1208 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1209 operands[2], operands[3]));
1214 (define_insn "*sse5i_fnmsub<mode>4"
1215 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1220 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1221 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1222 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1223 UNSPEC_SSE5_INTRINSIC))]
1224 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1225 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1226 [(set_attr "type" "ssemuladd")
1227 (set_attr "mode" "<MODE>")])
1229 ;; Scalar instructions
1230 (define_expand "sse5i_vmfmadd<mode>4"
1231 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1233 [(vec_merge:SSEMODEF2P
1236 (match_operand:SSEMODEF2P 1 "register_operand" "")
1237 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1238 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1241 UNSPEC_SSE5_INTRINSIC))]
1244 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1245 if (TARGET_FUSED_MADD)
1247 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1248 operands[2], operands[3]));
1253 ;; For the scalar operations, use operand1 for the upper words that aren't
1254 ;; modified, so restrict the forms that are accepted.
1255 (define_insn "*sse5i_vmfmadd<mode>4"
1256 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1258 [(vec_merge:SSEMODEF2P
1261 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1262 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1263 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1266 UNSPEC_SSE5_INTRINSIC))]
1267 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1268 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1269 [(set_attr "type" "ssemuladd")
1270 (set_attr "mode" "<ssescalarmode>")])
1272 (define_expand "sse5i_vmfmsub<mode>4"
1273 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1275 [(vec_merge:SSEMODEF2P
1278 (match_operand:SSEMODEF2P 1 "register_operand" "")
1279 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1280 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1283 UNSPEC_SSE5_INTRINSIC))]
1286 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1287 if (TARGET_FUSED_MADD)
1289 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1290 operands[2], operands[3]));
1295 (define_insn "*sse5i_vmfmsub<mode>4"
1296 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1298 [(vec_merge:SSEMODEF2P
1301 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1302 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1303 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1306 UNSPEC_SSE5_INTRINSIC))]
1307 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1308 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1309 [(set_attr "type" "ssemuladd")
1310 (set_attr "mode" "<ssescalarmode>")])
1312 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1313 (define_expand "sse5i_vmfnmadd<mode>4"
1314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1316 [(vec_merge:SSEMODEF2P
1318 (match_operand:SSEMODEF2P 3 "register_operand" "")
1320 (match_operand:SSEMODEF2P 1 "register_operand" "")
1321 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1324 UNSPEC_SSE5_INTRINSIC))]
1327 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1328 if (TARGET_FUSED_MADD)
1330 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1331 operands[2], operands[3]));
1336 (define_insn "*sse5i_vmfnmadd<mode>4"
1337 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1339 [(vec_merge:SSEMODEF2P
1341 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1343 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1344 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1347 UNSPEC_SSE5_INTRINSIC))]
1348 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1349 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1350 [(set_attr "type" "ssemuladd")
1351 (set_attr "mode" "<ssescalarmode>")])
1353 (define_expand "sse5i_vmfnmsub<mode>4"
1354 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1356 [(vec_merge:SSEMODEF2P
1360 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1361 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1362 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1365 UNSPEC_SSE5_INTRINSIC))]
1368 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1369 if (TARGET_FUSED_MADD)
1371 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1372 operands[2], operands[3]));
1377 (define_insn "*sse5i_vmfnmsub<mode>4"
1378 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1380 [(vec_merge:SSEMODEF2P
1384 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1385 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1386 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1389 UNSPEC_SSE5_INTRINSIC))]
1390 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1391 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1392 [(set_attr "type" "ssemuladd")
1393 (set_attr "mode" "<ssescalarmode>")])
1395 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1397 ;; Parallel single-precision floating point conversion operations
1399 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1401 (define_insn "sse_cvtpi2ps"
1402 [(set (match_operand:V4SF 0 "register_operand" "=x")
1405 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1406 (match_operand:V4SF 1 "register_operand" "0")
1409 "cvtpi2ps\t{%2, %0|%0, %2}"
1410 [(set_attr "type" "ssecvt")
1411 (set_attr "mode" "V4SF")])
1413 (define_insn "sse_cvtps2pi"
1414 [(set (match_operand:V2SI 0 "register_operand" "=y")
1416 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1418 (parallel [(const_int 0) (const_int 1)])))]
1420 "cvtps2pi\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "ssecvt")
1422 (set_attr "unit" "mmx")
1423 (set_attr "mode" "DI")])
1425 (define_insn "sse_cvttps2pi"
1426 [(set (match_operand:V2SI 0 "register_operand" "=y")
1428 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1429 (parallel [(const_int 0) (const_int 1)])))]
1431 "cvttps2pi\t{%1, %0|%0, %1}"
1432 [(set_attr "type" "ssecvt")
1433 (set_attr "unit" "mmx")
1434 (set_attr "mode" "SF")])
1436 (define_insn "sse_cvtsi2ss"
1437 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1440 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1441 (match_operand:V4SF 1 "register_operand" "0,0")
1444 "cvtsi2ss\t{%2, %0|%0, %2}"
1445 [(set_attr "type" "sseicvt")
1446 (set_attr "athlon_decode" "vector,double")
1447 (set_attr "amdfam10_decode" "vector,double")
1448 (set_attr "mode" "SF")])
1450 (define_insn "sse_cvtsi2ssq"
1451 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1454 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1455 (match_operand:V4SF 1 "register_operand" "0,0")
1457 "TARGET_SSE && TARGET_64BIT"
1458 "cvtsi2ssq\t{%2, %0|%0, %2}"
1459 [(set_attr "type" "sseicvt")
1460 (set_attr "athlon_decode" "vector,double")
1461 (set_attr "amdfam10_decode" "vector,double")
1462 (set_attr "mode" "SF")])
1464 (define_insn "sse_cvtss2si"
1465 [(set (match_operand:SI 0 "register_operand" "=r,r")
1468 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1469 (parallel [(const_int 0)]))]
1470 UNSPEC_FIX_NOTRUNC))]
1472 "cvtss2si\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "sseicvt")
1474 (set_attr "athlon_decode" "double,vector")
1475 (set_attr "prefix_rep" "1")
1476 (set_attr "mode" "SI")])
1478 (define_insn "sse_cvtss2si_2"
1479 [(set (match_operand:SI 0 "register_operand" "=r,r")
1480 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1481 UNSPEC_FIX_NOTRUNC))]
1483 "cvtss2si\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "sseicvt")
1485 (set_attr "athlon_decode" "double,vector")
1486 (set_attr "amdfam10_decode" "double,double")
1487 (set_attr "prefix_rep" "1")
1488 (set_attr "mode" "SI")])
1490 (define_insn "sse_cvtss2siq"
1491 [(set (match_operand:DI 0 "register_operand" "=r,r")
1494 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1495 (parallel [(const_int 0)]))]
1496 UNSPEC_FIX_NOTRUNC))]
1497 "TARGET_SSE && TARGET_64BIT"
1498 "cvtss2siq\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "sseicvt")
1500 (set_attr "athlon_decode" "double,vector")
1501 (set_attr "prefix_rep" "1")
1502 (set_attr "mode" "DI")])
1504 (define_insn "sse_cvtss2siq_2"
1505 [(set (match_operand:DI 0 "register_operand" "=r,r")
1506 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1507 UNSPEC_FIX_NOTRUNC))]
1508 "TARGET_SSE && TARGET_64BIT"
1509 "cvtss2siq\t{%1, %0|%0, %1}"
1510 [(set_attr "type" "sseicvt")
1511 (set_attr "athlon_decode" "double,vector")
1512 (set_attr "amdfam10_decode" "double,double")
1513 (set_attr "prefix_rep" "1")
1514 (set_attr "mode" "DI")])
1516 (define_insn "sse_cvttss2si"
1517 [(set (match_operand:SI 0 "register_operand" "=r,r")
1520 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1521 (parallel [(const_int 0)]))))]
1523 "cvttss2si\t{%1, %0|%0, %1}"
1524 [(set_attr "type" "sseicvt")
1525 (set_attr "athlon_decode" "double,vector")
1526 (set_attr "amdfam10_decode" "double,double")
1527 (set_attr "prefix_rep" "1")
1528 (set_attr "mode" "SI")])
1530 (define_insn "sse_cvttss2siq"
1531 [(set (match_operand:DI 0 "register_operand" "=r,r")
1534 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1535 (parallel [(const_int 0)]))))]
1536 "TARGET_SSE && TARGET_64BIT"
1537 "cvttss2siq\t{%1, %0|%0, %1}"
1538 [(set_attr "type" "sseicvt")
1539 (set_attr "athlon_decode" "double,vector")
1540 (set_attr "amdfam10_decode" "double,double")
1541 (set_attr "prefix_rep" "1")
1542 (set_attr "mode" "DI")])
1544 (define_insn "sse2_cvtdq2ps"
1545 [(set (match_operand:V4SF 0 "register_operand" "=x")
1546 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1548 "cvtdq2ps\t{%1, %0|%0, %1}"
1549 [(set_attr "type" "ssecvt")
1550 (set_attr "mode" "V4SF")])
1552 (define_insn "sse2_cvtps2dq"
1553 [(set (match_operand:V4SI 0 "register_operand" "=x")
1554 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1555 UNSPEC_FIX_NOTRUNC))]
1557 "cvtps2dq\t{%1, %0|%0, %1}"
1558 [(set_attr "type" "ssecvt")
1559 (set_attr "prefix_data16" "1")
1560 (set_attr "mode" "TI")])
1562 (define_insn "sse2_cvttps2dq"
1563 [(set (match_operand:V4SI 0 "register_operand" "=x")
1564 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1566 "cvttps2dq\t{%1, %0|%0, %1}"
1567 [(set_attr "type" "ssecvt")
1568 (set_attr "prefix_rep" "1")
1569 (set_attr "mode" "TI")])
1571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1573 ;; Parallel double-precision floating point conversion operations
1575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1577 (define_insn "sse2_cvtpi2pd"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1579 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1581 "cvtpi2pd\t{%1, %0|%0, %1}"
1582 [(set_attr "type" "ssecvt")
1583 (set_attr "unit" "mmx,*")
1584 (set_attr "mode" "V2DF")])
1586 (define_insn "sse2_cvtpd2pi"
1587 [(set (match_operand:V2SI 0 "register_operand" "=y")
1588 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1589 UNSPEC_FIX_NOTRUNC))]
1591 "cvtpd2pi\t{%1, %0|%0, %1}"
1592 [(set_attr "type" "ssecvt")
1593 (set_attr "unit" "mmx")
1594 (set_attr "prefix_data16" "1")
1595 (set_attr "mode" "DI")])
1597 (define_insn "sse2_cvttpd2pi"
1598 [(set (match_operand:V2SI 0 "register_operand" "=y")
1599 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1601 "cvttpd2pi\t{%1, %0|%0, %1}"
1602 [(set_attr "type" "ssecvt")
1603 (set_attr "unit" "mmx")
1604 (set_attr "prefix_data16" "1")
1605 (set_attr "mode" "TI")])
1607 (define_insn "sse2_cvtsi2sd"
1608 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1611 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1612 (match_operand:V2DF 1 "register_operand" "0,0")
1615 "cvtsi2sd\t{%2, %0|%0, %2}"
1616 [(set_attr "type" "sseicvt")
1617 (set_attr "mode" "DF")
1618 (set_attr "athlon_decode" "double,direct")
1619 (set_attr "amdfam10_decode" "vector,double")])
1621 (define_insn "sse2_cvtsi2sdq"
1622 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1625 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1626 (match_operand:V2DF 1 "register_operand" "0,0")
1628 "TARGET_SSE2 && TARGET_64BIT"
1629 "cvtsi2sdq\t{%2, %0|%0, %2}"
1630 [(set_attr "type" "sseicvt")
1631 (set_attr "mode" "DF")
1632 (set_attr "athlon_decode" "double,direct")
1633 (set_attr "amdfam10_decode" "vector,double")])
1635 (define_insn "sse2_cvtsd2si"
1636 [(set (match_operand:SI 0 "register_operand" "=r,r")
1639 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1640 (parallel [(const_int 0)]))]
1641 UNSPEC_FIX_NOTRUNC))]
1643 "cvtsd2si\t{%1, %0|%0, %1}"
1644 [(set_attr "type" "sseicvt")
1645 (set_attr "athlon_decode" "double,vector")
1646 (set_attr "prefix_rep" "1")
1647 (set_attr "mode" "SI")])
1649 (define_insn "sse2_cvtsd2si_2"
1650 [(set (match_operand:SI 0 "register_operand" "=r,r")
1651 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1652 UNSPEC_FIX_NOTRUNC))]
1654 "cvtsd2si\t{%1, %0|%0, %1}"
1655 [(set_attr "type" "sseicvt")
1656 (set_attr "athlon_decode" "double,vector")
1657 (set_attr "amdfam10_decode" "double,double")
1658 (set_attr "prefix_rep" "1")
1659 (set_attr "mode" "SI")])
1661 (define_insn "sse2_cvtsd2siq"
1662 [(set (match_operand:DI 0 "register_operand" "=r,r")
1665 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1666 (parallel [(const_int 0)]))]
1667 UNSPEC_FIX_NOTRUNC))]
1668 "TARGET_SSE2 && TARGET_64BIT"
1669 "cvtsd2siq\t{%1, %0|%0, %1}"
1670 [(set_attr "type" "sseicvt")
1671 (set_attr "athlon_decode" "double,vector")
1672 (set_attr "prefix_rep" "1")
1673 (set_attr "mode" "DI")])
1675 (define_insn "sse2_cvtsd2siq_2"
1676 [(set (match_operand:DI 0 "register_operand" "=r,r")
1677 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1678 UNSPEC_FIX_NOTRUNC))]
1679 "TARGET_SSE2 && TARGET_64BIT"
1680 "cvtsd2siq\t{%1, %0|%0, %1}"
1681 [(set_attr "type" "sseicvt")
1682 (set_attr "athlon_decode" "double,vector")
1683 (set_attr "amdfam10_decode" "double,double")
1684 (set_attr "prefix_rep" "1")
1685 (set_attr "mode" "DI")])
1687 (define_insn "sse2_cvttsd2si"
1688 [(set (match_operand:SI 0 "register_operand" "=r,r")
1691 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1692 (parallel [(const_int 0)]))))]
1694 "cvttsd2si\t{%1, %0|%0, %1}"
1695 [(set_attr "type" "sseicvt")
1696 (set_attr "prefix_rep" "1")
1697 (set_attr "mode" "SI")
1698 (set_attr "athlon_decode" "double,vector")
1699 (set_attr "amdfam10_decode" "double,double")])
1701 (define_insn "sse2_cvttsd2siq"
1702 [(set (match_operand:DI 0 "register_operand" "=r,r")
1705 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1706 (parallel [(const_int 0)]))))]
1707 "TARGET_SSE2 && TARGET_64BIT"
1708 "cvttsd2siq\t{%1, %0|%0, %1}"
1709 [(set_attr "type" "sseicvt")
1710 (set_attr "prefix_rep" "1")
1711 (set_attr "mode" "DI")
1712 (set_attr "athlon_decode" "double,vector")
1713 (set_attr "amdfam10_decode" "double,double")])
1715 (define_insn "sse2_cvtdq2pd"
1716 [(set (match_operand:V2DF 0 "register_operand" "=x")
1719 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1720 (parallel [(const_int 0) (const_int 1)]))))]
1722 "cvtdq2pd\t{%1, %0|%0, %1}"
1723 [(set_attr "type" "ssecvt")
1724 (set_attr "mode" "V2DF")])
1726 (define_expand "sse2_cvtpd2dq"
1727 [(set (match_operand:V4SI 0 "register_operand" "")
1729 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1733 "operands[2] = CONST0_RTX (V2SImode);")
1735 (define_insn "*sse2_cvtpd2dq"
1736 [(set (match_operand:V4SI 0 "register_operand" "=x")
1738 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1740 (match_operand:V2SI 2 "const0_operand" "")))]
1742 "cvtpd2dq\t{%1, %0|%0, %1}"
1743 [(set_attr "type" "ssecvt")
1744 (set_attr "prefix_rep" "1")
1745 (set_attr "mode" "TI")
1746 (set_attr "amdfam10_decode" "double")])
1748 (define_expand "sse2_cvttpd2dq"
1749 [(set (match_operand:V4SI 0 "register_operand" "")
1751 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1754 "operands[2] = CONST0_RTX (V2SImode);")
1756 (define_insn "*sse2_cvttpd2dq"
1757 [(set (match_operand:V4SI 0 "register_operand" "=x")
1759 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1760 (match_operand:V2SI 2 "const0_operand" "")))]
1762 "cvttpd2dq\t{%1, %0|%0, %1}"
1763 [(set_attr "type" "ssecvt")
1764 (set_attr "prefix_rep" "1")
1765 (set_attr "mode" "TI")
1766 (set_attr "amdfam10_decode" "double")])
1768 (define_insn "sse2_cvtsd2ss"
1769 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1772 (float_truncate:V2SF
1773 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1774 (match_operand:V4SF 1 "register_operand" "0,0")
1777 "cvtsd2ss\t{%2, %0|%0, %2}"
1778 [(set_attr "type" "ssecvt")
1779 (set_attr "athlon_decode" "vector,double")
1780 (set_attr "amdfam10_decode" "vector,double")
1781 (set_attr "mode" "SF")])
1783 (define_insn "sse2_cvtss2sd"
1784 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1788 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1789 (parallel [(const_int 0) (const_int 1)])))
1790 (match_operand:V2DF 1 "register_operand" "0,0")
1793 "cvtss2sd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "ssecvt")
1795 (set_attr "amdfam10_decode" "vector,double")
1796 (set_attr "mode" "DF")])
1798 (define_expand "sse2_cvtpd2ps"
1799 [(set (match_operand:V4SF 0 "register_operand" "")
1801 (float_truncate:V2SF
1802 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1805 "operands[2] = CONST0_RTX (V2SFmode);")
1807 (define_insn "*sse2_cvtpd2ps"
1808 [(set (match_operand:V4SF 0 "register_operand" "=x")
1810 (float_truncate:V2SF
1811 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1812 (match_operand:V2SF 2 "const0_operand" "")))]
1814 "cvtpd2ps\t{%1, %0|%0, %1}"
1815 [(set_attr "type" "ssecvt")
1816 (set_attr "prefix_data16" "1")
1817 (set_attr "mode" "V4SF")
1818 (set_attr "amdfam10_decode" "double")])
1820 (define_insn "sse2_cvtps2pd"
1821 [(set (match_operand:V2DF 0 "register_operand" "=x")
1824 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1825 (parallel [(const_int 0) (const_int 1)]))))]
1827 "cvtps2pd\t{%1, %0|%0, %1}"
1828 [(set_attr "type" "ssecvt")
1829 (set_attr "mode" "V2DF")
1830 (set_attr "amdfam10_decode" "direct")])
1832 (define_expand "vec_unpacks_hi_v4sf"
1837 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1838 (parallel [(const_int 6)
1842 (set (match_operand:V2DF 0 "register_operand" "")
1846 (parallel [(const_int 0) (const_int 1)]))))]
1849 operands[2] = gen_reg_rtx (V4SFmode);
1852 (define_expand "vec_unpacks_lo_v4sf"
1853 [(set (match_operand:V2DF 0 "register_operand" "")
1856 (match_operand:V4SF 1 "nonimmediate_operand" "")
1857 (parallel [(const_int 0) (const_int 1)]))))]
1860 (define_expand "vec_unpacks_float_hi_v8hi"
1861 [(match_operand:V4SF 0 "register_operand" "")
1862 (match_operand:V8HI 1 "register_operand" "")]
1865 rtx tmp = gen_reg_rtx (V4SImode);
1867 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1868 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1872 (define_expand "vec_unpacks_float_lo_v8hi"
1873 [(match_operand:V4SF 0 "register_operand" "")
1874 (match_operand:V8HI 1 "register_operand" "")]
1877 rtx tmp = gen_reg_rtx (V4SImode);
1879 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1880 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1884 (define_expand "vec_unpacku_float_hi_v8hi"
1885 [(match_operand:V4SF 0 "register_operand" "")
1886 (match_operand:V8HI 1 "register_operand" "")]
1889 rtx tmp = gen_reg_rtx (V4SImode);
1891 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1892 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1896 (define_expand "vec_unpacku_float_lo_v8hi"
1897 [(match_operand:V4SF 0 "register_operand" "")
1898 (match_operand:V8HI 1 "register_operand" "")]
1901 rtx tmp = gen_reg_rtx (V4SImode);
1903 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1904 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1908 (define_expand "vec_unpacks_float_hi_v4si"
1911 (match_operand:V4SI 1 "nonimmediate_operand" "")
1912 (parallel [(const_int 2)
1916 (set (match_operand:V2DF 0 "register_operand" "")
1920 (parallel [(const_int 0) (const_int 1)]))))]
1923 operands[2] = gen_reg_rtx (V4SImode);
1926 (define_expand "vec_unpacks_float_lo_v4si"
1927 [(set (match_operand:V2DF 0 "register_operand" "")
1930 (match_operand:V4SI 1 "nonimmediate_operand" "")
1931 (parallel [(const_int 0) (const_int 1)]))))]
1934 (define_expand "vec_pack_trunc_v2df"
1935 [(match_operand:V4SF 0 "register_operand" "")
1936 (match_operand:V2DF 1 "nonimmediate_operand" "")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1942 r1 = gen_reg_rtx (V4SFmode);
1943 r2 = gen_reg_rtx (V4SFmode);
1945 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1946 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1947 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1951 (define_expand "vec_pack_sfix_trunc_v2df"
1952 [(match_operand:V4SI 0 "register_operand" "")
1953 (match_operand:V2DF 1 "nonimmediate_operand" "")
1954 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1959 r1 = gen_reg_rtx (V4SImode);
1960 r2 = gen_reg_rtx (V4SImode);
1962 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1963 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1964 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1965 gen_lowpart (V2DImode, r1),
1966 gen_lowpart (V2DImode, r2)));
1970 (define_expand "vec_pack_sfix_v2df"
1971 [(match_operand:V4SI 0 "register_operand" "")
1972 (match_operand:V2DF 1 "nonimmediate_operand" "")
1973 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1978 r1 = gen_reg_rtx (V4SImode);
1979 r2 = gen_reg_rtx (V4SImode);
1981 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1982 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1983 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1984 gen_lowpart (V2DImode, r1),
1985 gen_lowpart (V2DImode, r2)));
1989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1991 ;; Parallel single-precision floating point element swizzling
1993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1995 (define_expand "sse_movhlps_exp"
1996 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
1999 (match_operand:V4SF 1 "nonimmediate_operand" "")
2000 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2001 (parallel [(const_int 6)
2006 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2008 (define_insn "sse_movhlps"
2009 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2012 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2013 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2014 (parallel [(const_int 6)
2018 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2020 movhlps\t{%2, %0|%0, %2}
2021 movlps\t{%H2, %0|%0, %H2}
2022 movhps\t{%2, %0|%0, %2}"
2023 [(set_attr "type" "ssemov")
2024 (set_attr "mode" "V4SF,V2SF,V2SF")])
2026 (define_expand "sse_movlhps_exp"
2027 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2030 (match_operand:V4SF 1 "nonimmediate_operand" "")
2031 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2032 (parallel [(const_int 0)
2037 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2039 (define_insn "sse_movlhps"
2040 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2043 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2044 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2045 (parallel [(const_int 0)
2049 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2051 movlhps\t{%2, %0|%0, %2}
2052 movhps\t{%2, %0|%0, %2}
2053 movlps\t{%2, %H0|%H0, %2}"
2054 [(set_attr "type" "ssemov")
2055 (set_attr "mode" "V4SF,V2SF,V2SF")])
2057 (define_insn "sse_unpckhps"
2058 [(set (match_operand:V4SF 0 "register_operand" "=x")
2061 (match_operand:V4SF 1 "register_operand" "0")
2062 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2063 (parallel [(const_int 2) (const_int 6)
2064 (const_int 3) (const_int 7)])))]
2066 "unpckhps\t{%2, %0|%0, %2}"
2067 [(set_attr "type" "sselog")
2068 (set_attr "mode" "V4SF")])
2070 (define_insn "sse_unpcklps"
2071 [(set (match_operand:V4SF 0 "register_operand" "=x")
2074 (match_operand:V4SF 1 "register_operand" "0")
2075 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2076 (parallel [(const_int 0) (const_int 4)
2077 (const_int 1) (const_int 5)])))]
2079 "unpcklps\t{%2, %0|%0, %2}"
2080 [(set_attr "type" "sselog")
2081 (set_attr "mode" "V4SF")])
2083 ;; These are modeled with the same vec_concat as the others so that we
2084 ;; capture users of shufps that can use the new instructions
2085 (define_insn "sse3_movshdup"
2086 [(set (match_operand:V4SF 0 "register_operand" "=x")
2089 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2091 (parallel [(const_int 1)
2096 "movshdup\t{%1, %0|%0, %1}"
2097 [(set_attr "type" "sse")
2098 (set_attr "prefix_rep" "1")
2099 (set_attr "mode" "V4SF")])
2101 (define_insn "sse3_movsldup"
2102 [(set (match_operand:V4SF 0 "register_operand" "=x")
2105 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2107 (parallel [(const_int 0)
2112 "movsldup\t{%1, %0|%0, %1}"
2113 [(set_attr "type" "sse")
2114 (set_attr "prefix_rep" "1")
2115 (set_attr "mode" "V4SF")])
2117 (define_expand "sse_shufps"
2118 [(match_operand:V4SF 0 "register_operand" "")
2119 (match_operand:V4SF 1 "register_operand" "")
2120 (match_operand:V4SF 2 "nonimmediate_operand" "")
2121 (match_operand:SI 3 "const_int_operand" "")]
2124 int mask = INTVAL (operands[3]);
2125 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2126 GEN_INT ((mask >> 0) & 3),
2127 GEN_INT ((mask >> 2) & 3),
2128 GEN_INT (((mask >> 4) & 3) + 4),
2129 GEN_INT (((mask >> 6) & 3) + 4)));
2133 (define_insn "sse_shufps_1"
2134 [(set (match_operand:V4SF 0 "register_operand" "=x")
2137 (match_operand:V4SF 1 "register_operand" "0")
2138 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2139 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2140 (match_operand 4 "const_0_to_3_operand" "")
2141 (match_operand 5 "const_4_to_7_operand" "")
2142 (match_operand 6 "const_4_to_7_operand" "")])))]
2146 mask |= INTVAL (operands[3]) << 0;
2147 mask |= INTVAL (operands[4]) << 2;
2148 mask |= (INTVAL (operands[5]) - 4) << 4;
2149 mask |= (INTVAL (operands[6]) - 4) << 6;
2150 operands[3] = GEN_INT (mask);
2152 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2154 [(set_attr "type" "sselog")
2155 (set_attr "mode" "V4SF")])
2157 (define_insn "sse_storehps"
2158 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2160 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2161 (parallel [(const_int 2) (const_int 3)])))]
2164 movhps\t{%1, %0|%0, %1}
2165 movhlps\t{%1, %0|%0, %1}
2166 movlps\t{%H1, %0|%0, %H1}"
2167 [(set_attr "type" "ssemov")
2168 (set_attr "mode" "V2SF,V4SF,V2SF")])
2170 (define_expand "sse_loadhps_exp"
2171 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2174 (match_operand:V4SF 1 "nonimmediate_operand" "")
2175 (parallel [(const_int 0) (const_int 1)]))
2176 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
2178 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2180 (define_insn "sse_loadhps"
2181 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2184 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2185 (parallel [(const_int 0) (const_int 1)]))
2186 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2189 movhps\t{%2, %0|%0, %2}
2190 movlhps\t{%2, %0|%0, %2}
2191 movlps\t{%2, %H0|%H0, %2}"
2192 [(set_attr "type" "ssemov")
2193 (set_attr "mode" "V2SF,V4SF,V2SF")])
2195 (define_insn "sse_storelps"
2196 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2198 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2199 (parallel [(const_int 0) (const_int 1)])))]
2202 movlps\t{%1, %0|%0, %1}
2203 movaps\t{%1, %0|%0, %1}
2204 movlps\t{%1, %0|%0, %1}"
2205 [(set_attr "type" "ssemov")
2206 (set_attr "mode" "V2SF,V4SF,V2SF")])
2208 (define_expand "sse_loadlps_exp"
2209 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2211 (match_operand:V2SF 2 "nonimmediate_operand" "")
2213 (match_operand:V4SF 1 "nonimmediate_operand" "")
2214 (parallel [(const_int 2) (const_int 3)]))))]
2216 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2218 (define_insn "sse_loadlps"
2219 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2221 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2223 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2224 (parallel [(const_int 2) (const_int 3)]))))]
2227 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2228 movlps\t{%2, %0|%0, %2}
2229 movlps\t{%2, %0|%0, %2}"
2230 [(set_attr "type" "sselog,ssemov,ssemov")
2231 (set_attr "mode" "V4SF,V2SF,V2SF")])
2233 (define_insn "sse_movss"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x")
2236 (match_operand:V4SF 2 "register_operand" "x")
2237 (match_operand:V4SF 1 "register_operand" "0")
2240 "movss\t{%2, %0|%0, %2}"
2241 [(set_attr "type" "ssemov")
2242 (set_attr "mode" "SF")])
2244 (define_insn "*vec_dupv4sf"
2245 [(set (match_operand:V4SF 0 "register_operand" "=x")
2247 (match_operand:SF 1 "register_operand" "0")))]
2249 "shufps\t{$0, %0, %0|%0, %0, 0}"
2250 [(set_attr "type" "sselog1")
2251 (set_attr "mode" "V4SF")])
2253 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2254 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2255 ;; alternatives pretty much forces the MMX alternative to be chosen.
2256 (define_insn "*sse_concatv2sf"
2257 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2259 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2260 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2263 unpcklps\t{%2, %0|%0, %2}
2264 movss\t{%1, %0|%0, %1}
2265 punpckldq\t{%2, %0|%0, %2}
2266 movd\t{%1, %0|%0, %1}"
2267 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2268 (set_attr "mode" "V4SF,SF,DI,DI")])
2270 (define_insn "*sse_concatv4sf"
2271 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2273 (match_operand:V2SF 1 "register_operand" " 0,0")
2274 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2277 movlhps\t{%2, %0|%0, %2}
2278 movhps\t{%2, %0|%0, %2}"
2279 [(set_attr "type" "ssemov")
2280 (set_attr "mode" "V4SF,V2SF")])
2282 (define_expand "vec_initv4sf"
2283 [(match_operand:V4SF 0 "register_operand" "")
2284 (match_operand 1 "" "")]
2287 ix86_expand_vector_init (false, operands[0], operands[1]);
2291 (define_insn "vec_setv4sf_0"
2292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2295 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2296 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2300 movss\t{%2, %0|%0, %2}
2301 movss\t{%2, %0|%0, %2}
2302 movd\t{%2, %0|%0, %2}
2304 [(set_attr "type" "ssemov")
2305 (set_attr "mode" "SF")])
2307 ;; A subset is vec_setv4sf.
2308 (define_insn "*vec_setv4sf_sse4_1"
2309 [(set (match_operand:V4SF 0 "register_operand" "=x")
2312 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2313 (match_operand:V4SF 1 "register_operand" "0")
2314 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2317 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2318 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2320 [(set_attr "type" "sselog")
2321 (set_attr "prefix_extra" "1")
2322 (set_attr "mode" "V4SF")])
2324 (define_insn "sse4_1_insertps"
2325 [(set (match_operand:V4SF 0 "register_operand" "=x")
2326 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2327 (match_operand:V4SF 1 "register_operand" "0")
2328 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2331 "insertps\t{%3, %2, %0|%0, %2, %3}";
2332 [(set_attr "type" "sselog")
2333 (set_attr "prefix_extra" "1")
2334 (set_attr "mode" "V4SF")])
2337 [(set (match_operand:V4SF 0 "memory_operand" "")
2340 (match_operand:SF 1 "nonmemory_operand" ""))
2343 "TARGET_SSE && reload_completed"
2346 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2350 (define_expand "vec_setv4sf"
2351 [(match_operand:V4SF 0 "register_operand" "")
2352 (match_operand:SF 1 "register_operand" "")
2353 (match_operand 2 "const_int_operand" "")]
2356 ix86_expand_vector_set (false, operands[0], operands[1],
2357 INTVAL (operands[2]));
2361 (define_insn_and_split "*vec_extractv4sf_0"
2362 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2364 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2365 (parallel [(const_int 0)])))]
2366 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2368 "&& reload_completed"
2371 rtx op1 = operands[1];
2373 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2375 op1 = gen_lowpart (SFmode, op1);
2376 emit_move_insn (operands[0], op1);
2380 (define_insn "*sse4_1_extractps"
2381 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2383 (match_operand:V4SF 1 "register_operand" "x")
2384 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2386 "extractps\t{%2, %1, %0|%0, %1, %2}"
2387 [(set_attr "type" "sselog")
2388 (set_attr "prefix_extra" "1")
2389 (set_attr "mode" "V4SF")])
2391 (define_insn_and_split "*vec_extract_v4sf_mem"
2392 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2394 (match_operand:V4SF 1 "memory_operand" "o")
2395 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2401 int i = INTVAL (operands[2]);
2403 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2407 (define_expand "vec_extractv4sf"
2408 [(match_operand:SF 0 "register_operand" "")
2409 (match_operand:V4SF 1 "register_operand" "")
2410 (match_operand 2 "const_int_operand" "")]
2413 ix86_expand_vector_extract (false, operands[0], operands[1],
2414 INTVAL (operands[2]));
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420 ;; Parallel double-precision floating point element swizzling
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424 (define_expand "sse2_unpckhpd_exp"
2425 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2428 (match_operand:V2DF 1 "nonimmediate_operand" "")
2429 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2430 (parallel [(const_int 1)
2433 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2435 (define_insn "sse2_unpckhpd"
2436 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2439 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2440 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2441 (parallel [(const_int 1)
2443 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2445 unpckhpd\t{%2, %0|%0, %2}
2446 movlpd\t{%H1, %0|%0, %H1}
2447 movhpd\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "sselog,ssemov,ssemov")
2449 (set_attr "mode" "V2DF,V1DF,V1DF")])
2451 (define_insn "*sse3_movddup"
2452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2455 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2457 (parallel [(const_int 0)
2459 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2461 movddup\t{%1, %0|%0, %1}
2463 [(set_attr "type" "sselog1,ssemov")
2464 (set_attr "mode" "V2DF")])
2467 [(set (match_operand:V2DF 0 "memory_operand" "")
2470 (match_operand:V2DF 1 "register_operand" "")
2472 (parallel [(const_int 0)
2474 "TARGET_SSE3 && reload_completed"
2477 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2478 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2479 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2483 (define_expand "sse2_unpcklpd_exp"
2484 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2487 (match_operand:V2DF 1 "nonimmediate_operand" "")
2488 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2489 (parallel [(const_int 0)
2492 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2494 (define_insn "sse2_unpcklpd"
2495 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2498 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2499 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2500 (parallel [(const_int 0)
2502 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2504 unpcklpd\t{%2, %0|%0, %2}
2505 movhpd\t{%2, %0|%0, %2}
2506 movlpd\t{%2, %H0|%H0, %2}"
2507 [(set_attr "type" "sselog,ssemov,ssemov")
2508 (set_attr "mode" "V2DF,V1DF,V1DF")])
2510 (define_expand "sse2_shufpd"
2511 [(match_operand:V2DF 0 "register_operand" "")
2512 (match_operand:V2DF 1 "register_operand" "")
2513 (match_operand:V2DF 2 "nonimmediate_operand" "")
2514 (match_operand:SI 3 "const_int_operand" "")]
2517 int mask = INTVAL (operands[3]);
2518 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2520 GEN_INT (mask & 2 ? 3 : 2)));
2524 (define_insn "sse2_shufpd_1"
2525 [(set (match_operand:V2DF 0 "register_operand" "=x")
2528 (match_operand:V2DF 1 "register_operand" "0")
2529 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2530 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2531 (match_operand 4 "const_2_to_3_operand" "")])))]
2535 mask = INTVAL (operands[3]);
2536 mask |= (INTVAL (operands[4]) - 2) << 1;
2537 operands[3] = GEN_INT (mask);
2539 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2541 [(set_attr "type" "sselog")
2542 (set_attr "mode" "V2DF")])
2544 (define_insn "sse2_storehpd"
2545 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2547 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2548 (parallel [(const_int 1)])))]
2549 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2551 movhpd\t{%1, %0|%0, %1}
2554 [(set_attr "type" "ssemov,sselog1,ssemov")
2555 (set_attr "mode" "V1DF,V2DF,DF")])
2558 [(set (match_operand:DF 0 "register_operand" "")
2560 (match_operand:V2DF 1 "memory_operand" "")
2561 (parallel [(const_int 1)])))]
2562 "TARGET_SSE2 && reload_completed"
2563 [(set (match_dup 0) (match_dup 1))]
2565 operands[1] = adjust_address (operands[1], DFmode, 8);
2568 (define_insn "sse2_storelpd"
2569 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2571 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2572 (parallel [(const_int 0)])))]
2573 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2575 movlpd\t{%1, %0|%0, %1}
2578 [(set_attr "type" "ssemov")
2579 (set_attr "mode" "V1DF,DF,DF")])
2582 [(set (match_operand:DF 0 "register_operand" "")
2584 (match_operand:V2DF 1 "nonimmediate_operand" "")
2585 (parallel [(const_int 0)])))]
2586 "TARGET_SSE2 && reload_completed"
2589 rtx op1 = operands[1];
2591 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2593 op1 = gen_lowpart (DFmode, op1);
2594 emit_move_insn (operands[0], op1);
2598 (define_expand "sse2_loadhpd_exp"
2599 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2602 (match_operand:V2DF 1 "nonimmediate_operand" "")
2603 (parallel [(const_int 0)]))
2604 (match_operand:DF 2 "nonimmediate_operand" "")))]
2606 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2608 (define_insn "sse2_loadhpd"
2609 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2612 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2613 (parallel [(const_int 0)]))
2614 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2615 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2617 movhpd\t{%2, %0|%0, %2}
2618 unpcklpd\t{%2, %0|%0, %2}
2619 shufpd\t{$1, %1, %0|%0, %1, 1}
2621 [(set_attr "type" "ssemov,sselog,sselog,other")
2622 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2625 [(set (match_operand:V2DF 0 "memory_operand" "")
2627 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2628 (match_operand:DF 1 "register_operand" "")))]
2629 "TARGET_SSE2 && reload_completed"
2630 [(set (match_dup 0) (match_dup 1))]
2632 operands[0] = adjust_address (operands[0], DFmode, 8);
2635 (define_expand "sse2_loadlpd_exp"
2636 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2638 (match_operand:DF 2 "nonimmediate_operand" "")
2640 (match_operand:V2DF 1 "nonimmediate_operand" "")
2641 (parallel [(const_int 1)]))))]
2643 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2645 (define_insn "sse2_loadlpd"
2646 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2648 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2650 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2651 (parallel [(const_int 1)]))))]
2652 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2654 movsd\t{%2, %0|%0, %2}
2655 movlpd\t{%2, %0|%0, %2}
2656 movsd\t{%2, %0|%0, %2}
2657 shufpd\t{$2, %2, %0|%0, %2, 2}
2658 movhpd\t{%H1, %0|%0, %H1}
2660 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2661 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2664 [(set (match_operand:V2DF 0 "memory_operand" "")
2666 (match_operand:DF 1 "register_operand" "")
2667 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2668 "TARGET_SSE2 && reload_completed"
2669 [(set (match_dup 0) (match_dup 1))]
2671 operands[0] = adjust_address (operands[0], DFmode, 8);
2674 ;; Not sure these two are ever used, but it doesn't hurt to have
2676 (define_insn "*vec_extractv2df_1_sse"
2677 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2679 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2680 (parallel [(const_int 1)])))]
2681 "!TARGET_SSE2 && TARGET_SSE
2682 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2684 movhps\t{%1, %0|%0, %1}
2685 movhlps\t{%1, %0|%0, %1}
2686 movlps\t{%H1, %0|%0, %H1}"
2687 [(set_attr "type" "ssemov")
2688 (set_attr "mode" "V2SF,V4SF,V2SF")])
2690 (define_insn "*vec_extractv2df_0_sse"
2691 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2693 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2694 (parallel [(const_int 0)])))]
2695 "!TARGET_SSE2 && TARGET_SSE
2696 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2698 movlps\t{%1, %0|%0, %1}
2699 movaps\t{%1, %0|%0, %1}
2700 movlps\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "ssemov")
2702 (set_attr "mode" "V2SF,V4SF,V2SF")])
2704 (define_insn "sse2_movsd"
2705 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2707 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2708 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2712 movsd\t{%2, %0|%0, %2}
2713 movlpd\t{%2, %0|%0, %2}
2714 movlpd\t{%2, %0|%0, %2}
2715 shufpd\t{$2, %2, %0|%0, %2, 2}
2716 movhps\t{%H1, %0|%0, %H1}
2717 movhps\t{%1, %H0|%H0, %1}"
2718 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2719 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2721 (define_insn "*vec_dupv2df_sse3"
2722 [(set (match_operand:V2DF 0 "register_operand" "=x")
2724 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2726 "movddup\t{%1, %0|%0, %1}"
2727 [(set_attr "type" "sselog1")
2728 (set_attr "mode" "DF")])
2730 (define_insn "vec_dupv2df"
2731 [(set (match_operand:V2DF 0 "register_operand" "=x")
2733 (match_operand:DF 1 "register_operand" "0")))]
2736 [(set_attr "type" "sselog1")
2737 (set_attr "mode" "V2DF")])
2739 (define_insn "*vec_concatv2df_sse3"
2740 [(set (match_operand:V2DF 0 "register_operand" "=x")
2742 (match_operand:DF 1 "nonimmediate_operand" "xm")
2745 "movddup\t{%1, %0|%0, %1}"
2746 [(set_attr "type" "sselog1")
2747 (set_attr "mode" "DF")])
2749 (define_insn "*vec_concatv2df"
2750 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2752 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2753 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2756 unpcklpd\t{%2, %0|%0, %2}
2757 movhpd\t{%2, %0|%0, %2}
2758 movsd\t{%1, %0|%0, %1}
2759 movlhps\t{%2, %0|%0, %2}
2760 movhps\t{%2, %0|%0, %2}"
2761 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2762 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2764 (define_expand "vec_setv2df"
2765 [(match_operand:V2DF 0 "register_operand" "")
2766 (match_operand:DF 1 "register_operand" "")
2767 (match_operand 2 "const_int_operand" "")]
2770 ix86_expand_vector_set (false, operands[0], operands[1],
2771 INTVAL (operands[2]));
2775 (define_expand "vec_extractv2df"
2776 [(match_operand:DF 0 "register_operand" "")
2777 (match_operand:V2DF 1 "register_operand" "")
2778 (match_operand 2 "const_int_operand" "")]
2781 ix86_expand_vector_extract (false, operands[0], operands[1],
2782 INTVAL (operands[2]));
2786 (define_expand "vec_initv2df"
2787 [(match_operand:V2DF 0 "register_operand" "")
2788 (match_operand 1 "" "")]
2791 ix86_expand_vector_init (false, operands[0], operands[1]);
2795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2797 ;; Parallel integral arithmetic
2799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2801 (define_expand "neg<mode>2"
2802 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2805 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2807 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2809 (define_expand "<plusminus_insn><mode>3"
2810 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2812 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2813 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2815 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2817 (define_insn "*<plusminus_insn><mode>3"
2818 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2820 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2821 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2822 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2823 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2824 [(set_attr "type" "sseiadd")
2825 (set_attr "prefix_data16" "1")
2826 (set_attr "mode" "TI")])
2828 (define_expand "sse2_<plusminus_insn><mode>3"
2829 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2830 (sat_plusminus:SSEMODE12
2831 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
2832 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
2834 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2836 (define_insn "*sse2_<plusminus_insn><mode>3"
2837 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2838 (sat_plusminus:SSEMODE12
2839 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2840 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2841 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2842 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2843 [(set_attr "type" "sseiadd")
2844 (set_attr "prefix_data16" "1")
2845 (set_attr "mode" "TI")])
2847 (define_insn_and_split "mulv16qi3"
2848 [(set (match_operand:V16QI 0 "register_operand" "")
2849 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2850 (match_operand:V16QI 2 "register_operand" "")))]
2852 && !(reload_completed || reload_in_progress)"
2857 rtx t[12], op0, op[3];
2862 /* On SSE5, we can take advantage of the pperm instruction to pack and
2863 unpack the bytes. Unpack data such that we've got a source byte in
2864 each low byte of each word. We don't care what goes into the high
2865 byte, so put 0 there. */
2866 for (i = 0; i < 6; ++i)
2867 t[i] = gen_reg_rtx (V8HImode);
2869 for (i = 0; i < 2; i++)
2872 op[1] = operands[i+1];
2873 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2876 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2879 /* Multiply words. */
2880 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2881 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2883 /* Pack the low byte of each word back into a single xmm */
2884 op[0] = operands[0];
2887 ix86_expand_sse5_pack (op);
2891 for (i = 0; i < 12; ++i)
2892 t[i] = gen_reg_rtx (V16QImode);
2894 /* Unpack data such that we've got a source byte in each low byte of
2895 each word. We don't care what goes into the high byte of each word.
2896 Rather than trying to get zero in there, most convenient is to let
2897 it be a copy of the low byte. */
2898 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2899 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2900 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2901 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2903 /* Multiply words. The end-of-line annotations here give a picture of what
2904 the output of that instruction looks like. Dot means don't care; the
2905 letters are the bytes of the result with A being the most significant. */
2906 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2907 gen_lowpart (V8HImode, t[0]),
2908 gen_lowpart (V8HImode, t[1])));
2909 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2910 gen_lowpart (V8HImode, t[2]),
2911 gen_lowpart (V8HImode, t[3])));
2913 /* Extract the relevant bytes and merge them back together. */
2914 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2915 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2916 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2917 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2918 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2919 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2922 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2926 (define_expand "mulv8hi3"
2927 [(set (match_operand:V8HI 0 "register_operand" "")
2928 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2929 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2931 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2933 (define_insn "*mulv8hi3"
2934 [(set (match_operand:V8HI 0 "register_operand" "=x")
2935 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2936 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2937 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2938 "pmullw\t{%2, %0|%0, %2}"
2939 [(set_attr "type" "sseimul")
2940 (set_attr "prefix_data16" "1")
2941 (set_attr "mode" "TI")])
2943 (define_expand "smulv8hi3_highpart"
2944 [(set (match_operand:V8HI 0 "register_operand" "")
2949 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2951 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2954 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2956 (define_insn "*smulv8hi3_highpart"
2957 [(set (match_operand:V8HI 0 "register_operand" "=x")
2962 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2964 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2966 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2967 "pmulhw\t{%2, %0|%0, %2}"
2968 [(set_attr "type" "sseimul")
2969 (set_attr "prefix_data16" "1")
2970 (set_attr "mode" "TI")])
2972 (define_expand "umulv8hi3_highpart"
2973 [(set (match_operand:V8HI 0 "register_operand" "")
2978 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2980 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2983 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2985 (define_insn "*umulv8hi3_highpart"
2986 [(set (match_operand:V8HI 0 "register_operand" "=x")
2991 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2993 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2995 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2996 "pmulhuw\t{%2, %0|%0, %2}"
2997 [(set_attr "type" "sseimul")
2998 (set_attr "prefix_data16" "1")
2999 (set_attr "mode" "TI")])
3001 (define_expand "sse2_umulv2siv2di3"
3002 [(set (match_operand:V2DI 0 "register_operand" "")
3006 (match_operand:V4SI 1 "nonimmediate_operand" "")
3007 (parallel [(const_int 0) (const_int 2)])))
3010 (match_operand:V4SI 2 "nonimmediate_operand" "")
3011 (parallel [(const_int 0) (const_int 2)])))))]
3013 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3015 (define_insn "*sse2_umulv2siv2di3"
3016 [(set (match_operand:V2DI 0 "register_operand" "=x")
3020 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3021 (parallel [(const_int 0) (const_int 2)])))
3024 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3025 (parallel [(const_int 0) (const_int 2)])))))]
3026 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3027 "pmuludq\t{%2, %0|%0, %2}"
3028 [(set_attr "type" "sseimul")
3029 (set_attr "prefix_data16" "1")
3030 (set_attr "mode" "TI")])
3032 (define_expand "sse4_1_mulv2siv2di3"
3033 [(set (match_operand:V2DI 0 "register_operand" "")
3037 (match_operand:V4SI 1 "nonimmediate_operand" "")
3038 (parallel [(const_int 0) (const_int 2)])))
3041 (match_operand:V4SI 2 "nonimmediate_operand" "")
3042 (parallel [(const_int 0) (const_int 2)])))))]
3044 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3046 (define_insn "*sse4_1_mulv2siv2di3"
3047 [(set (match_operand:V2DI 0 "register_operand" "=x")
3051 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3052 (parallel [(const_int 0) (const_int 2)])))
3055 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3056 (parallel [(const_int 0) (const_int 2)])))))]
3057 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3058 "pmuldq\t{%2, %0|%0, %2}"
3059 [(set_attr "type" "sseimul")
3060 (set_attr "prefix_extra" "1")
3061 (set_attr "mode" "TI")])
3063 (define_expand "sse2_pmaddwd"
3064 [(set (match_operand:V4SI 0 "register_operand" "")
3069 (match_operand:V8HI 1 "nonimmediate_operand" "")
3070 (parallel [(const_int 0)
3076 (match_operand:V8HI 2 "nonimmediate_operand" "")
3077 (parallel [(const_int 0)
3083 (vec_select:V4HI (match_dup 1)
3084 (parallel [(const_int 1)
3089 (vec_select:V4HI (match_dup 2)
3090 (parallel [(const_int 1)
3093 (const_int 7)]))))))]
3095 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3097 (define_insn "*sse2_pmaddwd"
3098 [(set (match_operand:V4SI 0 "register_operand" "=x")
3103 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3104 (parallel [(const_int 0)
3110 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3111 (parallel [(const_int 0)
3117 (vec_select:V4HI (match_dup 1)
3118 (parallel [(const_int 1)
3123 (vec_select:V4HI (match_dup 2)
3124 (parallel [(const_int 1)
3127 (const_int 7)]))))))]
3128 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3129 "pmaddwd\t{%2, %0|%0, %2}"
3130 [(set_attr "type" "sseiadd")
3131 (set_attr "prefix_data16" "1")
3132 (set_attr "mode" "TI")])
3134 (define_expand "mulv4si3"
3135 [(set (match_operand:V4SI 0 "register_operand" "")
3136 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3137 (match_operand:V4SI 2 "register_operand" "")))]
3140 if (TARGET_SSE4_1 || TARGET_SSE5)
3141 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3144 (define_insn "*sse4_1_mulv4si3"
3145 [(set (match_operand:V4SI 0 "register_operand" "=x")
3146 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3147 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3148 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3149 "pmulld\t{%2, %0|%0, %2}"
3150 [(set_attr "type" "sseimul")
3151 (set_attr "prefix_extra" "1")
3152 (set_attr "mode" "TI")])
3154 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3155 ;; multiply/add. In general, we expect the define_split to occur before
3156 ;; register allocation, so we have to handle the corner case where the target
3157 ;; is used as the base or index register in operands 1/2.
3158 (define_insn_and_split "*sse5_mulv4si3"
3159 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3160 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3161 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3164 "&& (reload_completed
3165 || (!reg_mentioned_p (operands[0], operands[1])
3166 && !reg_mentioned_p (operands[0], operands[2])))"
3170 (plus:V4SI (mult:V4SI (match_dup 1)
3174 operands[3] = CONST0_RTX (V4SImode);
3176 [(set_attr "type" "ssemuladd")
3177 (set_attr "mode" "TI")])
3179 (define_insn_and_split "*sse2_mulv4si3"
3180 [(set (match_operand:V4SI 0 "register_operand" "")
3181 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3182 (match_operand:V4SI 2 "register_operand" "")))]
3183 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3184 && !(reload_completed || reload_in_progress)"
3189 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3195 t1 = gen_reg_rtx (V4SImode);
3196 t2 = gen_reg_rtx (V4SImode);
3197 t3 = gen_reg_rtx (V4SImode);
3198 t4 = gen_reg_rtx (V4SImode);
3199 t5 = gen_reg_rtx (V4SImode);
3200 t6 = gen_reg_rtx (V4SImode);
3201 thirtytwo = GEN_INT (32);
3203 /* Multiply elements 2 and 0. */
3204 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3207 /* Shift both input vectors down one element, so that elements 3
3208 and 1 are now in the slots for elements 2 and 0. For K8, at
3209 least, this is faster than using a shuffle. */
3210 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3211 gen_lowpart (TImode, op1),
3213 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3214 gen_lowpart (TImode, op2),
3216 /* Multiply elements 3 and 1. */
3217 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3220 /* Move the results in element 2 down to element 1; we don't care
3221 what goes in elements 2 and 3. */
3222 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3223 const0_rtx, const0_rtx));
3224 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3225 const0_rtx, const0_rtx));
3227 /* Merge the parts back together. */
3228 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3232 (define_insn_and_split "mulv2di3"
3233 [(set (match_operand:V2DI 0 "register_operand" "")
3234 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3235 (match_operand:V2DI 2 "register_operand" "")))]
3237 && !(reload_completed || reload_in_progress)"
3242 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3248 t1 = gen_reg_rtx (V2DImode);
3249 t2 = gen_reg_rtx (V2DImode);
3250 t3 = gen_reg_rtx (V2DImode);
3251 t4 = gen_reg_rtx (V2DImode);
3252 t5 = gen_reg_rtx (V2DImode);
3253 t6 = gen_reg_rtx (V2DImode);
3254 thirtytwo = GEN_INT (32);
3256 /* Multiply low parts. */
3257 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3258 gen_lowpart (V4SImode, op2)));
3260 /* Shift input vectors left 32 bits so we can multiply high parts. */
3261 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3262 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3264 /* Multiply high parts by low parts. */
3265 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3266 gen_lowpart (V4SImode, t3)));
3267 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3268 gen_lowpart (V4SImode, t2)));
3270 /* Shift them back. */
3271 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3272 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3274 /* Add the three parts together. */
3275 emit_insn (gen_addv2di3 (t6, t1, t4));
3276 emit_insn (gen_addv2di3 (op0, t6, t5));
3280 (define_expand "vec_widen_smult_hi_v8hi"
3281 [(match_operand:V4SI 0 "register_operand" "")
3282 (match_operand:V8HI 1 "register_operand" "")
3283 (match_operand:V8HI 2 "register_operand" "")]
3286 rtx op1, op2, t1, t2, dest;
3290 t1 = gen_reg_rtx (V8HImode);
3291 t2 = gen_reg_rtx (V8HImode);
3292 dest = gen_lowpart (V8HImode, operands[0]);
3294 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3295 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3296 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3300 (define_expand "vec_widen_smult_lo_v8hi"
3301 [(match_operand:V4SI 0 "register_operand" "")
3302 (match_operand:V8HI 1 "register_operand" "")
3303 (match_operand:V8HI 2 "register_operand" "")]
3306 rtx op1, op2, t1, t2, dest;
3310 t1 = gen_reg_rtx (V8HImode);
3311 t2 = gen_reg_rtx (V8HImode);
3312 dest = gen_lowpart (V8HImode, operands[0]);
3314 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3315 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3316 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3320 (define_expand "vec_widen_umult_hi_v8hi"
3321 [(match_operand:V4SI 0 "register_operand" "")
3322 (match_operand:V8HI 1 "register_operand" "")
3323 (match_operand:V8HI 2 "register_operand" "")]
3326 rtx op1, op2, t1, t2, dest;
3330 t1 = gen_reg_rtx (V8HImode);
3331 t2 = gen_reg_rtx (V8HImode);
3332 dest = gen_lowpart (V8HImode, operands[0]);
3334 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3335 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3336 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3340 (define_expand "vec_widen_umult_lo_v8hi"
3341 [(match_operand:V4SI 0 "register_operand" "")
3342 (match_operand:V8HI 1 "register_operand" "")
3343 (match_operand:V8HI 2 "register_operand" "")]
3346 rtx op1, op2, t1, t2, dest;
3350 t1 = gen_reg_rtx (V8HImode);
3351 t2 = gen_reg_rtx (V8HImode);
3352 dest = gen_lowpart (V8HImode, operands[0]);
3354 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3355 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3356 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3360 (define_expand "vec_widen_smult_hi_v4si"
3361 [(match_operand:V2DI 0 "register_operand" "")
3362 (match_operand:V4SI 1 "register_operand" "")
3363 (match_operand:V4SI 2 "register_operand" "")]
3366 rtx op1, op2, t1, t2;
3370 t1 = gen_reg_rtx (V4SImode);
3371 t2 = gen_reg_rtx (V4SImode);
3373 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3374 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3375 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3379 (define_expand "vec_widen_smult_lo_v4si"
3380 [(match_operand:V2DI 0 "register_operand" "")
3381 (match_operand:V4SI 1 "register_operand" "")
3382 (match_operand:V4SI 2 "register_operand" "")]
3385 rtx op1, op2, t1, t2;
3389 t1 = gen_reg_rtx (V4SImode);
3390 t2 = gen_reg_rtx (V4SImode);
3392 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3393 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3394 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3398 (define_expand "vec_widen_umult_hi_v4si"
3399 [(match_operand:V2DI 0 "register_operand" "")
3400 (match_operand:V4SI 1 "register_operand" "")
3401 (match_operand:V4SI 2 "register_operand" "")]
3404 rtx op1, op2, t1, t2;
3408 t1 = gen_reg_rtx (V4SImode);
3409 t2 = gen_reg_rtx (V4SImode);
3411 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3412 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3413 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3417 (define_expand "vec_widen_umult_lo_v4si"
3418 [(match_operand:V2DI 0 "register_operand" "")
3419 (match_operand:V4SI 1 "register_operand" "")
3420 (match_operand:V4SI 2 "register_operand" "")]
3423 rtx op1, op2, t1, t2;
3427 t1 = gen_reg_rtx (V4SImode);
3428 t2 = gen_reg_rtx (V4SImode);
3430 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3431 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3432 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3436 (define_expand "sdot_prodv8hi"
3437 [(match_operand:V4SI 0 "register_operand" "")
3438 (match_operand:V8HI 1 "register_operand" "")
3439 (match_operand:V8HI 2 "register_operand" "")
3440 (match_operand:V4SI 3 "register_operand" "")]
3443 rtx t = gen_reg_rtx (V4SImode);
3444 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3445 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3449 (define_expand "udot_prodv4si"
3450 [(match_operand:V2DI 0 "register_operand" "")
3451 (match_operand:V4SI 1 "register_operand" "")
3452 (match_operand:V4SI 2 "register_operand" "")
3453 (match_operand:V2DI 3 "register_operand" "")]
3458 t1 = gen_reg_rtx (V2DImode);
3459 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3460 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3462 t2 = gen_reg_rtx (V4SImode);
3463 t3 = gen_reg_rtx (V4SImode);
3464 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3465 gen_lowpart (TImode, operands[1]),
3467 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3468 gen_lowpart (TImode, operands[2]),
3471 t4 = gen_reg_rtx (V2DImode);
3472 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3474 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3478 (define_insn "ashr<mode>3"
3479 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3481 (match_operand:SSEMODE24 1 "register_operand" "0")
3482 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3484 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3485 [(set_attr "type" "sseishft")
3486 (set_attr "prefix_data16" "1")
3487 (set_attr "mode" "TI")])
3489 (define_insn "lshr<mode>3"
3490 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3491 (lshiftrt:SSEMODE248
3492 (match_operand:SSEMODE248 1 "register_operand" "0")
3493 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3495 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3496 [(set_attr "type" "sseishft")
3497 (set_attr "prefix_data16" "1")
3498 (set_attr "mode" "TI")])
3500 (define_insn "ashl<mode>3"
3501 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3503 (match_operand:SSEMODE248 1 "register_operand" "0")
3504 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3506 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3507 [(set_attr "type" "sseishft")
3508 (set_attr "prefix_data16" "1")
3509 (set_attr "mode" "TI")])
3511 (define_expand "vec_shl_<mode>"
3512 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3513 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3514 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3517 operands[0] = gen_lowpart (TImode, operands[0]);
3518 operands[1] = gen_lowpart (TImode, operands[1]);
3521 (define_expand "vec_shr_<mode>"
3522 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3523 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3524 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3527 operands[0] = gen_lowpart (TImode, operands[0]);
3528 operands[1] = gen_lowpart (TImode, operands[1]);
3531 (define_expand "<code>v16qi3"
3532 [(set (match_operand:V16QI 0 "register_operand" "")
3534 (match_operand:V16QI 1 "nonimmediate_operand" "")
3535 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3537 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3539 (define_insn "*<code>v16qi3"
3540 [(set (match_operand:V16QI 0 "register_operand" "=x")
3542 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3543 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3544 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3545 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3546 [(set_attr "type" "sseiadd")
3547 (set_attr "prefix_data16" "1")
3548 (set_attr "mode" "TI")])
3550 (define_expand "<code>v8hi3"
3551 [(set (match_operand:V8HI 0 "register_operand" "")
3553 (match_operand:V8HI 1 "nonimmediate_operand" "")
3554 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3556 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3558 (define_insn "*<code>v8hi3"
3559 [(set (match_operand:V8HI 0 "register_operand" "=x")
3561 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3562 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3563 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3564 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3565 [(set_attr "type" "sseiadd")
3566 (set_attr "prefix_data16" "1")
3567 (set_attr "mode" "TI")])
3569 (define_expand "umaxv8hi3"
3570 [(set (match_operand:V8HI 0 "register_operand" "")
3571 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3572 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3576 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3579 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3580 if (rtx_equal_p (op3, op2))
3581 op3 = gen_reg_rtx (V8HImode);
3582 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3583 emit_insn (gen_addv8hi3 (op0, op3, op2));
3588 (define_expand "smax<mode>3"
3589 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3590 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3591 (match_operand:SSEMODE14 2 "register_operand" "")))]
3595 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3601 xops[0] = operands[0];
3602 xops[1] = operands[1];
3603 xops[2] = operands[2];
3604 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3605 xops[4] = operands[1];
3606 xops[5] = operands[2];
3607 ok = ix86_expand_int_vcond (xops);
3613 (define_insn "*sse4_1_<code><mode>3"
3614 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3616 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3617 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3618 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3619 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3620 [(set_attr "type" "sseiadd")
3621 (set_attr "prefix_extra" "1")
3622 (set_attr "mode" "TI")])
3624 (define_expand "umaxv4si3"
3625 [(set (match_operand:V4SI 0 "register_operand" "")
3626 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3627 (match_operand:V4SI 2 "register_operand" "")))]
3631 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3637 xops[0] = operands[0];
3638 xops[1] = operands[1];
3639 xops[2] = operands[2];
3640 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3641 xops[4] = operands[1];
3642 xops[5] = operands[2];
3643 ok = ix86_expand_int_vcond (xops);
3649 (define_insn "*sse4_1_<code><mode>3"
3650 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3652 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3653 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3654 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3655 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3656 [(set_attr "type" "sseiadd")
3657 (set_attr "prefix_extra" "1")
3658 (set_attr "mode" "TI")])
3660 (define_expand "smin<mode>3"
3661 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3662 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3663 (match_operand:SSEMODE14 2 "register_operand" "")))]
3667 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3673 xops[0] = operands[0];
3674 xops[1] = operands[2];
3675 xops[2] = operands[1];
3676 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3677 xops[4] = operands[1];
3678 xops[5] = operands[2];
3679 ok = ix86_expand_int_vcond (xops);
3685 (define_expand "umin<mode>3"
3686 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3687 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3688 (match_operand:SSEMODE24 2 "register_operand" "")))]
3692 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3698 xops[0] = operands[0];
3699 xops[1] = operands[2];
3700 xops[2] = operands[1];
3701 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3702 xops[4] = operands[1];
3703 xops[5] = operands[2];
3704 ok = ix86_expand_int_vcond (xops);
3710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3712 ;; Parallel integral comparisons
3714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3716 (define_expand "sse2_eq<mode>3"
3717 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3719 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
3720 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
3721 "TARGET_SSE2 && !TARGET_SSE5"
3722 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
3724 (define_insn "*sse2_eq<mode>3"
3725 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3727 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3728 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3729 "TARGET_SSE2 && !TARGET_SSE5
3730 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3731 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3732 [(set_attr "type" "ssecmp")
3733 (set_attr "prefix_data16" "1")
3734 (set_attr "mode" "TI")])
3736 (define_expand "sse4_1_eqv2di3"
3737 [(set (match_operand:V2DI 0 "register_operand" "")
3739 (match_operand:V2DI 1 "nonimmediate_operand" "")
3740 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
3742 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
3744 (define_insn "*sse4_1_eqv2di3"
3745 [(set (match_operand:V2DI 0 "register_operand" "=x")
3747 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3748 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3749 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3750 "pcmpeqq\t{%2, %0|%0, %2}"
3751 [(set_attr "type" "ssecmp")
3752 (set_attr "prefix_extra" "1")
3753 (set_attr "mode" "TI")])
3755 (define_insn "sse2_gt<mode>3"
3756 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3758 (match_operand:SSEMODE124 1 "register_operand" "0")
3759 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3760 "TARGET_SSE2 && !TARGET_SSE5"
3761 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3762 [(set_attr "type" "ssecmp")
3763 (set_attr "prefix_data16" "1")
3764 (set_attr "mode" "TI")])
3766 (define_insn "sse4_2_gtv2di3"
3767 [(set (match_operand:V2DI 0 "register_operand" "=x")
3769 (match_operand:V2DI 1 "register_operand" "0")
3770 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3772 "pcmpgtq\t{%2, %0|%0, %2}"
3773 [(set_attr "type" "ssecmp")
3774 (set_attr "mode" "TI")])
3776 (define_expand "vcond<mode>"
3777 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3778 (if_then_else:SSEMODEI
3779 (match_operator 3 ""
3780 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3781 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3782 (match_operand:SSEMODEI 1 "general_operand" "")
3783 (match_operand:SSEMODEI 2 "general_operand" "")))]
3786 if (ix86_expand_int_vcond (operands))
3792 (define_expand "vcondu<mode>"
3793 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3794 (if_then_else:SSEMODEI
3795 (match_operator 3 ""
3796 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3797 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3798 (match_operand:SSEMODEI 1 "general_operand" "")
3799 (match_operand:SSEMODEI 2 "general_operand" "")))]
3802 if (ix86_expand_int_vcond (operands))
3808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3810 ;; Parallel bitwise logical operations
3812 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3814 (define_expand "one_cmpl<mode>2"
3815 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3816 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3820 int i, n = GET_MODE_NUNITS (<MODE>mode);
3821 rtvec v = rtvec_alloc (n);
3823 for (i = 0; i < n; ++i)
3824 RTVEC_ELT (v, i) = constm1_rtx;
3826 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3829 (define_insn "*sse_nand<mode>3"
3830 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3832 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3833 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3834 "(TARGET_SSE && !TARGET_SSE2)"
3835 "andnps\t{%2, %0|%0, %2}"
3836 [(set_attr "type" "sselog")
3837 (set_attr "mode" "V4SF")])
3839 (define_insn "sse2_nand<mode>3"
3840 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3842 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3843 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3845 "pandn\t{%2, %0|%0, %2}"
3846 [(set_attr "type" "sselog")
3847 (set_attr "prefix_data16" "1")
3848 (set_attr "mode" "TI")])
3850 (define_insn "*nandtf3"
3851 [(set (match_operand:TF 0 "register_operand" "=x")
3853 (not:TF (match_operand:TF 1 "register_operand" "0"))
3854 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3856 "pandn\t{%2, %0|%0, %2}"
3857 [(set_attr "type" "sselog")
3858 (set_attr "prefix_data16" "1")
3859 (set_attr "mode" "TI")])
3861 (define_expand "<code><mode>3"
3862 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3864 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3865 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3867 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3869 (define_insn "*sse_<code><mode>3"
3870 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3872 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3873 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3874 "(TARGET_SSE && !TARGET_SSE2)
3875 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3876 "<plogicprefix>ps\t{%2, %0|%0, %2}"
3877 [(set_attr "type" "sselog")
3878 (set_attr "mode" "V4SF")])
3880 (define_insn "*sse2_<code><mode>3"
3881 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3883 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3884 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3885 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3886 "p<plogicprefix>\t{%2, %0|%0, %2}"
3887 [(set_attr "type" "sselog")
3888 (set_attr "prefix_data16" "1")
3889 (set_attr "mode" "TI")])
3891 (define_expand "<code>tf3"
3892 [(set (match_operand:TF 0 "register_operand" "")
3894 (match_operand:TF 1 "nonimmediate_operand" "")
3895 (match_operand:TF 2 "nonimmediate_operand" "")))]
3897 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3899 (define_insn "*<code>tf3"
3900 [(set (match_operand:TF 0 "register_operand" "=x")
3902 (match_operand:TF 1 "nonimmediate_operand" "%0")
3903 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3904 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3905 "p<plogicprefix>\t{%2, %0|%0, %2}"
3906 [(set_attr "type" "sselog")
3907 (set_attr "prefix_data16" "1")
3908 (set_attr "mode" "TI")])
3910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3912 ;; Parallel integral element swizzling
3914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3917 ;; op1 = abcdefghijklmnop
3918 ;; op2 = qrstuvwxyz012345
3919 ;; h1 = aqbrcsdteufvgwhx
3920 ;; l1 = iyjzk0l1m2n3o4p5
3921 ;; h2 = aiqybjrzcks0dlt1
3922 ;; l2 = emu2fnv3gow4hpx5
3923 ;; h3 = aeimquy2bfjnrvz3
3924 ;; l3 = cgkosw04dhlptx15
3925 ;; result = bdfhjlnprtvxz135
3926 (define_expand "vec_pack_trunc_v8hi"
3927 [(match_operand:V16QI 0 "register_operand" "")
3928 (match_operand:V8HI 1 "register_operand" "")
3929 (match_operand:V8HI 2 "register_operand" "")]
3932 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3934 op1 = gen_lowpart (V16QImode, operands[1]);
3935 op2 = gen_lowpart (V16QImode, operands[2]);
3936 h1 = gen_reg_rtx (V16QImode);
3937 l1 = gen_reg_rtx (V16QImode);
3938 h2 = gen_reg_rtx (V16QImode);
3939 l2 = gen_reg_rtx (V16QImode);
3940 h3 = gen_reg_rtx (V16QImode);
3941 l3 = gen_reg_rtx (V16QImode);
3943 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3944 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3945 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3946 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3947 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3948 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3949 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3960 ;; result = bdfhjlnp
3961 (define_expand "vec_pack_trunc_v4si"
3962 [(match_operand:V8HI 0 "register_operand" "")
3963 (match_operand:V4SI 1 "register_operand" "")
3964 (match_operand:V4SI 2 "register_operand" "")]
3967 rtx op1, op2, h1, l1, h2, l2;
3969 op1 = gen_lowpart (V8HImode, operands[1]);
3970 op2 = gen_lowpart (V8HImode, operands[2]);
3971 h1 = gen_reg_rtx (V8HImode);
3972 l1 = gen_reg_rtx (V8HImode);
3973 h2 = gen_reg_rtx (V8HImode);
3974 l2 = gen_reg_rtx (V8HImode);
3976 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3977 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3978 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3979 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3980 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3990 (define_expand "vec_pack_trunc_v2di"
3991 [(match_operand:V4SI 0 "register_operand" "")
3992 (match_operand:V2DI 1 "register_operand" "")
3993 (match_operand:V2DI 2 "register_operand" "")]
3996 rtx op1, op2, h1, l1;
3998 op1 = gen_lowpart (V4SImode, operands[1]);
3999 op2 = gen_lowpart (V4SImode, operands[2]);
4000 h1 = gen_reg_rtx (V4SImode);
4001 l1 = gen_reg_rtx (V4SImode);
4003 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4004 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4005 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4009 (define_expand "vec_interleave_highv16qi"
4010 [(set (match_operand:V16QI 0 "register_operand" "")
4013 (match_operand:V16QI 1 "register_operand" "")
4014 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4015 (parallel [(const_int 8) (const_int 24)
4016 (const_int 9) (const_int 25)
4017 (const_int 10) (const_int 26)
4018 (const_int 11) (const_int 27)
4019 (const_int 12) (const_int 28)
4020 (const_int 13) (const_int 29)
4021 (const_int 14) (const_int 30)
4022 (const_int 15) (const_int 31)])))]
4025 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4029 (define_expand "vec_interleave_lowv16qi"
4030 [(set (match_operand:V16QI 0 "register_operand" "")
4033 (match_operand:V16QI 1 "register_operand" "")
4034 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4035 (parallel [(const_int 0) (const_int 16)
4036 (const_int 1) (const_int 17)
4037 (const_int 2) (const_int 18)
4038 (const_int 3) (const_int 19)
4039 (const_int 4) (const_int 20)
4040 (const_int 5) (const_int 21)
4041 (const_int 6) (const_int 22)
4042 (const_int 7) (const_int 23)])))]
4045 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4049 (define_expand "vec_interleave_highv8hi"
4050 [(set (match_operand:V8HI 0 "register_operand" "=")
4053 (match_operand:V8HI 1 "register_operand" "")
4054 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4055 (parallel [(const_int 4) (const_int 12)
4056 (const_int 5) (const_int 13)
4057 (const_int 6) (const_int 14)
4058 (const_int 7) (const_int 15)])))]
4061 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4065 (define_expand "vec_interleave_lowv8hi"
4066 [(set (match_operand:V8HI 0 "register_operand" "")
4069 (match_operand:V8HI 1 "register_operand" "")
4070 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4071 (parallel [(const_int 0) (const_int 8)
4072 (const_int 1) (const_int 9)
4073 (const_int 2) (const_int 10)
4074 (const_int 3) (const_int 11)])))]
4077 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4081 (define_expand "vec_interleave_highv4si"
4082 [(set (match_operand:V4SI 0 "register_operand" "")
4085 (match_operand:V4SI 1 "register_operand" "")
4086 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4087 (parallel [(const_int 2) (const_int 6)
4088 (const_int 3) (const_int 7)])))]
4091 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4095 (define_expand "vec_interleave_lowv4si"
4096 [(set (match_operand:V4SI 0 "register_operand" "")
4099 (match_operand:V4SI 1 "register_operand" "")
4100 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4101 (parallel [(const_int 0) (const_int 4)
4102 (const_int 1) (const_int 5)])))]
4105 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4109 (define_expand "vec_interleave_highv2di"
4110 [(set (match_operand:V2DI 0 "register_operand" "")
4113 (match_operand:V2DI 1 "register_operand" "")
4114 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4115 (parallel [(const_int 1)
4119 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4123 (define_expand "vec_interleave_lowv2di"
4124 [(set (match_operand:V2DI 0 "register_operand" "")
4127 (match_operand:V2DI 1 "register_operand" "")
4128 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4129 (parallel [(const_int 0)
4133 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4137 (define_insn "sse2_packsswb"
4138 [(set (match_operand:V16QI 0 "register_operand" "=x")
4141 (match_operand:V8HI 1 "register_operand" "0"))
4143 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4145 "packsswb\t{%2, %0|%0, %2}"
4146 [(set_attr "type" "sselog")
4147 (set_attr "prefix_data16" "1")
4148 (set_attr "mode" "TI")])
4150 (define_insn "sse2_packssdw"
4151 [(set (match_operand:V8HI 0 "register_operand" "=x")
4154 (match_operand:V4SI 1 "register_operand" "0"))
4156 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4158 "packssdw\t{%2, %0|%0, %2}"
4159 [(set_attr "type" "sselog")
4160 (set_attr "prefix_data16" "1")
4161 (set_attr "mode" "TI")])
4163 (define_insn "sse2_packuswb"
4164 [(set (match_operand:V16QI 0 "register_operand" "=x")
4167 (match_operand:V8HI 1 "register_operand" "0"))
4169 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4171 "packuswb\t{%2, %0|%0, %2}"
4172 [(set_attr "type" "sselog")
4173 (set_attr "prefix_data16" "1")
4174 (set_attr "mode" "TI")])
4176 (define_insn "sse2_punpckhbw"
4177 [(set (match_operand:V16QI 0 "register_operand" "=x")
4180 (match_operand:V16QI 1 "register_operand" "0")
4181 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4182 (parallel [(const_int 8) (const_int 24)
4183 (const_int 9) (const_int 25)
4184 (const_int 10) (const_int 26)
4185 (const_int 11) (const_int 27)
4186 (const_int 12) (const_int 28)
4187 (const_int 13) (const_int 29)
4188 (const_int 14) (const_int 30)
4189 (const_int 15) (const_int 31)])))]
4191 "punpckhbw\t{%2, %0|%0, %2}"
4192 [(set_attr "type" "sselog")
4193 (set_attr "prefix_data16" "1")
4194 (set_attr "mode" "TI")])
4196 (define_insn "sse2_punpcklbw"
4197 [(set (match_operand:V16QI 0 "register_operand" "=x")
4200 (match_operand:V16QI 1 "register_operand" "0")
4201 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4202 (parallel [(const_int 0) (const_int 16)
4203 (const_int 1) (const_int 17)
4204 (const_int 2) (const_int 18)
4205 (const_int 3) (const_int 19)
4206 (const_int 4) (const_int 20)
4207 (const_int 5) (const_int 21)
4208 (const_int 6) (const_int 22)
4209 (const_int 7) (const_int 23)])))]
4211 "punpcklbw\t{%2, %0|%0, %2}"
4212 [(set_attr "type" "sselog")
4213 (set_attr "prefix_data16" "1")
4214 (set_attr "mode" "TI")])
4216 (define_insn "sse2_punpckhwd"
4217 [(set (match_operand:V8HI 0 "register_operand" "=x")
4220 (match_operand:V8HI 1 "register_operand" "0")
4221 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4222 (parallel [(const_int 4) (const_int 12)
4223 (const_int 5) (const_int 13)
4224 (const_int 6) (const_int 14)
4225 (const_int 7) (const_int 15)])))]
4227 "punpckhwd\t{%2, %0|%0, %2}"
4228 [(set_attr "type" "sselog")
4229 (set_attr "prefix_data16" "1")
4230 (set_attr "mode" "TI")])
4232 (define_insn "sse2_punpcklwd"
4233 [(set (match_operand:V8HI 0 "register_operand" "=x")
4236 (match_operand:V8HI 1 "register_operand" "0")
4237 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4238 (parallel [(const_int 0) (const_int 8)
4239 (const_int 1) (const_int 9)
4240 (const_int 2) (const_int 10)
4241 (const_int 3) (const_int 11)])))]
4243 "punpcklwd\t{%2, %0|%0, %2}"
4244 [(set_attr "type" "sselog")
4245 (set_attr "prefix_data16" "1")
4246 (set_attr "mode" "TI")])
4248 (define_insn "sse2_punpckhdq"
4249 [(set (match_operand:V4SI 0 "register_operand" "=x")
4252 (match_operand:V4SI 1 "register_operand" "0")
4253 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4254 (parallel [(const_int 2) (const_int 6)
4255 (const_int 3) (const_int 7)])))]
4257 "punpckhdq\t{%2, %0|%0, %2}"
4258 [(set_attr "type" "sselog")
4259 (set_attr "prefix_data16" "1")
4260 (set_attr "mode" "TI")])
4262 (define_insn "sse2_punpckldq"
4263 [(set (match_operand:V4SI 0 "register_operand" "=x")
4266 (match_operand:V4SI 1 "register_operand" "0")
4267 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4268 (parallel [(const_int 0) (const_int 4)
4269 (const_int 1) (const_int 5)])))]
4271 "punpckldq\t{%2, %0|%0, %2}"
4272 [(set_attr "type" "sselog")
4273 (set_attr "prefix_data16" "1")
4274 (set_attr "mode" "TI")])
4276 (define_insn "sse2_punpckhqdq"
4277 [(set (match_operand:V2DI 0 "register_operand" "=x")
4280 (match_operand:V2DI 1 "register_operand" "0")
4281 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4282 (parallel [(const_int 1)
4285 "punpckhqdq\t{%2, %0|%0, %2}"
4286 [(set_attr "type" "sselog")
4287 (set_attr "prefix_data16" "1")
4288 (set_attr "mode" "TI")])
4290 (define_insn "sse2_punpcklqdq"
4291 [(set (match_operand:V2DI 0 "register_operand" "=x")
4294 (match_operand:V2DI 1 "register_operand" "0")
4295 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4296 (parallel [(const_int 0)
4299 "punpcklqdq\t{%2, %0|%0, %2}"
4300 [(set_attr "type" "sselog")
4301 (set_attr "prefix_data16" "1")
4302 (set_attr "mode" "TI")])
4304 (define_insn "*sse4_1_pinsrb"
4305 [(set (match_operand:V16QI 0 "register_operand" "=x")
4307 (vec_duplicate:V16QI
4308 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4309 (match_operand:V16QI 1 "register_operand" "0")
4310 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4313 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4314 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4316 [(set_attr "type" "sselog")
4317 (set_attr "prefix_extra" "1")
4318 (set_attr "mode" "TI")])
4320 (define_insn "*sse2_pinsrw"
4321 [(set (match_operand:V8HI 0 "register_operand" "=x")
4324 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4325 (match_operand:V8HI 1 "register_operand" "0")
4326 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4329 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4330 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4332 [(set_attr "type" "sselog")
4333 (set_attr "prefix_data16" "1")
4334 (set_attr "mode" "TI")])
4336 ;; It must come before sse2_loadld since it is preferred.
4337 (define_insn "*sse4_1_pinsrd"
4338 [(set (match_operand:V4SI 0 "register_operand" "=x")
4341 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4342 (match_operand:V4SI 1 "register_operand" "0")
4343 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4346 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4347 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4349 [(set_attr "type" "sselog")
4350 (set_attr "prefix_extra" "1")
4351 (set_attr "mode" "TI")])
4353 (define_insn "*sse4_1_pinsrq"
4354 [(set (match_operand:V2DI 0 "register_operand" "=x")
4357 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4358 (match_operand:V2DI 1 "register_operand" "0")
4359 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4362 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4363 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4365 [(set_attr "type" "sselog")
4366 (set_attr "prefix_extra" "1")
4367 (set_attr "mode" "TI")])
4369 (define_insn "*sse4_1_pextrb"
4370 [(set (match_operand:SI 0 "register_operand" "=r")
4373 (match_operand:V16QI 1 "register_operand" "x")
4374 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4376 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4377 [(set_attr "type" "sselog")
4378 (set_attr "prefix_extra" "1")
4379 (set_attr "mode" "TI")])
4381 (define_insn "*sse4_1_pextrb_memory"
4382 [(set (match_operand:QI 0 "memory_operand" "=m")
4384 (match_operand:V16QI 1 "register_operand" "x")
4385 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4387 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4388 [(set_attr "type" "sselog")
4389 (set_attr "prefix_extra" "1")
4390 (set_attr "mode" "TI")])
4392 (define_insn "*sse2_pextrw"
4393 [(set (match_operand:SI 0 "register_operand" "=r")
4396 (match_operand:V8HI 1 "register_operand" "x")
4397 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4399 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4400 [(set_attr "type" "sselog")
4401 (set_attr "prefix_data16" "1")
4402 (set_attr "mode" "TI")])
4404 (define_insn "*sse4_1_pextrw_memory"
4405 [(set (match_operand:HI 0 "memory_operand" "=m")
4407 (match_operand:V8HI 1 "register_operand" "x")
4408 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4410 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4411 [(set_attr "type" "sselog")
4412 (set_attr "prefix_extra" "1")
4413 (set_attr "mode" "TI")])
4415 (define_insn "*sse4_1_pextrd"
4416 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4418 (match_operand:V4SI 1 "register_operand" "x")
4419 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4421 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4422 [(set_attr "type" "sselog")
4423 (set_attr "prefix_extra" "1")
4424 (set_attr "mode" "TI")])
4426 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4427 (define_insn "*sse4_1_pextrq"
4428 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4430 (match_operand:V2DI 1 "register_operand" "x")
4431 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4432 "TARGET_SSE4_1 && TARGET_64BIT"
4433 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4434 [(set_attr "type" "sselog")
4435 (set_attr "prefix_extra" "1")
4436 (set_attr "mode" "TI")])
4438 (define_expand "sse2_pshufd"
4439 [(match_operand:V4SI 0 "register_operand" "")
4440 (match_operand:V4SI 1 "nonimmediate_operand" "")
4441 (match_operand:SI 2 "const_int_operand" "")]
4444 int mask = INTVAL (operands[2]);
4445 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4446 GEN_INT ((mask >> 0) & 3),
4447 GEN_INT ((mask >> 2) & 3),
4448 GEN_INT ((mask >> 4) & 3),
4449 GEN_INT ((mask >> 6) & 3)));
4453 (define_insn "sse2_pshufd_1"
4454 [(set (match_operand:V4SI 0 "register_operand" "=x")
4456 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4457 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4458 (match_operand 3 "const_0_to_3_operand" "")
4459 (match_operand 4 "const_0_to_3_operand" "")
4460 (match_operand 5 "const_0_to_3_operand" "")])))]
4464 mask |= INTVAL (operands[2]) << 0;
4465 mask |= INTVAL (operands[3]) << 2;
4466 mask |= INTVAL (operands[4]) << 4;
4467 mask |= INTVAL (operands[5]) << 6;
4468 operands[2] = GEN_INT (mask);
4470 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4472 [(set_attr "type" "sselog1")
4473 (set_attr "prefix_data16" "1")
4474 (set_attr "mode" "TI")])
4476 (define_expand "sse2_pshuflw"
4477 [(match_operand:V8HI 0 "register_operand" "")
4478 (match_operand:V8HI 1 "nonimmediate_operand" "")
4479 (match_operand:SI 2 "const_int_operand" "")]
4482 int mask = INTVAL (operands[2]);
4483 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4484 GEN_INT ((mask >> 0) & 3),
4485 GEN_INT ((mask >> 2) & 3),
4486 GEN_INT ((mask >> 4) & 3),
4487 GEN_INT ((mask >> 6) & 3)));
4491 (define_insn "sse2_pshuflw_1"
4492 [(set (match_operand:V8HI 0 "register_operand" "=x")
4494 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4495 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4496 (match_operand 3 "const_0_to_3_operand" "")
4497 (match_operand 4 "const_0_to_3_operand" "")
4498 (match_operand 5 "const_0_to_3_operand" "")
4506 mask |= INTVAL (operands[2]) << 0;
4507 mask |= INTVAL (operands[3]) << 2;
4508 mask |= INTVAL (operands[4]) << 4;
4509 mask |= INTVAL (operands[5]) << 6;
4510 operands[2] = GEN_INT (mask);
4512 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4514 [(set_attr "type" "sselog")
4515 (set_attr "prefix_rep" "1")
4516 (set_attr "mode" "TI")])
4518 (define_expand "sse2_pshufhw"
4519 [(match_operand:V8HI 0 "register_operand" "")
4520 (match_operand:V8HI 1 "nonimmediate_operand" "")
4521 (match_operand:SI 2 "const_int_operand" "")]
4524 int mask = INTVAL (operands[2]);
4525 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4526 GEN_INT (((mask >> 0) & 3) + 4),
4527 GEN_INT (((mask >> 2) & 3) + 4),
4528 GEN_INT (((mask >> 4) & 3) + 4),
4529 GEN_INT (((mask >> 6) & 3) + 4)));
4533 (define_insn "sse2_pshufhw_1"
4534 [(set (match_operand:V8HI 0 "register_operand" "=x")
4536 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4537 (parallel [(const_int 0)
4541 (match_operand 2 "const_4_to_7_operand" "")
4542 (match_operand 3 "const_4_to_7_operand" "")
4543 (match_operand 4 "const_4_to_7_operand" "")
4544 (match_operand 5 "const_4_to_7_operand" "")])))]
4548 mask |= (INTVAL (operands[2]) - 4) << 0;
4549 mask |= (INTVAL (operands[3]) - 4) << 2;
4550 mask |= (INTVAL (operands[4]) - 4) << 4;
4551 mask |= (INTVAL (operands[5]) - 4) << 6;
4552 operands[2] = GEN_INT (mask);
4554 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4556 [(set_attr "type" "sselog")
4557 (set_attr "prefix_rep" "1")
4558 (set_attr "mode" "TI")])
4560 (define_expand "sse2_loadd"
4561 [(set (match_operand:V4SI 0 "register_operand" "")
4564 (match_operand:SI 1 "nonimmediate_operand" ""))
4568 "operands[2] = CONST0_RTX (V4SImode);")
4570 (define_insn "sse2_loadld"
4571 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4574 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4575 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4579 movd\t{%2, %0|%0, %2}
4580 movd\t{%2, %0|%0, %2}
4581 movss\t{%2, %0|%0, %2}
4582 movss\t{%2, %0|%0, %2}"
4583 [(set_attr "type" "ssemov")
4584 (set_attr "mode" "TI,TI,V4SF,SF")])
4586 (define_insn_and_split "sse2_stored"
4587 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4589 (match_operand:V4SI 1 "register_operand" "x,Yi")
4590 (parallel [(const_int 0)])))]
4593 "&& reload_completed
4594 && (TARGET_INTER_UNIT_MOVES
4595 || MEM_P (operands [0])
4596 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4597 [(set (match_dup 0) (match_dup 1))]
4599 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4602 (define_insn_and_split "*vec_ext_v4si_mem"
4603 [(set (match_operand:SI 0 "register_operand" "=r")
4605 (match_operand:V4SI 1 "memory_operand" "o")
4606 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4612 int i = INTVAL (operands[2]);
4614 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4618 (define_expand "sse_storeq"
4619 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4621 (match_operand:V2DI 1 "register_operand" "")
4622 (parallel [(const_int 0)])))]
4626 (define_insn "*sse2_storeq_rex64"
4627 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4629 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4630 (parallel [(const_int 0)])))]
4631 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4635 mov{q}\t{%1, %0|%0, %1}"
4636 [(set_attr "type" "*,*,imov")
4637 (set_attr "mode" "*,*,DI")])
4639 (define_insn "*sse2_storeq"
4640 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4642 (match_operand:V2DI 1 "register_operand" "x")
4643 (parallel [(const_int 0)])))]
4648 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4650 (match_operand:V2DI 1 "register_operand" "")
4651 (parallel [(const_int 0)])))]
4654 && (TARGET_INTER_UNIT_MOVES
4655 || MEM_P (operands [0])
4656 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4657 [(set (match_dup 0) (match_dup 1))]
4659 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4662 (define_insn "*vec_extractv2di_1_rex64"
4663 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4665 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4666 (parallel [(const_int 1)])))]
4667 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4669 movhps\t{%1, %0|%0, %1}
4670 psrldq\t{$8, %0|%0, 8}
4671 movq\t{%H1, %0|%0, %H1}
4672 mov{q}\t{%H1, %0|%0, %H1}"
4673 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4674 (set_attr "memory" "*,none,*,*")
4675 (set_attr "mode" "V2SF,TI,TI,DI")])
4677 (define_insn "*vec_extractv2di_1_sse2"
4678 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4680 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4681 (parallel [(const_int 1)])))]
4683 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4685 movhps\t{%1, %0|%0, %1}
4686 psrldq\t{$8, %0|%0, 8}
4687 movq\t{%H1, %0|%0, %H1}"
4688 [(set_attr "type" "ssemov,sseishft,ssemov")
4689 (set_attr "memory" "*,none,*")
4690 (set_attr "mode" "V2SF,TI,TI")])
4692 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4693 (define_insn "*vec_extractv2di_1_sse"
4694 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4696 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4697 (parallel [(const_int 1)])))]
4698 "!TARGET_SSE2 && TARGET_SSE
4699 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4701 movhps\t{%1, %0|%0, %1}
4702 movhlps\t{%1, %0|%0, %1}
4703 movlps\t{%H1, %0|%0, %H1}"
4704 [(set_attr "type" "ssemov")
4705 (set_attr "mode" "V2SF,V4SF,V2SF")])
4707 (define_insn "*vec_dupv4si"
4708 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4710 (match_operand:SI 1 "register_operand" " Y2,0")))]
4713 pshufd\t{$0, %1, %0|%0, %1, 0}
4714 shufps\t{$0, %0, %0|%0, %0, 0}"
4715 [(set_attr "type" "sselog1")
4716 (set_attr "mode" "TI,V4SF")])
4718 (define_insn "*vec_dupv2di"
4719 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4721 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4726 [(set_attr "type" "sselog1,ssemov")
4727 (set_attr "mode" "TI,V4SF")])
4729 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4730 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4731 ;; alternatives pretty much forces the MMX alternative to be chosen.
4732 (define_insn "*sse2_concatv2si"
4733 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4735 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4736 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4739 punpckldq\t{%2, %0|%0, %2}
4740 movd\t{%1, %0|%0, %1}
4741 punpckldq\t{%2, %0|%0, %2}
4742 movd\t{%1, %0|%0, %1}"
4743 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4744 (set_attr "mode" "TI,TI,DI,DI")])
4746 (define_insn "*sse1_concatv2si"
4747 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4749 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4750 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4753 unpcklps\t{%2, %0|%0, %2}
4754 movss\t{%1, %0|%0, %1}
4755 punpckldq\t{%2, %0|%0, %2}
4756 movd\t{%1, %0|%0, %1}"
4757 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4758 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4760 (define_insn "*vec_concatv4si_1"
4761 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4763 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4764 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4767 punpcklqdq\t{%2, %0|%0, %2}
4768 movlhps\t{%2, %0|%0, %2}
4769 movhps\t{%2, %0|%0, %2}"
4770 [(set_attr "type" "sselog,ssemov,ssemov")
4771 (set_attr "mode" "TI,V4SF,V2SF")])
4773 (define_insn "vec_concatv2di"
4774 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4776 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4777 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4778 "!TARGET_64BIT && TARGET_SSE"
4780 movq\t{%1, %0|%0, %1}
4781 movq2dq\t{%1, %0|%0, %1}
4782 punpcklqdq\t{%2, %0|%0, %2}
4783 movlhps\t{%2, %0|%0, %2}
4784 movhps\t{%2, %0|%0, %2}
4785 movlps\t{%1, %0|%0, %1}"
4786 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4787 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4789 (define_insn "*vec_concatv2di_rex"
4790 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
4792 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4793 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
4796 movq\t{%1, %0|%0, %1}
4797 movq\t{%1, %0|%0, %1}
4798 movq2dq\t{%1, %0|%0, %1}
4799 punpcklqdq\t{%2, %0|%0, %2}
4800 movlhps\t{%2, %0|%0, %2}
4801 movhps\t{%2, %0|%0, %2}
4802 movlps\t{%1, %0|%0, %1}"
4803 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4804 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4806 (define_expand "vec_setv2di"
4807 [(match_operand:V2DI 0 "register_operand" "")
4808 (match_operand:DI 1 "register_operand" "")
4809 (match_operand 2 "const_int_operand" "")]
4812 ix86_expand_vector_set (false, operands[0], operands[1],
4813 INTVAL (operands[2]));
4817 (define_expand "vec_extractv2di"
4818 [(match_operand:DI 0 "register_operand" "")
4819 (match_operand:V2DI 1 "register_operand" "")
4820 (match_operand 2 "const_int_operand" "")]
4823 ix86_expand_vector_extract (false, operands[0], operands[1],
4824 INTVAL (operands[2]));
4828 (define_expand "vec_initv2di"
4829 [(match_operand:V2DI 0 "register_operand" "")
4830 (match_operand 1 "" "")]
4833 ix86_expand_vector_init (false, operands[0], operands[1]);
4837 (define_expand "vec_setv4si"
4838 [(match_operand:V4SI 0 "register_operand" "")
4839 (match_operand:SI 1 "register_operand" "")
4840 (match_operand 2 "const_int_operand" "")]
4843 ix86_expand_vector_set (false, operands[0], operands[1],
4844 INTVAL (operands[2]));
4848 (define_expand "vec_extractv4si"
4849 [(match_operand:SI 0 "register_operand" "")
4850 (match_operand:V4SI 1 "register_operand" "")
4851 (match_operand 2 "const_int_operand" "")]
4854 ix86_expand_vector_extract (false, operands[0], operands[1],
4855 INTVAL (operands[2]));
4859 (define_expand "vec_initv4si"
4860 [(match_operand:V4SI 0 "register_operand" "")
4861 (match_operand 1 "" "")]
4864 ix86_expand_vector_init (false, operands[0], operands[1]);
4868 (define_expand "vec_setv8hi"
4869 [(match_operand:V8HI 0 "register_operand" "")
4870 (match_operand:HI 1 "register_operand" "")
4871 (match_operand 2 "const_int_operand" "")]
4874 ix86_expand_vector_set (false, operands[0], operands[1],
4875 INTVAL (operands[2]));
4879 (define_expand "vec_extractv8hi"
4880 [(match_operand:HI 0 "register_operand" "")
4881 (match_operand:V8HI 1 "register_operand" "")
4882 (match_operand 2 "const_int_operand" "")]
4885 ix86_expand_vector_extract (false, operands[0], operands[1],
4886 INTVAL (operands[2]));
4890 (define_expand "vec_initv8hi"
4891 [(match_operand:V8HI 0 "register_operand" "")
4892 (match_operand 1 "" "")]
4895 ix86_expand_vector_init (false, operands[0], operands[1]);
4899 (define_expand "vec_setv16qi"
4900 [(match_operand:V16QI 0 "register_operand" "")
4901 (match_operand:QI 1 "register_operand" "")
4902 (match_operand 2 "const_int_operand" "")]
4905 ix86_expand_vector_set (false, operands[0], operands[1],
4906 INTVAL (operands[2]));
4910 (define_expand "vec_extractv16qi"
4911 [(match_operand:QI 0 "register_operand" "")
4912 (match_operand:V16QI 1 "register_operand" "")
4913 (match_operand 2 "const_int_operand" "")]
4916 ix86_expand_vector_extract (false, operands[0], operands[1],
4917 INTVAL (operands[2]));
4921 (define_expand "vec_initv16qi"
4922 [(match_operand:V16QI 0 "register_operand" "")
4923 (match_operand 1 "" "")]
4926 ix86_expand_vector_init (false, operands[0], operands[1]);
4930 (define_expand "vec_unpacku_hi_v16qi"
4931 [(match_operand:V8HI 0 "register_operand" "")
4932 (match_operand:V16QI 1 "register_operand" "")]
4936 ix86_expand_sse4_unpack (operands, true, true);
4937 else if (TARGET_SSE5)
4938 ix86_expand_sse5_unpack (operands, true, true);
4940 ix86_expand_sse_unpack (operands, true, true);
4944 (define_expand "vec_unpacks_hi_v16qi"
4945 [(match_operand:V8HI 0 "register_operand" "")
4946 (match_operand:V16QI 1 "register_operand" "")]
4950 ix86_expand_sse4_unpack (operands, false, true);
4951 else if (TARGET_SSE5)
4952 ix86_expand_sse5_unpack (operands, false, true);
4954 ix86_expand_sse_unpack (operands, false, true);
4958 (define_expand "vec_unpacku_lo_v16qi"
4959 [(match_operand:V8HI 0 "register_operand" "")
4960 (match_operand:V16QI 1 "register_operand" "")]
4964 ix86_expand_sse4_unpack (operands, true, false);
4965 else if (TARGET_SSE5)
4966 ix86_expand_sse5_unpack (operands, true, false);
4968 ix86_expand_sse_unpack (operands, true, false);
4972 (define_expand "vec_unpacks_lo_v16qi"
4973 [(match_operand:V8HI 0 "register_operand" "")
4974 (match_operand:V16QI 1 "register_operand" "")]
4978 ix86_expand_sse4_unpack (operands, false, false);
4979 else if (TARGET_SSE5)
4980 ix86_expand_sse5_unpack (operands, false, false);
4982 ix86_expand_sse_unpack (operands, false, false);
4986 (define_expand "vec_unpacku_hi_v8hi"
4987 [(match_operand:V4SI 0 "register_operand" "")
4988 (match_operand:V8HI 1 "register_operand" "")]
4992 ix86_expand_sse4_unpack (operands, true, true);
4993 else if (TARGET_SSE5)
4994 ix86_expand_sse5_unpack (operands, true, true);
4996 ix86_expand_sse_unpack (operands, true, true);
5000 (define_expand "vec_unpacks_hi_v8hi"
5001 [(match_operand:V4SI 0 "register_operand" "")
5002 (match_operand:V8HI 1 "register_operand" "")]
5006 ix86_expand_sse4_unpack (operands, false, true);
5007 else if (TARGET_SSE5)
5008 ix86_expand_sse5_unpack (operands, false, true);
5010 ix86_expand_sse_unpack (operands, false, true);
5014 (define_expand "vec_unpacku_lo_v8hi"
5015 [(match_operand:V4SI 0 "register_operand" "")
5016 (match_operand:V8HI 1 "register_operand" "")]
5020 ix86_expand_sse4_unpack (operands, true, false);
5021 else if (TARGET_SSE5)
5022 ix86_expand_sse5_unpack (operands, true, false);
5024 ix86_expand_sse_unpack (operands, true, false);
5028 (define_expand "vec_unpacks_lo_v8hi"
5029 [(match_operand:V4SI 0 "register_operand" "")
5030 (match_operand:V8HI 1 "register_operand" "")]
5034 ix86_expand_sse4_unpack (operands, false, false);
5035 else if (TARGET_SSE5)
5036 ix86_expand_sse5_unpack (operands, false, false);
5038 ix86_expand_sse_unpack (operands, false, false);
5042 (define_expand "vec_unpacku_hi_v4si"
5043 [(match_operand:V2DI 0 "register_operand" "")
5044 (match_operand:V4SI 1 "register_operand" "")]
5048 ix86_expand_sse4_unpack (operands, true, true);
5049 else if (TARGET_SSE5)
5050 ix86_expand_sse5_unpack (operands, true, true);
5052 ix86_expand_sse_unpack (operands, true, true);
5056 (define_expand "vec_unpacks_hi_v4si"
5057 [(match_operand:V2DI 0 "register_operand" "")
5058 (match_operand:V4SI 1 "register_operand" "")]
5062 ix86_expand_sse4_unpack (operands, false, true);
5063 else if (TARGET_SSE5)
5064 ix86_expand_sse5_unpack (operands, false, true);
5066 ix86_expand_sse_unpack (operands, false, true);
5070 (define_expand "vec_unpacku_lo_v4si"
5071 [(match_operand:V2DI 0 "register_operand" "")
5072 (match_operand:V4SI 1 "register_operand" "")]
5076 ix86_expand_sse4_unpack (operands, true, false);
5077 else if (TARGET_SSE5)
5078 ix86_expand_sse5_unpack (operands, true, false);
5080 ix86_expand_sse_unpack (operands, true, false);
5084 (define_expand "vec_unpacks_lo_v4si"
5085 [(match_operand:V2DI 0 "register_operand" "")
5086 (match_operand:V4SI 1 "register_operand" "")]
5090 ix86_expand_sse4_unpack (operands, false, false);
5091 else if (TARGET_SSE5)
5092 ix86_expand_sse5_unpack (operands, false, false);
5094 ix86_expand_sse_unpack (operands, false, false);
5098 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5102 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5104 (define_expand "sse2_uavgv16qi3"
5105 [(set (match_operand:V16QI 0 "register_operand" "")
5111 (match_operand:V16QI 1 "nonimmediate_operand" ""))
5113 (match_operand:V16QI 2 "nonimmediate_operand" "")))
5114 (const_vector:V16QI [(const_int 1) (const_int 1)
5115 (const_int 1) (const_int 1)
5116 (const_int 1) (const_int 1)
5117 (const_int 1) (const_int 1)
5118 (const_int 1) (const_int 1)
5119 (const_int 1) (const_int 1)
5120 (const_int 1) (const_int 1)
5121 (const_int 1) (const_int 1)]))
5124 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
5126 (define_insn "*sse2_uavgv16qi3"
5127 [(set (match_operand:V16QI 0 "register_operand" "=x")
5133 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5135 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5136 (const_vector:V16QI [(const_int 1) (const_int 1)
5137 (const_int 1) (const_int 1)
5138 (const_int 1) (const_int 1)
5139 (const_int 1) (const_int 1)
5140 (const_int 1) (const_int 1)
5141 (const_int 1) (const_int 1)
5142 (const_int 1) (const_int 1)
5143 (const_int 1) (const_int 1)]))
5145 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5146 "pavgb\t{%2, %0|%0, %2}"
5147 [(set_attr "type" "sseiadd")
5148 (set_attr "prefix_data16" "1")
5149 (set_attr "mode" "TI")])
5151 (define_expand "sse2_uavgv8hi3"
5152 [(set (match_operand:V8HI 0 "register_operand" "")
5158 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5160 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5161 (const_vector:V8HI [(const_int 1) (const_int 1)
5162 (const_int 1) (const_int 1)
5163 (const_int 1) (const_int 1)
5164 (const_int 1) (const_int 1)]))
5167 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
5169 (define_insn "*sse2_uavgv8hi3"
5170 [(set (match_operand:V8HI 0 "register_operand" "=x")
5176 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5178 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5179 (const_vector:V8HI [(const_int 1) (const_int 1)
5180 (const_int 1) (const_int 1)
5181 (const_int 1) (const_int 1)
5182 (const_int 1) (const_int 1)]))
5184 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5185 "pavgw\t{%2, %0|%0, %2}"
5186 [(set_attr "type" "sseiadd")
5187 (set_attr "prefix_data16" "1")
5188 (set_attr "mode" "TI")])
5190 ;; The correct representation for this is absolutely enormous, and
5191 ;; surely not generally useful.
5192 (define_insn "sse2_psadbw"
5193 [(set (match_operand:V2DI 0 "register_operand" "=x")
5194 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5195 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5198 "psadbw\t{%2, %0|%0, %2}"
5199 [(set_attr "type" "sseiadd")
5200 (set_attr "prefix_data16" "1")
5201 (set_attr "mode" "TI")])
5203 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5204 [(set (match_operand:SI 0 "register_operand" "=r")
5206 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5208 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5209 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5210 [(set_attr "type" "ssecvt")
5211 (set_attr "mode" "<MODE>")])
5213 (define_insn "sse2_pmovmskb"
5214 [(set (match_operand:SI 0 "register_operand" "=r")
5215 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5218 "pmovmskb\t{%1, %0|%0, %1}"
5219 [(set_attr "type" "ssecvt")
5220 (set_attr "prefix_data16" "1")
5221 (set_attr "mode" "SI")])
5223 (define_expand "sse2_maskmovdqu"
5224 [(set (match_operand:V16QI 0 "memory_operand" "")
5225 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5226 (match_operand:V16QI 2 "register_operand" "")
5232 (define_insn "*sse2_maskmovdqu"
5233 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5234 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5235 (match_operand:V16QI 2 "register_operand" "x")
5236 (mem:V16QI (match_dup 0))]
5238 "TARGET_SSE2 && !TARGET_64BIT"
5239 ;; @@@ check ordering of operands in intel/nonintel syntax
5240 "maskmovdqu\t{%2, %1|%1, %2}"
5241 [(set_attr "type" "ssecvt")
5242 (set_attr "prefix_data16" "1")
5243 (set_attr "mode" "TI")])
5245 (define_insn "*sse2_maskmovdqu_rex64"
5246 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5247 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5248 (match_operand:V16QI 2 "register_operand" "x")
5249 (mem:V16QI (match_dup 0))]
5251 "TARGET_SSE2 && TARGET_64BIT"
5252 ;; @@@ check ordering of operands in intel/nonintel syntax
5253 "maskmovdqu\t{%2, %1|%1, %2}"
5254 [(set_attr "type" "ssecvt")
5255 (set_attr "prefix_data16" "1")
5256 (set_attr "mode" "TI")])
5258 (define_insn "sse_ldmxcsr"
5259 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5263 [(set_attr "type" "sse")
5264 (set_attr "memory" "load")])
5266 (define_insn "sse_stmxcsr"
5267 [(set (match_operand:SI 0 "memory_operand" "=m")
5268 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5271 [(set_attr "type" "sse")
5272 (set_attr "memory" "store")])
5274 (define_expand "sse_sfence"
5276 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5277 "TARGET_SSE || TARGET_3DNOW_A"
5279 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5280 MEM_VOLATILE_P (operands[0]) = 1;
5283 (define_insn "*sse_sfence"
5284 [(set (match_operand:BLK 0 "" "")
5285 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5286 "TARGET_SSE || TARGET_3DNOW_A"
5288 [(set_attr "type" "sse")
5289 (set_attr "memory" "unknown")])
5291 (define_insn "sse2_clflush"
5292 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5296 [(set_attr "type" "sse")
5297 (set_attr "memory" "unknown")])
5299 (define_expand "sse2_mfence"
5301 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5304 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5305 MEM_VOLATILE_P (operands[0]) = 1;
5308 (define_insn "*sse2_mfence"
5309 [(set (match_operand:BLK 0 "" "")
5310 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5313 [(set_attr "type" "sse")
5314 (set_attr "memory" "unknown")])
5316 (define_expand "sse2_lfence"
5318 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5321 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5322 MEM_VOLATILE_P (operands[0]) = 1;
5325 (define_insn "*sse2_lfence"
5326 [(set (match_operand:BLK 0 "" "")
5327 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5330 [(set_attr "type" "sse")
5331 (set_attr "memory" "unknown")])
5333 (define_insn "sse3_mwait"
5334 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5335 (match_operand:SI 1 "register_operand" "c")]
5338 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5339 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5340 ;; we only need to set up 32bit registers.
5342 [(set_attr "length" "3")])
5344 (define_insn "sse3_monitor"
5345 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5346 (match_operand:SI 1 "register_operand" "c")
5347 (match_operand:SI 2 "register_operand" "d")]
5349 "TARGET_SSE3 && !TARGET_64BIT"
5350 "monitor\t%0, %1, %2"
5351 [(set_attr "length" "3")])
5353 (define_insn "sse3_monitor64"
5354 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5355 (match_operand:SI 1 "register_operand" "c")
5356 (match_operand:SI 2 "register_operand" "d")]
5358 "TARGET_SSE3 && TARGET_64BIT"
5359 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5360 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5361 ;; zero extended to 64bit, we only need to set up 32bit registers.
5363 [(set_attr "length" "3")])
5365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5367 ;; SSSE3 instructions
5369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5371 (define_insn "ssse3_phaddwv8hi3"
5372 [(set (match_operand:V8HI 0 "register_operand" "=x")
5378 (match_operand:V8HI 1 "register_operand" "0")
5379 (parallel [(const_int 0)]))
5380 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5382 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5383 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5386 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5387 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5389 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5390 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5395 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5396 (parallel [(const_int 0)]))
5397 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5399 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5400 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5403 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5404 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5406 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5407 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5409 "phaddw\t{%2, %0|%0, %2}"
5410 [(set_attr "type" "sseiadd")
5411 (set_attr "prefix_data16" "1")
5412 (set_attr "prefix_extra" "1")
5413 (set_attr "mode" "TI")])
5415 (define_insn "ssse3_phaddwv4hi3"
5416 [(set (match_operand:V4HI 0 "register_operand" "=y")
5421 (match_operand:V4HI 1 "register_operand" "0")
5422 (parallel [(const_int 0)]))
5423 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5425 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5426 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5430 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5431 (parallel [(const_int 0)]))
5432 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5434 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5435 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5437 "phaddw\t{%2, %0|%0, %2}"
5438 [(set_attr "type" "sseiadd")
5439 (set_attr "prefix_extra" "1")
5440 (set_attr "mode" "DI")])
5442 (define_insn "ssse3_phadddv4si3"
5443 [(set (match_operand:V4SI 0 "register_operand" "=x")
5448 (match_operand:V4SI 1 "register_operand" "0")
5449 (parallel [(const_int 0)]))
5450 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5452 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5453 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5457 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5458 (parallel [(const_int 0)]))
5459 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5461 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5462 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5464 "phaddd\t{%2, %0|%0, %2}"
5465 [(set_attr "type" "sseiadd")
5466 (set_attr "prefix_data16" "1")
5467 (set_attr "prefix_extra" "1")
5468 (set_attr "mode" "TI")])
5470 (define_insn "ssse3_phadddv2si3"
5471 [(set (match_operand:V2SI 0 "register_operand" "=y")
5475 (match_operand:V2SI 1 "register_operand" "0")
5476 (parallel [(const_int 0)]))
5477 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5480 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5481 (parallel [(const_int 0)]))
5482 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5484 "phaddd\t{%2, %0|%0, %2}"
5485 [(set_attr "type" "sseiadd")
5486 (set_attr "prefix_extra" "1")
5487 (set_attr "mode" "DI")])
5489 (define_insn "ssse3_phaddswv8hi3"
5490 [(set (match_operand:V8HI 0 "register_operand" "=x")
5496 (match_operand:V8HI 1 "register_operand" "0")
5497 (parallel [(const_int 0)]))
5498 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5500 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5501 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5504 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5505 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5507 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5508 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5513 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5514 (parallel [(const_int 0)]))
5515 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5517 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5518 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5521 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5522 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5524 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5525 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5527 "phaddsw\t{%2, %0|%0, %2}"
5528 [(set_attr "type" "sseiadd")
5529 (set_attr "prefix_data16" "1")
5530 (set_attr "prefix_extra" "1")
5531 (set_attr "mode" "TI")])
5533 (define_insn "ssse3_phaddswv4hi3"
5534 [(set (match_operand:V4HI 0 "register_operand" "=y")
5539 (match_operand:V4HI 1 "register_operand" "0")
5540 (parallel [(const_int 0)]))
5541 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5543 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5544 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5548 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5549 (parallel [(const_int 0)]))
5550 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5552 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5553 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5555 "phaddsw\t{%2, %0|%0, %2}"
5556 [(set_attr "type" "sseiadd")
5557 (set_attr "prefix_extra" "1")
5558 (set_attr "mode" "DI")])
5560 (define_insn "ssse3_phsubwv8hi3"
5561 [(set (match_operand:V8HI 0 "register_operand" "=x")
5567 (match_operand:V8HI 1 "register_operand" "0")
5568 (parallel [(const_int 0)]))
5569 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5571 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5572 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5575 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5576 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5578 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5579 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5584 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5585 (parallel [(const_int 0)]))
5586 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5588 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5589 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5592 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5593 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5595 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5596 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5598 "phsubw\t{%2, %0|%0, %2}"
5599 [(set_attr "type" "sseiadd")
5600 (set_attr "prefix_data16" "1")
5601 (set_attr "prefix_extra" "1")
5602 (set_attr "mode" "TI")])
5604 (define_insn "ssse3_phsubwv4hi3"
5605 [(set (match_operand:V4HI 0 "register_operand" "=y")
5610 (match_operand:V4HI 1 "register_operand" "0")
5611 (parallel [(const_int 0)]))
5612 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5614 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5615 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5619 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5620 (parallel [(const_int 0)]))
5621 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5623 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5624 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5626 "phsubw\t{%2, %0|%0, %2}"
5627 [(set_attr "type" "sseiadd")
5628 (set_attr "prefix_extra" "1")
5629 (set_attr "mode" "DI")])
5631 (define_insn "ssse3_phsubdv4si3"
5632 [(set (match_operand:V4SI 0 "register_operand" "=x")
5637 (match_operand:V4SI 1 "register_operand" "0")
5638 (parallel [(const_int 0)]))
5639 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5641 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5642 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5646 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5647 (parallel [(const_int 0)]))
5648 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5650 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5651 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5653 "phsubd\t{%2, %0|%0, %2}"
5654 [(set_attr "type" "sseiadd")
5655 (set_attr "prefix_data16" "1")
5656 (set_attr "prefix_extra" "1")
5657 (set_attr "mode" "TI")])
5659 (define_insn "ssse3_phsubdv2si3"
5660 [(set (match_operand:V2SI 0 "register_operand" "=y")
5664 (match_operand:V2SI 1 "register_operand" "0")
5665 (parallel [(const_int 0)]))
5666 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5669 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5670 (parallel [(const_int 0)]))
5671 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5673 "phsubd\t{%2, %0|%0, %2}"
5674 [(set_attr "type" "sseiadd")
5675 (set_attr "prefix_extra" "1")
5676 (set_attr "mode" "DI")])
5678 (define_insn "ssse3_phsubswv8hi3"
5679 [(set (match_operand:V8HI 0 "register_operand" "=x")
5685 (match_operand:V8HI 1 "register_operand" "0")
5686 (parallel [(const_int 0)]))
5687 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5689 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5690 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5693 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5694 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5696 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5697 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5702 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5703 (parallel [(const_int 0)]))
5704 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5706 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5707 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5710 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5711 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5713 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5714 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5716 "phsubsw\t{%2, %0|%0, %2}"
5717 [(set_attr "type" "sseiadd")
5718 (set_attr "prefix_data16" "1")
5719 (set_attr "prefix_extra" "1")
5720 (set_attr "mode" "TI")])
5722 (define_insn "ssse3_phsubswv4hi3"
5723 [(set (match_operand:V4HI 0 "register_operand" "=y")
5728 (match_operand:V4HI 1 "register_operand" "0")
5729 (parallel [(const_int 0)]))
5730 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5732 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5733 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5737 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5738 (parallel [(const_int 0)]))
5739 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5741 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5742 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5744 "phsubsw\t{%2, %0|%0, %2}"
5745 [(set_attr "type" "sseiadd")
5746 (set_attr "prefix_extra" "1")
5747 (set_attr "mode" "DI")])
5749 (define_insn "ssse3_pmaddubsw128"
5750 [(set (match_operand:V8HI 0 "register_operand" "=x")
5755 (match_operand:V16QI 1 "register_operand" "0")
5756 (parallel [(const_int 0)
5766 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5767 (parallel [(const_int 0)
5777 (vec_select:V16QI (match_dup 1)
5778 (parallel [(const_int 1)
5787 (vec_select:V16QI (match_dup 2)
5788 (parallel [(const_int 1)
5795 (const_int 15)]))))))]
5797 "pmaddubsw\t{%2, %0|%0, %2}"
5798 [(set_attr "type" "sseiadd")
5799 (set_attr "prefix_data16" "1")
5800 (set_attr "prefix_extra" "1")
5801 (set_attr "mode" "TI")])
5803 (define_insn "ssse3_pmaddubsw"
5804 [(set (match_operand:V4HI 0 "register_operand" "=y")
5809 (match_operand:V8QI 1 "register_operand" "0")
5810 (parallel [(const_int 0)
5816 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5817 (parallel [(const_int 0)
5823 (vec_select:V8QI (match_dup 1)
5824 (parallel [(const_int 1)
5829 (vec_select:V8QI (match_dup 2)
5830 (parallel [(const_int 1)
5833 (const_int 7)]))))))]
5835 "pmaddubsw\t{%2, %0|%0, %2}"
5836 [(set_attr "type" "sseiadd")
5837 (set_attr "prefix_extra" "1")
5838 (set_attr "mode" "DI")])
5840 (define_expand "ssse3_pmulhrswv8hi3"
5841 [(set (match_operand:V8HI 0 "register_operand" "")
5848 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5850 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5852 (const_vector:V8HI [(const_int 1) (const_int 1)
5853 (const_int 1) (const_int 1)
5854 (const_int 1) (const_int 1)
5855 (const_int 1) (const_int 1)]))
5858 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5860 (define_insn "*ssse3_pmulhrswv8hi3"
5861 [(set (match_operand:V8HI 0 "register_operand" "=x")
5868 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5870 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5872 (const_vector:V8HI [(const_int 1) (const_int 1)
5873 (const_int 1) (const_int 1)
5874 (const_int 1) (const_int 1)
5875 (const_int 1) (const_int 1)]))
5877 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5878 "pmulhrsw\t{%2, %0|%0, %2}"
5879 [(set_attr "type" "sseimul")
5880 (set_attr "prefix_data16" "1")
5881 (set_attr "prefix_extra" "1")
5882 (set_attr "mode" "TI")])
5884 (define_expand "ssse3_pmulhrswv4hi3"
5885 [(set (match_operand:V4HI 0 "register_operand" "")
5892 (match_operand:V4HI 1 "nonimmediate_operand" ""))
5894 (match_operand:V4HI 2 "nonimmediate_operand" "")))
5896 (const_vector:V4HI [(const_int 1) (const_int 1)
5897 (const_int 1) (const_int 1)]))
5900 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
5902 (define_insn "*ssse3_pmulhrswv4hi3"
5903 [(set (match_operand:V4HI 0 "register_operand" "=y")
5910 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5912 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5914 (const_vector:V4HI [(const_int 1) (const_int 1)
5915 (const_int 1) (const_int 1)]))
5917 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5918 "pmulhrsw\t{%2, %0|%0, %2}"
5919 [(set_attr "type" "sseimul")
5920 (set_attr "prefix_extra" "1")
5921 (set_attr "mode" "DI")])
5923 (define_insn "ssse3_pshufbv16qi3"
5924 [(set (match_operand:V16QI 0 "register_operand" "=x")
5925 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5926 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5929 "pshufb\t{%2, %0|%0, %2}";
5930 [(set_attr "type" "sselog1")
5931 (set_attr "prefix_data16" "1")
5932 (set_attr "prefix_extra" "1")
5933 (set_attr "mode" "TI")])
5935 (define_insn "ssse3_pshufbv8qi3"
5936 [(set (match_operand:V8QI 0 "register_operand" "=y")
5937 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5938 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5941 "pshufb\t{%2, %0|%0, %2}";
5942 [(set_attr "type" "sselog1")
5943 (set_attr "prefix_extra" "1")
5944 (set_attr "mode" "DI")])
5946 (define_insn "ssse3_psign<mode>3"
5947 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5949 [(match_operand:SSEMODE124 1 "register_operand" "0")
5950 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5953 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5954 [(set_attr "type" "sselog1")
5955 (set_attr "prefix_data16" "1")
5956 (set_attr "prefix_extra" "1")
5957 (set_attr "mode" "TI")])
5959 (define_insn "ssse3_psign<mode>3"
5960 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5962 [(match_operand:MMXMODEI 1 "register_operand" "0")
5963 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5966 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5967 [(set_attr "type" "sselog1")
5968 (set_attr "prefix_extra" "1")
5969 (set_attr "mode" "DI")])
5971 (define_insn "ssse3_palignrti"
5972 [(set (match_operand:TI 0 "register_operand" "=x")
5973 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5974 (match_operand:TI 2 "nonimmediate_operand" "xm")
5975 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5979 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5980 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5982 [(set_attr "type" "sseishft")
5983 (set_attr "prefix_data16" "1")
5984 (set_attr "prefix_extra" "1")
5985 (set_attr "mode" "TI")])
5987 (define_insn "ssse3_palignrdi"
5988 [(set (match_operand:DI 0 "register_operand" "=y")
5989 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5990 (match_operand:DI 2 "nonimmediate_operand" "ym")
5991 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5995 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5996 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5998 [(set_attr "type" "sseishft")
5999 (set_attr "prefix_extra" "1")
6000 (set_attr "mode" "DI")])
6002 (define_insn "abs<mode>2"
6003 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6004 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6006 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6007 [(set_attr "type" "sselog1")
6008 (set_attr "prefix_data16" "1")
6009 (set_attr "prefix_extra" "1")
6010 (set_attr "mode" "TI")])
6012 (define_insn "abs<mode>2"
6013 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6014 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6016 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6017 [(set_attr "type" "sselog1")
6018 (set_attr "prefix_extra" "1")
6019 (set_attr "mode" "DI")])
6021 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6023 ;; AMD SSE4A instructions
6025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6027 (define_insn "sse4a_movnt<mode>"
6028 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6030 [(match_operand:MODEF 1 "register_operand" "x")]
6033 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6034 [(set_attr "type" "ssemov")
6035 (set_attr "mode" "<MODE>")])
6037 (define_insn "sse4a_vmmovnt<mode>"
6038 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6039 (unspec:<ssescalarmode>
6040 [(vec_select:<ssescalarmode>
6041 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6042 (parallel [(const_int 0)]))]
6045 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6046 [(set_attr "type" "ssemov")
6047 (set_attr "mode" "<ssescalarmode>")])
6049 (define_insn "sse4a_extrqi"
6050 [(set (match_operand:V2DI 0 "register_operand" "=x")
6051 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6052 (match_operand 2 "const_int_operand" "")
6053 (match_operand 3 "const_int_operand" "")]
6056 "extrq\t{%3, %2, %0|%0, %2, %3}"
6057 [(set_attr "type" "sse")
6058 (set_attr "prefix_data16" "1")
6059 (set_attr "mode" "TI")])
6061 (define_insn "sse4a_extrq"
6062 [(set (match_operand:V2DI 0 "register_operand" "=x")
6063 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6064 (match_operand:V16QI 2 "register_operand" "x")]
6067 "extrq\t{%2, %0|%0, %2}"
6068 [(set_attr "type" "sse")
6069 (set_attr "prefix_data16" "1")
6070 (set_attr "mode" "TI")])
6072 (define_insn "sse4a_insertqi"
6073 [(set (match_operand:V2DI 0 "register_operand" "=x")
6074 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6075 (match_operand:V2DI 2 "register_operand" "x")
6076 (match_operand 3 "const_int_operand" "")
6077 (match_operand 4 "const_int_operand" "")]
6080 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6081 [(set_attr "type" "sseins")
6082 (set_attr "prefix_rep" "1")
6083 (set_attr "mode" "TI")])
6085 (define_insn "sse4a_insertq"
6086 [(set (match_operand:V2DI 0 "register_operand" "=x")
6087 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6088 (match_operand:V2DI 2 "register_operand" "x")]
6091 "insertq\t{%2, %0|%0, %2}"
6092 [(set_attr "type" "sseins")
6093 (set_attr "prefix_rep" "1")
6094 (set_attr "mode" "TI")])
6096 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6098 ;; Intel SSE4.1 instructions
6100 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6102 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6103 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6104 (vec_merge:SSEMODEF2P
6105 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6106 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6107 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
6109 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6110 [(set_attr "type" "ssemov")
6111 (set_attr "prefix_extra" "1")
6112 (set_attr "mode" "<MODE>")])
6114 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6115 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6117 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6118 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6119 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6122 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6123 [(set_attr "type" "ssemov")
6124 (set_attr "prefix_extra" "1")
6125 (set_attr "mode" "<MODE>")])
6127 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6128 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6130 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6131 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6132 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6135 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6136 [(set_attr "type" "ssemul")
6137 (set_attr "prefix_extra" "1")
6138 (set_attr "mode" "<MODE>")])
6140 (define_insn "sse4_1_movntdqa"
6141 [(set (match_operand:V2DI 0 "register_operand" "=x")
6142 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6145 "movntdqa\t{%1, %0|%0, %1}"
6146 [(set_attr "type" "ssecvt")
6147 (set_attr "prefix_extra" "1")
6148 (set_attr "mode" "TI")])
6150 (define_insn "sse4_1_mpsadbw"
6151 [(set (match_operand:V16QI 0 "register_operand" "=x")
6152 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6153 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6154 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6157 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6158 [(set_attr "type" "sselog1")
6159 (set_attr "prefix_extra" "1")
6160 (set_attr "mode" "TI")])
6162 (define_insn "sse4_1_packusdw"
6163 [(set (match_operand:V8HI 0 "register_operand" "=x")
6166 (match_operand:V4SI 1 "register_operand" "0"))
6168 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6170 "packusdw\t{%2, %0|%0, %2}"
6171 [(set_attr "type" "sselog")
6172 (set_attr "prefix_extra" "1")
6173 (set_attr "mode" "TI")])
6175 (define_insn "sse4_1_pblendvb"
6176 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6177 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6178 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6179 (match_operand:V16QI 3 "register_operand" "Yz")]
6182 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6183 [(set_attr "type" "ssemov")
6184 (set_attr "prefix_extra" "1")
6185 (set_attr "mode" "TI")])
6187 (define_insn "sse4_1_pblendw"
6188 [(set (match_operand:V8HI 0 "register_operand" "=x")
6190 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6191 (match_operand:V8HI 1 "register_operand" "0")
6192 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6194 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6195 [(set_attr "type" "ssemov")
6196 (set_attr "prefix_extra" "1")
6197 (set_attr "mode" "TI")])
6199 (define_insn "sse4_1_phminposuw"
6200 [(set (match_operand:V8HI 0 "register_operand" "=x")
6201 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6202 UNSPEC_PHMINPOSUW))]
6204 "phminposuw\t{%1, %0|%0, %1}"
6205 [(set_attr "type" "sselog1")
6206 (set_attr "prefix_extra" "1")
6207 (set_attr "mode" "TI")])
6209 (define_insn "sse4_1_extendv8qiv8hi2"
6210 [(set (match_operand:V8HI 0 "register_operand" "=x")
6213 (match_operand:V16QI 1 "register_operand" "x")
6214 (parallel [(const_int 0)
6223 "pmovsxbw\t{%1, %0|%0, %1}"
6224 [(set_attr "type" "ssemov")
6225 (set_attr "prefix_extra" "1")
6226 (set_attr "mode" "TI")])
6228 (define_insn "*sse4_1_extendv8qiv8hi2"
6229 [(set (match_operand:V8HI 0 "register_operand" "=x")
6232 (vec_duplicate:V16QI
6233 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6234 (parallel [(const_int 0)
6243 "pmovsxbw\t{%1, %0|%0, %1}"
6244 [(set_attr "type" "ssemov")
6245 (set_attr "prefix_extra" "1")
6246 (set_attr "mode" "TI")])
6248 (define_insn "sse4_1_extendv4qiv4si2"
6249 [(set (match_operand:V4SI 0 "register_operand" "=x")
6252 (match_operand:V16QI 1 "register_operand" "x")
6253 (parallel [(const_int 0)
6258 "pmovsxbd\t{%1, %0|%0, %1}"
6259 [(set_attr "type" "ssemov")
6260 (set_attr "prefix_extra" "1")
6261 (set_attr "mode" "TI")])
6263 (define_insn "*sse4_1_extendv4qiv4si2"
6264 [(set (match_operand:V4SI 0 "register_operand" "=x")
6267 (vec_duplicate:V16QI
6268 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6269 (parallel [(const_int 0)
6274 "pmovsxbd\t{%1, %0|%0, %1}"
6275 [(set_attr "type" "ssemov")
6276 (set_attr "prefix_extra" "1")
6277 (set_attr "mode" "TI")])
6279 (define_insn "sse4_1_extendv2qiv2di2"
6280 [(set (match_operand:V2DI 0 "register_operand" "=x")
6283 (match_operand:V16QI 1 "register_operand" "x")
6284 (parallel [(const_int 0)
6287 "pmovsxbq\t{%1, %0|%0, %1}"
6288 [(set_attr "type" "ssemov")
6289 (set_attr "prefix_extra" "1")
6290 (set_attr "mode" "TI")])
6292 (define_insn "*sse4_1_extendv2qiv2di2"
6293 [(set (match_operand:V2DI 0 "register_operand" "=x")
6296 (vec_duplicate:V16QI
6297 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6298 (parallel [(const_int 0)
6301 "pmovsxbq\t{%1, %0|%0, %1}"
6302 [(set_attr "type" "ssemov")
6303 (set_attr "prefix_extra" "1")
6304 (set_attr "mode" "TI")])
6306 (define_insn "sse4_1_extendv4hiv4si2"
6307 [(set (match_operand:V4SI 0 "register_operand" "=x")
6310 (match_operand:V8HI 1 "register_operand" "x")
6311 (parallel [(const_int 0)
6316 "pmovsxwd\t{%1, %0|%0, %1}"
6317 [(set_attr "type" "ssemov")
6318 (set_attr "prefix_extra" "1")
6319 (set_attr "mode" "TI")])
6321 (define_insn "*sse4_1_extendv4hiv4si2"
6322 [(set (match_operand:V4SI 0 "register_operand" "=x")
6326 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6327 (parallel [(const_int 0)
6332 "pmovsxwd\t{%1, %0|%0, %1}"
6333 [(set_attr "type" "ssemov")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "mode" "TI")])
6337 (define_insn "sse4_1_extendv2hiv2di2"
6338 [(set (match_operand:V2DI 0 "register_operand" "=x")
6341 (match_operand:V8HI 1 "register_operand" "x")
6342 (parallel [(const_int 0)
6345 "pmovsxwq\t{%1, %0|%0, %1}"
6346 [(set_attr "type" "ssemov")
6347 (set_attr "prefix_extra" "1")
6348 (set_attr "mode" "TI")])
6350 (define_insn "*sse4_1_extendv2hiv2di2"
6351 [(set (match_operand:V2DI 0 "register_operand" "=x")
6355 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6356 (parallel [(const_int 0)
6359 "pmovsxwq\t{%1, %0|%0, %1}"
6360 [(set_attr "type" "ssemov")
6361 (set_attr "prefix_extra" "1")
6362 (set_attr "mode" "TI")])
6364 (define_insn "sse4_1_extendv2siv2di2"
6365 [(set (match_operand:V2DI 0 "register_operand" "=x")
6368 (match_operand:V4SI 1 "register_operand" "x")
6369 (parallel [(const_int 0)
6372 "pmovsxdq\t{%1, %0|%0, %1}"
6373 [(set_attr "type" "ssemov")
6374 (set_attr "prefix_extra" "1")
6375 (set_attr "mode" "TI")])
6377 (define_insn "*sse4_1_extendv2siv2di2"
6378 [(set (match_operand:V2DI 0 "register_operand" "=x")
6382 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6383 (parallel [(const_int 0)
6386 "pmovsxdq\t{%1, %0|%0, %1}"
6387 [(set_attr "type" "ssemov")
6388 (set_attr "prefix_extra" "1")
6389 (set_attr "mode" "TI")])
6391 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6392 [(set (match_operand:V8HI 0 "register_operand" "=x")
6395 (match_operand:V16QI 1 "register_operand" "x")
6396 (parallel [(const_int 0)
6405 "pmovzxbw\t{%1, %0|%0, %1}"
6406 [(set_attr "type" "ssemov")
6407 (set_attr "prefix_extra" "1")
6408 (set_attr "mode" "TI")])
6410 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6411 [(set (match_operand:V8HI 0 "register_operand" "=x")
6414 (vec_duplicate:V16QI
6415 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6416 (parallel [(const_int 0)
6425 "pmovzxbw\t{%1, %0|%0, %1}"
6426 [(set_attr "type" "ssemov")
6427 (set_attr "prefix_extra" "1")
6428 (set_attr "mode" "TI")])
6430 (define_insn "sse4_1_zero_extendv4qiv4si2"
6431 [(set (match_operand:V4SI 0 "register_operand" "=x")
6434 (match_operand:V16QI 1 "register_operand" "x")
6435 (parallel [(const_int 0)
6440 "pmovzxbd\t{%1, %0|%0, %1}"
6441 [(set_attr "type" "ssemov")
6442 (set_attr "prefix_extra" "1")
6443 (set_attr "mode" "TI")])
6445 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6446 [(set (match_operand:V4SI 0 "register_operand" "=x")
6449 (vec_duplicate:V16QI
6450 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6451 (parallel [(const_int 0)
6456 "pmovzxbd\t{%1, %0|%0, %1}"
6457 [(set_attr "type" "ssemov")
6458 (set_attr "prefix_extra" "1")
6459 (set_attr "mode" "TI")])
6461 (define_insn "sse4_1_zero_extendv2qiv2di2"
6462 [(set (match_operand:V2DI 0 "register_operand" "=x")
6465 (match_operand:V16QI 1 "register_operand" "x")
6466 (parallel [(const_int 0)
6469 "pmovzxbq\t{%1, %0|%0, %1}"
6470 [(set_attr "type" "ssemov")
6471 (set_attr "prefix_extra" "1")
6472 (set_attr "mode" "TI")])
6474 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6475 [(set (match_operand:V2DI 0 "register_operand" "=x")
6478 (vec_duplicate:V16QI
6479 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6480 (parallel [(const_int 0)
6483 "pmovzxbq\t{%1, %0|%0, %1}"
6484 [(set_attr "type" "ssemov")
6485 (set_attr "prefix_extra" "1")
6486 (set_attr "mode" "TI")])
6488 (define_insn "sse4_1_zero_extendv4hiv4si2"
6489 [(set (match_operand:V4SI 0 "register_operand" "=x")
6492 (match_operand:V8HI 1 "register_operand" "x")
6493 (parallel [(const_int 0)
6498 "pmovzxwd\t{%1, %0|%0, %1}"
6499 [(set_attr "type" "ssemov")
6500 (set_attr "prefix_extra" "1")
6501 (set_attr "mode" "TI")])
6503 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6504 [(set (match_operand:V4SI 0 "register_operand" "=x")
6508 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6509 (parallel [(const_int 0)
6514 "pmovzxwd\t{%1, %0|%0, %1}"
6515 [(set_attr "type" "ssemov")
6516 (set_attr "prefix_extra" "1")
6517 (set_attr "mode" "TI")])
6519 (define_insn "sse4_1_zero_extendv2hiv2di2"
6520 [(set (match_operand:V2DI 0 "register_operand" "=x")
6523 (match_operand:V8HI 1 "register_operand" "x")
6524 (parallel [(const_int 0)
6527 "pmovzxwq\t{%1, %0|%0, %1}"
6528 [(set_attr "type" "ssemov")
6529 (set_attr "prefix_extra" "1")
6530 (set_attr "mode" "TI")])
6532 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6533 [(set (match_operand:V2DI 0 "register_operand" "=x")
6537 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6538 (parallel [(const_int 0)
6541 "pmovzxwq\t{%1, %0|%0, %1}"
6542 [(set_attr "type" "ssemov")
6543 (set_attr "prefix_extra" "1")
6544 (set_attr "mode" "TI")])
6546 (define_insn "sse4_1_zero_extendv2siv2di2"
6547 [(set (match_operand:V2DI 0 "register_operand" "=x")
6550 (match_operand:V4SI 1 "register_operand" "x")
6551 (parallel [(const_int 0)
6554 "pmovzxdq\t{%1, %0|%0, %1}"
6555 [(set_attr "type" "ssemov")
6556 (set_attr "prefix_extra" "1")
6557 (set_attr "mode" "TI")])
6559 (define_insn "*sse4_1_zero_extendv2siv2di2"
6560 [(set (match_operand:V2DI 0 "register_operand" "=x")
6564 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6565 (parallel [(const_int 0)
6568 "pmovzxdq\t{%1, %0|%0, %1}"
6569 [(set_attr "type" "ssemov")
6570 (set_attr "prefix_extra" "1")
6571 (set_attr "mode" "TI")])
6573 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6574 ;; But it is not a really compare instruction.
6575 (define_insn "sse4_1_ptest"
6576 [(set (reg:CC FLAGS_REG)
6577 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6578 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6581 "ptest\t{%1, %0|%0, %1}"
6582 [(set_attr "type" "ssecomi")
6583 (set_attr "prefix_extra" "1")
6584 (set_attr "mode" "TI")])
6586 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6587 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6589 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6590 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6593 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6594 [(set_attr "type" "ssecvt")
6595 (set_attr "prefix_extra" "1")
6596 (set_attr "mode" "<MODE>")])
6598 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6600 (vec_merge:SSEMODEF2P
6602 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6603 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6605 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6608 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6609 [(set_attr "type" "ssecvt")
6610 (set_attr "prefix_extra" "1")
6611 (set_attr "mode" "<MODE>")])
6613 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6615 ;; Intel SSE4.2 string/text processing instructions
6617 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6619 (define_insn_and_split "sse4_2_pcmpestr"
6620 [(set (match_operand:SI 0 "register_operand" "=c,c")
6622 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6623 (match_operand:SI 3 "register_operand" "a,a")
6624 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6625 (match_operand:SI 5 "register_operand" "d,d")
6626 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6628 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6636 (set (reg:CC FLAGS_REG)
6645 && !(reload_completed || reload_in_progress)"
6650 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6651 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6652 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6655 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6656 operands[3], operands[4],
6657 operands[5], operands[6]));
6659 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6660 operands[3], operands[4],
6661 operands[5], operands[6]));
6662 if (flags && !(ecx || xmm0))
6663 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6664 operands[2], operands[3],
6665 operands[4], operands[5],
6669 [(set_attr "type" "sselog")
6670 (set_attr "prefix_data16" "1")
6671 (set_attr "prefix_extra" "1")
6672 (set_attr "memory" "none,load")
6673 (set_attr "mode" "TI")])
6675 (define_insn "sse4_2_pcmpestri"
6676 [(set (match_operand:SI 0 "register_operand" "=c,c")
6678 [(match_operand:V16QI 1 "register_operand" "x,x")
6679 (match_operand:SI 2 "register_operand" "a,a")
6680 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6681 (match_operand:SI 4 "register_operand" "d,d")
6682 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6684 (set (reg:CC FLAGS_REG)
6693 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6694 [(set_attr "type" "sselog")
6695 (set_attr "prefix_data16" "1")
6696 (set_attr "prefix_extra" "1")
6697 (set_attr "memory" "none,load")
6698 (set_attr "mode" "TI")])
6700 (define_insn "sse4_2_pcmpestrm"
6701 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6703 [(match_operand:V16QI 1 "register_operand" "x,x")
6704 (match_operand:SI 2 "register_operand" "a,a")
6705 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6706 (match_operand:SI 4 "register_operand" "d,d")
6707 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6709 (set (reg:CC FLAGS_REG)
6718 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6719 [(set_attr "type" "sselog")
6720 (set_attr "prefix_data16" "1")
6721 (set_attr "prefix_extra" "1")
6722 (set_attr "memory" "none,load")
6723 (set_attr "mode" "TI")])
6725 (define_insn "sse4_2_pcmpestr_cconly"
6726 [(set (reg:CC FLAGS_REG)
6728 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6729 (match_operand:SI 3 "register_operand" "a,a,a,a")
6730 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6731 (match_operand:SI 5 "register_operand" "d,d,d,d")
6732 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6734 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6735 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6738 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6739 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6740 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6741 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6742 [(set_attr "type" "sselog")
6743 (set_attr "prefix_data16" "1")
6744 (set_attr "prefix_extra" "1")
6745 (set_attr "memory" "none,load,none,load")
6746 (set_attr "mode" "TI")])
6748 (define_insn_and_split "sse4_2_pcmpistr"
6749 [(set (match_operand:SI 0 "register_operand" "=c,c")
6751 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6752 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6753 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6755 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6761 (set (reg:CC FLAGS_REG)
6768 && !(reload_completed || reload_in_progress)"
6773 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6774 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6775 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6778 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6779 operands[3], operands[4]));
6781 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6782 operands[3], operands[4]));
6783 if (flags && !(ecx || xmm0))
6784 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6785 operands[2], operands[3],
6789 [(set_attr "type" "sselog")
6790 (set_attr "prefix_data16" "1")
6791 (set_attr "prefix_extra" "1")
6792 (set_attr "memory" "none,load")
6793 (set_attr "mode" "TI")])
6795 (define_insn "sse4_2_pcmpistri"
6796 [(set (match_operand:SI 0 "register_operand" "=c,c")
6798 [(match_operand:V16QI 1 "register_operand" "x,x")
6799 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6800 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6802 (set (reg:CC FLAGS_REG)
6809 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6810 [(set_attr "type" "sselog")
6811 (set_attr "prefix_data16" "1")
6812 (set_attr "prefix_extra" "1")
6813 (set_attr "memory" "none,load")
6814 (set_attr "mode" "TI")])
6816 (define_insn "sse4_2_pcmpistrm"
6817 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6819 [(match_operand:V16QI 1 "register_operand" "x,x")
6820 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6821 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6823 (set (reg:CC FLAGS_REG)
6830 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6831 [(set_attr "type" "sselog")
6832 (set_attr "prefix_data16" "1")
6833 (set_attr "prefix_extra" "1")
6834 (set_attr "memory" "none,load")
6835 (set_attr "mode" "TI")])
6837 (define_insn "sse4_2_pcmpistr_cconly"
6838 [(set (reg:CC FLAGS_REG)
6840 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6841 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6842 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6844 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6845 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6848 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6849 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6850 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6851 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6852 [(set_attr "type" "sselog")
6853 (set_attr "prefix_data16" "1")
6854 (set_attr "prefix_extra" "1")
6855 (set_attr "memory" "none,load,none,load")
6856 (set_attr "mode" "TI")])
6858 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6860 ;; SSE5 instructions
6862 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6864 ;; SSE5 parallel integer multiply/add instructions.
6865 ;; Note the instruction does not allow the value being added to be a memory
6866 ;; operation. However by pretending via the nonimmediate_operand predicate
6867 ;; that it does and splitting it later allows the following to be recognized:
6868 ;; a[i] = b[i] * c[i] + d[i];
6869 (define_insn "sse5_pmacsww"
6870 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6873 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6874 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6875 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6876 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6878 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6879 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6880 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6881 [(set_attr "type" "ssemuladd")
6882 (set_attr "mode" "TI")])
6884 ;; Split pmacsww with two memory operands into a load and the pmacsww.
6886 [(set (match_operand:V8HI 0 "register_operand" "")
6888 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
6889 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6890 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
6892 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6893 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6894 && !reg_mentioned_p (operands[0], operands[1])
6895 && !reg_mentioned_p (operands[0], operands[2])
6896 && !reg_mentioned_p (operands[0], operands[3])"
6899 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
6900 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
6905 (define_insn "sse5_pmacssww"
6906 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6908 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6909 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6910 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6911 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6913 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6914 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6915 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6916 [(set_attr "type" "ssemuladd")
6917 (set_attr "mode" "TI")])
6919 ;; Note the instruction does not allow the value being added to be a memory
6920 ;; operation. However by pretending via the nonimmediate_operand predicate
6921 ;; that it does and splitting it later allows the following to be recognized:
6922 ;; a[i] = b[i] * c[i] + d[i];
6923 (define_insn "sse5_pmacsdd"
6924 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6927 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6928 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6929 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6930 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6932 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6933 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6934 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6935 [(set_attr "type" "ssemuladd")
6936 (set_attr "mode" "TI")])
6938 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
6940 [(set (match_operand:V4SI 0 "register_operand" "")
6942 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
6943 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6944 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
6946 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6947 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6948 && !reg_mentioned_p (operands[0], operands[1])
6949 && !reg_mentioned_p (operands[0], operands[2])
6950 && !reg_mentioned_p (operands[0], operands[3])"
6953 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
6954 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
6959 (define_insn "sse5_pmacssdd"
6960 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6962 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6963 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6964 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6965 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6967 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6968 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6969 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6970 [(set_attr "type" "ssemuladd")
6971 (set_attr "mode" "TI")])
6973 (define_insn "sse5_pmacssdql"
6974 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6979 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6980 (parallel [(const_int 1)
6983 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6984 (parallel [(const_int 1)
6986 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6987 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6989 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6990 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6991 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6992 [(set_attr "type" "ssemuladd")
6993 (set_attr "mode" "TI")])
6995 (define_insn "sse5_pmacssdqh"
6996 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7001 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7002 (parallel [(const_int 0)
7006 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7007 (parallel [(const_int 0)
7009 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7010 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7012 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7013 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7014 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7015 [(set_attr "type" "ssemuladd")
7016 (set_attr "mode" "TI")])
7018 (define_insn "sse5_pmacsdql"
7019 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7024 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7025 (parallel [(const_int 1)
7029 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7030 (parallel [(const_int 1)
7032 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7033 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7035 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7036 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7037 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7038 [(set_attr "type" "ssemuladd")
7039 (set_attr "mode" "TI")])
7041 (define_insn "sse5_pmacsdqh"
7042 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7047 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7048 (parallel [(const_int 0)
7052 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7053 (parallel [(const_int 0)
7055 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7056 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7058 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7059 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7060 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7061 [(set_attr "type" "ssemuladd")
7062 (set_attr "mode" "TI")])
7064 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7065 (define_insn "sse5_pmacsswd"
7066 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7071 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7072 (parallel [(const_int 1)
7078 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7079 (parallel [(const_int 1)
7083 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7084 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7086 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7087 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7088 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7089 [(set_attr "type" "ssemuladd")
7090 (set_attr "mode" "TI")])
7092 (define_insn "sse5_pmacswd"
7093 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7098 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7099 (parallel [(const_int 1)
7105 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7106 (parallel [(const_int 1)
7110 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7111 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7113 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7114 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7115 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7116 [(set_attr "type" "ssemuladd")
7117 (set_attr "mode" "TI")])
7119 (define_insn "sse5_pmadcsswd"
7120 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7126 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7127 (parallel [(const_int 0)
7133 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7134 (parallel [(const_int 0)
7142 (parallel [(const_int 1)
7149 (parallel [(const_int 1)
7153 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7154 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7156 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7157 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7158 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7159 [(set_attr "type" "ssemuladd")
7160 (set_attr "mode" "TI")])
7162 (define_insn "sse5_pmadcswd"
7163 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7169 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7170 (parallel [(const_int 0)
7176 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7177 (parallel [(const_int 0)
7185 (parallel [(const_int 1)
7192 (parallel [(const_int 1)
7196 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7197 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7199 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7200 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7201 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7202 [(set_attr "type" "ssemuladd")
7203 (set_attr "mode" "TI")])
7205 ;; SSE5 parallel XMM conditional moves
7206 (define_insn "sse5_pcmov_<mode>"
7207 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7208 (if_then_else:SSEMODE
7209 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
7210 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
7211 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
7212 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7214 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7215 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7216 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7217 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7218 andps\t{%2, %0|%0, %2}
7219 andnps\t{%1, %0|%0, %1}"
7220 [(set_attr "type" "sse4arg")])
7222 ;; SSE5 horizontal add/subtract instructions
7223 (define_insn "sse5_phaddbw"
7224 [(set (match_operand:V8HI 0 "register_operand" "=x")
7228 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7229 (parallel [(const_int 0)
7240 (parallel [(const_int 1)
7247 (const_int 15)])))))]
7249 "phaddbw\t{%1, %0|%0, %1}"
7250 [(set_attr "type" "sseiadd1")])
7252 (define_insn "sse5_phaddbd"
7253 [(set (match_operand:V4SI 0 "register_operand" "=x")
7258 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7259 (parallel [(const_int 0)
7266 (parallel [(const_int 1)
7274 (parallel [(const_int 2)
7281 (parallel [(const_int 3)
7284 (const_int 15)]))))))]
7286 "phaddbd\t{%1, %0|%0, %1}"
7287 [(set_attr "type" "sseiadd1")])
7289 (define_insn "sse5_phaddbq"
7290 [(set (match_operand:V2DI 0 "register_operand" "=x")
7296 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7297 (parallel [(const_int 0)
7302 (parallel [(const_int 1)
7308 (parallel [(const_int 2)
7313 (parallel [(const_int 3)
7320 (parallel [(const_int 8)
7325 (parallel [(const_int 9)
7331 (parallel [(const_int 10)
7336 (parallel [(const_int 11)
7337 (const_int 15)])))))))]
7339 "phaddbq\t{%1, %0|%0, %1}"
7340 [(set_attr "type" "sseiadd1")])
7342 (define_insn "sse5_phaddwd"
7343 [(set (match_operand:V4SI 0 "register_operand" "=x")
7347 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7348 (parallel [(const_int 0)
7355 (parallel [(const_int 1)
7358 (const_int 7)])))))]
7360 "phaddwd\t{%1, %0|%0, %1}"
7361 [(set_attr "type" "sseiadd1")])
7363 (define_insn "sse5_phaddwq"
7364 [(set (match_operand:V2DI 0 "register_operand" "=x")
7369 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7370 (parallel [(const_int 0)
7375 (parallel [(const_int 1)
7381 (parallel [(const_int 2)
7386 (parallel [(const_int 3)
7387 (const_int 7)]))))))]
7389 "phaddwq\t{%1, %0|%0, %1}"
7390 [(set_attr "type" "sseiadd1")])
7392 (define_insn "sse5_phadddq"
7393 [(set (match_operand:V2DI 0 "register_operand" "=x")
7397 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7398 (parallel [(const_int 0)
7403 (parallel [(const_int 1)
7404 (const_int 3)])))))]
7406 "phadddq\t{%1, %0|%0, %1}"
7407 [(set_attr "type" "sseiadd1")])
7409 (define_insn "sse5_phaddubw"
7410 [(set (match_operand:V8HI 0 "register_operand" "=x")
7414 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7415 (parallel [(const_int 0)
7426 (parallel [(const_int 1)
7433 (const_int 15)])))))]
7435 "phaddubw\t{%1, %0|%0, %1}"
7436 [(set_attr "type" "sseiadd1")])
7438 (define_insn "sse5_phaddubd"
7439 [(set (match_operand:V4SI 0 "register_operand" "=x")
7444 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7445 (parallel [(const_int 0)
7452 (parallel [(const_int 1)
7460 (parallel [(const_int 2)
7467 (parallel [(const_int 3)
7470 (const_int 15)]))))))]
7472 "phaddubd\t{%1, %0|%0, %1}"
7473 [(set_attr "type" "sseiadd1")])
7475 (define_insn "sse5_phaddubq"
7476 [(set (match_operand:V2DI 0 "register_operand" "=x")
7482 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7483 (parallel [(const_int 0)
7488 (parallel [(const_int 1)
7494 (parallel [(const_int 2)
7499 (parallel [(const_int 3)
7506 (parallel [(const_int 8)
7511 (parallel [(const_int 9)
7517 (parallel [(const_int 10)
7522 (parallel [(const_int 11)
7523 (const_int 15)])))))))]
7525 "phaddubq\t{%1, %0|%0, %1}"
7526 [(set_attr "type" "sseiadd1")])
7528 (define_insn "sse5_phadduwd"
7529 [(set (match_operand:V4SI 0 "register_operand" "=x")
7533 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7534 (parallel [(const_int 0)
7541 (parallel [(const_int 1)
7544 (const_int 7)])))))]
7546 "phadduwd\t{%1, %0|%0, %1}"
7547 [(set_attr "type" "sseiadd1")])
7549 (define_insn "sse5_phadduwq"
7550 [(set (match_operand:V2DI 0 "register_operand" "=x")
7555 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7556 (parallel [(const_int 0)
7561 (parallel [(const_int 1)
7567 (parallel [(const_int 2)
7572 (parallel [(const_int 3)
7573 (const_int 7)]))))))]
7575 "phadduwq\t{%1, %0|%0, %1}"
7576 [(set_attr "type" "sseiadd1")])
7578 (define_insn "sse5_phaddudq"
7579 [(set (match_operand:V2DI 0 "register_operand" "=x")
7583 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7584 (parallel [(const_int 0)
7589 (parallel [(const_int 1)
7590 (const_int 3)])))))]
7592 "phaddudq\t{%1, %0|%0, %1}"
7593 [(set_attr "type" "sseiadd1")])
7595 (define_insn "sse5_phsubbw"
7596 [(set (match_operand:V8HI 0 "register_operand" "=x")
7600 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7601 (parallel [(const_int 0)
7612 (parallel [(const_int 1)
7619 (const_int 15)])))))]
7621 "phsubbw\t{%1, %0|%0, %1}"
7622 [(set_attr "type" "sseiadd1")])
7624 (define_insn "sse5_phsubwd"
7625 [(set (match_operand:V4SI 0 "register_operand" "=x")
7629 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7630 (parallel [(const_int 0)
7637 (parallel [(const_int 1)
7640 (const_int 7)])))))]
7642 "phsubwd\t{%1, %0|%0, %1}"
7643 [(set_attr "type" "sseiadd1")])
7645 (define_insn "sse5_phsubdq"
7646 [(set (match_operand:V2DI 0 "register_operand" "=x")
7650 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7651 (parallel [(const_int 0)
7656 (parallel [(const_int 1)
7657 (const_int 3)])))))]
7659 "phsubdq\t{%1, %0|%0, %1}"
7660 [(set_attr "type" "sseiadd1")])
7662 ;; SSE5 permute instructions
7663 (define_insn "sse5_pperm"
7664 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7666 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7667 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7668 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7669 UNSPEC_SSE5_PERMUTE))]
7670 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7671 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7672 [(set_attr "type" "sse4arg")
7673 (set_attr "mode" "TI")])
7675 ;; The following are for the various unpack insns which doesn't need the first
7676 ;; source operand, so we can just use the output operand for the first operand.
7677 ;; This allows either of the other two operands to be a memory operand. We
7678 ;; can't just use the first operand as an argument to the normal pperm because
7679 ;; then an output only argument, suddenly becomes an input operand.
7680 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7681 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7684 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7685 (match_operand 2 "" "")))) ;; parallel with const_int's
7686 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7688 && (register_operand (operands[1], V16QImode)
7689 || register_operand (operands[2], V16QImode))"
7690 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7691 [(set_attr "type" "sseadd")
7692 (set_attr "mode" "TI")])
7694 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7695 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7698 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7699 (match_operand 2 "" "")))) ;; parallel with const_int's
7700 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7702 && (register_operand (operands[1], V16QImode)
7703 || register_operand (operands[2], V16QImode))"
7704 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7705 [(set_attr "type" "sseadd")
7706 (set_attr "mode" "TI")])
7708 (define_insn "sse5_pperm_zero_v8hi_v4si"
7709 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7712 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7713 (match_operand 2 "" "")))) ;; parallel with const_int's
7714 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7716 && (register_operand (operands[1], V8HImode)
7717 || register_operand (operands[2], V16QImode))"
7718 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7719 [(set_attr "type" "sseadd")
7720 (set_attr "mode" "TI")])
7722 (define_insn "sse5_pperm_sign_v8hi_v4si"
7723 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7726 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7727 (match_operand 2 "" "")))) ;; parallel with const_int's
7728 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7730 && (register_operand (operands[1], V8HImode)
7731 || register_operand (operands[2], V16QImode))"
7732 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7733 [(set_attr "type" "sseadd")
7734 (set_attr "mode" "TI")])
7736 (define_insn "sse5_pperm_zero_v4si_v2di"
7737 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7740 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7741 (match_operand 2 "" "")))) ;; parallel with const_int's
7742 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7744 && (register_operand (operands[1], V4SImode)
7745 || register_operand (operands[2], V16QImode))"
7746 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7747 [(set_attr "type" "sseadd")
7748 (set_attr "mode" "TI")])
7750 (define_insn "sse5_pperm_sign_v4si_v2di"
7751 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7754 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7755 (match_operand 2 "" "")))) ;; parallel with const_int's
7756 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7758 && (register_operand (operands[1], V4SImode)
7759 || register_operand (operands[2], V16QImode))"
7760 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7761 [(set_attr "type" "sseadd")
7762 (set_attr "mode" "TI")])
7764 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7765 (define_insn "sse5_pperm_pack_v2di_v4si"
7766 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7769 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7771 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7772 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7773 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7774 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7775 [(set_attr "type" "sse4arg")
7776 (set_attr "mode" "TI")])
7778 (define_insn "sse5_pperm_pack_v4si_v8hi"
7779 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7782 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7784 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7785 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7786 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7787 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7788 [(set_attr "type" "sse4arg")
7789 (set_attr "mode" "TI")])
7791 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7792 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7795 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7797 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7798 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7799 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7800 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7801 [(set_attr "type" "sse4arg")
7802 (set_attr "mode" "TI")])
7804 ;; Floating point permutation (permps, permpd)
7805 (define_insn "sse5_perm<mode>"
7806 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7808 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7809 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7810 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7811 UNSPEC_SSE5_PERMUTE))]
7812 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7813 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7814 [(set_attr "type" "sse4arg")
7815 (set_attr "mode" "<MODE>")])
7817 ;; SSE5 packed rotate instructions
7818 (define_insn "rotl<mode>3"
7819 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7821 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
7822 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
7824 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7825 [(set_attr "type" "sseishft")
7826 (set_attr "mode" "TI")])
7828 (define_insn "sse5_rotl<mode>3"
7829 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7831 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7832 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
7833 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7834 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7835 [(set_attr "type" "sseishft")
7836 (set_attr "mode" "TI")])
7838 ;; SSE5 packed shift instructions. Note negative values for the shift amount
7839 ;; convert this into a right shift instead of left shift. For now, model this
7840 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
7841 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
7842 (define_insn "sse5_ashl<mode>3"
7843 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7845 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7846 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7847 UNSPEC_SSE5_ASHIFT))]
7848 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7849 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7850 [(set_attr "type" "sseishft")
7851 (set_attr "mode" "TI")])
7853 (define_insn "sse5_lshl<mode>3"
7854 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7856 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7857 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7858 UNSPEC_SSE5_LSHIFT))]
7859 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7860 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7861 [(set_attr "type" "sseishft")
7862 (set_attr "mode" "TI")])
7864 ;; SSE5 FRCZ support
7866 (define_insn "sse5_frcz<mode>2"
7867 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7869 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
7872 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
7873 [(set_attr "type" "ssecvt1")
7874 (set_attr "prefix_extra" "1")
7875 (set_attr "mode" "<MODE>")])
7878 (define_insn "sse5_vmfrcz<mode>2"
7879 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7880 (vec_merge:SSEMODEF2P
7882 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
7884 (match_operand:SSEMODEF2P 1 "register_operand" "0")
7887 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
7888 [(set_attr "type" "ssecvt1")
7889 (set_attr "prefix_extra" "1")
7890 (set_attr "mode" "<MODE>")])
7892 (define_insn "sse5_cvtph2ps"
7893 [(set (match_operand:V4SF 0 "register_operand" "=x")
7894 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
7897 "cvtph2ps\t{%1, %0|%0, %1}"
7898 [(set_attr "type" "ssecvt")
7899 (set_attr "mode" "V4SF")])
7901 (define_insn "sse5_cvtps2ph"
7902 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
7903 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
7906 "cvtps2ph\t{%1, %0|%0, %1}"
7907 [(set_attr "type" "ssecvt")
7908 (set_attr "mode" "V4SF")])
7910 ;; Scalar versions of the com instructions that use vector types that are
7911 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
7912 ;; com instructions fill in 0's in the upper bits instead of leaving them
7913 ;; unmodified, so we use const_vector of 0 instead of match_dup.
7914 (define_expand "sse5_vmmaskcmp<mode>3"
7915 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
7916 (vec_merge:SSEMODEF2P
7917 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7918 [(match_operand:SSEMODEF2P 2 "register_operand" "")
7919 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
7924 operands[4] = CONST0_RTX (<MODE>mode);
7927 (define_insn "*sse5_vmmaskcmp<mode>3"
7928 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7929 (vec_merge:SSEMODEF2P
7930 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7931 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
7932 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
7933 (match_operand:SSEMODEF2P 4 "")
7936 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
7937 [(set_attr "type" "sse4arg")
7938 (set_attr "mode" "<ssescalarmode>")])
7940 ;; We don't have a comparison operator that always returns true/false, so
7941 ;; handle comfalse and comtrue specially.
7942 (define_insn "sse5_com_tf<mode>3"
7943 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7945 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
7946 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
7947 (match_operand:SI 3 "const_int_operand" "n")]
7948 UNSPEC_SSE5_TRUEFALSE))]
7951 const char *ret = NULL;
7953 switch (INTVAL (operands[3]))
7956 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7960 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7964 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7968 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7977 [(set_attr "type" "ssecmp")
7978 (set_attr "mode" "<MODE>")])
7980 (define_insn "sse5_maskcmp<mode>3"
7981 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7982 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7983 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
7984 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
7986 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
7987 [(set_attr "type" "ssecmp")
7988 (set_attr "mode" "<MODE>")])
7990 (define_insn "sse5_maskcmp<mode>3"
7991 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7992 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
7993 [(match_operand:SSEMODE1248 2 "register_operand" "x")
7994 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
7996 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
7997 [(set_attr "type" "sse4arg")
7998 (set_attr "mode" "TI")])
8000 (define_insn "sse5_maskcmp_uns<mode>3"
8001 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8002 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8003 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8004 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8006 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8007 [(set_attr "type" "ssecmp")
8008 (set_attr "mode" "TI")])
8010 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8011 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8012 ;; the exact instruction generated for the intrinsic.
8013 (define_insn "sse5_maskcmp_uns2<mode>3"
8014 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8016 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8017 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8018 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8019 UNSPEC_SSE5_UNSIGNED_CMP))]
8021 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8022 [(set_attr "type" "ssecmp")
8023 (set_attr "mode" "TI")])
8025 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8026 ;; being added here to be complete.
8027 (define_insn "sse5_pcom_tf<mode>3"
8028 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8030 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8031 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8032 (match_operand:SI 3 "const_int_operand" "n")]
8033 UNSPEC_SSE5_TRUEFALSE))]
8036 return ((INTVAL (operands[3]) != 0)
8037 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8038 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8040 [(set_attr "type" "ssecmp")
8041 (set_attr "mode" "TI")])
8043 (define_insn "aesenc"
8044 [(set (match_operand:V2DI 0 "register_operand" "=x")
8045 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8046 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8049 "aesenc\t{%2, %0|%0, %2}"
8050 [(set_attr "type" "sselog1")
8051 (set_attr "prefix_extra" "1")
8052 (set_attr "mode" "TI")])
8054 (define_insn "aesenclast"
8055 [(set (match_operand:V2DI 0 "register_operand" "=x")
8056 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8057 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8058 UNSPEC_AESENCLAST))]
8060 "aesenclast\t{%2, %0|%0, %2}"
8061 [(set_attr "type" "sselog1")
8062 (set_attr "prefix_extra" "1")
8063 (set_attr "mode" "TI")])
8065 (define_insn "aesdec"
8066 [(set (match_operand:V2DI 0 "register_operand" "=x")
8067 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8068 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8071 "aesdec\t{%2, %0|%0, %2}"
8072 [(set_attr "type" "sselog1")
8073 (set_attr "prefix_extra" "1")
8074 (set_attr "mode" "TI")])
8076 (define_insn "aesdeclast"
8077 [(set (match_operand:V2DI 0 "register_operand" "=x")
8078 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8079 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8080 UNSPEC_AESDECLAST))]
8082 "aesdeclast\t{%2, %0|%0, %2}"
8083 [(set_attr "type" "sselog1")
8084 (set_attr "prefix_extra" "1")
8085 (set_attr "mode" "TI")])
8087 (define_insn "aesimc"
8088 [(set (match_operand:V2DI 0 "register_operand" "=x")
8089 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8092 "aesimc\t{%1, %0|%0, %1}"
8093 [(set_attr "type" "sselog1")
8094 (set_attr "prefix_extra" "1")
8095 (set_attr "mode" "TI")])
8097 (define_insn "aeskeygenassist"
8098 [(set (match_operand:V2DI 0 "register_operand" "=x")
8099 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
8100 (match_operand:SI 2 "const_0_to_255_operand" "n")]
8101 UNSPEC_AESKEYGENASSIST))]
8103 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
8104 [(set_attr "type" "sselog1")
8105 (set_attr "prefix_extra" "1")
8106 (set_attr "mode" "TI")])
8108 (define_insn "pclmulqdq"
8109 [(set (match_operand:V2DI 0 "register_operand" "=x")
8110 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8111 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
8112 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8115 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
8116 [(set_attr "type" "sselog1")
8117 (set_attr "prefix_extra" "1")
8118 (set_attr "mode" "TI")])