1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
58 ;; Mapping of immediate bits for blend instructions
59 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
61 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
63 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69 ;; All of these patterns are enabled for SSE1 as well as SSE2.
70 ;; This is essential for maintaining stable calling conventions.
72 (define_expand "mov<mode>"
73 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
74 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
77 ix86_expand_vector_move (<MODE>mode, operands);
81 (define_insn "*mov<mode>_internal"
82 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
83 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
85 && (register_operand (operands[0], <MODE>mode)
86 || register_operand (operands[1], <MODE>mode))"
88 switch (which_alternative)
91 return standard_sse_constant_opcode (insn, operands[1]);
94 switch (get_attr_mode (insn))
97 return "movaps\t{%1, %0|%0, %1}";
99 return "movapd\t{%1, %0|%0, %1}";
101 return "movdqa\t{%1, %0|%0, %1}";
107 [(set_attr "type" "sselog1,ssemov,ssemov")
109 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
110 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
111 (and (eq_attr "alternative" "2")
112 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
114 (const_string "V4SF")
115 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
116 (const_string "V4SF")
117 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
118 (const_string "V2DF")
120 (const_string "TI")))])
122 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
123 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
124 ;; from memory, we'd prefer to load the memory directly into the %xmm
125 ;; register. To facilitate this happy circumstance, this pattern won't
126 ;; split until after register allocation. If the 64-bit value didn't
127 ;; come from memory, this is the best we can do. This is much better
128 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
131 (define_insn_and_split "movdi_to_sse"
133 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
134 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
135 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
136 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
138 "&& reload_completed"
141 if (register_operand (operands[1], DImode))
143 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
144 Assemble the 64-bit DImode value in an xmm register. */
145 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
146 gen_rtx_SUBREG (SImode, operands[1], 0)));
147 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
148 gen_rtx_SUBREG (SImode, operands[1], 4)));
149 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
151 else if (memory_operand (operands[1], DImode))
152 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
163 (vec_duplicate:V4SF (match_dup 1))
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "<sse>_movup<ssemodesuffixf2c>"
199 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
201 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
203 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
204 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
205 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
206 [(set_attr "type" "ssemov")
207 (set_attr "mode" "<MODE>")])
209 (define_insn "sse2_movdqu"
210 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
211 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
213 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
214 "movdqu\t{%1, %0|%0, %1}"
215 [(set_attr "type" "ssemov")
216 (set_attr "prefix_data16" "1")
217 (set_attr "mode" "TI")])
219 (define_insn "<sse>_movnt<mode>"
220 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
222 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
224 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
225 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
226 [(set_attr "type" "ssemov")
227 (set_attr "mode" "<MODE>")])
229 (define_insn "sse2_movntv2di"
230 [(set (match_operand:V2DI 0 "memory_operand" "=m")
231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
234 "movntdq\t{%1, %0|%0, %1}"
235 [(set_attr "type" "ssecvt")
236 (set_attr "prefix_data16" "1")
237 (set_attr "mode" "TI")])
239 (define_insn "sse2_movntsi"
240 [(set (match_operand:SI 0 "memory_operand" "=m")
241 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
244 "movnti\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssecvt")
246 (set_attr "mode" "V2DF")])
248 (define_insn "sse3_lddqu"
249 [(set (match_operand:V16QI 0 "register_operand" "=x")
250 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
253 "lddqu\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssecvt")
255 (set_attr "prefix_rep" "1")
256 (set_attr "mode" "TI")])
258 ; Expand patterns for non-temporal stores. At the moment, only those
259 ; that directly map to insns are defined; it would be possible to
260 ; define patterns for other modes that would expand to several insns.
262 (define_expand "storent<mode>"
263 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
265 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
267 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
270 (define_expand "storent<mode>"
271 [(set (match_operand:MODEF 0 "memory_operand" "")
273 [(match_operand:MODEF 1 "register_operand" "")]
278 (define_expand "storentv2di"
279 [(set (match_operand:V2DI 0 "memory_operand" "")
280 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
285 (define_expand "storentsi"
286 [(set (match_operand:SI 0 "memory_operand" "")
287 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
294 ;; Parallel floating point arithmetic
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
298 (define_expand "<code><mode>2"
299 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
301 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
302 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
303 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
305 (define_expand "<plusminus_insn><mode>3"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
307 (plusminus:SSEMODEF2P
308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
310 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
311 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
313 (define_insn "*<plusminus_insn><mode>3"
314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
315 (plusminus:SSEMODEF2P
316 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
317 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
318 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
319 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
320 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
321 [(set_attr "type" "sseadd")
322 (set_attr "mode" "<MODE>")])
324 (define_insn "<sse>_vm<plusminus_insn><mode>3"
325 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
326 (vec_merge:SSEMODEF2P
327 (plusminus:SSEMODEF2P
328 (match_operand:SSEMODEF2P 1 "register_operand" "0")
329 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
332 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
333 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
334 [(set_attr "type" "sseadd")
335 (set_attr "mode" "<ssescalarmode>")])
337 (define_expand "mul<mode>3"
338 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
340 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
341 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
342 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
343 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
345 (define_insn "*mul<mode>3"
346 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
348 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
349 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
350 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
351 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
352 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
353 [(set_attr "type" "ssemul")
354 (set_attr "mode" "<MODE>")])
356 (define_insn "<sse>_vmmul<mode>3"
357 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
358 (vec_merge:SSEMODEF2P
360 (match_operand:SSEMODEF2P 1 "register_operand" "0")
361 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
364 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
365 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
366 [(set_attr "type" "ssemul")
367 (set_attr "mode" "<ssescalarmode>")])
369 (define_expand "divv4sf3"
370 [(set (match_operand:V4SF 0 "register_operand" "")
371 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
372 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
376 && flag_finite_math_only && !flag_trapping_math
377 && flag_unsafe_math_optimizations)
379 ix86_emit_swdivsf (operands[0], operands[1],
380 operands[2], V4SFmode);
385 (define_expand "divv2df3"
386 [(set (match_operand:V2DF 0 "register_operand" "")
387 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
388 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
392 (define_insn "<sse>_div<mode>3"
393 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
395 (match_operand:SSEMODEF2P 1 "register_operand" "0")
396 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
397 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
398 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
399 [(set_attr "type" "ssediv")
400 (set_attr "mode" "<MODE>")])
402 (define_insn "<sse>_vmdiv<mode>3"
403 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
404 (vec_merge:SSEMODEF2P
406 (match_operand:SSEMODEF2P 1 "register_operand" "0")
407 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
410 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
411 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
412 [(set_attr "type" "ssediv")
413 (set_attr "mode" "<ssescalarmode>")])
415 (define_insn "sse_rcpv4sf2"
416 [(set (match_operand:V4SF 0 "register_operand" "=x")
418 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
420 "rcpps\t{%1, %0|%0, %1}"
421 [(set_attr "type" "sse")
422 (set_attr "mode" "V4SF")])
424 (define_insn "sse_vmrcpv4sf2"
425 [(set (match_operand:V4SF 0 "register_operand" "=x")
427 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
429 (match_operand:V4SF 2 "register_operand" "0")
432 "rcpss\t{%1, %0|%0, %1}"
433 [(set_attr "type" "sse")
434 (set_attr "mode" "SF")])
436 (define_expand "sqrtv4sf2"
437 [(set (match_operand:V4SF 0 "register_operand" "")
438 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
441 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
442 && flag_finite_math_only && !flag_trapping_math
443 && flag_unsafe_math_optimizations)
445 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
450 (define_insn "sse_sqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
454 "sqrtps\t{%1, %0|%0, %1}"
455 [(set_attr "type" "sse")
456 (set_attr "mode" "V4SF")])
458 (define_insn "sqrtv2df2"
459 [(set (match_operand:V2DF 0 "register_operand" "=x")
460 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
462 "sqrtpd\t{%1, %0|%0, %1}"
463 [(set_attr "type" "sse")
464 (set_attr "mode" "V2DF")])
466 (define_insn "<sse>_vmsqrt<mode>2"
467 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
468 (vec_merge:SSEMODEF2P
470 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
471 (match_operand:SSEMODEF2P 2 "register_operand" "0")
473 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
474 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
475 [(set_attr "type" "sse")
476 (set_attr "mode" "<ssescalarmode>")])
478 (define_expand "rsqrtv4sf2"
479 [(set (match_operand:V4SF 0 "register_operand" "")
481 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
484 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
488 (define_insn "sse_rsqrtv4sf2"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
493 "rsqrtps\t{%1, %0|%0, %1}"
494 [(set_attr "type" "sse")
495 (set_attr "mode" "V4SF")])
497 (define_insn "sse_vmrsqrtv4sf2"
498 [(set (match_operand:V4SF 0 "register_operand" "=x")
500 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
502 (match_operand:V4SF 2 "register_operand" "0")
505 "rsqrtss\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "SF")])
509 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
510 ;; isn't really correct, as those rtl operators aren't defined when
511 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
513 (define_expand "<code><mode>3"
514 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
516 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
517 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
518 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
520 if (!flag_finite_math_only)
521 operands[1] = force_reg (<MODE>mode, operands[1]);
522 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
525 (define_insn "*<code><mode>3_finite"
526 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
528 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
529 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
530 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
531 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
532 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sseadd")
534 (set_attr "mode" "<MODE>")])
536 (define_insn "*<code><mode>3"
537 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
539 (match_operand:SSEMODEF2P 1 "register_operand" "0")
540 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
541 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
542 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
543 [(set_attr "type" "sseadd")
544 (set_attr "mode" "<MODE>")])
546 (define_insn "<sse>_vm<code><mode>3"
547 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
548 (vec_merge:SSEMODEF2P
550 (match_operand:SSEMODEF2P 1 "register_operand" "0")
551 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
554 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
555 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
556 [(set_attr "type" "sse")
557 (set_attr "mode" "<ssescalarmode>")])
559 ;; These versions of the min/max patterns implement exactly the operations
560 ;; min = (op1 < op2 ? op1 : op2)
561 ;; max = (!(op1 < op2) ? op1 : op2)
562 ;; Their operands are not commutative, and thus they may be used in the
563 ;; presence of -0.0 and NaN.
565 (define_insn "*ieee_smin<mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
568 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
569 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
571 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
572 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
573 [(set_attr "type" "sseadd")
574 (set_attr "mode" "<MODE>")])
576 (define_insn "*ieee_smax<mode>3"
577 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
579 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
580 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
582 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
583 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
584 [(set_attr "type" "sseadd")
585 (set_attr "mode" "<MODE>")])
587 (define_insn "sse3_addsubv4sf3"
588 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (match_operand:V4SF 1 "register_operand" "0")
592 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
593 (minus:V4SF (match_dup 1) (match_dup 2))
596 "addsubps\t{%2, %0|%0, %2}"
597 [(set_attr "type" "sseadd")
598 (set_attr "prefix_rep" "1")
599 (set_attr "mode" "V4SF")])
601 (define_insn "sse3_addsubv2df3"
602 [(set (match_operand:V2DF 0 "register_operand" "=x")
605 (match_operand:V2DF 1 "register_operand" "0")
606 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
607 (minus:V2DF (match_dup 1) (match_dup 2))
610 "addsubpd\t{%2, %0|%0, %2}"
611 [(set_attr "type" "sseadd")
612 (set_attr "mode" "V2DF")])
614 (define_insn "sse3_h<plusminus_insn>v4sf3"
615 [(set (match_operand:V4SF 0 "register_operand" "=x")
620 (match_operand:V4SF 1 "register_operand" "0")
621 (parallel [(const_int 0)]))
622 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
624 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
625 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
629 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
630 (parallel [(const_int 0)]))
631 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
633 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
634 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
636 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
637 [(set_attr "type" "sseadd")
638 (set_attr "prefix_rep" "1")
639 (set_attr "mode" "V4SF")])
641 (define_insn "sse3_h<plusminus_insn>v2df3"
642 [(set (match_operand:V2DF 0 "register_operand" "=x")
646 (match_operand:V2DF 1 "register_operand" "0")
647 (parallel [(const_int 0)]))
648 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
651 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
652 (parallel [(const_int 0)]))
653 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
655 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
656 [(set_attr "type" "sseadd")
657 (set_attr "mode" "V2DF")])
659 (define_expand "reduc_splus_v4sf"
660 [(match_operand:V4SF 0 "register_operand" "")
661 (match_operand:V4SF 1 "register_operand" "")]
666 rtx tmp = gen_reg_rtx (V4SFmode);
667 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
668 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
671 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
675 (define_expand "reduc_splus_v2df"
676 [(match_operand:V2DF 0 "register_operand" "")
677 (match_operand:V2DF 1 "register_operand" "")]
680 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
684 (define_expand "reduc_smax_v4sf"
685 [(match_operand:V4SF 0 "register_operand" "")
686 (match_operand:V4SF 1 "register_operand" "")]
689 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
693 (define_expand "reduc_smin_v4sf"
694 [(match_operand:V4SF 0 "register_operand" "")
695 (match_operand:V4SF 1 "register_operand" "")]
698 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
704 ;; Parallel floating point comparisons
706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
708 (define_insn "<sse>_maskcmp<mode>3"
709 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
710 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
711 [(match_operand:SSEMODEF4 1 "register_operand" "0")
712 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
713 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
715 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
716 [(set_attr "type" "ssecmp")
717 (set_attr "mode" "<MODE>")])
719 (define_insn "<sse>_vmmaskcmp<mode>3"
720 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
721 (vec_merge:SSEMODEF2P
722 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
723 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
724 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
727 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
728 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
729 [(set_attr "type" "ssecmp")
730 (set_attr "mode" "<ssescalarmode>")])
732 (define_insn "<sse>_comi"
733 [(set (reg:CCFP FLAGS_REG)
736 (match_operand:<ssevecmode> 0 "register_operand" "x")
737 (parallel [(const_int 0)]))
739 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
740 (parallel [(const_int 0)]))))]
741 "SSE_FLOAT_MODE_P (<MODE>mode)"
742 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
743 [(set_attr "type" "ssecomi")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "<sse>_ucomi"
747 [(set (reg:CCFPU FLAGS_REG)
750 (match_operand:<ssevecmode> 0 "register_operand" "x")
751 (parallel [(const_int 0)]))
753 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))))]
755 "SSE_FLOAT_MODE_P (<MODE>mode)"
756 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
757 [(set_attr "type" "ssecomi")
758 (set_attr "mode" "<MODE>")])
760 (define_expand "vcond<mode>"
761 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
762 (if_then_else:SSEMODEF2P
764 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
765 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
766 (match_operand:SSEMODEF2P 1 "general_operand" "")
767 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
768 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
770 if (ix86_expand_fp_vcond (operands))
776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
778 ;; Parallel floating point logical operations
780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
782 (define_insn "<sse>_nand<mode>3"
783 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
786 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
787 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
788 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
789 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
790 [(set_attr "type" "sselog")
791 (set_attr "mode" "<MODE>")])
793 (define_expand "<code><mode>3"
794 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
796 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
797 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
798 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
799 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
801 (define_insn "*<code><mode>3"
802 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
804 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
805 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
806 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
807 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
808 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
809 [(set_attr "type" "sselog")
810 (set_attr "mode" "<MODE>")])
812 ;; Also define scalar versions. These are used for abs, neg, and
813 ;; conditional move. Using subregs into vector modes causes register
814 ;; allocation lossage. These patterns do not allow memory operands
815 ;; because the native instructions read the full 128-bits.
817 (define_insn "*nand<mode>3"
818 [(set (match_operand:MODEF 0 "register_operand" "=x")
821 (match_operand:MODEF 1 "register_operand" "0"))
822 (match_operand:MODEF 2 "register_operand" "x")))]
823 "SSE_FLOAT_MODE_P (<MODE>mode)"
824 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
825 [(set_attr "type" "sselog")
826 (set_attr "mode" "<ssevecmode>")])
828 (define_insn "*<code><mode>3"
829 [(set (match_operand:MODEF 0 "register_operand" "=x")
831 (match_operand:MODEF 1 "register_operand" "0")
832 (match_operand:MODEF 2 "register_operand" "x")))]
833 "SSE_FLOAT_MODE_P (<MODE>mode)"
834 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sselog")
836 (set_attr "mode" "<ssevecmode>")])
838 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
840 ;; SSE5 floating point multiply/accumulate instructions This includes the
841 ;; scalar version of the instructions as well as the vector
843 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
845 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
846 ;; combine to generate a multiply/add with two memory references. We then
847 ;; split this insn, into loading up the destination register with one of the
848 ;; memory operations. If we don't manage to split the insn, reload will
849 ;; generate the appropriate moves. The reason this is needed, is that combine
850 ;; has already folded one of the memory references into both the multiply and
851 ;; add insns, and it can't generate a new pseudo. I.e.:
852 ;; (set (reg1) (mem (addr1)))
853 ;; (set (reg2) (mult (reg1) (mem (addr2))))
854 ;; (set (reg3) (plus (reg2) (mem (addr3))))
856 (define_insn "sse5_fmadd<mode>4"
857 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
860 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
861 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
862 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
863 "TARGET_SSE5 && TARGET_FUSED_MADD
864 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
865 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
866 [(set_attr "type" "ssemuladd")
867 (set_attr "mode" "<MODE>")])
869 ;; Split fmadd with two memory operands into a load and the fmadd.
871 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
874 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
875 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
876 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
878 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
879 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
880 && !reg_mentioned_p (operands[0], operands[1])
881 && !reg_mentioned_p (operands[0], operands[2])
882 && !reg_mentioned_p (operands[0], operands[3])"
885 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
887 operands[2], operands[3]));
891 ;; For the scalar operations, use operand1 for the upper words that aren't
892 ;; modified, so restrict the forms that are generated.
893 ;; Scalar version of fmadd
894 (define_insn "sse5_vmfmadd<mode>4"
895 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
896 (vec_merge:SSEMODEF2P
899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
900 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
901 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
904 "TARGET_SSE5 && TARGET_FUSED_MADD
905 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
906 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
907 [(set_attr "type" "ssemuladd")
908 (set_attr "mode" "<MODE>")])
910 ;; Floating multiply and subtract
911 ;; Allow two memory operands the same as fmadd
912 (define_insn "sse5_fmsub<mode>4"
913 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
916 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
917 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
918 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
919 "TARGET_SSE5 && TARGET_FUSED_MADD
920 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
921 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
922 [(set_attr "type" "ssemuladd")
923 (set_attr "mode" "<MODE>")])
925 ;; Split fmsub with two memory operands into a load and the fmsub.
927 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
930 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
931 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
932 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
934 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
935 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
936 && !reg_mentioned_p (operands[0], operands[1])
937 && !reg_mentioned_p (operands[0], operands[2])
938 && !reg_mentioned_p (operands[0], operands[3])"
941 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
942 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
943 operands[2], operands[3]));
947 ;; For the scalar operations, use operand1 for the upper words that aren't
948 ;; modified, so restrict the forms that are generated.
949 ;; Scalar version of fmsub
950 (define_insn "sse5_vmfmsub<mode>4"
951 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
952 (vec_merge:SSEMODEF2P
955 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
956 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
957 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
960 "TARGET_SSE5 && TARGET_FUSED_MADD
961 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
962 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
963 [(set_attr "type" "ssemuladd")
964 (set_attr "mode" "<MODE>")])
966 ;; Floating point negative multiply and add
967 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
968 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
969 ;; Allow two memory operands to help in optimizing.
970 (define_insn "sse5_fnmadd<mode>4"
971 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
973 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
975 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
976 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
977 "TARGET_SSE5 && TARGET_FUSED_MADD
978 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
979 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
980 [(set_attr "type" "ssemuladd")
981 (set_attr "mode" "<MODE>")])
983 ;; Split fnmadd with two memory operands into a load and the fnmadd.
985 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
987 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
989 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
990 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
992 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
993 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
994 && !reg_mentioned_p (operands[0], operands[1])
995 && !reg_mentioned_p (operands[0], operands[2])
996 && !reg_mentioned_p (operands[0], operands[3])"
999 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1000 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1001 operands[2], operands[3]));
1005 ;; For the scalar operations, use operand1 for the upper words that aren't
1006 ;; modified, so restrict the forms that are generated.
1007 ;; Scalar version of fnmadd
1008 (define_insn "sse5_vmfnmadd<mode>4"
1009 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1010 (vec_merge:SSEMODEF2P
1012 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1014 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1015 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1018 "TARGET_SSE5 && TARGET_FUSED_MADD
1019 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1020 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1021 [(set_attr "type" "ssemuladd")
1022 (set_attr "mode" "<MODE>")])
1024 ;; Floating point negative multiply and subtract
1025 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1026 ;; Allow 2 memory operands to help with optimization
1027 (define_insn "sse5_fnmsub<mode>4"
1028 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1032 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1033 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1034 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1035 "TARGET_SSE5 && TARGET_FUSED_MADD
1036 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1037 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1038 [(set_attr "type" "ssemuladd")
1039 (set_attr "mode" "<MODE>")])
1041 ;; Split fnmsub with two memory operands into a load and the fmsub.
1043 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1047 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1048 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1049 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1051 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1052 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1053 && !reg_mentioned_p (operands[0], operands[1])
1054 && !reg_mentioned_p (operands[0], operands[2])
1055 && !reg_mentioned_p (operands[0], operands[3])"
1058 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1059 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1060 operands[2], operands[3]));
1064 ;; For the scalar operations, use operand1 for the upper words that aren't
1065 ;; modified, so restrict the forms that are generated.
1066 ;; Scalar version of fnmsub
1067 (define_insn "sse5_vmfnmsub<mode>4"
1068 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1069 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1075 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1078 "TARGET_SSE5 && TARGET_FUSED_MADD
1079 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1080 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1081 [(set_attr "type" "ssemuladd")
1082 (set_attr "mode" "<MODE>")])
1084 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1085 ;; even if the user used -mno-fused-madd
1086 ;; Parallel instructions. During instruction generation, just default
1087 ;; to registers, and let combine later build the appropriate instruction.
1088 (define_expand "sse5i_fmadd<mode>4"
1089 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1093 (match_operand:SSEMODEF2P 1 "register_operand" "")
1094 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1095 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1096 UNSPEC_SSE5_INTRINSIC))]
1099 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1100 if (TARGET_FUSED_MADD)
1102 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1103 operands[2], operands[3]));
1108 (define_insn "*sse5i_fmadd<mode>4"
1109 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1113 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1114 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1115 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1116 UNSPEC_SSE5_INTRINSIC))]
1117 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1118 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1119 [(set_attr "type" "ssemuladd")
1120 (set_attr "mode" "<MODE>")])
1122 (define_expand "sse5i_fmsub<mode>4"
1123 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1127 (match_operand:SSEMODEF2P 1 "register_operand" "")
1128 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1129 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1130 UNSPEC_SSE5_INTRINSIC))]
1133 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1134 if (TARGET_FUSED_MADD)
1136 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1137 operands[2], operands[3]));
1142 (define_insn "*sse5i_fmsub<mode>4"
1143 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1147 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1148 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1149 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1150 UNSPEC_SSE5_INTRINSIC))]
1151 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1152 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1153 [(set_attr "type" "ssemuladd")
1154 (set_attr "mode" "<MODE>")])
1156 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1157 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1158 (define_expand "sse5i_fnmadd<mode>4"
1159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1162 (match_operand:SSEMODEF2P 3 "register_operand" "")
1164 (match_operand:SSEMODEF2P 1 "register_operand" "")
1165 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1166 UNSPEC_SSE5_INTRINSIC))]
1169 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1170 if (TARGET_FUSED_MADD)
1172 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1173 operands[2], operands[3]));
1178 (define_insn "*sse5i_fnmadd<mode>4"
1179 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1182 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1184 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1185 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1186 UNSPEC_SSE5_INTRINSIC))]
1187 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1188 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1189 [(set_attr "type" "ssemuladd")
1190 (set_attr "mode" "<MODE>")])
1192 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1193 (define_expand "sse5i_fnmsub<mode>4"
1194 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1199 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1200 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1201 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1202 UNSPEC_SSE5_INTRINSIC))]
1205 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1206 if (TARGET_FUSED_MADD)
1208 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1209 operands[2], operands[3]));
1214 (define_insn "*sse5i_fnmsub<mode>4"
1215 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1220 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1221 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1222 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1223 UNSPEC_SSE5_INTRINSIC))]
1224 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1225 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1226 [(set_attr "type" "ssemuladd")
1227 (set_attr "mode" "<MODE>")])
1229 ;; Scalar instructions
1230 (define_expand "sse5i_vmfmadd<mode>4"
1231 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1233 [(vec_merge:SSEMODEF2P
1236 (match_operand:SSEMODEF2P 1 "register_operand" "")
1237 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1238 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1241 UNSPEC_SSE5_INTRINSIC))]
1244 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1245 if (TARGET_FUSED_MADD)
1247 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1248 operands[2], operands[3]));
1253 ;; For the scalar operations, use operand1 for the upper words that aren't
1254 ;; modified, so restrict the forms that are accepted.
1255 (define_insn "*sse5i_vmfmadd<mode>4"
1256 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1258 [(vec_merge:SSEMODEF2P
1261 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1262 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1263 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1266 UNSPEC_SSE5_INTRINSIC))]
1267 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1268 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1269 [(set_attr "type" "ssemuladd")
1270 (set_attr "mode" "<ssescalarmode>")])
1272 (define_expand "sse5i_vmfmsub<mode>4"
1273 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1275 [(vec_merge:SSEMODEF2P
1278 (match_operand:SSEMODEF2P 1 "register_operand" "")
1279 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1280 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1283 UNSPEC_SSE5_INTRINSIC))]
1286 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1287 if (TARGET_FUSED_MADD)
1289 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1290 operands[2], operands[3]));
1295 (define_insn "*sse5i_vmfmsub<mode>4"
1296 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1298 [(vec_merge:SSEMODEF2P
1301 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1302 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1303 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1306 UNSPEC_SSE5_INTRINSIC))]
1307 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1308 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1309 [(set_attr "type" "ssemuladd")
1310 (set_attr "mode" "<ssescalarmode>")])
1312 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1313 (define_expand "sse5i_vmfnmadd<mode>4"
1314 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1316 [(vec_merge:SSEMODEF2P
1318 (match_operand:SSEMODEF2P 3 "register_operand" "")
1320 (match_operand:SSEMODEF2P 1 "register_operand" "")
1321 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1324 UNSPEC_SSE5_INTRINSIC))]
1327 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1328 if (TARGET_FUSED_MADD)
1330 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1331 operands[2], operands[3]));
1336 (define_insn "*sse5i_vmfnmadd<mode>4"
1337 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1339 [(vec_merge:SSEMODEF2P
1341 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1343 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1344 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1347 UNSPEC_SSE5_INTRINSIC))]
1348 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1349 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1350 [(set_attr "type" "ssemuladd")
1351 (set_attr "mode" "<ssescalarmode>")])
1353 (define_expand "sse5i_vmfnmsub<mode>4"
1354 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1356 [(vec_merge:SSEMODEF2P
1360 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1361 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1362 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1365 UNSPEC_SSE5_INTRINSIC))]
1368 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1369 if (TARGET_FUSED_MADD)
1371 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1372 operands[2], operands[3]));
1377 (define_insn "*sse5i_vmfnmsub<mode>4"
1378 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1380 [(vec_merge:SSEMODEF2P
1384 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1385 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1386 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1389 UNSPEC_SSE5_INTRINSIC))]
1390 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1391 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1392 [(set_attr "type" "ssemuladd")
1393 (set_attr "mode" "<ssescalarmode>")])
1395 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1397 ;; Parallel single-precision floating point conversion operations
1399 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1401 (define_insn "sse_cvtpi2ps"
1402 [(set (match_operand:V4SF 0 "register_operand" "=x")
1405 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1406 (match_operand:V4SF 1 "register_operand" "0")
1409 "cvtpi2ps\t{%2, %0|%0, %2}"
1410 [(set_attr "type" "ssecvt")
1411 (set_attr "mode" "V4SF")])
1413 (define_insn "sse_cvtps2pi"
1414 [(set (match_operand:V2SI 0 "register_operand" "=y")
1416 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1418 (parallel [(const_int 0) (const_int 1)])))]
1420 "cvtps2pi\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "ssecvt")
1422 (set_attr "unit" "mmx")
1423 (set_attr "mode" "DI")])
1425 (define_insn "sse_cvttps2pi"
1426 [(set (match_operand:V2SI 0 "register_operand" "=y")
1428 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1429 (parallel [(const_int 0) (const_int 1)])))]
1431 "cvttps2pi\t{%1, %0|%0, %1}"
1432 [(set_attr "type" "ssecvt")
1433 (set_attr "unit" "mmx")
1434 (set_attr "mode" "SF")])
1436 (define_insn "sse_cvtsi2ss"
1437 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1440 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1441 (match_operand:V4SF 1 "register_operand" "0,0")
1444 "cvtsi2ss\t{%2, %0|%0, %2}"
1445 [(set_attr "type" "sseicvt")
1446 (set_attr "athlon_decode" "vector,double")
1447 (set_attr "amdfam10_decode" "vector,double")
1448 (set_attr "mode" "SF")])
1450 (define_insn "sse_cvtsi2ssq"
1451 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1454 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1455 (match_operand:V4SF 1 "register_operand" "0,0")
1457 "TARGET_SSE && TARGET_64BIT"
1458 "cvtsi2ssq\t{%2, %0|%0, %2}"
1459 [(set_attr "type" "sseicvt")
1460 (set_attr "athlon_decode" "vector,double")
1461 (set_attr "amdfam10_decode" "vector,double")
1462 (set_attr "mode" "SF")])
1464 (define_insn "sse_cvtss2si"
1465 [(set (match_operand:SI 0 "register_operand" "=r,r")
1468 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1469 (parallel [(const_int 0)]))]
1470 UNSPEC_FIX_NOTRUNC))]
1472 "cvtss2si\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "sseicvt")
1474 (set_attr "athlon_decode" "double,vector")
1475 (set_attr "prefix_rep" "1")
1476 (set_attr "mode" "SI")])
1478 (define_insn "sse_cvtss2si_2"
1479 [(set (match_operand:SI 0 "register_operand" "=r,r")
1480 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1481 UNSPEC_FIX_NOTRUNC))]
1483 "cvtss2si\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "sseicvt")
1485 (set_attr "athlon_decode" "double,vector")
1486 (set_attr "amdfam10_decode" "double,double")
1487 (set_attr "prefix_rep" "1")
1488 (set_attr "mode" "SI")])
1490 (define_insn "sse_cvtss2siq"
1491 [(set (match_operand:DI 0 "register_operand" "=r,r")
1494 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1495 (parallel [(const_int 0)]))]
1496 UNSPEC_FIX_NOTRUNC))]
1497 "TARGET_SSE && TARGET_64BIT"
1498 "cvtss2siq\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "sseicvt")
1500 (set_attr "athlon_decode" "double,vector")
1501 (set_attr "prefix_rep" "1")
1502 (set_attr "mode" "DI")])
1504 (define_insn "sse_cvtss2siq_2"
1505 [(set (match_operand:DI 0 "register_operand" "=r,r")
1506 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1507 UNSPEC_FIX_NOTRUNC))]
1508 "TARGET_SSE && TARGET_64BIT"
1509 "cvtss2siq\t{%1, %0|%0, %1}"
1510 [(set_attr "type" "sseicvt")
1511 (set_attr "athlon_decode" "double,vector")
1512 (set_attr "amdfam10_decode" "double,double")
1513 (set_attr "prefix_rep" "1")
1514 (set_attr "mode" "DI")])
1516 (define_insn "sse_cvttss2si"
1517 [(set (match_operand:SI 0 "register_operand" "=r,r")
1520 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1521 (parallel [(const_int 0)]))))]
1523 "cvttss2si\t{%1, %0|%0, %1}"
1524 [(set_attr "type" "sseicvt")
1525 (set_attr "athlon_decode" "double,vector")
1526 (set_attr "amdfam10_decode" "double,double")
1527 (set_attr "prefix_rep" "1")
1528 (set_attr "mode" "SI")])
1530 (define_insn "sse_cvttss2siq"
1531 [(set (match_operand:DI 0 "register_operand" "=r,r")
1534 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1535 (parallel [(const_int 0)]))))]
1536 "TARGET_SSE && TARGET_64BIT"
1537 "cvttss2siq\t{%1, %0|%0, %1}"
1538 [(set_attr "type" "sseicvt")
1539 (set_attr "athlon_decode" "double,vector")
1540 (set_attr "amdfam10_decode" "double,double")
1541 (set_attr "prefix_rep" "1")
1542 (set_attr "mode" "DI")])
1544 (define_insn "sse2_cvtdq2ps"
1545 [(set (match_operand:V4SF 0 "register_operand" "=x")
1546 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1548 "cvtdq2ps\t{%1, %0|%0, %1}"
1549 [(set_attr "type" "ssecvt")
1550 (set_attr "mode" "V4SF")])
1552 (define_insn "sse2_cvtps2dq"
1553 [(set (match_operand:V4SI 0 "register_operand" "=x")
1554 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1555 UNSPEC_FIX_NOTRUNC))]
1557 "cvtps2dq\t{%1, %0|%0, %1}"
1558 [(set_attr "type" "ssecvt")
1559 (set_attr "prefix_data16" "1")
1560 (set_attr "mode" "TI")])
1562 (define_insn "sse2_cvttps2dq"
1563 [(set (match_operand:V4SI 0 "register_operand" "=x")
1564 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1566 "cvttps2dq\t{%1, %0|%0, %1}"
1567 [(set_attr "type" "ssecvt")
1568 (set_attr "prefix_rep" "1")
1569 (set_attr "mode" "TI")])
1571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1573 ;; Parallel double-precision floating point conversion operations
1575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1577 (define_insn "sse2_cvtpi2pd"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1579 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1581 "cvtpi2pd\t{%1, %0|%0, %1}"
1582 [(set_attr "type" "ssecvt")
1583 (set_attr "unit" "mmx,*")
1584 (set_attr "mode" "V2DF")])
1586 (define_insn "sse2_cvtpd2pi"
1587 [(set (match_operand:V2SI 0 "register_operand" "=y")
1588 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1589 UNSPEC_FIX_NOTRUNC))]
1591 "cvtpd2pi\t{%1, %0|%0, %1}"
1592 [(set_attr "type" "ssecvt")
1593 (set_attr "unit" "mmx")
1594 (set_attr "prefix_data16" "1")
1595 (set_attr "mode" "DI")])
1597 (define_insn "sse2_cvttpd2pi"
1598 [(set (match_operand:V2SI 0 "register_operand" "=y")
1599 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1601 "cvttpd2pi\t{%1, %0|%0, %1}"
1602 [(set_attr "type" "ssecvt")
1603 (set_attr "unit" "mmx")
1604 (set_attr "prefix_data16" "1")
1605 (set_attr "mode" "TI")])
1607 (define_insn "sse2_cvtsi2sd"
1608 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1611 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1612 (match_operand:V2DF 1 "register_operand" "0,0")
1615 "cvtsi2sd\t{%2, %0|%0, %2}"
1616 [(set_attr "type" "sseicvt")
1617 (set_attr "mode" "DF")
1618 (set_attr "athlon_decode" "double,direct")
1619 (set_attr "amdfam10_decode" "vector,double")])
1621 (define_insn "sse2_cvtsi2sdq"
1622 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1625 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1626 (match_operand:V2DF 1 "register_operand" "0,0")
1628 "TARGET_SSE2 && TARGET_64BIT"
1629 "cvtsi2sdq\t{%2, %0|%0, %2}"
1630 [(set_attr "type" "sseicvt")
1631 (set_attr "mode" "DF")
1632 (set_attr "athlon_decode" "double,direct")
1633 (set_attr "amdfam10_decode" "vector,double")])
1635 (define_insn "sse2_cvtsd2si"
1636 [(set (match_operand:SI 0 "register_operand" "=r,r")
1639 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1640 (parallel [(const_int 0)]))]
1641 UNSPEC_FIX_NOTRUNC))]
1643 "cvtsd2si\t{%1, %0|%0, %1}"
1644 [(set_attr "type" "sseicvt")
1645 (set_attr "athlon_decode" "double,vector")
1646 (set_attr "prefix_rep" "1")
1647 (set_attr "mode" "SI")])
1649 (define_insn "sse2_cvtsd2si_2"
1650 [(set (match_operand:SI 0 "register_operand" "=r,r")
1651 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1652 UNSPEC_FIX_NOTRUNC))]
1654 "cvtsd2si\t{%1, %0|%0, %1}"
1655 [(set_attr "type" "sseicvt")
1656 (set_attr "athlon_decode" "double,vector")
1657 (set_attr "amdfam10_decode" "double,double")
1658 (set_attr "prefix_rep" "1")
1659 (set_attr "mode" "SI")])
1661 (define_insn "sse2_cvtsd2siq"
1662 [(set (match_operand:DI 0 "register_operand" "=r,r")
1665 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1666 (parallel [(const_int 0)]))]
1667 UNSPEC_FIX_NOTRUNC))]
1668 "TARGET_SSE2 && TARGET_64BIT"
1669 "cvtsd2siq\t{%1, %0|%0, %1}"
1670 [(set_attr "type" "sseicvt")
1671 (set_attr "athlon_decode" "double,vector")
1672 (set_attr "prefix_rep" "1")
1673 (set_attr "mode" "DI")])
1675 (define_insn "sse2_cvtsd2siq_2"
1676 [(set (match_operand:DI 0 "register_operand" "=r,r")
1677 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1678 UNSPEC_FIX_NOTRUNC))]
1679 "TARGET_SSE2 && TARGET_64BIT"
1680 "cvtsd2siq\t{%1, %0|%0, %1}"
1681 [(set_attr "type" "sseicvt")
1682 (set_attr "athlon_decode" "double,vector")
1683 (set_attr "amdfam10_decode" "double,double")
1684 (set_attr "prefix_rep" "1")
1685 (set_attr "mode" "DI")])
1687 (define_insn "sse2_cvttsd2si"
1688 [(set (match_operand:SI 0 "register_operand" "=r,r")
1691 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1692 (parallel [(const_int 0)]))))]
1694 "cvttsd2si\t{%1, %0|%0, %1}"
1695 [(set_attr "type" "sseicvt")
1696 (set_attr "prefix_rep" "1")
1697 (set_attr "mode" "SI")
1698 (set_attr "athlon_decode" "double,vector")
1699 (set_attr "amdfam10_decode" "double,double")])
1701 (define_insn "sse2_cvttsd2siq"
1702 [(set (match_operand:DI 0 "register_operand" "=r,r")
1705 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1706 (parallel [(const_int 0)]))))]
1707 "TARGET_SSE2 && TARGET_64BIT"
1708 "cvttsd2siq\t{%1, %0|%0, %1}"
1709 [(set_attr "type" "sseicvt")
1710 (set_attr "prefix_rep" "1")
1711 (set_attr "mode" "DI")
1712 (set_attr "athlon_decode" "double,vector")
1713 (set_attr "amdfam10_decode" "double,double")])
1715 (define_insn "sse2_cvtdq2pd"
1716 [(set (match_operand:V2DF 0 "register_operand" "=x")
1719 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1720 (parallel [(const_int 0) (const_int 1)]))))]
1722 "cvtdq2pd\t{%1, %0|%0, %1}"
1723 [(set_attr "type" "ssecvt")
1724 (set_attr "mode" "V2DF")])
1726 (define_expand "sse2_cvtpd2dq"
1727 [(set (match_operand:V4SI 0 "register_operand" "")
1729 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1733 "operands[2] = CONST0_RTX (V2SImode);")
1735 (define_insn "*sse2_cvtpd2dq"
1736 [(set (match_operand:V4SI 0 "register_operand" "=x")
1738 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1740 (match_operand:V2SI 2 "const0_operand" "")))]
1742 "cvtpd2dq\t{%1, %0|%0, %1}"
1743 [(set_attr "type" "ssecvt")
1744 (set_attr "prefix_rep" "1")
1745 (set_attr "mode" "TI")
1746 (set_attr "amdfam10_decode" "double")])
1748 (define_expand "sse2_cvttpd2dq"
1749 [(set (match_operand:V4SI 0 "register_operand" "")
1751 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1754 "operands[2] = CONST0_RTX (V2SImode);")
1756 (define_insn "*sse2_cvttpd2dq"
1757 [(set (match_operand:V4SI 0 "register_operand" "=x")
1759 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1760 (match_operand:V2SI 2 "const0_operand" "")))]
1762 "cvttpd2dq\t{%1, %0|%0, %1}"
1763 [(set_attr "type" "ssecvt")
1764 (set_attr "prefix_rep" "1")
1765 (set_attr "mode" "TI")
1766 (set_attr "amdfam10_decode" "double")])
1768 (define_insn "sse2_cvtsd2ss"
1769 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1772 (float_truncate:V2SF
1773 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1774 (match_operand:V4SF 1 "register_operand" "0,0")
1777 "cvtsd2ss\t{%2, %0|%0, %2}"
1778 [(set_attr "type" "ssecvt")
1779 (set_attr "athlon_decode" "vector,double")
1780 (set_attr "amdfam10_decode" "vector,double")
1781 (set_attr "mode" "SF")])
1783 (define_insn "sse2_cvtss2sd"
1784 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1788 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1789 (parallel [(const_int 0) (const_int 1)])))
1790 (match_operand:V2DF 1 "register_operand" "0,0")
1793 "cvtss2sd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "ssecvt")
1795 (set_attr "amdfam10_decode" "vector,double")
1796 (set_attr "mode" "DF")])
1798 (define_expand "sse2_cvtpd2ps"
1799 [(set (match_operand:V4SF 0 "register_operand" "")
1801 (float_truncate:V2SF
1802 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1805 "operands[2] = CONST0_RTX (V2SFmode);")
1807 (define_insn "*sse2_cvtpd2ps"
1808 [(set (match_operand:V4SF 0 "register_operand" "=x")
1810 (float_truncate:V2SF
1811 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1812 (match_operand:V2SF 2 "const0_operand" "")))]
1814 "cvtpd2ps\t{%1, %0|%0, %1}"
1815 [(set_attr "type" "ssecvt")
1816 (set_attr "prefix_data16" "1")
1817 (set_attr "mode" "V4SF")
1818 (set_attr "amdfam10_decode" "double")])
1820 (define_insn "sse2_cvtps2pd"
1821 [(set (match_operand:V2DF 0 "register_operand" "=x")
1824 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1825 (parallel [(const_int 0) (const_int 1)]))))]
1827 "cvtps2pd\t{%1, %0|%0, %1}"
1828 [(set_attr "type" "ssecvt")
1829 (set_attr "mode" "V2DF")
1830 (set_attr "amdfam10_decode" "direct")])
1832 (define_expand "vec_unpacks_hi_v4sf"
1837 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1838 (parallel [(const_int 6)
1842 (set (match_operand:V2DF 0 "register_operand" "")
1846 (parallel [(const_int 0) (const_int 1)]))))]
1849 operands[2] = gen_reg_rtx (V4SFmode);
1852 (define_expand "vec_unpacks_lo_v4sf"
1853 [(set (match_operand:V2DF 0 "register_operand" "")
1856 (match_operand:V4SF 1 "nonimmediate_operand" "")
1857 (parallel [(const_int 0) (const_int 1)]))))]
1860 (define_expand "vec_unpacks_float_hi_v8hi"
1861 [(match_operand:V4SF 0 "register_operand" "")
1862 (match_operand:V8HI 1 "register_operand" "")]
1865 rtx tmp = gen_reg_rtx (V4SImode);
1867 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1868 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1872 (define_expand "vec_unpacks_float_lo_v8hi"
1873 [(match_operand:V4SF 0 "register_operand" "")
1874 (match_operand:V8HI 1 "register_operand" "")]
1877 rtx tmp = gen_reg_rtx (V4SImode);
1879 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1880 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1884 (define_expand "vec_unpacku_float_hi_v8hi"
1885 [(match_operand:V4SF 0 "register_operand" "")
1886 (match_operand:V8HI 1 "register_operand" "")]
1889 rtx tmp = gen_reg_rtx (V4SImode);
1891 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1892 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1896 (define_expand "vec_unpacku_float_lo_v8hi"
1897 [(match_operand:V4SF 0 "register_operand" "")
1898 (match_operand:V8HI 1 "register_operand" "")]
1901 rtx tmp = gen_reg_rtx (V4SImode);
1903 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1904 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1908 (define_expand "vec_unpacks_float_hi_v4si"
1911 (match_operand:V4SI 1 "nonimmediate_operand" "")
1912 (parallel [(const_int 2)
1916 (set (match_operand:V2DF 0 "register_operand" "")
1920 (parallel [(const_int 0) (const_int 1)]))))]
1923 operands[2] = gen_reg_rtx (V4SImode);
1926 (define_expand "vec_unpacks_float_lo_v4si"
1927 [(set (match_operand:V2DF 0 "register_operand" "")
1930 (match_operand:V4SI 1 "nonimmediate_operand" "")
1931 (parallel [(const_int 0) (const_int 1)]))))]
1934 (define_expand "vec_pack_trunc_v2df"
1935 [(match_operand:V4SF 0 "register_operand" "")
1936 (match_operand:V2DF 1 "nonimmediate_operand" "")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1942 r1 = gen_reg_rtx (V4SFmode);
1943 r2 = gen_reg_rtx (V4SFmode);
1945 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1946 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1947 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1951 (define_expand "vec_pack_sfix_trunc_v2df"
1952 [(match_operand:V4SI 0 "register_operand" "")
1953 (match_operand:V2DF 1 "nonimmediate_operand" "")
1954 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1959 r1 = gen_reg_rtx (V4SImode);
1960 r2 = gen_reg_rtx (V4SImode);
1962 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1963 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1964 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1965 gen_lowpart (V2DImode, r1),
1966 gen_lowpart (V2DImode, r2)));
1970 (define_expand "vec_pack_sfix_v2df"
1971 [(match_operand:V4SI 0 "register_operand" "")
1972 (match_operand:V2DF 1 "nonimmediate_operand" "")
1973 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1978 r1 = gen_reg_rtx (V4SImode);
1979 r2 = gen_reg_rtx (V4SImode);
1981 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1982 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1983 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1984 gen_lowpart (V2DImode, r1),
1985 gen_lowpart (V2DImode, r2)));
1989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1991 ;; Parallel single-precision floating point element swizzling
1993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1995 (define_expand "sse_movhlps_exp"
1996 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
1999 (match_operand:V4SF 1 "nonimmediate_operand" "")
2000 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2001 (parallel [(const_int 6)
2006 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2008 (define_insn "sse_movhlps"
2009 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2012 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2013 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2014 (parallel [(const_int 6)
2018 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2020 movhlps\t{%2, %0|%0, %2}
2021 movlps\t{%H2, %0|%0, %H2}
2022 movhps\t{%2, %0|%0, %2}"
2023 [(set_attr "type" "ssemov")
2024 (set_attr "mode" "V4SF,V2SF,V2SF")])
2026 (define_expand "sse_movlhps_exp"
2027 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2030 (match_operand:V4SF 1 "nonimmediate_operand" "")
2031 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2032 (parallel [(const_int 0)
2037 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2039 (define_insn "sse_movlhps"
2040 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2043 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2044 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2045 (parallel [(const_int 0)
2049 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2051 movlhps\t{%2, %0|%0, %2}
2052 movhps\t{%2, %0|%0, %2}
2053 movlps\t{%2, %H0|%H0, %2}"
2054 [(set_attr "type" "ssemov")
2055 (set_attr "mode" "V4SF,V2SF,V2SF")])
2057 (define_insn "sse_unpckhps"
2058 [(set (match_operand:V4SF 0 "register_operand" "=x")
2061 (match_operand:V4SF 1 "register_operand" "0")
2062 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2063 (parallel [(const_int 2) (const_int 6)
2064 (const_int 3) (const_int 7)])))]
2066 "unpckhps\t{%2, %0|%0, %2}"
2067 [(set_attr "type" "sselog")
2068 (set_attr "mode" "V4SF")])
2070 (define_insn "sse_unpcklps"
2071 [(set (match_operand:V4SF 0 "register_operand" "=x")
2074 (match_operand:V4SF 1 "register_operand" "0")
2075 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2076 (parallel [(const_int 0) (const_int 4)
2077 (const_int 1) (const_int 5)])))]
2079 "unpcklps\t{%2, %0|%0, %2}"
2080 [(set_attr "type" "sselog")
2081 (set_attr "mode" "V4SF")])
2083 ;; These are modeled with the same vec_concat as the others so that we
2084 ;; capture users of shufps that can use the new instructions
2085 (define_insn "sse3_movshdup"
2086 [(set (match_operand:V4SF 0 "register_operand" "=x")
2089 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2091 (parallel [(const_int 1)
2096 "movshdup\t{%1, %0|%0, %1}"
2097 [(set_attr "type" "sse")
2098 (set_attr "prefix_rep" "1")
2099 (set_attr "mode" "V4SF")])
2101 (define_insn "sse3_movsldup"
2102 [(set (match_operand:V4SF 0 "register_operand" "=x")
2105 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2107 (parallel [(const_int 0)
2112 "movsldup\t{%1, %0|%0, %1}"
2113 [(set_attr "type" "sse")
2114 (set_attr "prefix_rep" "1")
2115 (set_attr "mode" "V4SF")])
2117 (define_expand "sse_shufps"
2118 [(match_operand:V4SF 0 "register_operand" "")
2119 (match_operand:V4SF 1 "register_operand" "")
2120 (match_operand:V4SF 2 "nonimmediate_operand" "")
2121 (match_operand:SI 3 "const_int_operand" "")]
2124 int mask = INTVAL (operands[3]);
2125 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2126 GEN_INT ((mask >> 0) & 3),
2127 GEN_INT ((mask >> 2) & 3),
2128 GEN_INT (((mask >> 4) & 3) + 4),
2129 GEN_INT (((mask >> 6) & 3) + 4)));
2133 (define_insn "sse_shufps_1"
2134 [(set (match_operand:V4SF 0 "register_operand" "=x")
2137 (match_operand:V4SF 1 "register_operand" "0")
2138 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2139 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2140 (match_operand 4 "const_0_to_3_operand" "")
2141 (match_operand 5 "const_4_to_7_operand" "")
2142 (match_operand 6 "const_4_to_7_operand" "")])))]
2146 mask |= INTVAL (operands[3]) << 0;
2147 mask |= INTVAL (operands[4]) << 2;
2148 mask |= (INTVAL (operands[5]) - 4) << 4;
2149 mask |= (INTVAL (operands[6]) - 4) << 6;
2150 operands[3] = GEN_INT (mask);
2152 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2154 [(set_attr "type" "sselog")
2155 (set_attr "mode" "V4SF")])
2157 (define_insn "sse_storehps"
2158 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2160 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2161 (parallel [(const_int 2) (const_int 3)])))]
2164 movhps\t{%1, %0|%0, %1}
2165 movhlps\t{%1, %0|%0, %1}
2166 movlps\t{%H1, %0|%0, %H1}"
2167 [(set_attr "type" "ssemov")
2168 (set_attr "mode" "V2SF,V4SF,V2SF")])
2170 (define_expand "sse_loadhps_exp"
2171 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2174 (match_operand:V4SF 1 "nonimmediate_operand" "")
2175 (parallel [(const_int 0) (const_int 1)]))
2176 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
2178 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2180 (define_insn "sse_loadhps"
2181 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2184 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2185 (parallel [(const_int 0) (const_int 1)]))
2186 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2189 movhps\t{%2, %0|%0, %2}
2190 movlhps\t{%2, %0|%0, %2}
2191 movlps\t{%2, %H0|%H0, %2}"
2192 [(set_attr "type" "ssemov")
2193 (set_attr "mode" "V2SF,V4SF,V2SF")])
2195 (define_insn "sse_storelps"
2196 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2198 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2199 (parallel [(const_int 0) (const_int 1)])))]
2202 movlps\t{%1, %0|%0, %1}
2203 movaps\t{%1, %0|%0, %1}
2204 movlps\t{%1, %0|%0, %1}"
2205 [(set_attr "type" "ssemov")
2206 (set_attr "mode" "V2SF,V4SF,V2SF")])
2208 (define_expand "sse_loadlps_exp"
2209 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2211 (match_operand:V2SF 2 "nonimmediate_operand" "")
2213 (match_operand:V4SF 1 "nonimmediate_operand" "")
2214 (parallel [(const_int 2) (const_int 3)]))))]
2216 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2218 (define_insn "sse_loadlps"
2219 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2221 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2223 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2224 (parallel [(const_int 2) (const_int 3)]))))]
2227 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2228 movlps\t{%2, %0|%0, %2}
2229 movlps\t{%2, %0|%0, %2}"
2230 [(set_attr "type" "sselog,ssemov,ssemov")
2231 (set_attr "mode" "V4SF,V2SF,V2SF")])
2233 (define_insn "sse_movss"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x")
2236 (match_operand:V4SF 2 "register_operand" "x")
2237 (match_operand:V4SF 1 "register_operand" "0")
2240 "movss\t{%2, %0|%0, %2}"
2241 [(set_attr "type" "ssemov")
2242 (set_attr "mode" "SF")])
2244 (define_insn "*vec_dupv4sf"
2245 [(set (match_operand:V4SF 0 "register_operand" "=x")
2247 (match_operand:SF 1 "register_operand" "0")))]
2249 "shufps\t{$0, %0, %0|%0, %0, 0}"
2250 [(set_attr "type" "sselog1")
2251 (set_attr "mode" "V4SF")])
2253 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2254 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2255 ;; alternatives pretty much forces the MMX alternative to be chosen.
2256 (define_insn "*vec_concatv2sf_sse"
2257 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2259 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2260 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2263 unpcklps\t{%2, %0|%0, %2}
2264 movss\t{%1, %0|%0, %1}
2265 punpckldq\t{%2, %0|%0, %2}
2266 movd\t{%1, %0|%0, %1}"
2267 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2268 (set_attr "mode" "V4SF,SF,DI,DI")])
2270 (define_insn "*vec_concatv4sf_sse"
2271 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2273 (match_operand:V2SF 1 "register_operand" " 0,0")
2274 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2277 movlhps\t{%2, %0|%0, %2}
2278 movhps\t{%2, %0|%0, %2}"
2279 [(set_attr "type" "ssemov")
2280 (set_attr "mode" "V4SF,V2SF")])
2282 (define_expand "vec_initv4sf"
2283 [(match_operand:V4SF 0 "register_operand" "")
2284 (match_operand 1 "" "")]
2287 ix86_expand_vector_init (false, operands[0], operands[1]);
2291 (define_insn "vec_setv4sf_0"
2292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2295 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2296 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2300 movss\t{%2, %0|%0, %2}
2301 movss\t{%2, %0|%0, %2}
2302 movd\t{%2, %0|%0, %2}
2304 [(set_attr "type" "ssemov")
2305 (set_attr "mode" "SF")])
2307 ;; A subset is vec_setv4sf.
2308 (define_insn "*vec_setv4sf_sse4_1"
2309 [(set (match_operand:V4SF 0 "register_operand" "=x")
2312 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2313 (match_operand:V4SF 1 "register_operand" "0")
2314 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2317 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2318 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2320 [(set_attr "type" "sselog")
2321 (set_attr "prefix_extra" "1")
2322 (set_attr "mode" "V4SF")])
2324 (define_insn "sse4_1_insertps"
2325 [(set (match_operand:V4SF 0 "register_operand" "=x")
2326 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2327 (match_operand:V4SF 1 "register_operand" "0")
2328 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2331 "insertps\t{%3, %2, %0|%0, %2, %3}";
2332 [(set_attr "type" "sselog")
2333 (set_attr "prefix_extra" "1")
2334 (set_attr "mode" "V4SF")])
2337 [(set (match_operand:V4SF 0 "memory_operand" "")
2340 (match_operand:SF 1 "nonmemory_operand" ""))
2343 "TARGET_SSE && reload_completed"
2346 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2350 (define_expand "vec_setv4sf"
2351 [(match_operand:V4SF 0 "register_operand" "")
2352 (match_operand:SF 1 "register_operand" "")
2353 (match_operand 2 "const_int_operand" "")]
2356 ix86_expand_vector_set (false, operands[0], operands[1],
2357 INTVAL (operands[2]));
2361 (define_insn_and_split "*vec_extractv4sf_0"
2362 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2364 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2365 (parallel [(const_int 0)])))]
2366 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2368 "&& reload_completed"
2371 rtx op1 = operands[1];
2373 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2375 op1 = gen_lowpart (SFmode, op1);
2376 emit_move_insn (operands[0], op1);
2380 (define_insn "*sse4_1_extractps"
2381 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2383 (match_operand:V4SF 1 "register_operand" "x")
2384 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2386 "extractps\t{%2, %1, %0|%0, %1, %2}"
2387 [(set_attr "type" "sselog")
2388 (set_attr "prefix_extra" "1")
2389 (set_attr "mode" "V4SF")])
2391 (define_insn_and_split "*vec_extract_v4sf_mem"
2392 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2394 (match_operand:V4SF 1 "memory_operand" "o")
2395 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2401 int i = INTVAL (operands[2]);
2403 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2407 (define_expand "vec_extractv4sf"
2408 [(match_operand:SF 0 "register_operand" "")
2409 (match_operand:V4SF 1 "register_operand" "")
2410 (match_operand 2 "const_int_operand" "")]
2413 ix86_expand_vector_extract (false, operands[0], operands[1],
2414 INTVAL (operands[2]));
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420 ;; Parallel double-precision floating point element swizzling
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424 (define_expand "sse2_unpckhpd_exp"
2425 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2428 (match_operand:V2DF 1 "nonimmediate_operand" "")
2429 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2430 (parallel [(const_int 1)
2433 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2435 (define_insn "sse2_unpckhpd"
2436 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2439 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2440 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2441 (parallel [(const_int 1)
2443 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2445 unpckhpd\t{%2, %0|%0, %2}
2446 movlpd\t{%H1, %0|%0, %H1}
2447 movhpd\t{%1, %0|%0, %1}"
2448 [(set_attr "type" "sselog,ssemov,ssemov")
2449 (set_attr "mode" "V2DF,V1DF,V1DF")])
2451 (define_insn "*sse3_movddup"
2452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2455 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2457 (parallel [(const_int 0)
2459 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2461 movddup\t{%1, %0|%0, %1}
2463 [(set_attr "type" "sselog1,ssemov")
2464 (set_attr "mode" "V2DF")])
2467 [(set (match_operand:V2DF 0 "memory_operand" "")
2470 (match_operand:V2DF 1 "register_operand" "")
2472 (parallel [(const_int 0)
2474 "TARGET_SSE3 && reload_completed"
2477 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2478 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2479 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2483 (define_expand "sse2_unpcklpd_exp"
2484 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2487 (match_operand:V2DF 1 "nonimmediate_operand" "")
2488 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2489 (parallel [(const_int 0)
2492 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2494 (define_insn "sse2_unpcklpd"
2495 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2498 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2499 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2500 (parallel [(const_int 0)
2502 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2504 unpcklpd\t{%2, %0|%0, %2}
2505 movhpd\t{%2, %0|%0, %2}
2506 movlpd\t{%2, %H0|%H0, %2}"
2507 [(set_attr "type" "sselog,ssemov,ssemov")
2508 (set_attr "mode" "V2DF,V1DF,V1DF")])
2510 (define_expand "sse2_shufpd"
2511 [(match_operand:V2DF 0 "register_operand" "")
2512 (match_operand:V2DF 1 "register_operand" "")
2513 (match_operand:V2DF 2 "nonimmediate_operand" "")
2514 (match_operand:SI 3 "const_int_operand" "")]
2517 int mask = INTVAL (operands[3]);
2518 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2520 GEN_INT (mask & 2 ? 3 : 2)));
2524 (define_insn "sse2_shufpd_1"
2525 [(set (match_operand:V2DF 0 "register_operand" "=x")
2528 (match_operand:V2DF 1 "register_operand" "0")
2529 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2530 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2531 (match_operand 4 "const_2_to_3_operand" "")])))]
2535 mask = INTVAL (operands[3]);
2536 mask |= (INTVAL (operands[4]) - 2) << 1;
2537 operands[3] = GEN_INT (mask);
2539 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2541 [(set_attr "type" "sselog")
2542 (set_attr "mode" "V2DF")])
2544 (define_insn "sse2_storehpd"
2545 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2547 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2548 (parallel [(const_int 1)])))]
2549 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2551 movhpd\t{%1, %0|%0, %1}
2554 [(set_attr "type" "ssemov,sselog1,ssemov")
2555 (set_attr "mode" "V1DF,V2DF,DF")])
2558 [(set (match_operand:DF 0 "register_operand" "")
2560 (match_operand:V2DF 1 "memory_operand" "")
2561 (parallel [(const_int 1)])))]
2562 "TARGET_SSE2 && reload_completed"
2563 [(set (match_dup 0) (match_dup 1))]
2565 operands[1] = adjust_address (operands[1], DFmode, 8);
2568 (define_insn "sse2_storelpd"
2569 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2571 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2572 (parallel [(const_int 0)])))]
2573 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2575 movlpd\t{%1, %0|%0, %1}
2578 [(set_attr "type" "ssemov")
2579 (set_attr "mode" "V1DF,DF,DF")])
2582 [(set (match_operand:DF 0 "register_operand" "")
2584 (match_operand:V2DF 1 "nonimmediate_operand" "")
2585 (parallel [(const_int 0)])))]
2586 "TARGET_SSE2 && reload_completed"
2589 rtx op1 = operands[1];
2591 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2593 op1 = gen_lowpart (DFmode, op1);
2594 emit_move_insn (operands[0], op1);
2598 (define_expand "sse2_loadhpd_exp"
2599 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2602 (match_operand:V2DF 1 "nonimmediate_operand" "")
2603 (parallel [(const_int 0)]))
2604 (match_operand:DF 2 "nonimmediate_operand" "")))]
2606 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2608 (define_insn "sse2_loadhpd"
2609 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2612 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2613 (parallel [(const_int 0)]))
2614 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2615 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2617 movhpd\t{%2, %0|%0, %2}
2618 unpcklpd\t{%2, %0|%0, %2}
2619 shufpd\t{$1, %1, %0|%0, %1, 1}
2621 [(set_attr "type" "ssemov,sselog,sselog,other")
2622 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2625 [(set (match_operand:V2DF 0 "memory_operand" "")
2627 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2628 (match_operand:DF 1 "register_operand" "")))]
2629 "TARGET_SSE2 && reload_completed"
2630 [(set (match_dup 0) (match_dup 1))]
2632 operands[0] = adjust_address (operands[0], DFmode, 8);
2635 (define_expand "sse2_loadlpd_exp"
2636 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2638 (match_operand:DF 2 "nonimmediate_operand" "")
2640 (match_operand:V2DF 1 "nonimmediate_operand" "")
2641 (parallel [(const_int 1)]))))]
2643 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2645 (define_insn "sse2_loadlpd"
2646 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2648 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2650 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2651 (parallel [(const_int 1)]))))]
2652 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2654 movsd\t{%2, %0|%0, %2}
2655 movlpd\t{%2, %0|%0, %2}
2656 movsd\t{%2, %0|%0, %2}
2657 shufpd\t{$2, %2, %0|%0, %2, 2}
2658 movhpd\t{%H1, %0|%0, %H1}
2660 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2661 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2664 [(set (match_operand:V2DF 0 "memory_operand" "")
2666 (match_operand:DF 1 "register_operand" "")
2667 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2668 "TARGET_SSE2 && reload_completed"
2669 [(set (match_dup 0) (match_dup 1))]
2671 operands[0] = adjust_address (operands[0], DFmode, 8);
2674 ;; Not sure these two are ever used, but it doesn't hurt to have
2676 (define_insn "*vec_extractv2df_1_sse"
2677 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2679 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2680 (parallel [(const_int 1)])))]
2681 "!TARGET_SSE2 && TARGET_SSE
2682 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2684 movhps\t{%1, %0|%0, %1}
2685 movhlps\t{%1, %0|%0, %1}
2686 movlps\t{%H1, %0|%0, %H1}"
2687 [(set_attr "type" "ssemov")
2688 (set_attr "mode" "V2SF,V4SF,V2SF")])
2690 (define_insn "*vec_extractv2df_0_sse"
2691 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2693 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2694 (parallel [(const_int 0)])))]
2695 "!TARGET_SSE2 && TARGET_SSE
2696 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2698 movlps\t{%1, %0|%0, %1}
2699 movaps\t{%1, %0|%0, %1}
2700 movlps\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "ssemov")
2702 (set_attr "mode" "V2SF,V4SF,V2SF")])
2704 (define_insn "sse2_movsd"
2705 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2707 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2708 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2712 movsd\t{%2, %0|%0, %2}
2713 movlpd\t{%2, %0|%0, %2}
2714 movlpd\t{%2, %0|%0, %2}
2715 shufpd\t{$2, %2, %0|%0, %2, 2}
2716 movhps\t{%H1, %0|%0, %H1}
2717 movhps\t{%1, %H0|%H0, %1}"
2718 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2719 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2721 (define_insn "*vec_dupv2df_sse3"
2722 [(set (match_operand:V2DF 0 "register_operand" "=x")
2724 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2726 "movddup\t{%1, %0|%0, %1}"
2727 [(set_attr "type" "sselog1")
2728 (set_attr "mode" "DF")])
2730 (define_insn "vec_dupv2df"
2731 [(set (match_operand:V2DF 0 "register_operand" "=x")
2733 (match_operand:DF 1 "register_operand" "0")))]
2736 [(set_attr "type" "sselog1")
2737 (set_attr "mode" "V2DF")])
2739 (define_insn "*vec_concatv2df_sse3"
2740 [(set (match_operand:V2DF 0 "register_operand" "=x")
2742 (match_operand:DF 1 "nonimmediate_operand" "xm")
2745 "movddup\t{%1, %0|%0, %1}"
2746 [(set_attr "type" "sselog1")
2747 (set_attr "mode" "DF")])
2749 (define_insn "*vec_concatv2df"
2750 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2752 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2753 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2756 unpcklpd\t{%2, %0|%0, %2}
2757 movhpd\t{%2, %0|%0, %2}
2758 movsd\t{%1, %0|%0, %1}
2759 movlhps\t{%2, %0|%0, %2}
2760 movhps\t{%2, %0|%0, %2}"
2761 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2762 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2764 (define_expand "vec_setv2df"
2765 [(match_operand:V2DF 0 "register_operand" "")
2766 (match_operand:DF 1 "register_operand" "")
2767 (match_operand 2 "const_int_operand" "")]
2770 ix86_expand_vector_set (false, operands[0], operands[1],
2771 INTVAL (operands[2]));
2775 (define_expand "vec_extractv2df"
2776 [(match_operand:DF 0 "register_operand" "")
2777 (match_operand:V2DF 1 "register_operand" "")
2778 (match_operand 2 "const_int_operand" "")]
2781 ix86_expand_vector_extract (false, operands[0], operands[1],
2782 INTVAL (operands[2]));
2786 (define_expand "vec_initv2df"
2787 [(match_operand:V2DF 0 "register_operand" "")
2788 (match_operand 1 "" "")]
2791 ix86_expand_vector_init (false, operands[0], operands[1]);
2795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2797 ;; Parallel integral arithmetic
2799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2801 (define_expand "neg<mode>2"
2802 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2805 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2807 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2809 (define_expand "<plusminus_insn><mode>3"
2810 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2812 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2813 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2815 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2817 (define_insn "*<plusminus_insn><mode>3"
2818 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2820 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2821 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2822 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2823 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2824 [(set_attr "type" "sseiadd")
2825 (set_attr "prefix_data16" "1")
2826 (set_attr "mode" "TI")])
2828 (define_expand "sse2_<plusminus_insn><mode>3"
2829 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2830 (sat_plusminus:SSEMODE12
2831 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
2832 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
2834 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2836 (define_insn "*sse2_<plusminus_insn><mode>3"
2837 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2838 (sat_plusminus:SSEMODE12
2839 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2840 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2841 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2842 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2843 [(set_attr "type" "sseiadd")
2844 (set_attr "prefix_data16" "1")
2845 (set_attr "mode" "TI")])
2847 (define_insn_and_split "mulv16qi3"
2848 [(set (match_operand:V16QI 0 "register_operand" "")
2849 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2850 (match_operand:V16QI 2 "register_operand" "")))]
2852 && !(reload_completed || reload_in_progress)"
2857 rtx t[12], op0, op[3];
2862 /* On SSE5, we can take advantage of the pperm instruction to pack and
2863 unpack the bytes. Unpack data such that we've got a source byte in
2864 each low byte of each word. We don't care what goes into the high
2865 byte, so put 0 there. */
2866 for (i = 0; i < 6; ++i)
2867 t[i] = gen_reg_rtx (V8HImode);
2869 for (i = 0; i < 2; i++)
2872 op[1] = operands[i+1];
2873 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2876 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2879 /* Multiply words. */
2880 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2881 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2883 /* Pack the low byte of each word back into a single xmm */
2884 op[0] = operands[0];
2887 ix86_expand_sse5_pack (op);
2891 for (i = 0; i < 12; ++i)
2892 t[i] = gen_reg_rtx (V16QImode);
2894 /* Unpack data such that we've got a source byte in each low byte of
2895 each word. We don't care what goes into the high byte of each word.
2896 Rather than trying to get zero in there, most convenient is to let
2897 it be a copy of the low byte. */
2898 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2899 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2900 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2901 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2903 /* Multiply words. The end-of-line annotations here give a picture of what
2904 the output of that instruction looks like. Dot means don't care; the
2905 letters are the bytes of the result with A being the most significant. */
2906 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2907 gen_lowpart (V8HImode, t[0]),
2908 gen_lowpart (V8HImode, t[1])));
2909 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2910 gen_lowpart (V8HImode, t[2]),
2911 gen_lowpart (V8HImode, t[3])));
2913 /* Extract the relevant bytes and merge them back together. */
2914 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2915 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2916 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2917 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2918 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2919 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2922 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2926 (define_expand "mulv8hi3"
2927 [(set (match_operand:V8HI 0 "register_operand" "")
2928 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2929 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2931 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2933 (define_insn "*mulv8hi3"
2934 [(set (match_operand:V8HI 0 "register_operand" "=x")
2935 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2936 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2937 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2938 "pmullw\t{%2, %0|%0, %2}"
2939 [(set_attr "type" "sseimul")
2940 (set_attr "prefix_data16" "1")
2941 (set_attr "mode" "TI")])
2943 (define_expand "smulv8hi3_highpart"
2944 [(set (match_operand:V8HI 0 "register_operand" "")
2949 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2951 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2954 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2956 (define_insn "*smulv8hi3_highpart"
2957 [(set (match_operand:V8HI 0 "register_operand" "=x")
2962 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2964 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2966 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2967 "pmulhw\t{%2, %0|%0, %2}"
2968 [(set_attr "type" "sseimul")
2969 (set_attr "prefix_data16" "1")
2970 (set_attr "mode" "TI")])
2972 (define_expand "umulv8hi3_highpart"
2973 [(set (match_operand:V8HI 0 "register_operand" "")
2978 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2980 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2983 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2985 (define_insn "*umulv8hi3_highpart"
2986 [(set (match_operand:V8HI 0 "register_operand" "=x")
2991 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2993 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2995 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2996 "pmulhuw\t{%2, %0|%0, %2}"
2997 [(set_attr "type" "sseimul")
2998 (set_attr "prefix_data16" "1")
2999 (set_attr "mode" "TI")])
3001 (define_expand "sse2_umulv2siv2di3"
3002 [(set (match_operand:V2DI 0 "register_operand" "")
3006 (match_operand:V4SI 1 "nonimmediate_operand" "")
3007 (parallel [(const_int 0) (const_int 2)])))
3010 (match_operand:V4SI 2 "nonimmediate_operand" "")
3011 (parallel [(const_int 0) (const_int 2)])))))]
3013 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3015 (define_insn "*sse2_umulv2siv2di3"
3016 [(set (match_operand:V2DI 0 "register_operand" "=x")
3020 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3021 (parallel [(const_int 0) (const_int 2)])))
3024 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3025 (parallel [(const_int 0) (const_int 2)])))))]
3026 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3027 "pmuludq\t{%2, %0|%0, %2}"
3028 [(set_attr "type" "sseimul")
3029 (set_attr "prefix_data16" "1")
3030 (set_attr "mode" "TI")])
3032 (define_expand "sse4_1_mulv2siv2di3"
3033 [(set (match_operand:V2DI 0 "register_operand" "")
3037 (match_operand:V4SI 1 "nonimmediate_operand" "")
3038 (parallel [(const_int 0) (const_int 2)])))
3041 (match_operand:V4SI 2 "nonimmediate_operand" "")
3042 (parallel [(const_int 0) (const_int 2)])))))]
3044 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3046 (define_insn "*sse4_1_mulv2siv2di3"
3047 [(set (match_operand:V2DI 0 "register_operand" "=x")
3051 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3052 (parallel [(const_int 0) (const_int 2)])))
3055 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3056 (parallel [(const_int 0) (const_int 2)])))))]
3057 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3058 "pmuldq\t{%2, %0|%0, %2}"
3059 [(set_attr "type" "sseimul")
3060 (set_attr "prefix_extra" "1")
3061 (set_attr "mode" "TI")])
3063 (define_expand "sse2_pmaddwd"
3064 [(set (match_operand:V4SI 0 "register_operand" "")
3069 (match_operand:V8HI 1 "nonimmediate_operand" "")
3070 (parallel [(const_int 0)
3076 (match_operand:V8HI 2 "nonimmediate_operand" "")
3077 (parallel [(const_int 0)
3083 (vec_select:V4HI (match_dup 1)
3084 (parallel [(const_int 1)
3089 (vec_select:V4HI (match_dup 2)
3090 (parallel [(const_int 1)
3093 (const_int 7)]))))))]
3095 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3097 (define_insn "*sse2_pmaddwd"
3098 [(set (match_operand:V4SI 0 "register_operand" "=x")
3103 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3104 (parallel [(const_int 0)
3110 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3111 (parallel [(const_int 0)
3117 (vec_select:V4HI (match_dup 1)
3118 (parallel [(const_int 1)
3123 (vec_select:V4HI (match_dup 2)
3124 (parallel [(const_int 1)
3127 (const_int 7)]))))))]
3128 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3129 "pmaddwd\t{%2, %0|%0, %2}"
3130 [(set_attr "type" "sseiadd")
3131 (set_attr "prefix_data16" "1")
3132 (set_attr "mode" "TI")])
3134 (define_expand "mulv4si3"
3135 [(set (match_operand:V4SI 0 "register_operand" "")
3136 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3137 (match_operand:V4SI 2 "register_operand" "")))]
3140 if (TARGET_SSE4_1 || TARGET_SSE5)
3141 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3144 (define_insn "*sse4_1_mulv4si3"
3145 [(set (match_operand:V4SI 0 "register_operand" "=x")
3146 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3147 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3148 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3149 "pmulld\t{%2, %0|%0, %2}"
3150 [(set_attr "type" "sseimul")
3151 (set_attr "prefix_extra" "1")
3152 (set_attr "mode" "TI")])
3154 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3155 ;; multiply/add. In general, we expect the define_split to occur before
3156 ;; register allocation, so we have to handle the corner case where the target
3157 ;; is used as the base or index register in operands 1/2.
3158 (define_insn_and_split "*sse5_mulv4si3"
3159 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3160 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3161 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3164 "&& (reload_completed
3165 || (!reg_mentioned_p (operands[0], operands[1])
3166 && !reg_mentioned_p (operands[0], operands[2])))"
3170 (plus:V4SI (mult:V4SI (match_dup 1)
3174 operands[3] = CONST0_RTX (V4SImode);
3176 [(set_attr "type" "ssemuladd")
3177 (set_attr "mode" "TI")])
3179 (define_insn_and_split "*sse2_mulv4si3"
3180 [(set (match_operand:V4SI 0 "register_operand" "")
3181 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3182 (match_operand:V4SI 2 "register_operand" "")))]
3183 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3184 && !(reload_completed || reload_in_progress)"
3189 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3195 t1 = gen_reg_rtx (V4SImode);
3196 t2 = gen_reg_rtx (V4SImode);
3197 t3 = gen_reg_rtx (V4SImode);
3198 t4 = gen_reg_rtx (V4SImode);
3199 t5 = gen_reg_rtx (V4SImode);
3200 t6 = gen_reg_rtx (V4SImode);
3201 thirtytwo = GEN_INT (32);
3203 /* Multiply elements 2 and 0. */
3204 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3207 /* Shift both input vectors down one element, so that elements 3
3208 and 1 are now in the slots for elements 2 and 0. For K8, at
3209 least, this is faster than using a shuffle. */
3210 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3211 gen_lowpart (TImode, op1),
3213 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3214 gen_lowpart (TImode, op2),
3216 /* Multiply elements 3 and 1. */
3217 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3220 /* Move the results in element 2 down to element 1; we don't care
3221 what goes in elements 2 and 3. */
3222 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3223 const0_rtx, const0_rtx));
3224 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3225 const0_rtx, const0_rtx));
3227 /* Merge the parts back together. */
3228 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3232 (define_insn_and_split "mulv2di3"
3233 [(set (match_operand:V2DI 0 "register_operand" "")
3234 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3235 (match_operand:V2DI 2 "register_operand" "")))]
3237 && !(reload_completed || reload_in_progress)"
3242 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3248 t1 = gen_reg_rtx (V2DImode);
3249 t2 = gen_reg_rtx (V2DImode);
3250 t3 = gen_reg_rtx (V2DImode);
3251 t4 = gen_reg_rtx (V2DImode);
3252 t5 = gen_reg_rtx (V2DImode);
3253 t6 = gen_reg_rtx (V2DImode);
3254 thirtytwo = GEN_INT (32);
3256 /* Multiply low parts. */
3257 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3258 gen_lowpart (V4SImode, op2)));
3260 /* Shift input vectors left 32 bits so we can multiply high parts. */
3261 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3262 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3264 /* Multiply high parts by low parts. */
3265 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3266 gen_lowpart (V4SImode, t3)));
3267 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3268 gen_lowpart (V4SImode, t2)));
3270 /* Shift them back. */
3271 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3272 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3274 /* Add the three parts together. */
3275 emit_insn (gen_addv2di3 (t6, t1, t4));
3276 emit_insn (gen_addv2di3 (op0, t6, t5));
3280 (define_expand "vec_widen_smult_hi_v8hi"
3281 [(match_operand:V4SI 0 "register_operand" "")
3282 (match_operand:V8HI 1 "register_operand" "")
3283 (match_operand:V8HI 2 "register_operand" "")]
3286 rtx op1, op2, t1, t2, dest;
3290 t1 = gen_reg_rtx (V8HImode);
3291 t2 = gen_reg_rtx (V8HImode);
3292 dest = gen_lowpart (V8HImode, operands[0]);
3294 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3295 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3296 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3300 (define_expand "vec_widen_smult_lo_v8hi"
3301 [(match_operand:V4SI 0 "register_operand" "")
3302 (match_operand:V8HI 1 "register_operand" "")
3303 (match_operand:V8HI 2 "register_operand" "")]
3306 rtx op1, op2, t1, t2, dest;
3310 t1 = gen_reg_rtx (V8HImode);
3311 t2 = gen_reg_rtx (V8HImode);
3312 dest = gen_lowpart (V8HImode, operands[0]);
3314 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3315 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3316 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3320 (define_expand "vec_widen_umult_hi_v8hi"
3321 [(match_operand:V4SI 0 "register_operand" "")
3322 (match_operand:V8HI 1 "register_operand" "")
3323 (match_operand:V8HI 2 "register_operand" "")]
3326 rtx op1, op2, t1, t2, dest;
3330 t1 = gen_reg_rtx (V8HImode);
3331 t2 = gen_reg_rtx (V8HImode);
3332 dest = gen_lowpart (V8HImode, operands[0]);
3334 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3335 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3336 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3340 (define_expand "vec_widen_umult_lo_v8hi"
3341 [(match_operand:V4SI 0 "register_operand" "")
3342 (match_operand:V8HI 1 "register_operand" "")
3343 (match_operand:V8HI 2 "register_operand" "")]
3346 rtx op1, op2, t1, t2, dest;
3350 t1 = gen_reg_rtx (V8HImode);
3351 t2 = gen_reg_rtx (V8HImode);
3352 dest = gen_lowpart (V8HImode, operands[0]);
3354 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3355 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3356 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3360 (define_expand "vec_widen_smult_hi_v4si"
3361 [(match_operand:V2DI 0 "register_operand" "")
3362 (match_operand:V4SI 1 "register_operand" "")
3363 (match_operand:V4SI 2 "register_operand" "")]
3366 rtx op1, op2, t1, t2;
3370 t1 = gen_reg_rtx (V4SImode);
3371 t2 = gen_reg_rtx (V4SImode);
3373 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3374 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3375 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3379 (define_expand "vec_widen_smult_lo_v4si"
3380 [(match_operand:V2DI 0 "register_operand" "")
3381 (match_operand:V4SI 1 "register_operand" "")
3382 (match_operand:V4SI 2 "register_operand" "")]
3385 rtx op1, op2, t1, t2;
3389 t1 = gen_reg_rtx (V4SImode);
3390 t2 = gen_reg_rtx (V4SImode);
3392 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3393 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3394 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3398 (define_expand "vec_widen_umult_hi_v4si"
3399 [(match_operand:V2DI 0 "register_operand" "")
3400 (match_operand:V4SI 1 "register_operand" "")
3401 (match_operand:V4SI 2 "register_operand" "")]
3404 rtx op1, op2, t1, t2;
3408 t1 = gen_reg_rtx (V4SImode);
3409 t2 = gen_reg_rtx (V4SImode);
3411 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3412 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3413 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3417 (define_expand "vec_widen_umult_lo_v4si"
3418 [(match_operand:V2DI 0 "register_operand" "")
3419 (match_operand:V4SI 1 "register_operand" "")
3420 (match_operand:V4SI 2 "register_operand" "")]
3423 rtx op1, op2, t1, t2;
3427 t1 = gen_reg_rtx (V4SImode);
3428 t2 = gen_reg_rtx (V4SImode);
3430 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3431 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3432 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3436 (define_expand "sdot_prodv8hi"
3437 [(match_operand:V4SI 0 "register_operand" "")
3438 (match_operand:V8HI 1 "register_operand" "")
3439 (match_operand:V8HI 2 "register_operand" "")
3440 (match_operand:V4SI 3 "register_operand" "")]
3443 rtx t = gen_reg_rtx (V4SImode);
3444 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3445 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3449 (define_expand "udot_prodv4si"
3450 [(match_operand:V2DI 0 "register_operand" "")
3451 (match_operand:V4SI 1 "register_operand" "")
3452 (match_operand:V4SI 2 "register_operand" "")
3453 (match_operand:V2DI 3 "register_operand" "")]
3458 t1 = gen_reg_rtx (V2DImode);
3459 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3460 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3462 t2 = gen_reg_rtx (V4SImode);
3463 t3 = gen_reg_rtx (V4SImode);
3464 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3465 gen_lowpart (TImode, operands[1]),
3467 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3468 gen_lowpart (TImode, operands[2]),
3471 t4 = gen_reg_rtx (V2DImode);
3472 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3474 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3478 (define_insn "ashr<mode>3"
3479 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3481 (match_operand:SSEMODE24 1 "register_operand" "0")
3482 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3484 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3485 [(set_attr "type" "sseishft")
3486 (set_attr "prefix_data16" "1")
3487 (set_attr "mode" "TI")])
3489 (define_insn "lshr<mode>3"
3490 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3491 (lshiftrt:SSEMODE248
3492 (match_operand:SSEMODE248 1 "register_operand" "0")
3493 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3495 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3496 [(set_attr "type" "sseishft")
3497 (set_attr "prefix_data16" "1")
3498 (set_attr "mode" "TI")])
3500 (define_insn "ashl<mode>3"
3501 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3503 (match_operand:SSEMODE248 1 "register_operand" "0")
3504 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3506 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3507 [(set_attr "type" "sseishft")
3508 (set_attr "prefix_data16" "1")
3509 (set_attr "mode" "TI")])
3511 (define_expand "vec_shl_<mode>"
3512 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3513 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3514 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3517 operands[0] = gen_lowpart (TImode, operands[0]);
3518 operands[1] = gen_lowpart (TImode, operands[1]);
3521 (define_expand "vec_shr_<mode>"
3522 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3523 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3524 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3527 operands[0] = gen_lowpart (TImode, operands[0]);
3528 operands[1] = gen_lowpart (TImode, operands[1]);
3531 (define_expand "<code>v16qi3"
3532 [(set (match_operand:V16QI 0 "register_operand" "")
3534 (match_operand:V16QI 1 "nonimmediate_operand" "")
3535 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3537 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3539 (define_insn "*<code>v16qi3"
3540 [(set (match_operand:V16QI 0 "register_operand" "=x")
3542 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3543 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3544 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3545 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3546 [(set_attr "type" "sseiadd")
3547 (set_attr "prefix_data16" "1")
3548 (set_attr "mode" "TI")])
3550 (define_expand "<code>v8hi3"
3551 [(set (match_operand:V8HI 0 "register_operand" "")
3553 (match_operand:V8HI 1 "nonimmediate_operand" "")
3554 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3556 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3558 (define_insn "*<code>v8hi3"
3559 [(set (match_operand:V8HI 0 "register_operand" "=x")
3561 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3562 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3563 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3564 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3565 [(set_attr "type" "sseiadd")
3566 (set_attr "prefix_data16" "1")
3567 (set_attr "mode" "TI")])
3569 (define_expand "umaxv8hi3"
3570 [(set (match_operand:V8HI 0 "register_operand" "")
3571 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3572 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3576 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3579 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3580 if (rtx_equal_p (op3, op2))
3581 op3 = gen_reg_rtx (V8HImode);
3582 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3583 emit_insn (gen_addv8hi3 (op0, op3, op2));
3588 (define_expand "smax<mode>3"
3589 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3590 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3591 (match_operand:SSEMODE14 2 "register_operand" "")))]
3595 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3601 xops[0] = operands[0];
3602 xops[1] = operands[1];
3603 xops[2] = operands[2];
3604 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3605 xops[4] = operands[1];
3606 xops[5] = operands[2];
3607 ok = ix86_expand_int_vcond (xops);
3613 (define_insn "*sse4_1_<code><mode>3"
3614 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3616 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3617 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3618 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3619 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3620 [(set_attr "type" "sseiadd")
3621 (set_attr "prefix_extra" "1")
3622 (set_attr "mode" "TI")])
3624 (define_expand "umaxv4si3"
3625 [(set (match_operand:V4SI 0 "register_operand" "")
3626 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3627 (match_operand:V4SI 2 "register_operand" "")))]
3631 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3637 xops[0] = operands[0];
3638 xops[1] = operands[1];
3639 xops[2] = operands[2];
3640 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3641 xops[4] = operands[1];
3642 xops[5] = operands[2];
3643 ok = ix86_expand_int_vcond (xops);
3649 (define_insn "*sse4_1_<code><mode>3"
3650 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3652 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3653 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3654 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3655 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3656 [(set_attr "type" "sseiadd")
3657 (set_attr "prefix_extra" "1")
3658 (set_attr "mode" "TI")])
3660 (define_expand "smin<mode>3"
3661 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3662 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3663 (match_operand:SSEMODE14 2 "register_operand" "")))]
3667 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3673 xops[0] = operands[0];
3674 xops[1] = operands[2];
3675 xops[2] = operands[1];
3676 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3677 xops[4] = operands[1];
3678 xops[5] = operands[2];
3679 ok = ix86_expand_int_vcond (xops);
3685 (define_expand "umin<mode>3"
3686 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3687 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3688 (match_operand:SSEMODE24 2 "register_operand" "")))]
3692 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3698 xops[0] = operands[0];
3699 xops[1] = operands[2];
3700 xops[2] = operands[1];
3701 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3702 xops[4] = operands[1];
3703 xops[5] = operands[2];
3704 ok = ix86_expand_int_vcond (xops);
3710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3712 ;; Parallel integral comparisons
3714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3716 (define_expand "sse2_eq<mode>3"
3717 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3719 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
3720 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
3721 "TARGET_SSE2 && !TARGET_SSE5"
3722 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
3724 (define_insn "*sse2_eq<mode>3"
3725 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3727 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3728 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3729 "TARGET_SSE2 && !TARGET_SSE5
3730 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3731 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3732 [(set_attr "type" "ssecmp")
3733 (set_attr "prefix_data16" "1")
3734 (set_attr "mode" "TI")])
3736 (define_expand "sse4_1_eqv2di3"
3737 [(set (match_operand:V2DI 0 "register_operand" "")
3739 (match_operand:V2DI 1 "nonimmediate_operand" "")
3740 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
3742 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
3744 (define_insn "*sse4_1_eqv2di3"
3745 [(set (match_operand:V2DI 0 "register_operand" "=x")
3747 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3748 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3749 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3750 "pcmpeqq\t{%2, %0|%0, %2}"
3751 [(set_attr "type" "ssecmp")
3752 (set_attr "prefix_extra" "1")
3753 (set_attr "mode" "TI")])
3755 (define_insn "sse2_gt<mode>3"
3756 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3758 (match_operand:SSEMODE124 1 "register_operand" "0")
3759 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3760 "TARGET_SSE2 && !TARGET_SSE5"
3761 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3762 [(set_attr "type" "ssecmp")
3763 (set_attr "prefix_data16" "1")
3764 (set_attr "mode" "TI")])
3766 (define_insn "sse4_2_gtv2di3"
3767 [(set (match_operand:V2DI 0 "register_operand" "=x")
3769 (match_operand:V2DI 1 "register_operand" "0")
3770 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3772 "pcmpgtq\t{%2, %0|%0, %2}"
3773 [(set_attr "type" "ssecmp")
3774 (set_attr "mode" "TI")])
3776 (define_expand "vcond<mode>"
3777 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3778 (if_then_else:SSEMODEI
3779 (match_operator 3 ""
3780 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3781 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3782 (match_operand:SSEMODEI 1 "general_operand" "")
3783 (match_operand:SSEMODEI 2 "general_operand" "")))]
3786 if (ix86_expand_int_vcond (operands))
3792 (define_expand "vcondu<mode>"
3793 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3794 (if_then_else:SSEMODEI
3795 (match_operator 3 ""
3796 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3797 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3798 (match_operand:SSEMODEI 1 "general_operand" "")
3799 (match_operand:SSEMODEI 2 "general_operand" "")))]
3802 if (ix86_expand_int_vcond (operands))
3808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3810 ;; Parallel bitwise logical operations
3812 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3814 (define_expand "one_cmpl<mode>2"
3815 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3816 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3820 int i, n = GET_MODE_NUNITS (<MODE>mode);
3821 rtvec v = rtvec_alloc (n);
3823 for (i = 0; i < n; ++i)
3824 RTVEC_ELT (v, i) = constm1_rtx;
3826 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3829 (define_insn "*sse_nand<mode>3"
3830 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3832 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3833 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3834 "(TARGET_SSE && !TARGET_SSE2)"
3835 "andnps\t{%2, %0|%0, %2}"
3836 [(set_attr "type" "sselog")
3837 (set_attr "mode" "V4SF")])
3839 (define_insn "sse2_nand<mode>3"
3840 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3842 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3843 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3845 "pandn\t{%2, %0|%0, %2}"
3846 [(set_attr "type" "sselog")
3847 (set_attr "prefix_data16" "1")
3848 (set_attr "mode" "TI")])
3850 (define_insn "*nandtf3"
3851 [(set (match_operand:TF 0 "register_operand" "=x")
3853 (not:TF (match_operand:TF 1 "register_operand" "0"))
3854 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3856 "pandn\t{%2, %0|%0, %2}"
3857 [(set_attr "type" "sselog")
3858 (set_attr "prefix_data16" "1")
3859 (set_attr "mode" "TI")])
3861 (define_expand "<code><mode>3"
3862 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3864 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3865 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3867 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3869 (define_insn "*sse_<code><mode>3"
3870 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3872 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3873 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3874 "(TARGET_SSE && !TARGET_SSE2)
3875 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3876 "<plogicprefix>ps\t{%2, %0|%0, %2}"
3877 [(set_attr "type" "sselog")
3878 (set_attr "mode" "V4SF")])
3880 (define_insn "*sse2_<code><mode>3"
3881 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3883 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3884 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3885 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3886 "p<plogicprefix>\t{%2, %0|%0, %2}"
3887 [(set_attr "type" "sselog")
3888 (set_attr "prefix_data16" "1")
3889 (set_attr "mode" "TI")])
3891 (define_expand "<code>tf3"
3892 [(set (match_operand:TF 0 "register_operand" "")
3894 (match_operand:TF 1 "nonimmediate_operand" "")
3895 (match_operand:TF 2 "nonimmediate_operand" "")))]
3897 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3899 (define_insn "*<code>tf3"
3900 [(set (match_operand:TF 0 "register_operand" "=x")
3902 (match_operand:TF 1 "nonimmediate_operand" "%0")
3903 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3904 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3905 "p<plogicprefix>\t{%2, %0|%0, %2}"
3906 [(set_attr "type" "sselog")
3907 (set_attr "prefix_data16" "1")
3908 (set_attr "mode" "TI")])
3910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3912 ;; Parallel integral element swizzling
3914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3917 ;; op1 = abcdefghijklmnop
3918 ;; op2 = qrstuvwxyz012345
3919 ;; h1 = aqbrcsdteufvgwhx
3920 ;; l1 = iyjzk0l1m2n3o4p5
3921 ;; h2 = aiqybjrzcks0dlt1
3922 ;; l2 = emu2fnv3gow4hpx5
3923 ;; h3 = aeimquy2bfjnrvz3
3924 ;; l3 = cgkosw04dhlptx15
3925 ;; result = bdfhjlnprtvxz135
3926 (define_expand "vec_pack_trunc_v8hi"
3927 [(match_operand:V16QI 0 "register_operand" "")
3928 (match_operand:V8HI 1 "register_operand" "")
3929 (match_operand:V8HI 2 "register_operand" "")]
3932 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3934 op1 = gen_lowpart (V16QImode, operands[1]);
3935 op2 = gen_lowpart (V16QImode, operands[2]);
3936 h1 = gen_reg_rtx (V16QImode);
3937 l1 = gen_reg_rtx (V16QImode);
3938 h2 = gen_reg_rtx (V16QImode);
3939 l2 = gen_reg_rtx (V16QImode);
3940 h3 = gen_reg_rtx (V16QImode);
3941 l3 = gen_reg_rtx (V16QImode);
3943 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3944 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3945 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3946 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3947 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3948 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3949 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3960 ;; result = bdfhjlnp
3961 (define_expand "vec_pack_trunc_v4si"
3962 [(match_operand:V8HI 0 "register_operand" "")
3963 (match_operand:V4SI 1 "register_operand" "")
3964 (match_operand:V4SI 2 "register_operand" "")]
3967 rtx op1, op2, h1, l1, h2, l2;
3969 op1 = gen_lowpart (V8HImode, operands[1]);
3970 op2 = gen_lowpart (V8HImode, operands[2]);
3971 h1 = gen_reg_rtx (V8HImode);
3972 l1 = gen_reg_rtx (V8HImode);
3973 h2 = gen_reg_rtx (V8HImode);
3974 l2 = gen_reg_rtx (V8HImode);
3976 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3977 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3978 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3979 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3980 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3990 (define_expand "vec_pack_trunc_v2di"
3991 [(match_operand:V4SI 0 "register_operand" "")
3992 (match_operand:V2DI 1 "register_operand" "")
3993 (match_operand:V2DI 2 "register_operand" "")]
3996 rtx op1, op2, h1, l1;
3998 op1 = gen_lowpart (V4SImode, operands[1]);
3999 op2 = gen_lowpart (V4SImode, operands[2]);
4000 h1 = gen_reg_rtx (V4SImode);
4001 l1 = gen_reg_rtx (V4SImode);
4003 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4004 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4005 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4009 (define_expand "vec_interleave_highv16qi"
4010 [(set (match_operand:V16QI 0 "register_operand" "")
4013 (match_operand:V16QI 1 "register_operand" "")
4014 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4015 (parallel [(const_int 8) (const_int 24)
4016 (const_int 9) (const_int 25)
4017 (const_int 10) (const_int 26)
4018 (const_int 11) (const_int 27)
4019 (const_int 12) (const_int 28)
4020 (const_int 13) (const_int 29)
4021 (const_int 14) (const_int 30)
4022 (const_int 15) (const_int 31)])))]
4025 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4029 (define_expand "vec_interleave_lowv16qi"
4030 [(set (match_operand:V16QI 0 "register_operand" "")
4033 (match_operand:V16QI 1 "register_operand" "")
4034 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4035 (parallel [(const_int 0) (const_int 16)
4036 (const_int 1) (const_int 17)
4037 (const_int 2) (const_int 18)
4038 (const_int 3) (const_int 19)
4039 (const_int 4) (const_int 20)
4040 (const_int 5) (const_int 21)
4041 (const_int 6) (const_int 22)
4042 (const_int 7) (const_int 23)])))]
4045 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4049 (define_expand "vec_interleave_highv8hi"
4050 [(set (match_operand:V8HI 0 "register_operand" "=")
4053 (match_operand:V8HI 1 "register_operand" "")
4054 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4055 (parallel [(const_int 4) (const_int 12)
4056 (const_int 5) (const_int 13)
4057 (const_int 6) (const_int 14)
4058 (const_int 7) (const_int 15)])))]
4061 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4065 (define_expand "vec_interleave_lowv8hi"
4066 [(set (match_operand:V8HI 0 "register_operand" "")
4069 (match_operand:V8HI 1 "register_operand" "")
4070 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4071 (parallel [(const_int 0) (const_int 8)
4072 (const_int 1) (const_int 9)
4073 (const_int 2) (const_int 10)
4074 (const_int 3) (const_int 11)])))]
4077 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4081 (define_expand "vec_interleave_highv4si"
4082 [(set (match_operand:V4SI 0 "register_operand" "")
4085 (match_operand:V4SI 1 "register_operand" "")
4086 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4087 (parallel [(const_int 2) (const_int 6)
4088 (const_int 3) (const_int 7)])))]
4091 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4095 (define_expand "vec_interleave_lowv4si"
4096 [(set (match_operand:V4SI 0 "register_operand" "")
4099 (match_operand:V4SI 1 "register_operand" "")
4100 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4101 (parallel [(const_int 0) (const_int 4)
4102 (const_int 1) (const_int 5)])))]
4105 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4109 (define_expand "vec_interleave_highv2di"
4110 [(set (match_operand:V2DI 0 "register_operand" "")
4113 (match_operand:V2DI 1 "register_operand" "")
4114 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4115 (parallel [(const_int 1)
4119 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4123 (define_expand "vec_interleave_lowv2di"
4124 [(set (match_operand:V2DI 0 "register_operand" "")
4127 (match_operand:V2DI 1 "register_operand" "")
4128 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4129 (parallel [(const_int 0)
4133 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4137 (define_insn "sse2_packsswb"
4138 [(set (match_operand:V16QI 0 "register_operand" "=x")
4141 (match_operand:V8HI 1 "register_operand" "0"))
4143 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4145 "packsswb\t{%2, %0|%0, %2}"
4146 [(set_attr "type" "sselog")
4147 (set_attr "prefix_data16" "1")
4148 (set_attr "mode" "TI")])
4150 (define_insn "sse2_packssdw"
4151 [(set (match_operand:V8HI 0 "register_operand" "=x")
4154 (match_operand:V4SI 1 "register_operand" "0"))
4156 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4158 "packssdw\t{%2, %0|%0, %2}"
4159 [(set_attr "type" "sselog")
4160 (set_attr "prefix_data16" "1")
4161 (set_attr "mode" "TI")])
4163 (define_insn "sse2_packuswb"
4164 [(set (match_operand:V16QI 0 "register_operand" "=x")
4167 (match_operand:V8HI 1 "register_operand" "0"))
4169 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4171 "packuswb\t{%2, %0|%0, %2}"
4172 [(set_attr "type" "sselog")
4173 (set_attr "prefix_data16" "1")
4174 (set_attr "mode" "TI")])
4176 (define_insn "sse2_punpckhbw"
4177 [(set (match_operand:V16QI 0 "register_operand" "=x")
4180 (match_operand:V16QI 1 "register_operand" "0")
4181 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4182 (parallel [(const_int 8) (const_int 24)
4183 (const_int 9) (const_int 25)
4184 (const_int 10) (const_int 26)
4185 (const_int 11) (const_int 27)
4186 (const_int 12) (const_int 28)
4187 (const_int 13) (const_int 29)
4188 (const_int 14) (const_int 30)
4189 (const_int 15) (const_int 31)])))]
4191 "punpckhbw\t{%2, %0|%0, %2}"
4192 [(set_attr "type" "sselog")
4193 (set_attr "prefix_data16" "1")
4194 (set_attr "mode" "TI")])
4196 (define_insn "sse2_punpcklbw"
4197 [(set (match_operand:V16QI 0 "register_operand" "=x")
4200 (match_operand:V16QI 1 "register_operand" "0")
4201 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4202 (parallel [(const_int 0) (const_int 16)
4203 (const_int 1) (const_int 17)
4204 (const_int 2) (const_int 18)
4205 (const_int 3) (const_int 19)
4206 (const_int 4) (const_int 20)
4207 (const_int 5) (const_int 21)
4208 (const_int 6) (const_int 22)
4209 (const_int 7) (const_int 23)])))]
4211 "punpcklbw\t{%2, %0|%0, %2}"
4212 [(set_attr "type" "sselog")
4213 (set_attr "prefix_data16" "1")
4214 (set_attr "mode" "TI")])
4216 (define_insn "sse2_punpckhwd"
4217 [(set (match_operand:V8HI 0 "register_operand" "=x")
4220 (match_operand:V8HI 1 "register_operand" "0")
4221 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4222 (parallel [(const_int 4) (const_int 12)
4223 (const_int 5) (const_int 13)
4224 (const_int 6) (const_int 14)
4225 (const_int 7) (const_int 15)])))]
4227 "punpckhwd\t{%2, %0|%0, %2}"
4228 [(set_attr "type" "sselog")
4229 (set_attr "prefix_data16" "1")
4230 (set_attr "mode" "TI")])
4232 (define_insn "sse2_punpcklwd"
4233 [(set (match_operand:V8HI 0 "register_operand" "=x")
4236 (match_operand:V8HI 1 "register_operand" "0")
4237 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4238 (parallel [(const_int 0) (const_int 8)
4239 (const_int 1) (const_int 9)
4240 (const_int 2) (const_int 10)
4241 (const_int 3) (const_int 11)])))]
4243 "punpcklwd\t{%2, %0|%0, %2}"
4244 [(set_attr "type" "sselog")
4245 (set_attr "prefix_data16" "1")
4246 (set_attr "mode" "TI")])
4248 (define_insn "sse2_punpckhdq"
4249 [(set (match_operand:V4SI 0 "register_operand" "=x")
4252 (match_operand:V4SI 1 "register_operand" "0")
4253 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4254 (parallel [(const_int 2) (const_int 6)
4255 (const_int 3) (const_int 7)])))]
4257 "punpckhdq\t{%2, %0|%0, %2}"
4258 [(set_attr "type" "sselog")
4259 (set_attr "prefix_data16" "1")
4260 (set_attr "mode" "TI")])
4262 (define_insn "sse2_punpckldq"
4263 [(set (match_operand:V4SI 0 "register_operand" "=x")
4266 (match_operand:V4SI 1 "register_operand" "0")
4267 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4268 (parallel [(const_int 0) (const_int 4)
4269 (const_int 1) (const_int 5)])))]
4271 "punpckldq\t{%2, %0|%0, %2}"
4272 [(set_attr "type" "sselog")
4273 (set_attr "prefix_data16" "1")
4274 (set_attr "mode" "TI")])
4276 (define_insn "sse2_punpckhqdq"
4277 [(set (match_operand:V2DI 0 "register_operand" "=x")
4280 (match_operand:V2DI 1 "register_operand" "0")
4281 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4282 (parallel [(const_int 1)
4285 "punpckhqdq\t{%2, %0|%0, %2}"
4286 [(set_attr "type" "sselog")
4287 (set_attr "prefix_data16" "1")
4288 (set_attr "mode" "TI")])
4290 (define_insn "sse2_punpcklqdq"
4291 [(set (match_operand:V2DI 0 "register_operand" "=x")
4294 (match_operand:V2DI 1 "register_operand" "0")
4295 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4296 (parallel [(const_int 0)
4299 "punpcklqdq\t{%2, %0|%0, %2}"
4300 [(set_attr "type" "sselog")
4301 (set_attr "prefix_data16" "1")
4302 (set_attr "mode" "TI")])
4304 (define_insn "*sse4_1_pinsrb"
4305 [(set (match_operand:V16QI 0 "register_operand" "=x")
4307 (vec_duplicate:V16QI
4308 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4309 (match_operand:V16QI 1 "register_operand" "0")
4310 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4313 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4314 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4316 [(set_attr "type" "sselog")
4317 (set_attr "prefix_extra" "1")
4318 (set_attr "mode" "TI")])
4320 (define_insn "*sse2_pinsrw"
4321 [(set (match_operand:V8HI 0 "register_operand" "=x")
4324 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4325 (match_operand:V8HI 1 "register_operand" "0")
4326 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4329 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4330 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4332 [(set_attr "type" "sselog")
4333 (set_attr "prefix_data16" "1")
4334 (set_attr "mode" "TI")])
4336 ;; It must come before sse2_loadld since it is preferred.
4337 (define_insn "*sse4_1_pinsrd"
4338 [(set (match_operand:V4SI 0 "register_operand" "=x")
4341 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4342 (match_operand:V4SI 1 "register_operand" "0")
4343 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4346 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4347 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4349 [(set_attr "type" "sselog")
4350 (set_attr "prefix_extra" "1")
4351 (set_attr "mode" "TI")])
4353 (define_insn "*sse4_1_pinsrq"
4354 [(set (match_operand:V2DI 0 "register_operand" "=x")
4357 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4358 (match_operand:V2DI 1 "register_operand" "0")
4359 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4362 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4363 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4365 [(set_attr "type" "sselog")
4366 (set_attr "prefix_extra" "1")
4367 (set_attr "mode" "TI")])
4369 (define_insn "*sse4_1_pextrb"
4370 [(set (match_operand:SI 0 "register_operand" "=r")
4373 (match_operand:V16QI 1 "register_operand" "x")
4374 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4376 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4377 [(set_attr "type" "sselog")
4378 (set_attr "prefix_extra" "1")
4379 (set_attr "mode" "TI")])
4381 (define_insn "*sse4_1_pextrb_memory"
4382 [(set (match_operand:QI 0 "memory_operand" "=m")
4384 (match_operand:V16QI 1 "register_operand" "x")
4385 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4387 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4388 [(set_attr "type" "sselog")
4389 (set_attr "prefix_extra" "1")
4390 (set_attr "mode" "TI")])
4392 (define_insn "*sse2_pextrw"
4393 [(set (match_operand:SI 0 "register_operand" "=r")
4396 (match_operand:V8HI 1 "register_operand" "x")
4397 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4399 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4400 [(set_attr "type" "sselog")
4401 (set_attr "prefix_data16" "1")
4402 (set_attr "mode" "TI")])
4404 (define_insn "*sse4_1_pextrw_memory"
4405 [(set (match_operand:HI 0 "memory_operand" "=m")
4407 (match_operand:V8HI 1 "register_operand" "x")
4408 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4410 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4411 [(set_attr "type" "sselog")
4412 (set_attr "prefix_extra" "1")
4413 (set_attr "mode" "TI")])
4415 (define_insn "*sse4_1_pextrd"
4416 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4418 (match_operand:V4SI 1 "register_operand" "x")
4419 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4421 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4422 [(set_attr "type" "sselog")
4423 (set_attr "prefix_extra" "1")
4424 (set_attr "mode" "TI")])
4426 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4427 (define_insn "*sse4_1_pextrq"
4428 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4430 (match_operand:V2DI 1 "register_operand" "x")
4431 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4432 "TARGET_SSE4_1 && TARGET_64BIT"
4433 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4434 [(set_attr "type" "sselog")
4435 (set_attr "prefix_extra" "1")
4436 (set_attr "mode" "TI")])
4438 (define_expand "sse2_pshufd"
4439 [(match_operand:V4SI 0 "register_operand" "")
4440 (match_operand:V4SI 1 "nonimmediate_operand" "")
4441 (match_operand:SI 2 "const_int_operand" "")]
4444 int mask = INTVAL (operands[2]);
4445 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4446 GEN_INT ((mask >> 0) & 3),
4447 GEN_INT ((mask >> 2) & 3),
4448 GEN_INT ((mask >> 4) & 3),
4449 GEN_INT ((mask >> 6) & 3)));
4453 (define_insn "sse2_pshufd_1"
4454 [(set (match_operand:V4SI 0 "register_operand" "=x")
4456 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4457 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4458 (match_operand 3 "const_0_to_3_operand" "")
4459 (match_operand 4 "const_0_to_3_operand" "")
4460 (match_operand 5 "const_0_to_3_operand" "")])))]
4464 mask |= INTVAL (operands[2]) << 0;
4465 mask |= INTVAL (operands[3]) << 2;
4466 mask |= INTVAL (operands[4]) << 4;
4467 mask |= INTVAL (operands[5]) << 6;
4468 operands[2] = GEN_INT (mask);
4470 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4472 [(set_attr "type" "sselog1")
4473 (set_attr "prefix_data16" "1")
4474 (set_attr "mode" "TI")])
4476 (define_expand "sse2_pshuflw"
4477 [(match_operand:V8HI 0 "register_operand" "")
4478 (match_operand:V8HI 1 "nonimmediate_operand" "")
4479 (match_operand:SI 2 "const_int_operand" "")]
4482 int mask = INTVAL (operands[2]);
4483 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4484 GEN_INT ((mask >> 0) & 3),
4485 GEN_INT ((mask >> 2) & 3),
4486 GEN_INT ((mask >> 4) & 3),
4487 GEN_INT ((mask >> 6) & 3)));
4491 (define_insn "sse2_pshuflw_1"
4492 [(set (match_operand:V8HI 0 "register_operand" "=x")
4494 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4495 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4496 (match_operand 3 "const_0_to_3_operand" "")
4497 (match_operand 4 "const_0_to_3_operand" "")
4498 (match_operand 5 "const_0_to_3_operand" "")
4506 mask |= INTVAL (operands[2]) << 0;
4507 mask |= INTVAL (operands[3]) << 2;
4508 mask |= INTVAL (operands[4]) << 4;
4509 mask |= INTVAL (operands[5]) << 6;
4510 operands[2] = GEN_INT (mask);
4512 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4514 [(set_attr "type" "sselog")
4515 (set_attr "prefix_rep" "1")
4516 (set_attr "mode" "TI")])
4518 (define_expand "sse2_pshufhw"
4519 [(match_operand:V8HI 0 "register_operand" "")
4520 (match_operand:V8HI 1 "nonimmediate_operand" "")
4521 (match_operand:SI 2 "const_int_operand" "")]
4524 int mask = INTVAL (operands[2]);
4525 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4526 GEN_INT (((mask >> 0) & 3) + 4),
4527 GEN_INT (((mask >> 2) & 3) + 4),
4528 GEN_INT (((mask >> 4) & 3) + 4),
4529 GEN_INT (((mask >> 6) & 3) + 4)));
4533 (define_insn "sse2_pshufhw_1"
4534 [(set (match_operand:V8HI 0 "register_operand" "=x")
4536 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4537 (parallel [(const_int 0)
4541 (match_operand 2 "const_4_to_7_operand" "")
4542 (match_operand 3 "const_4_to_7_operand" "")
4543 (match_operand 4 "const_4_to_7_operand" "")
4544 (match_operand 5 "const_4_to_7_operand" "")])))]
4548 mask |= (INTVAL (operands[2]) - 4) << 0;
4549 mask |= (INTVAL (operands[3]) - 4) << 2;
4550 mask |= (INTVAL (operands[4]) - 4) << 4;
4551 mask |= (INTVAL (operands[5]) - 4) << 6;
4552 operands[2] = GEN_INT (mask);
4554 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4556 [(set_attr "type" "sselog")
4557 (set_attr "prefix_rep" "1")
4558 (set_attr "mode" "TI")])
4560 (define_expand "sse2_loadd"
4561 [(set (match_operand:V4SI 0 "register_operand" "")
4564 (match_operand:SI 1 "nonimmediate_operand" ""))
4568 "operands[2] = CONST0_RTX (V4SImode);")
4570 (define_insn "sse2_loadld"
4571 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4574 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4575 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4579 movd\t{%2, %0|%0, %2}
4580 movd\t{%2, %0|%0, %2}
4581 movss\t{%2, %0|%0, %2}
4582 movss\t{%2, %0|%0, %2}"
4583 [(set_attr "type" "ssemov")
4584 (set_attr "mode" "TI,TI,V4SF,SF")])
4586 (define_insn_and_split "sse2_stored"
4587 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4589 (match_operand:V4SI 1 "register_operand" "x,Yi")
4590 (parallel [(const_int 0)])))]
4593 "&& reload_completed
4594 && (TARGET_INTER_UNIT_MOVES
4595 || MEM_P (operands [0])
4596 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4597 [(set (match_dup 0) (match_dup 1))]
4599 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4602 (define_insn_and_split "*vec_ext_v4si_mem"
4603 [(set (match_operand:SI 0 "register_operand" "=r")
4605 (match_operand:V4SI 1 "memory_operand" "o")
4606 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4612 int i = INTVAL (operands[2]);
4614 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4618 (define_expand "sse_storeq"
4619 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4621 (match_operand:V2DI 1 "register_operand" "")
4622 (parallel [(const_int 0)])))]
4626 (define_insn "*sse2_storeq_rex64"
4627 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4629 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4630 (parallel [(const_int 0)])))]
4631 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4635 mov{q}\t{%1, %0|%0, %1}"
4636 [(set_attr "type" "*,*,imov")
4637 (set_attr "mode" "*,*,DI")])
4639 (define_insn "*sse2_storeq"
4640 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4642 (match_operand:V2DI 1 "register_operand" "x")
4643 (parallel [(const_int 0)])))]
4648 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4650 (match_operand:V2DI 1 "register_operand" "")
4651 (parallel [(const_int 0)])))]
4654 && (TARGET_INTER_UNIT_MOVES
4655 || MEM_P (operands [0])
4656 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4657 [(set (match_dup 0) (match_dup 1))]
4659 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4662 (define_insn "*vec_extractv2di_1_rex64"
4663 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4665 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4666 (parallel [(const_int 1)])))]
4667 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4669 movhps\t{%1, %0|%0, %1}
4670 psrldq\t{$8, %0|%0, 8}
4671 movq\t{%H1, %0|%0, %H1}
4672 mov{q}\t{%H1, %0|%0, %H1}"
4673 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4674 (set_attr "memory" "*,none,*,*")
4675 (set_attr "mode" "V2SF,TI,TI,DI")])
4677 (define_insn "*vec_extractv2di_1_sse2"
4678 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4680 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4681 (parallel [(const_int 1)])))]
4683 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4685 movhps\t{%1, %0|%0, %1}
4686 psrldq\t{$8, %0|%0, 8}
4687 movq\t{%H1, %0|%0, %H1}"
4688 [(set_attr "type" "ssemov,sseishft,ssemov")
4689 (set_attr "memory" "*,none,*")
4690 (set_attr "mode" "V2SF,TI,TI")])
4692 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4693 (define_insn "*vec_extractv2di_1_sse"
4694 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4696 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4697 (parallel [(const_int 1)])))]
4698 "!TARGET_SSE2 && TARGET_SSE
4699 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4701 movhps\t{%1, %0|%0, %1}
4702 movhlps\t{%1, %0|%0, %1}
4703 movlps\t{%H1, %0|%0, %H1}"
4704 [(set_attr "type" "ssemov")
4705 (set_attr "mode" "V2SF,V4SF,V2SF")])
4707 (define_insn "*vec_dupv4si"
4708 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4710 (match_operand:SI 1 "register_operand" " Y2,0")))]
4713 pshufd\t{$0, %1, %0|%0, %1, 0}
4714 shufps\t{$0, %0, %0|%0, %0, 0}"
4715 [(set_attr "type" "sselog1")
4716 (set_attr "mode" "TI,V4SF")])
4718 (define_insn "*vec_dupv2di"
4719 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4721 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4726 [(set_attr "type" "sselog1,ssemov")
4727 (set_attr "mode" "TI,V4SF")])
4729 (define_insn "*vec_concatv2si_sse4_1"
4730 [(set (match_operand:V2SI 0 "register_operand" "=x,x")
4732 (match_operand:SI 1 "nonimmediate_operand" "0,rm")
4733 (match_operand:SI 2 "nonimmediate_operand" "rm,0")))]
4736 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
4737 pinsrd\t{$0x0, %2, %0|%0, %2, 0x0}"
4738 [(set_attr "type" "sselog")
4739 (set_attr "mode" "TI")])
4741 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4742 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4743 ;; alternatives pretty much forces the MMX alternative to be chosen.
4744 (define_insn "*vec_concatv2si_sse2"
4745 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4747 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4748 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4751 punpckldq\t{%2, %0|%0, %2}
4752 movd\t{%1, %0|%0, %1}
4753 punpckldq\t{%2, %0|%0, %2}
4754 movd\t{%1, %0|%0, %1}"
4755 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4756 (set_attr "mode" "TI,TI,DI,DI")])
4758 (define_insn "*vec_concatv2si_sse"
4759 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4761 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4762 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4765 unpcklps\t{%2, %0|%0, %2}
4766 movss\t{%1, %0|%0, %1}
4767 punpckldq\t{%2, %0|%0, %2}
4768 movd\t{%1, %0|%0, %1}"
4769 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4770 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4772 (define_insn "*vec_concatv4si_1"
4773 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4775 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4776 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4779 punpcklqdq\t{%2, %0|%0, %2}
4780 movlhps\t{%2, %0|%0, %2}
4781 movhps\t{%2, %0|%0, %2}"
4782 [(set_attr "type" "sselog,ssemov,ssemov")
4783 (set_attr "mode" "TI,V4SF,V2SF")])
4785 (define_insn "*vec_concatv2di_rex64_sse4_1"
4786 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4788 (match_operand:DI 1 "nonimmediate_operand" "0,rm")
4789 (match_operand:DI 2 "nonimmediate_operand" "rm,0")))]
4790 "TARGET_64BIT && TARGET_SSE4_1"
4792 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
4793 pinsrq\t{$0x0, %2, %0|%0, %2, 0x0}"
4794 [(set_attr "type" "sselog")
4795 (set_attr "mode" "TI")])
4797 (define_insn "vec_concatv2di"
4798 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4800 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4801 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4802 "!TARGET_64BIT && TARGET_SSE"
4804 movq\t{%1, %0|%0, %1}
4805 movq2dq\t{%1, %0|%0, %1}
4806 punpcklqdq\t{%2, %0|%0, %2}
4807 movlhps\t{%2, %0|%0, %2}
4808 movhps\t{%2, %0|%0, %2}
4809 movlps\t{%1, %0|%0, %1}"
4810 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4811 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4813 (define_insn "*vec_concatv2di_rex64"
4814 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
4816 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4817 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
4820 movq\t{%1, %0|%0, %1}
4821 movq\t{%1, %0|%0, %1}
4822 movq2dq\t{%1, %0|%0, %1}
4823 punpcklqdq\t{%2, %0|%0, %2}
4824 movlhps\t{%2, %0|%0, %2}
4825 movhps\t{%2, %0|%0, %2}
4826 movlps\t{%1, %0|%0, %1}"
4827 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4828 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4830 (define_expand "vec_setv2di"
4831 [(match_operand:V2DI 0 "register_operand" "")
4832 (match_operand:DI 1 "register_operand" "")
4833 (match_operand 2 "const_int_operand" "")]
4836 ix86_expand_vector_set (false, operands[0], operands[1],
4837 INTVAL (operands[2]));
4841 (define_expand "vec_extractv2di"
4842 [(match_operand:DI 0 "register_operand" "")
4843 (match_operand:V2DI 1 "register_operand" "")
4844 (match_operand 2 "const_int_operand" "")]
4847 ix86_expand_vector_extract (false, operands[0], operands[1],
4848 INTVAL (operands[2]));
4852 (define_expand "vec_initv2di"
4853 [(match_operand:V2DI 0 "register_operand" "")
4854 (match_operand 1 "" "")]
4857 ix86_expand_vector_init (false, operands[0], operands[1]);
4861 (define_expand "vec_setv4si"
4862 [(match_operand:V4SI 0 "register_operand" "")
4863 (match_operand:SI 1 "register_operand" "")
4864 (match_operand 2 "const_int_operand" "")]
4867 ix86_expand_vector_set (false, operands[0], operands[1],
4868 INTVAL (operands[2]));
4872 (define_expand "vec_extractv4si"
4873 [(match_operand:SI 0 "register_operand" "")
4874 (match_operand:V4SI 1 "register_operand" "")
4875 (match_operand 2 "const_int_operand" "")]
4878 ix86_expand_vector_extract (false, operands[0], operands[1],
4879 INTVAL (operands[2]));
4883 (define_expand "vec_initv4si"
4884 [(match_operand:V4SI 0 "register_operand" "")
4885 (match_operand 1 "" "")]
4888 ix86_expand_vector_init (false, operands[0], operands[1]);
4892 (define_expand "vec_setv8hi"
4893 [(match_operand:V8HI 0 "register_operand" "")
4894 (match_operand:HI 1 "register_operand" "")
4895 (match_operand 2 "const_int_operand" "")]
4898 ix86_expand_vector_set (false, operands[0], operands[1],
4899 INTVAL (operands[2]));
4903 (define_expand "vec_extractv8hi"
4904 [(match_operand:HI 0 "register_operand" "")
4905 (match_operand:V8HI 1 "register_operand" "")
4906 (match_operand 2 "const_int_operand" "")]
4909 ix86_expand_vector_extract (false, operands[0], operands[1],
4910 INTVAL (operands[2]));
4914 (define_expand "vec_initv8hi"
4915 [(match_operand:V8HI 0 "register_operand" "")
4916 (match_operand 1 "" "")]
4919 ix86_expand_vector_init (false, operands[0], operands[1]);
4923 (define_expand "vec_setv16qi"
4924 [(match_operand:V16QI 0 "register_operand" "")
4925 (match_operand:QI 1 "register_operand" "")
4926 (match_operand 2 "const_int_operand" "")]
4929 ix86_expand_vector_set (false, operands[0], operands[1],
4930 INTVAL (operands[2]));
4934 (define_expand "vec_extractv16qi"
4935 [(match_operand:QI 0 "register_operand" "")
4936 (match_operand:V16QI 1 "register_operand" "")
4937 (match_operand 2 "const_int_operand" "")]
4940 ix86_expand_vector_extract (false, operands[0], operands[1],
4941 INTVAL (operands[2]));
4945 (define_expand "vec_initv16qi"
4946 [(match_operand:V16QI 0 "register_operand" "")
4947 (match_operand 1 "" "")]
4950 ix86_expand_vector_init (false, operands[0], operands[1]);
4954 (define_expand "vec_unpacku_hi_v16qi"
4955 [(match_operand:V8HI 0 "register_operand" "")
4956 (match_operand:V16QI 1 "register_operand" "")]
4960 ix86_expand_sse4_unpack (operands, true, true);
4961 else if (TARGET_SSE5)
4962 ix86_expand_sse5_unpack (operands, true, true);
4964 ix86_expand_sse_unpack (operands, true, true);
4968 (define_expand "vec_unpacks_hi_v16qi"
4969 [(match_operand:V8HI 0 "register_operand" "")
4970 (match_operand:V16QI 1 "register_operand" "")]
4974 ix86_expand_sse4_unpack (operands, false, true);
4975 else if (TARGET_SSE5)
4976 ix86_expand_sse5_unpack (operands, false, true);
4978 ix86_expand_sse_unpack (operands, false, true);
4982 (define_expand "vec_unpacku_lo_v16qi"
4983 [(match_operand:V8HI 0 "register_operand" "")
4984 (match_operand:V16QI 1 "register_operand" "")]
4988 ix86_expand_sse4_unpack (operands, true, false);
4989 else if (TARGET_SSE5)
4990 ix86_expand_sse5_unpack (operands, true, false);
4992 ix86_expand_sse_unpack (operands, true, false);
4996 (define_expand "vec_unpacks_lo_v16qi"
4997 [(match_operand:V8HI 0 "register_operand" "")
4998 (match_operand:V16QI 1 "register_operand" "")]
5002 ix86_expand_sse4_unpack (operands, false, false);
5003 else if (TARGET_SSE5)
5004 ix86_expand_sse5_unpack (operands, false, false);
5006 ix86_expand_sse_unpack (operands, false, false);
5010 (define_expand "vec_unpacku_hi_v8hi"
5011 [(match_operand:V4SI 0 "register_operand" "")
5012 (match_operand:V8HI 1 "register_operand" "")]
5016 ix86_expand_sse4_unpack (operands, true, true);
5017 else if (TARGET_SSE5)
5018 ix86_expand_sse5_unpack (operands, true, true);
5020 ix86_expand_sse_unpack (operands, true, true);
5024 (define_expand "vec_unpacks_hi_v8hi"
5025 [(match_operand:V4SI 0 "register_operand" "")
5026 (match_operand:V8HI 1 "register_operand" "")]
5030 ix86_expand_sse4_unpack (operands, false, true);
5031 else if (TARGET_SSE5)
5032 ix86_expand_sse5_unpack (operands, false, true);
5034 ix86_expand_sse_unpack (operands, false, true);
5038 (define_expand "vec_unpacku_lo_v8hi"
5039 [(match_operand:V4SI 0 "register_operand" "")
5040 (match_operand:V8HI 1 "register_operand" "")]
5044 ix86_expand_sse4_unpack (operands, true, false);
5045 else if (TARGET_SSE5)
5046 ix86_expand_sse5_unpack (operands, true, false);
5048 ix86_expand_sse_unpack (operands, true, false);
5052 (define_expand "vec_unpacks_lo_v8hi"
5053 [(match_operand:V4SI 0 "register_operand" "")
5054 (match_operand:V8HI 1 "register_operand" "")]
5058 ix86_expand_sse4_unpack (operands, false, false);
5059 else if (TARGET_SSE5)
5060 ix86_expand_sse5_unpack (operands, false, false);
5062 ix86_expand_sse_unpack (operands, false, false);
5066 (define_expand "vec_unpacku_hi_v4si"
5067 [(match_operand:V2DI 0 "register_operand" "")
5068 (match_operand:V4SI 1 "register_operand" "")]
5072 ix86_expand_sse4_unpack (operands, true, true);
5073 else if (TARGET_SSE5)
5074 ix86_expand_sse5_unpack (operands, true, true);
5076 ix86_expand_sse_unpack (operands, true, true);
5080 (define_expand "vec_unpacks_hi_v4si"
5081 [(match_operand:V2DI 0 "register_operand" "")
5082 (match_operand:V4SI 1 "register_operand" "")]
5086 ix86_expand_sse4_unpack (operands, false, true);
5087 else if (TARGET_SSE5)
5088 ix86_expand_sse5_unpack (operands, false, true);
5090 ix86_expand_sse_unpack (operands, false, true);
5094 (define_expand "vec_unpacku_lo_v4si"
5095 [(match_operand:V2DI 0 "register_operand" "")
5096 (match_operand:V4SI 1 "register_operand" "")]
5100 ix86_expand_sse4_unpack (operands, true, false);
5101 else if (TARGET_SSE5)
5102 ix86_expand_sse5_unpack (operands, true, false);
5104 ix86_expand_sse_unpack (operands, true, false);
5108 (define_expand "vec_unpacks_lo_v4si"
5109 [(match_operand:V2DI 0 "register_operand" "")
5110 (match_operand:V4SI 1 "register_operand" "")]
5114 ix86_expand_sse4_unpack (operands, false, false);
5115 else if (TARGET_SSE5)
5116 ix86_expand_sse5_unpack (operands, false, false);
5118 ix86_expand_sse_unpack (operands, false, false);
5122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5128 (define_expand "sse2_uavgv16qi3"
5129 [(set (match_operand:V16QI 0 "register_operand" "")
5135 (match_operand:V16QI 1 "nonimmediate_operand" ""))
5137 (match_operand:V16QI 2 "nonimmediate_operand" "")))
5138 (const_vector:V16QI [(const_int 1) (const_int 1)
5139 (const_int 1) (const_int 1)
5140 (const_int 1) (const_int 1)
5141 (const_int 1) (const_int 1)
5142 (const_int 1) (const_int 1)
5143 (const_int 1) (const_int 1)
5144 (const_int 1) (const_int 1)
5145 (const_int 1) (const_int 1)]))
5148 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
5150 (define_insn "*sse2_uavgv16qi3"
5151 [(set (match_operand:V16QI 0 "register_operand" "=x")
5157 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5159 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5160 (const_vector:V16QI [(const_int 1) (const_int 1)
5161 (const_int 1) (const_int 1)
5162 (const_int 1) (const_int 1)
5163 (const_int 1) (const_int 1)
5164 (const_int 1) (const_int 1)
5165 (const_int 1) (const_int 1)
5166 (const_int 1) (const_int 1)
5167 (const_int 1) (const_int 1)]))
5169 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5170 "pavgb\t{%2, %0|%0, %2}"
5171 [(set_attr "type" "sseiadd")
5172 (set_attr "prefix_data16" "1")
5173 (set_attr "mode" "TI")])
5175 (define_expand "sse2_uavgv8hi3"
5176 [(set (match_operand:V8HI 0 "register_operand" "")
5182 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5184 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5185 (const_vector:V8HI [(const_int 1) (const_int 1)
5186 (const_int 1) (const_int 1)
5187 (const_int 1) (const_int 1)
5188 (const_int 1) (const_int 1)]))
5191 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
5193 (define_insn "*sse2_uavgv8hi3"
5194 [(set (match_operand:V8HI 0 "register_operand" "=x")
5200 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5202 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5203 (const_vector:V8HI [(const_int 1) (const_int 1)
5204 (const_int 1) (const_int 1)
5205 (const_int 1) (const_int 1)
5206 (const_int 1) (const_int 1)]))
5208 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5209 "pavgw\t{%2, %0|%0, %2}"
5210 [(set_attr "type" "sseiadd")
5211 (set_attr "prefix_data16" "1")
5212 (set_attr "mode" "TI")])
5214 ;; The correct representation for this is absolutely enormous, and
5215 ;; surely not generally useful.
5216 (define_insn "sse2_psadbw"
5217 [(set (match_operand:V2DI 0 "register_operand" "=x")
5218 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5219 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5222 "psadbw\t{%2, %0|%0, %2}"
5223 [(set_attr "type" "sseiadd")
5224 (set_attr "prefix_data16" "1")
5225 (set_attr "mode" "TI")])
5227 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5228 [(set (match_operand:SI 0 "register_operand" "=r")
5230 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5232 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5233 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5234 [(set_attr "type" "ssecvt")
5235 (set_attr "mode" "<MODE>")])
5237 (define_insn "sse2_pmovmskb"
5238 [(set (match_operand:SI 0 "register_operand" "=r")
5239 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5242 "pmovmskb\t{%1, %0|%0, %1}"
5243 [(set_attr "type" "ssecvt")
5244 (set_attr "prefix_data16" "1")
5245 (set_attr "mode" "SI")])
5247 (define_expand "sse2_maskmovdqu"
5248 [(set (match_operand:V16QI 0 "memory_operand" "")
5249 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5250 (match_operand:V16QI 2 "register_operand" "")
5256 (define_insn "*sse2_maskmovdqu"
5257 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5258 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5259 (match_operand:V16QI 2 "register_operand" "x")
5260 (mem:V16QI (match_dup 0))]
5262 "TARGET_SSE2 && !TARGET_64BIT"
5263 ;; @@@ check ordering of operands in intel/nonintel syntax
5264 "maskmovdqu\t{%2, %1|%1, %2}"
5265 [(set_attr "type" "ssecvt")
5266 (set_attr "prefix_data16" "1")
5267 (set_attr "mode" "TI")])
5269 (define_insn "*sse2_maskmovdqu_rex64"
5270 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5271 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5272 (match_operand:V16QI 2 "register_operand" "x")
5273 (mem:V16QI (match_dup 0))]
5275 "TARGET_SSE2 && TARGET_64BIT"
5276 ;; @@@ check ordering of operands in intel/nonintel syntax
5277 "maskmovdqu\t{%2, %1|%1, %2}"
5278 [(set_attr "type" "ssecvt")
5279 (set_attr "prefix_data16" "1")
5280 (set_attr "mode" "TI")])
5282 (define_insn "sse_ldmxcsr"
5283 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5287 [(set_attr "type" "sse")
5288 (set_attr "memory" "load")])
5290 (define_insn "sse_stmxcsr"
5291 [(set (match_operand:SI 0 "memory_operand" "=m")
5292 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5295 [(set_attr "type" "sse")
5296 (set_attr "memory" "store")])
5298 (define_expand "sse_sfence"
5300 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5301 "TARGET_SSE || TARGET_3DNOW_A"
5303 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5304 MEM_VOLATILE_P (operands[0]) = 1;
5307 (define_insn "*sse_sfence"
5308 [(set (match_operand:BLK 0 "" "")
5309 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5310 "TARGET_SSE || TARGET_3DNOW_A"
5312 [(set_attr "type" "sse")
5313 (set_attr "memory" "unknown")])
5315 (define_insn "sse2_clflush"
5316 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5320 [(set_attr "type" "sse")
5321 (set_attr "memory" "unknown")])
5323 (define_expand "sse2_mfence"
5325 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5328 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5329 MEM_VOLATILE_P (operands[0]) = 1;
5332 (define_insn "*sse2_mfence"
5333 [(set (match_operand:BLK 0 "" "")
5334 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5337 [(set_attr "type" "sse")
5338 (set_attr "memory" "unknown")])
5340 (define_expand "sse2_lfence"
5342 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5345 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5346 MEM_VOLATILE_P (operands[0]) = 1;
5349 (define_insn "*sse2_lfence"
5350 [(set (match_operand:BLK 0 "" "")
5351 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5354 [(set_attr "type" "sse")
5355 (set_attr "memory" "unknown")])
5357 (define_insn "sse3_mwait"
5358 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5359 (match_operand:SI 1 "register_operand" "c")]
5362 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5363 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5364 ;; we only need to set up 32bit registers.
5366 [(set_attr "length" "3")])
5368 (define_insn "sse3_monitor"
5369 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5370 (match_operand:SI 1 "register_operand" "c")
5371 (match_operand:SI 2 "register_operand" "d")]
5373 "TARGET_SSE3 && !TARGET_64BIT"
5374 "monitor\t%0, %1, %2"
5375 [(set_attr "length" "3")])
5377 (define_insn "sse3_monitor64"
5378 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5379 (match_operand:SI 1 "register_operand" "c")
5380 (match_operand:SI 2 "register_operand" "d")]
5382 "TARGET_SSE3 && TARGET_64BIT"
5383 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5384 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5385 ;; zero extended to 64bit, we only need to set up 32bit registers.
5387 [(set_attr "length" "3")])
5389 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5391 ;; SSSE3 instructions
5393 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5395 (define_insn "ssse3_phaddwv8hi3"
5396 [(set (match_operand:V8HI 0 "register_operand" "=x")
5402 (match_operand:V8HI 1 "register_operand" "0")
5403 (parallel [(const_int 0)]))
5404 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5406 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5407 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5410 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5411 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5413 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5414 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5419 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5420 (parallel [(const_int 0)]))
5421 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5423 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5424 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5427 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5428 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5430 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5431 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5433 "phaddw\t{%2, %0|%0, %2}"
5434 [(set_attr "type" "sseiadd")
5435 (set_attr "prefix_data16" "1")
5436 (set_attr "prefix_extra" "1")
5437 (set_attr "mode" "TI")])
5439 (define_insn "ssse3_phaddwv4hi3"
5440 [(set (match_operand:V4HI 0 "register_operand" "=y")
5445 (match_operand:V4HI 1 "register_operand" "0")
5446 (parallel [(const_int 0)]))
5447 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5449 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5450 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5454 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5455 (parallel [(const_int 0)]))
5456 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5458 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5459 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5461 "phaddw\t{%2, %0|%0, %2}"
5462 [(set_attr "type" "sseiadd")
5463 (set_attr "prefix_extra" "1")
5464 (set_attr "mode" "DI")])
5466 (define_insn "ssse3_phadddv4si3"
5467 [(set (match_operand:V4SI 0 "register_operand" "=x")
5472 (match_operand:V4SI 1 "register_operand" "0")
5473 (parallel [(const_int 0)]))
5474 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5476 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5477 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5481 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5482 (parallel [(const_int 0)]))
5483 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5485 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5486 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5488 "phaddd\t{%2, %0|%0, %2}"
5489 [(set_attr "type" "sseiadd")
5490 (set_attr "prefix_data16" "1")
5491 (set_attr "prefix_extra" "1")
5492 (set_attr "mode" "TI")])
5494 (define_insn "ssse3_phadddv2si3"
5495 [(set (match_operand:V2SI 0 "register_operand" "=y")
5499 (match_operand:V2SI 1 "register_operand" "0")
5500 (parallel [(const_int 0)]))
5501 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5504 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5505 (parallel [(const_int 0)]))
5506 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5508 "phaddd\t{%2, %0|%0, %2}"
5509 [(set_attr "type" "sseiadd")
5510 (set_attr "prefix_extra" "1")
5511 (set_attr "mode" "DI")])
5513 (define_insn "ssse3_phaddswv8hi3"
5514 [(set (match_operand:V8HI 0 "register_operand" "=x")
5520 (match_operand:V8HI 1 "register_operand" "0")
5521 (parallel [(const_int 0)]))
5522 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5524 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5525 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5528 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5529 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5531 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5532 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5537 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5538 (parallel [(const_int 0)]))
5539 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5541 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5542 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5545 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5546 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5548 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5549 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5551 "phaddsw\t{%2, %0|%0, %2}"
5552 [(set_attr "type" "sseiadd")
5553 (set_attr "prefix_data16" "1")
5554 (set_attr "prefix_extra" "1")
5555 (set_attr "mode" "TI")])
5557 (define_insn "ssse3_phaddswv4hi3"
5558 [(set (match_operand:V4HI 0 "register_operand" "=y")
5563 (match_operand:V4HI 1 "register_operand" "0")
5564 (parallel [(const_int 0)]))
5565 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5567 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5568 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5572 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5573 (parallel [(const_int 0)]))
5574 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5576 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5577 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5579 "phaddsw\t{%2, %0|%0, %2}"
5580 [(set_attr "type" "sseiadd")
5581 (set_attr "prefix_extra" "1")
5582 (set_attr "mode" "DI")])
5584 (define_insn "ssse3_phsubwv8hi3"
5585 [(set (match_operand:V8HI 0 "register_operand" "=x")
5591 (match_operand:V8HI 1 "register_operand" "0")
5592 (parallel [(const_int 0)]))
5593 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5595 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5596 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5599 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5600 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5602 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5603 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5608 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5609 (parallel [(const_int 0)]))
5610 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5612 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5613 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5616 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5617 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5619 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5620 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5622 "phsubw\t{%2, %0|%0, %2}"
5623 [(set_attr "type" "sseiadd")
5624 (set_attr "prefix_data16" "1")
5625 (set_attr "prefix_extra" "1")
5626 (set_attr "mode" "TI")])
5628 (define_insn "ssse3_phsubwv4hi3"
5629 [(set (match_operand:V4HI 0 "register_operand" "=y")
5634 (match_operand:V4HI 1 "register_operand" "0")
5635 (parallel [(const_int 0)]))
5636 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5638 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5639 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5643 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5644 (parallel [(const_int 0)]))
5645 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5647 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5648 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5650 "phsubw\t{%2, %0|%0, %2}"
5651 [(set_attr "type" "sseiadd")
5652 (set_attr "prefix_extra" "1")
5653 (set_attr "mode" "DI")])
5655 (define_insn "ssse3_phsubdv4si3"
5656 [(set (match_operand:V4SI 0 "register_operand" "=x")
5661 (match_operand:V4SI 1 "register_operand" "0")
5662 (parallel [(const_int 0)]))
5663 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5665 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5666 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5670 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5671 (parallel [(const_int 0)]))
5672 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5674 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5675 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5677 "phsubd\t{%2, %0|%0, %2}"
5678 [(set_attr "type" "sseiadd")
5679 (set_attr "prefix_data16" "1")
5680 (set_attr "prefix_extra" "1")
5681 (set_attr "mode" "TI")])
5683 (define_insn "ssse3_phsubdv2si3"
5684 [(set (match_operand:V2SI 0 "register_operand" "=y")
5688 (match_operand:V2SI 1 "register_operand" "0")
5689 (parallel [(const_int 0)]))
5690 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5693 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5694 (parallel [(const_int 0)]))
5695 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5697 "phsubd\t{%2, %0|%0, %2}"
5698 [(set_attr "type" "sseiadd")
5699 (set_attr "prefix_extra" "1")
5700 (set_attr "mode" "DI")])
5702 (define_insn "ssse3_phsubswv8hi3"
5703 [(set (match_operand:V8HI 0 "register_operand" "=x")
5709 (match_operand:V8HI 1 "register_operand" "0")
5710 (parallel [(const_int 0)]))
5711 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5713 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5714 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5717 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5718 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5720 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5721 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5726 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5727 (parallel [(const_int 0)]))
5728 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5730 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5731 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5734 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5735 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5737 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5738 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5740 "phsubsw\t{%2, %0|%0, %2}"
5741 [(set_attr "type" "sseiadd")
5742 (set_attr "prefix_data16" "1")
5743 (set_attr "prefix_extra" "1")
5744 (set_attr "mode" "TI")])
5746 (define_insn "ssse3_phsubswv4hi3"
5747 [(set (match_operand:V4HI 0 "register_operand" "=y")
5752 (match_operand:V4HI 1 "register_operand" "0")
5753 (parallel [(const_int 0)]))
5754 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5756 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5757 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5761 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5762 (parallel [(const_int 0)]))
5763 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5765 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5766 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5768 "phsubsw\t{%2, %0|%0, %2}"
5769 [(set_attr "type" "sseiadd")
5770 (set_attr "prefix_extra" "1")
5771 (set_attr "mode" "DI")])
5773 (define_insn "ssse3_pmaddubsw128"
5774 [(set (match_operand:V8HI 0 "register_operand" "=x")
5779 (match_operand:V16QI 1 "register_operand" "0")
5780 (parallel [(const_int 0)
5790 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5791 (parallel [(const_int 0)
5801 (vec_select:V16QI (match_dup 1)
5802 (parallel [(const_int 1)
5811 (vec_select:V16QI (match_dup 2)
5812 (parallel [(const_int 1)
5819 (const_int 15)]))))))]
5821 "pmaddubsw\t{%2, %0|%0, %2}"
5822 [(set_attr "type" "sseiadd")
5823 (set_attr "prefix_data16" "1")
5824 (set_attr "prefix_extra" "1")
5825 (set_attr "mode" "TI")])
5827 (define_insn "ssse3_pmaddubsw"
5828 [(set (match_operand:V4HI 0 "register_operand" "=y")
5833 (match_operand:V8QI 1 "register_operand" "0")
5834 (parallel [(const_int 0)
5840 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5841 (parallel [(const_int 0)
5847 (vec_select:V8QI (match_dup 1)
5848 (parallel [(const_int 1)
5853 (vec_select:V8QI (match_dup 2)
5854 (parallel [(const_int 1)
5857 (const_int 7)]))))))]
5859 "pmaddubsw\t{%2, %0|%0, %2}"
5860 [(set_attr "type" "sseiadd")
5861 (set_attr "prefix_extra" "1")
5862 (set_attr "mode" "DI")])
5864 (define_expand "ssse3_pmulhrswv8hi3"
5865 [(set (match_operand:V8HI 0 "register_operand" "")
5872 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5874 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5876 (const_vector:V8HI [(const_int 1) (const_int 1)
5877 (const_int 1) (const_int 1)
5878 (const_int 1) (const_int 1)
5879 (const_int 1) (const_int 1)]))
5882 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5884 (define_insn "*ssse3_pmulhrswv8hi3"
5885 [(set (match_operand:V8HI 0 "register_operand" "=x")
5892 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5894 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5896 (const_vector:V8HI [(const_int 1) (const_int 1)
5897 (const_int 1) (const_int 1)
5898 (const_int 1) (const_int 1)
5899 (const_int 1) (const_int 1)]))
5901 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5902 "pmulhrsw\t{%2, %0|%0, %2}"
5903 [(set_attr "type" "sseimul")
5904 (set_attr "prefix_data16" "1")
5905 (set_attr "prefix_extra" "1")
5906 (set_attr "mode" "TI")])
5908 (define_expand "ssse3_pmulhrswv4hi3"
5909 [(set (match_operand:V4HI 0 "register_operand" "")
5916 (match_operand:V4HI 1 "nonimmediate_operand" ""))
5918 (match_operand:V4HI 2 "nonimmediate_operand" "")))
5920 (const_vector:V4HI [(const_int 1) (const_int 1)
5921 (const_int 1) (const_int 1)]))
5924 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
5926 (define_insn "*ssse3_pmulhrswv4hi3"
5927 [(set (match_operand:V4HI 0 "register_operand" "=y")
5934 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5936 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5938 (const_vector:V4HI [(const_int 1) (const_int 1)
5939 (const_int 1) (const_int 1)]))
5941 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5942 "pmulhrsw\t{%2, %0|%0, %2}"
5943 [(set_attr "type" "sseimul")
5944 (set_attr "prefix_extra" "1")
5945 (set_attr "mode" "DI")])
5947 (define_insn "ssse3_pshufbv16qi3"
5948 [(set (match_operand:V16QI 0 "register_operand" "=x")
5949 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5950 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5953 "pshufb\t{%2, %0|%0, %2}";
5954 [(set_attr "type" "sselog1")
5955 (set_attr "prefix_data16" "1")
5956 (set_attr "prefix_extra" "1")
5957 (set_attr "mode" "TI")])
5959 (define_insn "ssse3_pshufbv8qi3"
5960 [(set (match_operand:V8QI 0 "register_operand" "=y")
5961 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5962 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5965 "pshufb\t{%2, %0|%0, %2}";
5966 [(set_attr "type" "sselog1")
5967 (set_attr "prefix_extra" "1")
5968 (set_attr "mode" "DI")])
5970 (define_insn "ssse3_psign<mode>3"
5971 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5973 [(match_operand:SSEMODE124 1 "register_operand" "0")
5974 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5977 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5978 [(set_attr "type" "sselog1")
5979 (set_attr "prefix_data16" "1")
5980 (set_attr "prefix_extra" "1")
5981 (set_attr "mode" "TI")])
5983 (define_insn "ssse3_psign<mode>3"
5984 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5986 [(match_operand:MMXMODEI 1 "register_operand" "0")
5987 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5990 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5991 [(set_attr "type" "sselog1")
5992 (set_attr "prefix_extra" "1")
5993 (set_attr "mode" "DI")])
5995 (define_insn "ssse3_palignrti"
5996 [(set (match_operand:TI 0 "register_operand" "=x")
5997 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5998 (match_operand:TI 2 "nonimmediate_operand" "xm")
5999 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6003 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6004 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6006 [(set_attr "type" "sseishft")
6007 (set_attr "prefix_data16" "1")
6008 (set_attr "prefix_extra" "1")
6009 (set_attr "mode" "TI")])
6011 (define_insn "ssse3_palignrdi"
6012 [(set (match_operand:DI 0 "register_operand" "=y")
6013 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6014 (match_operand:DI 2 "nonimmediate_operand" "ym")
6015 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6019 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6020 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6022 [(set_attr "type" "sseishft")
6023 (set_attr "prefix_extra" "1")
6024 (set_attr "mode" "DI")])
6026 (define_insn "abs<mode>2"
6027 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6028 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6030 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6031 [(set_attr "type" "sselog1")
6032 (set_attr "prefix_data16" "1")
6033 (set_attr "prefix_extra" "1")
6034 (set_attr "mode" "TI")])
6036 (define_insn "abs<mode>2"
6037 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6038 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6040 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6041 [(set_attr "type" "sselog1")
6042 (set_attr "prefix_extra" "1")
6043 (set_attr "mode" "DI")])
6045 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6047 ;; AMD SSE4A instructions
6049 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6051 (define_insn "sse4a_movnt<mode>"
6052 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6054 [(match_operand:MODEF 1 "register_operand" "x")]
6057 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6058 [(set_attr "type" "ssemov")
6059 (set_attr "mode" "<MODE>")])
6061 (define_insn "sse4a_vmmovnt<mode>"
6062 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6063 (unspec:<ssescalarmode>
6064 [(vec_select:<ssescalarmode>
6065 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6066 (parallel [(const_int 0)]))]
6069 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6070 [(set_attr "type" "ssemov")
6071 (set_attr "mode" "<ssescalarmode>")])
6073 (define_insn "sse4a_extrqi"
6074 [(set (match_operand:V2DI 0 "register_operand" "=x")
6075 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6076 (match_operand 2 "const_int_operand" "")
6077 (match_operand 3 "const_int_operand" "")]
6080 "extrq\t{%3, %2, %0|%0, %2, %3}"
6081 [(set_attr "type" "sse")
6082 (set_attr "prefix_data16" "1")
6083 (set_attr "mode" "TI")])
6085 (define_insn "sse4a_extrq"
6086 [(set (match_operand:V2DI 0 "register_operand" "=x")
6087 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6088 (match_operand:V16QI 2 "register_operand" "x")]
6091 "extrq\t{%2, %0|%0, %2}"
6092 [(set_attr "type" "sse")
6093 (set_attr "prefix_data16" "1")
6094 (set_attr "mode" "TI")])
6096 (define_insn "sse4a_insertqi"
6097 [(set (match_operand:V2DI 0 "register_operand" "=x")
6098 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6099 (match_operand:V2DI 2 "register_operand" "x")
6100 (match_operand 3 "const_int_operand" "")
6101 (match_operand 4 "const_int_operand" "")]
6104 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6105 [(set_attr "type" "sseins")
6106 (set_attr "prefix_rep" "1")
6107 (set_attr "mode" "TI")])
6109 (define_insn "sse4a_insertq"
6110 [(set (match_operand:V2DI 0 "register_operand" "=x")
6111 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6112 (match_operand:V2DI 2 "register_operand" "x")]
6115 "insertq\t{%2, %0|%0, %2}"
6116 [(set_attr "type" "sseins")
6117 (set_attr "prefix_rep" "1")
6118 (set_attr "mode" "TI")])
6120 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6122 ;; Intel SSE4.1 instructions
6124 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6126 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6127 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6128 (vec_merge:SSEMODEF2P
6129 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6130 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6131 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
6133 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6134 [(set_attr "type" "ssemov")
6135 (set_attr "prefix_extra" "1")
6136 (set_attr "mode" "<MODE>")])
6138 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6139 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6141 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6142 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6143 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6146 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6147 [(set_attr "type" "ssemov")
6148 (set_attr "prefix_extra" "1")
6149 (set_attr "mode" "<MODE>")])
6151 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6152 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6154 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6155 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6156 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6159 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6160 [(set_attr "type" "ssemul")
6161 (set_attr "prefix_extra" "1")
6162 (set_attr "mode" "<MODE>")])
6164 (define_insn "sse4_1_movntdqa"
6165 [(set (match_operand:V2DI 0 "register_operand" "=x")
6166 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6169 "movntdqa\t{%1, %0|%0, %1}"
6170 [(set_attr "type" "ssecvt")
6171 (set_attr "prefix_extra" "1")
6172 (set_attr "mode" "TI")])
6174 (define_insn "sse4_1_mpsadbw"
6175 [(set (match_operand:V16QI 0 "register_operand" "=x")
6176 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6177 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6178 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6181 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6182 [(set_attr "type" "sselog1")
6183 (set_attr "prefix_extra" "1")
6184 (set_attr "mode" "TI")])
6186 (define_insn "sse4_1_packusdw"
6187 [(set (match_operand:V8HI 0 "register_operand" "=x")
6190 (match_operand:V4SI 1 "register_operand" "0"))
6192 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6194 "packusdw\t{%2, %0|%0, %2}"
6195 [(set_attr "type" "sselog")
6196 (set_attr "prefix_extra" "1")
6197 (set_attr "mode" "TI")])
6199 (define_insn "sse4_1_pblendvb"
6200 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6201 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6202 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6203 (match_operand:V16QI 3 "register_operand" "Yz")]
6206 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6207 [(set_attr "type" "ssemov")
6208 (set_attr "prefix_extra" "1")
6209 (set_attr "mode" "TI")])
6211 (define_insn "sse4_1_pblendw"
6212 [(set (match_operand:V8HI 0 "register_operand" "=x")
6214 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6215 (match_operand:V8HI 1 "register_operand" "0")
6216 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6218 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6219 [(set_attr "type" "ssemov")
6220 (set_attr "prefix_extra" "1")
6221 (set_attr "mode" "TI")])
6223 (define_insn "sse4_1_phminposuw"
6224 [(set (match_operand:V8HI 0 "register_operand" "=x")
6225 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6226 UNSPEC_PHMINPOSUW))]
6228 "phminposuw\t{%1, %0|%0, %1}"
6229 [(set_attr "type" "sselog1")
6230 (set_attr "prefix_extra" "1")
6231 (set_attr "mode" "TI")])
6233 (define_insn "sse4_1_extendv8qiv8hi2"
6234 [(set (match_operand:V8HI 0 "register_operand" "=x")
6237 (match_operand:V16QI 1 "register_operand" "x")
6238 (parallel [(const_int 0)
6247 "pmovsxbw\t{%1, %0|%0, %1}"
6248 [(set_attr "type" "ssemov")
6249 (set_attr "prefix_extra" "1")
6250 (set_attr "mode" "TI")])
6252 (define_insn "*sse4_1_extendv8qiv8hi2"
6253 [(set (match_operand:V8HI 0 "register_operand" "=x")
6256 (vec_duplicate:V16QI
6257 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6258 (parallel [(const_int 0)
6267 "pmovsxbw\t{%1, %0|%0, %1}"
6268 [(set_attr "type" "ssemov")
6269 (set_attr "prefix_extra" "1")
6270 (set_attr "mode" "TI")])
6272 (define_insn "sse4_1_extendv4qiv4si2"
6273 [(set (match_operand:V4SI 0 "register_operand" "=x")
6276 (match_operand:V16QI 1 "register_operand" "x")
6277 (parallel [(const_int 0)
6282 "pmovsxbd\t{%1, %0|%0, %1}"
6283 [(set_attr "type" "ssemov")
6284 (set_attr "prefix_extra" "1")
6285 (set_attr "mode" "TI")])
6287 (define_insn "*sse4_1_extendv4qiv4si2"
6288 [(set (match_operand:V4SI 0 "register_operand" "=x")
6291 (vec_duplicate:V16QI
6292 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6293 (parallel [(const_int 0)
6298 "pmovsxbd\t{%1, %0|%0, %1}"
6299 [(set_attr "type" "ssemov")
6300 (set_attr "prefix_extra" "1")
6301 (set_attr "mode" "TI")])
6303 (define_insn "sse4_1_extendv2qiv2di2"
6304 [(set (match_operand:V2DI 0 "register_operand" "=x")
6307 (match_operand:V16QI 1 "register_operand" "x")
6308 (parallel [(const_int 0)
6311 "pmovsxbq\t{%1, %0|%0, %1}"
6312 [(set_attr "type" "ssemov")
6313 (set_attr "prefix_extra" "1")
6314 (set_attr "mode" "TI")])
6316 (define_insn "*sse4_1_extendv2qiv2di2"
6317 [(set (match_operand:V2DI 0 "register_operand" "=x")
6320 (vec_duplicate:V16QI
6321 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6322 (parallel [(const_int 0)
6325 "pmovsxbq\t{%1, %0|%0, %1}"
6326 [(set_attr "type" "ssemov")
6327 (set_attr "prefix_extra" "1")
6328 (set_attr "mode" "TI")])
6330 (define_insn "sse4_1_extendv4hiv4si2"
6331 [(set (match_operand:V4SI 0 "register_operand" "=x")
6334 (match_operand:V8HI 1 "register_operand" "x")
6335 (parallel [(const_int 0)
6340 "pmovsxwd\t{%1, %0|%0, %1}"
6341 [(set_attr "type" "ssemov")
6342 (set_attr "prefix_extra" "1")
6343 (set_attr "mode" "TI")])
6345 (define_insn "*sse4_1_extendv4hiv4si2"
6346 [(set (match_operand:V4SI 0 "register_operand" "=x")
6350 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6351 (parallel [(const_int 0)
6356 "pmovsxwd\t{%1, %0|%0, %1}"
6357 [(set_attr "type" "ssemov")
6358 (set_attr "prefix_extra" "1")
6359 (set_attr "mode" "TI")])
6361 (define_insn "sse4_1_extendv2hiv2di2"
6362 [(set (match_operand:V2DI 0 "register_operand" "=x")
6365 (match_operand:V8HI 1 "register_operand" "x")
6366 (parallel [(const_int 0)
6369 "pmovsxwq\t{%1, %0|%0, %1}"
6370 [(set_attr "type" "ssemov")
6371 (set_attr "prefix_extra" "1")
6372 (set_attr "mode" "TI")])
6374 (define_insn "*sse4_1_extendv2hiv2di2"
6375 [(set (match_operand:V2DI 0 "register_operand" "=x")
6379 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6380 (parallel [(const_int 0)
6383 "pmovsxwq\t{%1, %0|%0, %1}"
6384 [(set_attr "type" "ssemov")
6385 (set_attr "prefix_extra" "1")
6386 (set_attr "mode" "TI")])
6388 (define_insn "sse4_1_extendv2siv2di2"
6389 [(set (match_operand:V2DI 0 "register_operand" "=x")
6392 (match_operand:V4SI 1 "register_operand" "x")
6393 (parallel [(const_int 0)
6396 "pmovsxdq\t{%1, %0|%0, %1}"
6397 [(set_attr "type" "ssemov")
6398 (set_attr "prefix_extra" "1")
6399 (set_attr "mode" "TI")])
6401 (define_insn "*sse4_1_extendv2siv2di2"
6402 [(set (match_operand:V2DI 0 "register_operand" "=x")
6406 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6407 (parallel [(const_int 0)
6410 "pmovsxdq\t{%1, %0|%0, %1}"
6411 [(set_attr "type" "ssemov")
6412 (set_attr "prefix_extra" "1")
6413 (set_attr "mode" "TI")])
6415 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6416 [(set (match_operand:V8HI 0 "register_operand" "=x")
6419 (match_operand:V16QI 1 "register_operand" "x")
6420 (parallel [(const_int 0)
6429 "pmovzxbw\t{%1, %0|%0, %1}"
6430 [(set_attr "type" "ssemov")
6431 (set_attr "prefix_extra" "1")
6432 (set_attr "mode" "TI")])
6434 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6435 [(set (match_operand:V8HI 0 "register_operand" "=x")
6438 (vec_duplicate:V16QI
6439 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6440 (parallel [(const_int 0)
6449 "pmovzxbw\t{%1, %0|%0, %1}"
6450 [(set_attr "type" "ssemov")
6451 (set_attr "prefix_extra" "1")
6452 (set_attr "mode" "TI")])
6454 (define_insn "sse4_1_zero_extendv4qiv4si2"
6455 [(set (match_operand:V4SI 0 "register_operand" "=x")
6458 (match_operand:V16QI 1 "register_operand" "x")
6459 (parallel [(const_int 0)
6464 "pmovzxbd\t{%1, %0|%0, %1}"
6465 [(set_attr "type" "ssemov")
6466 (set_attr "prefix_extra" "1")
6467 (set_attr "mode" "TI")])
6469 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6470 [(set (match_operand:V4SI 0 "register_operand" "=x")
6473 (vec_duplicate:V16QI
6474 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6475 (parallel [(const_int 0)
6480 "pmovzxbd\t{%1, %0|%0, %1}"
6481 [(set_attr "type" "ssemov")
6482 (set_attr "prefix_extra" "1")
6483 (set_attr "mode" "TI")])
6485 (define_insn "sse4_1_zero_extendv2qiv2di2"
6486 [(set (match_operand:V2DI 0 "register_operand" "=x")
6489 (match_operand:V16QI 1 "register_operand" "x")
6490 (parallel [(const_int 0)
6493 "pmovzxbq\t{%1, %0|%0, %1}"
6494 [(set_attr "type" "ssemov")
6495 (set_attr "prefix_extra" "1")
6496 (set_attr "mode" "TI")])
6498 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6499 [(set (match_operand:V2DI 0 "register_operand" "=x")
6502 (vec_duplicate:V16QI
6503 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6504 (parallel [(const_int 0)
6507 "pmovzxbq\t{%1, %0|%0, %1}"
6508 [(set_attr "type" "ssemov")
6509 (set_attr "prefix_extra" "1")
6510 (set_attr "mode" "TI")])
6512 (define_insn "sse4_1_zero_extendv4hiv4si2"
6513 [(set (match_operand:V4SI 0 "register_operand" "=x")
6516 (match_operand:V8HI 1 "register_operand" "x")
6517 (parallel [(const_int 0)
6522 "pmovzxwd\t{%1, %0|%0, %1}"
6523 [(set_attr "type" "ssemov")
6524 (set_attr "prefix_extra" "1")
6525 (set_attr "mode" "TI")])
6527 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6528 [(set (match_operand:V4SI 0 "register_operand" "=x")
6532 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6533 (parallel [(const_int 0)
6538 "pmovzxwd\t{%1, %0|%0, %1}"
6539 [(set_attr "type" "ssemov")
6540 (set_attr "prefix_extra" "1")
6541 (set_attr "mode" "TI")])
6543 (define_insn "sse4_1_zero_extendv2hiv2di2"
6544 [(set (match_operand:V2DI 0 "register_operand" "=x")
6547 (match_operand:V8HI 1 "register_operand" "x")
6548 (parallel [(const_int 0)
6551 "pmovzxwq\t{%1, %0|%0, %1}"
6552 [(set_attr "type" "ssemov")
6553 (set_attr "prefix_extra" "1")
6554 (set_attr "mode" "TI")])
6556 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6557 [(set (match_operand:V2DI 0 "register_operand" "=x")
6561 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6562 (parallel [(const_int 0)
6565 "pmovzxwq\t{%1, %0|%0, %1}"
6566 [(set_attr "type" "ssemov")
6567 (set_attr "prefix_extra" "1")
6568 (set_attr "mode" "TI")])
6570 (define_insn "sse4_1_zero_extendv2siv2di2"
6571 [(set (match_operand:V2DI 0 "register_operand" "=x")
6574 (match_operand:V4SI 1 "register_operand" "x")
6575 (parallel [(const_int 0)
6578 "pmovzxdq\t{%1, %0|%0, %1}"
6579 [(set_attr "type" "ssemov")
6580 (set_attr "prefix_extra" "1")
6581 (set_attr "mode" "TI")])
6583 (define_insn "*sse4_1_zero_extendv2siv2di2"
6584 [(set (match_operand:V2DI 0 "register_operand" "=x")
6588 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6589 (parallel [(const_int 0)
6592 "pmovzxdq\t{%1, %0|%0, %1}"
6593 [(set_attr "type" "ssemov")
6594 (set_attr "prefix_extra" "1")
6595 (set_attr "mode" "TI")])
6597 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6598 ;; But it is not a really compare instruction.
6599 (define_insn "sse4_1_ptest"
6600 [(set (reg:CC FLAGS_REG)
6601 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6602 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6605 "ptest\t{%1, %0|%0, %1}"
6606 [(set_attr "type" "ssecomi")
6607 (set_attr "prefix_extra" "1")
6608 (set_attr "mode" "TI")])
6610 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6611 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6613 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6614 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6617 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6618 [(set_attr "type" "ssecvt")
6619 (set_attr "prefix_extra" "1")
6620 (set_attr "mode" "<MODE>")])
6622 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6623 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6624 (vec_merge:SSEMODEF2P
6626 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6627 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6629 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6632 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6633 [(set_attr "type" "ssecvt")
6634 (set_attr "prefix_extra" "1")
6635 (set_attr "mode" "<MODE>")])
6637 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6639 ;; Intel SSE4.2 string/text processing instructions
6641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6643 (define_insn_and_split "sse4_2_pcmpestr"
6644 [(set (match_operand:SI 0 "register_operand" "=c,c")
6646 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6647 (match_operand:SI 3 "register_operand" "a,a")
6648 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6649 (match_operand:SI 5 "register_operand" "d,d")
6650 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6652 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6660 (set (reg:CC FLAGS_REG)
6669 && !(reload_completed || reload_in_progress)"
6674 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6675 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6676 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6679 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6680 operands[3], operands[4],
6681 operands[5], operands[6]));
6683 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6684 operands[3], operands[4],
6685 operands[5], operands[6]));
6686 if (flags && !(ecx || xmm0))
6687 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6688 operands[2], operands[3],
6689 operands[4], operands[5],
6693 [(set_attr "type" "sselog")
6694 (set_attr "prefix_data16" "1")
6695 (set_attr "prefix_extra" "1")
6696 (set_attr "memory" "none,load")
6697 (set_attr "mode" "TI")])
6699 (define_insn "sse4_2_pcmpestri"
6700 [(set (match_operand:SI 0 "register_operand" "=c,c")
6702 [(match_operand:V16QI 1 "register_operand" "x,x")
6703 (match_operand:SI 2 "register_operand" "a,a")
6704 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6705 (match_operand:SI 4 "register_operand" "d,d")
6706 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6708 (set (reg:CC FLAGS_REG)
6717 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6718 [(set_attr "type" "sselog")
6719 (set_attr "prefix_data16" "1")
6720 (set_attr "prefix_extra" "1")
6721 (set_attr "memory" "none,load")
6722 (set_attr "mode" "TI")])
6724 (define_insn "sse4_2_pcmpestrm"
6725 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6727 [(match_operand:V16QI 1 "register_operand" "x,x")
6728 (match_operand:SI 2 "register_operand" "a,a")
6729 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6730 (match_operand:SI 4 "register_operand" "d,d")
6731 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6733 (set (reg:CC FLAGS_REG)
6742 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6743 [(set_attr "type" "sselog")
6744 (set_attr "prefix_data16" "1")
6745 (set_attr "prefix_extra" "1")
6746 (set_attr "memory" "none,load")
6747 (set_attr "mode" "TI")])
6749 (define_insn "sse4_2_pcmpestr_cconly"
6750 [(set (reg:CC FLAGS_REG)
6752 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6753 (match_operand:SI 3 "register_operand" "a,a,a,a")
6754 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6755 (match_operand:SI 5 "register_operand" "d,d,d,d")
6756 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6758 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6759 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6762 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6763 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6764 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6765 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6766 [(set_attr "type" "sselog")
6767 (set_attr "prefix_data16" "1")
6768 (set_attr "prefix_extra" "1")
6769 (set_attr "memory" "none,load,none,load")
6770 (set_attr "mode" "TI")])
6772 (define_insn_and_split "sse4_2_pcmpistr"
6773 [(set (match_operand:SI 0 "register_operand" "=c,c")
6775 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6776 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6777 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6779 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6785 (set (reg:CC FLAGS_REG)
6792 && !(reload_completed || reload_in_progress)"
6797 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6798 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6799 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6802 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6803 operands[3], operands[4]));
6805 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6806 operands[3], operands[4]));
6807 if (flags && !(ecx || xmm0))
6808 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6809 operands[2], operands[3],
6813 [(set_attr "type" "sselog")
6814 (set_attr "prefix_data16" "1")
6815 (set_attr "prefix_extra" "1")
6816 (set_attr "memory" "none,load")
6817 (set_attr "mode" "TI")])
6819 (define_insn "sse4_2_pcmpistri"
6820 [(set (match_operand:SI 0 "register_operand" "=c,c")
6822 [(match_operand:V16QI 1 "register_operand" "x,x")
6823 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6824 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6826 (set (reg:CC FLAGS_REG)
6833 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6834 [(set_attr "type" "sselog")
6835 (set_attr "prefix_data16" "1")
6836 (set_attr "prefix_extra" "1")
6837 (set_attr "memory" "none,load")
6838 (set_attr "mode" "TI")])
6840 (define_insn "sse4_2_pcmpistrm"
6841 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6843 [(match_operand:V16QI 1 "register_operand" "x,x")
6844 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6845 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6847 (set (reg:CC FLAGS_REG)
6854 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6855 [(set_attr "type" "sselog")
6856 (set_attr "prefix_data16" "1")
6857 (set_attr "prefix_extra" "1")
6858 (set_attr "memory" "none,load")
6859 (set_attr "mode" "TI")])
6861 (define_insn "sse4_2_pcmpistr_cconly"
6862 [(set (reg:CC FLAGS_REG)
6864 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6865 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6866 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6868 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6869 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6872 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6873 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6874 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6875 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6876 [(set_attr "type" "sselog")
6877 (set_attr "prefix_data16" "1")
6878 (set_attr "prefix_extra" "1")
6879 (set_attr "memory" "none,load,none,load")
6880 (set_attr "mode" "TI")])
6882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6884 ;; SSE5 instructions
6886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6888 ;; SSE5 parallel integer multiply/add instructions.
6889 ;; Note the instruction does not allow the value being added to be a memory
6890 ;; operation. However by pretending via the nonimmediate_operand predicate
6891 ;; that it does and splitting it later allows the following to be recognized:
6892 ;; a[i] = b[i] * c[i] + d[i];
6893 (define_insn "sse5_pmacsww"
6894 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6897 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6898 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6899 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6900 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6902 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6903 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6904 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6905 [(set_attr "type" "ssemuladd")
6906 (set_attr "mode" "TI")])
6908 ;; Split pmacsww with two memory operands into a load and the pmacsww.
6910 [(set (match_operand:V8HI 0 "register_operand" "")
6912 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
6913 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6914 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
6916 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6917 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6918 && !reg_mentioned_p (operands[0], operands[1])
6919 && !reg_mentioned_p (operands[0], operands[2])
6920 && !reg_mentioned_p (operands[0], operands[3])"
6923 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
6924 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
6929 (define_insn "sse5_pmacssww"
6930 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6932 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6933 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6934 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6937 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6938 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6939 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6940 [(set_attr "type" "ssemuladd")
6941 (set_attr "mode" "TI")])
6943 ;; Note the instruction does not allow the value being added to be a memory
6944 ;; operation. However by pretending via the nonimmediate_operand predicate
6945 ;; that it does and splitting it later allows the following to be recognized:
6946 ;; a[i] = b[i] * c[i] + d[i];
6947 (define_insn "sse5_pmacsdd"
6948 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6951 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6952 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6953 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6954 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6956 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6957 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6958 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6959 [(set_attr "type" "ssemuladd")
6960 (set_attr "mode" "TI")])
6962 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
6964 [(set (match_operand:V4SI 0 "register_operand" "")
6966 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
6967 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6968 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
6970 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6971 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6972 && !reg_mentioned_p (operands[0], operands[1])
6973 && !reg_mentioned_p (operands[0], operands[2])
6974 && !reg_mentioned_p (operands[0], operands[3])"
6977 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
6978 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
6983 (define_insn "sse5_pmacssdd"
6984 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6986 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6987 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6988 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6989 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6991 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6992 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6993 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6994 [(set_attr "type" "ssemuladd")
6995 (set_attr "mode" "TI")])
6997 (define_insn "sse5_pmacssdql"
6998 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7003 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7004 (parallel [(const_int 1)
7007 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7008 (parallel [(const_int 1)
7010 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7011 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7013 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7014 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7015 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7016 [(set_attr "type" "ssemuladd")
7017 (set_attr "mode" "TI")])
7019 (define_insn "sse5_pmacssdqh"
7020 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7025 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7026 (parallel [(const_int 0)
7030 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7031 (parallel [(const_int 0)
7033 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7034 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7036 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7037 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7038 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7039 [(set_attr "type" "ssemuladd")
7040 (set_attr "mode" "TI")])
7042 (define_insn "sse5_pmacsdql"
7043 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7048 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7049 (parallel [(const_int 1)
7053 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7054 (parallel [(const_int 1)
7056 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7057 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7059 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7060 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7061 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7062 [(set_attr "type" "ssemuladd")
7063 (set_attr "mode" "TI")])
7065 (define_insn "sse5_pmacsdqh"
7066 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7071 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7072 (parallel [(const_int 0)
7076 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7077 (parallel [(const_int 0)
7079 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7080 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7082 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7083 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7084 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7085 [(set_attr "type" "ssemuladd")
7086 (set_attr "mode" "TI")])
7088 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7089 (define_insn "sse5_pmacsswd"
7090 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7095 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7096 (parallel [(const_int 1)
7102 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7103 (parallel [(const_int 1)
7107 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7108 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7110 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7111 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7112 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7113 [(set_attr "type" "ssemuladd")
7114 (set_attr "mode" "TI")])
7116 (define_insn "sse5_pmacswd"
7117 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7122 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7123 (parallel [(const_int 1)
7129 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7130 (parallel [(const_int 1)
7134 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7135 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7137 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7138 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7139 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7140 [(set_attr "type" "ssemuladd")
7141 (set_attr "mode" "TI")])
7143 (define_insn "sse5_pmadcsswd"
7144 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7150 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7151 (parallel [(const_int 0)
7157 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7158 (parallel [(const_int 0)
7166 (parallel [(const_int 1)
7173 (parallel [(const_int 1)
7177 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7178 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7180 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7181 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7182 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7183 [(set_attr "type" "ssemuladd")
7184 (set_attr "mode" "TI")])
7186 (define_insn "sse5_pmadcswd"
7187 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7193 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7194 (parallel [(const_int 0)
7200 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7201 (parallel [(const_int 0)
7209 (parallel [(const_int 1)
7216 (parallel [(const_int 1)
7220 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7221 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7223 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7224 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7225 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7226 [(set_attr "type" "ssemuladd")
7227 (set_attr "mode" "TI")])
7229 ;; SSE5 parallel XMM conditional moves
7230 (define_insn "sse5_pcmov_<mode>"
7231 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7232 (if_then_else:SSEMODE
7233 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
7234 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
7235 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
7236 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7238 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7239 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7240 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7241 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7242 andps\t{%2, %0|%0, %2}
7243 andnps\t{%1, %0|%0, %1}"
7244 [(set_attr "type" "sse4arg")])
7246 ;; SSE5 horizontal add/subtract instructions
7247 (define_insn "sse5_phaddbw"
7248 [(set (match_operand:V8HI 0 "register_operand" "=x")
7252 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7253 (parallel [(const_int 0)
7264 (parallel [(const_int 1)
7271 (const_int 15)])))))]
7273 "phaddbw\t{%1, %0|%0, %1}"
7274 [(set_attr "type" "sseiadd1")])
7276 (define_insn "sse5_phaddbd"
7277 [(set (match_operand:V4SI 0 "register_operand" "=x")
7282 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7283 (parallel [(const_int 0)
7290 (parallel [(const_int 1)
7298 (parallel [(const_int 2)
7305 (parallel [(const_int 3)
7308 (const_int 15)]))))))]
7310 "phaddbd\t{%1, %0|%0, %1}"
7311 [(set_attr "type" "sseiadd1")])
7313 (define_insn "sse5_phaddbq"
7314 [(set (match_operand:V2DI 0 "register_operand" "=x")
7320 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7321 (parallel [(const_int 0)
7326 (parallel [(const_int 1)
7332 (parallel [(const_int 2)
7337 (parallel [(const_int 3)
7344 (parallel [(const_int 8)
7349 (parallel [(const_int 9)
7355 (parallel [(const_int 10)
7360 (parallel [(const_int 11)
7361 (const_int 15)])))))))]
7363 "phaddbq\t{%1, %0|%0, %1}"
7364 [(set_attr "type" "sseiadd1")])
7366 (define_insn "sse5_phaddwd"
7367 [(set (match_operand:V4SI 0 "register_operand" "=x")
7371 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7372 (parallel [(const_int 0)
7379 (parallel [(const_int 1)
7382 (const_int 7)])))))]
7384 "phaddwd\t{%1, %0|%0, %1}"
7385 [(set_attr "type" "sseiadd1")])
7387 (define_insn "sse5_phaddwq"
7388 [(set (match_operand:V2DI 0 "register_operand" "=x")
7393 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7394 (parallel [(const_int 0)
7399 (parallel [(const_int 1)
7405 (parallel [(const_int 2)
7410 (parallel [(const_int 3)
7411 (const_int 7)]))))))]
7413 "phaddwq\t{%1, %0|%0, %1}"
7414 [(set_attr "type" "sseiadd1")])
7416 (define_insn "sse5_phadddq"
7417 [(set (match_operand:V2DI 0 "register_operand" "=x")
7421 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7422 (parallel [(const_int 0)
7427 (parallel [(const_int 1)
7428 (const_int 3)])))))]
7430 "phadddq\t{%1, %0|%0, %1}"
7431 [(set_attr "type" "sseiadd1")])
7433 (define_insn "sse5_phaddubw"
7434 [(set (match_operand:V8HI 0 "register_operand" "=x")
7438 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7439 (parallel [(const_int 0)
7450 (parallel [(const_int 1)
7457 (const_int 15)])))))]
7459 "phaddubw\t{%1, %0|%0, %1}"
7460 [(set_attr "type" "sseiadd1")])
7462 (define_insn "sse5_phaddubd"
7463 [(set (match_operand:V4SI 0 "register_operand" "=x")
7468 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7469 (parallel [(const_int 0)
7476 (parallel [(const_int 1)
7484 (parallel [(const_int 2)
7491 (parallel [(const_int 3)
7494 (const_int 15)]))))))]
7496 "phaddubd\t{%1, %0|%0, %1}"
7497 [(set_attr "type" "sseiadd1")])
7499 (define_insn "sse5_phaddubq"
7500 [(set (match_operand:V2DI 0 "register_operand" "=x")
7506 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7507 (parallel [(const_int 0)
7512 (parallel [(const_int 1)
7518 (parallel [(const_int 2)
7523 (parallel [(const_int 3)
7530 (parallel [(const_int 8)
7535 (parallel [(const_int 9)
7541 (parallel [(const_int 10)
7546 (parallel [(const_int 11)
7547 (const_int 15)])))))))]
7549 "phaddubq\t{%1, %0|%0, %1}"
7550 [(set_attr "type" "sseiadd1")])
7552 (define_insn "sse5_phadduwd"
7553 [(set (match_operand:V4SI 0 "register_operand" "=x")
7557 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7558 (parallel [(const_int 0)
7565 (parallel [(const_int 1)
7568 (const_int 7)])))))]
7570 "phadduwd\t{%1, %0|%0, %1}"
7571 [(set_attr "type" "sseiadd1")])
7573 (define_insn "sse5_phadduwq"
7574 [(set (match_operand:V2DI 0 "register_operand" "=x")
7579 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7580 (parallel [(const_int 0)
7585 (parallel [(const_int 1)
7591 (parallel [(const_int 2)
7596 (parallel [(const_int 3)
7597 (const_int 7)]))))))]
7599 "phadduwq\t{%1, %0|%0, %1}"
7600 [(set_attr "type" "sseiadd1")])
7602 (define_insn "sse5_phaddudq"
7603 [(set (match_operand:V2DI 0 "register_operand" "=x")
7607 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7608 (parallel [(const_int 0)
7613 (parallel [(const_int 1)
7614 (const_int 3)])))))]
7616 "phaddudq\t{%1, %0|%0, %1}"
7617 [(set_attr "type" "sseiadd1")])
7619 (define_insn "sse5_phsubbw"
7620 [(set (match_operand:V8HI 0 "register_operand" "=x")
7624 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7625 (parallel [(const_int 0)
7636 (parallel [(const_int 1)
7643 (const_int 15)])))))]
7645 "phsubbw\t{%1, %0|%0, %1}"
7646 [(set_attr "type" "sseiadd1")])
7648 (define_insn "sse5_phsubwd"
7649 [(set (match_operand:V4SI 0 "register_operand" "=x")
7653 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7654 (parallel [(const_int 0)
7661 (parallel [(const_int 1)
7664 (const_int 7)])))))]
7666 "phsubwd\t{%1, %0|%0, %1}"
7667 [(set_attr "type" "sseiadd1")])
7669 (define_insn "sse5_phsubdq"
7670 [(set (match_operand:V2DI 0 "register_operand" "=x")
7674 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7675 (parallel [(const_int 0)
7680 (parallel [(const_int 1)
7681 (const_int 3)])))))]
7683 "phsubdq\t{%1, %0|%0, %1}"
7684 [(set_attr "type" "sseiadd1")])
7686 ;; SSE5 permute instructions
7687 (define_insn "sse5_pperm"
7688 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7690 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7691 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7692 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7693 UNSPEC_SSE5_PERMUTE))]
7694 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7695 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7696 [(set_attr "type" "sse4arg")
7697 (set_attr "mode" "TI")])
7699 ;; The following are for the various unpack insns which doesn't need the first
7700 ;; source operand, so we can just use the output operand for the first operand.
7701 ;; This allows either of the other two operands to be a memory operand. We
7702 ;; can't just use the first operand as an argument to the normal pperm because
7703 ;; then an output only argument, suddenly becomes an input operand.
7704 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7705 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7708 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7709 (match_operand 2 "" "")))) ;; parallel with const_int's
7710 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7712 && (register_operand (operands[1], V16QImode)
7713 || register_operand (operands[2], V16QImode))"
7714 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7715 [(set_attr "type" "sseadd")
7716 (set_attr "mode" "TI")])
7718 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7719 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7722 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7723 (match_operand 2 "" "")))) ;; parallel with const_int's
7724 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7726 && (register_operand (operands[1], V16QImode)
7727 || register_operand (operands[2], V16QImode))"
7728 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7729 [(set_attr "type" "sseadd")
7730 (set_attr "mode" "TI")])
7732 (define_insn "sse5_pperm_zero_v8hi_v4si"
7733 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7736 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7737 (match_operand 2 "" "")))) ;; parallel with const_int's
7738 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7740 && (register_operand (operands[1], V8HImode)
7741 || register_operand (operands[2], V16QImode))"
7742 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7743 [(set_attr "type" "sseadd")
7744 (set_attr "mode" "TI")])
7746 (define_insn "sse5_pperm_sign_v8hi_v4si"
7747 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7750 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7751 (match_operand 2 "" "")))) ;; parallel with const_int's
7752 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7754 && (register_operand (operands[1], V8HImode)
7755 || register_operand (operands[2], V16QImode))"
7756 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7757 [(set_attr "type" "sseadd")
7758 (set_attr "mode" "TI")])
7760 (define_insn "sse5_pperm_zero_v4si_v2di"
7761 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7764 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7765 (match_operand 2 "" "")))) ;; parallel with const_int's
7766 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7768 && (register_operand (operands[1], V4SImode)
7769 || register_operand (operands[2], V16QImode))"
7770 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7771 [(set_attr "type" "sseadd")
7772 (set_attr "mode" "TI")])
7774 (define_insn "sse5_pperm_sign_v4si_v2di"
7775 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7778 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7779 (match_operand 2 "" "")))) ;; parallel with const_int's
7780 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7782 && (register_operand (operands[1], V4SImode)
7783 || register_operand (operands[2], V16QImode))"
7784 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7785 [(set_attr "type" "sseadd")
7786 (set_attr "mode" "TI")])
7788 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7789 (define_insn "sse5_pperm_pack_v2di_v4si"
7790 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7793 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7795 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7796 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7797 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7798 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7799 [(set_attr "type" "sse4arg")
7800 (set_attr "mode" "TI")])
7802 (define_insn "sse5_pperm_pack_v4si_v8hi"
7803 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7806 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7808 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7809 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7810 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7811 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7812 [(set_attr "type" "sse4arg")
7813 (set_attr "mode" "TI")])
7815 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7816 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7819 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7821 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7822 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7823 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7824 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7825 [(set_attr "type" "sse4arg")
7826 (set_attr "mode" "TI")])
7828 ;; Floating point permutation (permps, permpd)
7829 (define_insn "sse5_perm<mode>"
7830 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7832 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7833 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7834 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7835 UNSPEC_SSE5_PERMUTE))]
7836 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7837 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7838 [(set_attr "type" "sse4arg")
7839 (set_attr "mode" "<MODE>")])
7841 ;; SSE5 packed rotate instructions
7842 (define_insn "rotl<mode>3"
7843 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
7845 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
7846 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
7848 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7849 [(set_attr "type" "sseishft")
7850 (set_attr "mode" "TI")])
7852 (define_insn "sse5_rotl<mode>3"
7853 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7855 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7856 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
7857 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7858 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7859 [(set_attr "type" "sseishft")
7860 (set_attr "mode" "TI")])
7862 ;; SSE5 packed shift instructions. Note negative values for the shift amount
7863 ;; convert this into a right shift instead of left shift. For now, model this
7864 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
7865 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
7866 (define_insn "sse5_ashl<mode>3"
7867 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7869 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7870 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7871 UNSPEC_SSE5_ASHIFT))]
7872 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7873 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7874 [(set_attr "type" "sseishft")
7875 (set_attr "mode" "TI")])
7877 (define_insn "sse5_lshl<mode>3"
7878 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
7880 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
7881 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
7882 UNSPEC_SSE5_LSHIFT))]
7883 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
7884 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
7885 [(set_attr "type" "sseishft")
7886 (set_attr "mode" "TI")])
7888 ;; SSE5 FRCZ support
7890 (define_insn "sse5_frcz<mode>2"
7891 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7893 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
7896 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
7897 [(set_attr "type" "ssecvt1")
7898 (set_attr "prefix_extra" "1")
7899 (set_attr "mode" "<MODE>")])
7902 (define_insn "sse5_vmfrcz<mode>2"
7903 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7904 (vec_merge:SSEMODEF2P
7906 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
7908 (match_operand:SSEMODEF2P 1 "register_operand" "0")
7911 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
7912 [(set_attr "type" "ssecvt1")
7913 (set_attr "prefix_extra" "1")
7914 (set_attr "mode" "<MODE>")])
7916 (define_insn "sse5_cvtph2ps"
7917 [(set (match_operand:V4SF 0 "register_operand" "=x")
7918 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
7921 "cvtph2ps\t{%1, %0|%0, %1}"
7922 [(set_attr "type" "ssecvt")
7923 (set_attr "mode" "V4SF")])
7925 (define_insn "sse5_cvtps2ph"
7926 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
7927 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
7930 "cvtps2ph\t{%1, %0|%0, %1}"
7931 [(set_attr "type" "ssecvt")
7932 (set_attr "mode" "V4SF")])
7934 ;; Scalar versions of the com instructions that use vector types that are
7935 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
7936 ;; com instructions fill in 0's in the upper bits instead of leaving them
7937 ;; unmodified, so we use const_vector of 0 instead of match_dup.
7938 (define_expand "sse5_vmmaskcmp<mode>3"
7939 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
7940 (vec_merge:SSEMODEF2P
7941 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7942 [(match_operand:SSEMODEF2P 2 "register_operand" "")
7943 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
7948 operands[4] = CONST0_RTX (<MODE>mode);
7951 (define_insn "*sse5_vmmaskcmp<mode>3"
7952 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7953 (vec_merge:SSEMODEF2P
7954 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
7955 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
7956 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
7957 (match_operand:SSEMODEF2P 4 "")
7960 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
7961 [(set_attr "type" "sse4arg")
7962 (set_attr "mode" "<ssescalarmode>")])
7964 ;; We don't have a comparison operator that always returns true/false, so
7965 ;; handle comfalse and comtrue specially.
7966 (define_insn "sse5_com_tf<mode>3"
7967 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
7969 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
7970 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
7971 (match_operand:SI 3 "const_int_operand" "n")]
7972 UNSPEC_SSE5_TRUEFALSE))]
7975 const char *ret = NULL;
7977 switch (INTVAL (operands[3]))
7980 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7984 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7988 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
7992 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8001 [(set_attr "type" "ssecmp")
8002 (set_attr "mode" "<MODE>")])
8004 (define_insn "sse5_maskcmp<mode>3"
8005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8006 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8007 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8008 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8010 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8011 [(set_attr "type" "ssecmp")
8012 (set_attr "mode" "<MODE>")])
8014 (define_insn "sse5_maskcmp<mode>3"
8015 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8016 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8017 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8018 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8020 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8021 [(set_attr "type" "sse4arg")
8022 (set_attr "mode" "TI")])
8024 (define_insn "sse5_maskcmp_uns<mode>3"
8025 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8026 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8027 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8028 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8030 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8031 [(set_attr "type" "ssecmp")
8032 (set_attr "mode" "TI")])
8034 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8035 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8036 ;; the exact instruction generated for the intrinsic.
8037 (define_insn "sse5_maskcmp_uns2<mode>3"
8038 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8040 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8041 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8042 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8043 UNSPEC_SSE5_UNSIGNED_CMP))]
8045 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8046 [(set_attr "type" "ssecmp")
8047 (set_attr "mode" "TI")])
8049 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8050 ;; being added here to be complete.
8051 (define_insn "sse5_pcom_tf<mode>3"
8052 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8054 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8055 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8056 (match_operand:SI 3 "const_int_operand" "n")]
8057 UNSPEC_SSE5_TRUEFALSE))]
8060 return ((INTVAL (operands[3]) != 0)
8061 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8062 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8064 [(set_attr "type" "ssecmp")
8065 (set_attr "mode" "TI")])
8067 (define_insn "aesenc"
8068 [(set (match_operand:V2DI 0 "register_operand" "=x")
8069 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8070 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8073 "aesenc\t{%2, %0|%0, %2}"
8074 [(set_attr "type" "sselog1")
8075 (set_attr "prefix_extra" "1")
8076 (set_attr "mode" "TI")])
8078 (define_insn "aesenclast"
8079 [(set (match_operand:V2DI 0 "register_operand" "=x")
8080 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8081 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8082 UNSPEC_AESENCLAST))]
8084 "aesenclast\t{%2, %0|%0, %2}"
8085 [(set_attr "type" "sselog1")
8086 (set_attr "prefix_extra" "1")
8087 (set_attr "mode" "TI")])
8089 (define_insn "aesdec"
8090 [(set (match_operand:V2DI 0 "register_operand" "=x")
8091 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8092 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8095 "aesdec\t{%2, %0|%0, %2}"
8096 [(set_attr "type" "sselog1")
8097 (set_attr "prefix_extra" "1")
8098 (set_attr "mode" "TI")])
8100 (define_insn "aesdeclast"
8101 [(set (match_operand:V2DI 0 "register_operand" "=x")
8102 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8103 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8104 UNSPEC_AESDECLAST))]
8106 "aesdeclast\t{%2, %0|%0, %2}"
8107 [(set_attr "type" "sselog1")
8108 (set_attr "prefix_extra" "1")
8109 (set_attr "mode" "TI")])
8111 (define_insn "aesimc"
8112 [(set (match_operand:V2DI 0 "register_operand" "=x")
8113 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8116 "aesimc\t{%1, %0|%0, %1}"
8117 [(set_attr "type" "sselog1")
8118 (set_attr "prefix_extra" "1")
8119 (set_attr "mode" "TI")])
8121 (define_insn "aeskeygenassist"
8122 [(set (match_operand:V2DI 0 "register_operand" "=x")
8123 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
8124 (match_operand:SI 2 "const_0_to_255_operand" "n")]
8125 UNSPEC_AESKEYGENASSIST))]
8127 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
8128 [(set_attr "type" "sselog1")
8129 (set_attr "prefix_extra" "1")
8130 (set_attr "mode" "TI")])
8132 (define_insn "pclmulqdq"
8133 [(set (match_operand:V2DI 0 "register_operand" "=x")
8134 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8135 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
8136 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8139 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
8140 [(set_attr "type" "sselog1")
8141 (set_attr "prefix_extra" "1")
8142 (set_attr "mode" "TI")])