1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
57 (V16QI "QI") (V8HI "HI")
58 (V4SI "SI") (V2DI "DI")])
60 ;; Number of scalar elements in each vector type
61 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
62 (V16QI "16") (V8HI "8")
63 (V4SI "4") (V2DI "2")])
65 ;; Mapping of immediate bits for blend instructions
66 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
68 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
70 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
74 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
76 ;; All of these patterns are enabled for SSE1 as well as SSE2.
77 ;; This is essential for maintaining stable calling conventions.
79 (define_expand "mov<mode>"
80 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
81 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
84 ix86_expand_vector_move (<MODE>mode, operands);
88 (define_insn "*mov<mode>_internal"
89 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
90 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
92 && (register_operand (operands[0], <MODE>mode)
93 || register_operand (operands[1], <MODE>mode))"
95 switch (which_alternative)
98 return standard_sse_constant_opcode (insn, operands[1]);
101 switch (get_attr_mode (insn))
104 return "movaps\t{%1, %0|%0, %1}";
106 return "movapd\t{%1, %0|%0, %1}";
108 return "movdqa\t{%1, %0|%0, %1}";
114 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
117 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
118 (and (eq_attr "alternative" "2")
119 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
121 (const_string "V4SF")
122 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
123 (const_string "V4SF")
124 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
125 (const_string "V2DF")
127 (const_string "TI")))])
129 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
130 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
131 ;; from memory, we'd prefer to load the memory directly into the %xmm
132 ;; register. To facilitate this happy circumstance, this pattern won't
133 ;; split until after register allocation. If the 64-bit value didn't
134 ;; come from memory, this is the best we can do. This is much better
135 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
138 (define_insn_and_split "movdi_to_sse"
140 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
141 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
142 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
143 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
145 "&& reload_completed"
148 if (register_operand (operands[1], DImode))
150 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
151 Assemble the 64-bit DImode value in an xmm register. */
152 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
153 gen_rtx_SUBREG (SImode, operands[1], 0)));
154 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
155 gen_rtx_SUBREG (SImode, operands[1], 4)));
156 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
158 else if (memory_operand (operands[1], DImode))
159 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
165 [(set (match_operand:V4SF 0 "register_operand" "")
166 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
167 "TARGET_SSE && reload_completed"
170 (vec_duplicate:V4SF (match_dup 1))
174 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
175 operands[2] = CONST0_RTX (V4SFmode);
179 [(set (match_operand:V2DF 0 "register_operand" "")
180 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
181 "TARGET_SSE2 && reload_completed"
182 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
184 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
185 operands[2] = CONST0_RTX (DFmode);
188 (define_expand "push<mode>1"
189 [(match_operand:SSEMODE 0 "register_operand" "")]
192 ix86_expand_push (<MODE>mode, operands[0]);
196 (define_expand "movmisalign<mode>"
197 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
198 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
201 ix86_expand_vector_move_misalign (<MODE>mode, operands);
205 (define_insn "<sse>_movup<ssemodesuffixf2c>"
206 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
208 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
210 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
211 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "<MODE>")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "prefix_data16" "1")
224 (set_attr "mode" "TI")])
226 (define_insn "<sse>_movnt<mode>"
227 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
229 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
231 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
232 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
233 [(set_attr "type" "ssemov")
234 (set_attr "mode" "<MODE>")])
236 (define_insn "sse2_movntv2di"
237 [(set (match_operand:V2DI 0 "memory_operand" "=m")
238 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
241 "movntdq\t{%1, %0|%0, %1}"
242 [(set_attr "type" "ssecvt")
243 (set_attr "prefix_data16" "1")
244 (set_attr "mode" "TI")])
246 (define_insn "sse2_movntsi"
247 [(set (match_operand:SI 0 "memory_operand" "=m")
248 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
251 "movnti\t{%1, %0|%0, %1}"
252 [(set_attr "type" "ssecvt")
253 (set_attr "mode" "V2DF")])
255 (define_insn "sse3_lddqu"
256 [(set (match_operand:V16QI 0 "register_operand" "=x")
257 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
260 "lddqu\t{%1, %0|%0, %1}"
261 [(set_attr "type" "ssecvt")
262 (set_attr "prefix_rep" "1")
263 (set_attr "mode" "TI")])
265 ; Expand patterns for non-temporal stores. At the moment, only those
266 ; that directly map to insns are defined; it would be possible to
267 ; define patterns for other modes that would expand to several insns.
269 (define_expand "storent<mode>"
270 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
272 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
274 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
277 (define_expand "storent<mode>"
278 [(set (match_operand:MODEF 0 "memory_operand" "")
280 [(match_operand:MODEF 1 "register_operand" "")]
285 (define_expand "storentv2di"
286 [(set (match_operand:V2DI 0 "memory_operand" "")
287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
292 (define_expand "storentsi"
293 [(set (match_operand:SI 0 "memory_operand" "")
294 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
301 ;; Parallel floating point arithmetic
303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
305 (define_expand "<code><mode>2"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
308 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
309 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
310 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
312 (define_expand "<plusminus_insn><mode>3"
313 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
314 (plusminus:SSEMODEF2P
315 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
316 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
317 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
318 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
320 (define_insn "*<plusminus_insn><mode>3"
321 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
322 (plusminus:SSEMODEF2P
323 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
324 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
325 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
326 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
327 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
328 [(set_attr "type" "sseadd")
329 (set_attr "mode" "<MODE>")])
331 (define_insn "<sse>_vm<plusminus_insn><mode>3"
332 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
333 (vec_merge:SSEMODEF2P
334 (plusminus:SSEMODEF2P
335 (match_operand:SSEMODEF2P 1 "register_operand" "0")
336 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
339 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
340 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "<ssescalarmode>")])
344 (define_expand "mul<mode>3"
345 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
347 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
348 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
349 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
350 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
352 (define_insn "*mul<mode>3"
353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
355 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
356 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
357 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
358 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
359 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
360 [(set_attr "type" "ssemul")
361 (set_attr "mode" "<MODE>")])
363 (define_insn "<sse>_vmmul<mode>3"
364 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
365 (vec_merge:SSEMODEF2P
367 (match_operand:SSEMODEF2P 1 "register_operand" "0")
368 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
372 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
373 [(set_attr "type" "ssemul")
374 (set_attr "mode" "<ssescalarmode>")])
376 (define_expand "divv4sf3"
377 [(set (match_operand:V4SF 0 "register_operand" "")
378 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
379 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
382 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
383 && flag_finite_math_only && !flag_trapping_math
384 && flag_unsafe_math_optimizations)
386 ix86_emit_swdivsf (operands[0], operands[1],
387 operands[2], V4SFmode);
392 (define_expand "divv2df3"
393 [(set (match_operand:V2DF 0 "register_operand" "")
394 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
395 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
399 (define_insn "<sse>_div<mode>3"
400 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
402 (match_operand:SSEMODEF2P 1 "register_operand" "0")
403 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
404 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
405 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
406 [(set_attr "type" "ssediv")
407 (set_attr "mode" "<MODE>")])
409 (define_insn "<sse>_vmdiv<mode>3"
410 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
411 (vec_merge:SSEMODEF2P
413 (match_operand:SSEMODEF2P 1 "register_operand" "0")
414 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
417 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
418 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
419 [(set_attr "type" "ssediv")
420 (set_attr "mode" "<ssescalarmode>")])
422 (define_insn "sse_rcpv4sf2"
423 [(set (match_operand:V4SF 0 "register_operand" "=x")
425 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
427 "rcpps\t{%1, %0|%0, %1}"
428 [(set_attr "type" "sse")
429 (set_attr "mode" "V4SF")])
431 (define_insn "sse_vmrcpv4sf2"
432 [(set (match_operand:V4SF 0 "register_operand" "=x")
434 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
436 (match_operand:V4SF 2 "register_operand" "0")
439 "rcpss\t{%1, %0|%0, %1}"
440 [(set_attr "type" "sse")
441 (set_attr "mode" "SF")])
443 (define_expand "sqrtv4sf2"
444 [(set (match_operand:V4SF 0 "register_operand" "")
445 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
448 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
449 && flag_finite_math_only && !flag_trapping_math
450 && flag_unsafe_math_optimizations)
452 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
457 (define_insn "sse_sqrtv4sf2"
458 [(set (match_operand:V4SF 0 "register_operand" "=x")
459 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
461 "sqrtps\t{%1, %0|%0, %1}"
462 [(set_attr "type" "sse")
463 (set_attr "mode" "V4SF")])
465 (define_insn "sqrtv2df2"
466 [(set (match_operand:V2DF 0 "register_operand" "=x")
467 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
469 "sqrtpd\t{%1, %0|%0, %1}"
470 [(set_attr "type" "sse")
471 (set_attr "mode" "V2DF")])
473 (define_insn "<sse>_vmsqrt<mode>2"
474 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
475 (vec_merge:SSEMODEF2P
477 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
478 (match_operand:SSEMODEF2P 2 "register_operand" "0")
480 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
481 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
482 [(set_attr "type" "sse")
483 (set_attr "mode" "<ssescalarmode>")])
485 (define_expand "rsqrtv4sf2"
486 [(set (match_operand:V4SF 0 "register_operand" "")
488 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
491 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
495 (define_insn "sse_rsqrtv4sf2"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
500 "rsqrtps\t{%1, %0|%0, %1}"
501 [(set_attr "type" "sse")
502 (set_attr "mode" "V4SF")])
504 (define_insn "sse_vmrsqrtv4sf2"
505 [(set (match_operand:V4SF 0 "register_operand" "=x")
507 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
509 (match_operand:V4SF 2 "register_operand" "0")
512 "rsqrtss\t{%1, %0|%0, %1}"
513 [(set_attr "type" "sse")
514 (set_attr "mode" "SF")])
516 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
517 ;; isn't really correct, as those rtl operators aren't defined when
518 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
520 (define_expand "<code><mode>3"
521 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
523 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
524 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
525 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
527 if (!flag_finite_math_only)
528 operands[1] = force_reg (<MODE>mode, operands[1]);
529 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
532 (define_insn "*<code><mode>3_finite"
533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
535 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
536 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
537 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
538 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
539 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
540 [(set_attr "type" "sseadd")
541 (set_attr "mode" "<MODE>")])
543 (define_insn "*<code><mode>3"
544 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
546 (match_operand:SSEMODEF2P 1 "register_operand" "0")
547 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
548 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
549 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
550 [(set_attr "type" "sseadd")
551 (set_attr "mode" "<MODE>")])
553 (define_insn "<sse>_vm<code><mode>3"
554 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
555 (vec_merge:SSEMODEF2P
557 (match_operand:SSEMODEF2P 1 "register_operand" "0")
558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
561 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
562 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "<ssescalarmode>")])
566 ;; These versions of the min/max patterns implement exactly the operations
567 ;; min = (op1 < op2 ? op1 : op2)
568 ;; max = (!(op1 < op2) ? op1 : op2)
569 ;; Their operands are not commutative, and thus they may be used in the
570 ;; presence of -0.0 and NaN.
572 (define_insn "*ieee_smin<mode>3"
573 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
576 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
578 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
579 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
580 [(set_attr "type" "sseadd")
581 (set_attr "mode" "<MODE>")])
583 (define_insn "*ieee_smax<mode>3"
584 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
586 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
587 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
589 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
590 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
591 [(set_attr "type" "sseadd")
592 (set_attr "mode" "<MODE>")])
594 (define_insn "sse3_addsubv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "=x")
598 (match_operand:V4SF 1 "register_operand" "0")
599 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
600 (minus:V4SF (match_dup 1) (match_dup 2))
603 "addsubps\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "prefix_rep" "1")
606 (set_attr "mode" "V4SF")])
608 (define_insn "sse3_addsubv2df3"
609 [(set (match_operand:V2DF 0 "register_operand" "=x")
612 (match_operand:V2DF 1 "register_operand" "0")
613 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
614 (minus:V2DF (match_dup 1) (match_dup 2))
617 "addsubpd\t{%2, %0|%0, %2}"
618 [(set_attr "type" "sseadd")
619 (set_attr "mode" "V2DF")])
621 (define_insn "sse3_h<plusminus_insn>v4sf3"
622 [(set (match_operand:V4SF 0 "register_operand" "=x")
627 (match_operand:V4SF 1 "register_operand" "0")
628 (parallel [(const_int 0)]))
629 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
631 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
632 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
636 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
637 (parallel [(const_int 0)]))
638 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
640 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
641 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
643 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
644 [(set_attr "type" "sseadd")
645 (set_attr "prefix_rep" "1")
646 (set_attr "mode" "V4SF")])
648 (define_insn "sse3_h<plusminus_insn>v2df3"
649 [(set (match_operand:V2DF 0 "register_operand" "=x")
653 (match_operand:V2DF 1 "register_operand" "0")
654 (parallel [(const_int 0)]))
655 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
658 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
659 (parallel [(const_int 0)]))
660 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
662 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sseadd")
664 (set_attr "mode" "V2DF")])
666 (define_expand "reduc_splus_v4sf"
667 [(match_operand:V4SF 0 "register_operand" "")
668 (match_operand:V4SF 1 "register_operand" "")]
673 rtx tmp = gen_reg_rtx (V4SFmode);
674 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
675 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
678 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
682 (define_expand "reduc_splus_v2df"
683 [(match_operand:V2DF 0 "register_operand" "")
684 (match_operand:V2DF 1 "register_operand" "")]
687 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
691 (define_expand "reduc_smax_v4sf"
692 [(match_operand:V4SF 0 "register_operand" "")
693 (match_operand:V4SF 1 "register_operand" "")]
696 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
700 (define_expand "reduc_smin_v4sf"
701 [(match_operand:V4SF 0 "register_operand" "")
702 (match_operand:V4SF 1 "register_operand" "")]
705 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
711 ;; Parallel floating point comparisons
713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
715 (define_insn "<sse>_maskcmp<mode>3"
716 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
717 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
718 [(match_operand:SSEMODEF4 1 "register_operand" "0")
719 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
720 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
722 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
723 [(set_attr "type" "ssecmp")
724 (set_attr "mode" "<MODE>")])
726 (define_insn "<sse>_vmmaskcmp<mode>3"
727 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (vec_merge:SSEMODEF2P
729 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
730 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
731 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
734 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
735 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
736 [(set_attr "type" "ssecmp")
737 (set_attr "mode" "<ssescalarmode>")])
739 (define_insn "<sse>_comi"
740 [(set (reg:CCFP FLAGS_REG)
743 (match_operand:<ssevecmode> 0 "register_operand" "x")
744 (parallel [(const_int 0)]))
746 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
747 (parallel [(const_int 0)]))))]
748 "SSE_FLOAT_MODE_P (<MODE>mode)"
749 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
750 [(set_attr "type" "ssecomi")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "<sse>_ucomi"
754 [(set (reg:CCFPU FLAGS_REG)
757 (match_operand:<ssevecmode> 0 "register_operand" "x")
758 (parallel [(const_int 0)]))
760 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
761 (parallel [(const_int 0)]))))]
762 "SSE_FLOAT_MODE_P (<MODE>mode)"
763 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
764 [(set_attr "type" "ssecomi")
765 (set_attr "mode" "<MODE>")])
767 (define_expand "vcond<mode>"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
769 (if_then_else:SSEMODEF2P
771 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
772 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
773 (match_operand:SSEMODEF2P 1 "general_operand" "")
774 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
777 if (ix86_expand_fp_vcond (operands))
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 ;; Parallel floating point logical operations
787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
789 (define_insn "<sse>_nand<mode>3"
790 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
793 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
794 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
795 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
796 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
797 [(set_attr "type" "sselog")
798 (set_attr "mode" "<MODE>")])
800 (define_expand "<code><mode>3"
801 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
803 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
804 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
805 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
806 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
808 (define_insn "*<code><mode>3"
809 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
811 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
812 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
813 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
814 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
815 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
816 [(set_attr "type" "sselog")
817 (set_attr "mode" "<MODE>")])
819 ;; Also define scalar versions. These are used for abs, neg, and
820 ;; conditional move. Using subregs into vector modes causes register
821 ;; allocation lossage. These patterns do not allow memory operands
822 ;; because the native instructions read the full 128-bits.
824 (define_insn "*nand<mode>3"
825 [(set (match_operand:MODEF 0 "register_operand" "=x")
828 (match_operand:MODEF 1 "register_operand" "0"))
829 (match_operand:MODEF 2 "register_operand" "x")))]
830 "SSE_FLOAT_MODE_P (<MODE>mode)"
831 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
832 [(set_attr "type" "sselog")
833 (set_attr "mode" "<ssevecmode>")])
835 (define_insn "*<code><mode>3"
836 [(set (match_operand:MODEF 0 "register_operand" "=x")
838 (match_operand:MODEF 1 "register_operand" "0")
839 (match_operand:MODEF 2 "register_operand" "x")))]
840 "SSE_FLOAT_MODE_P (<MODE>mode)"
841 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
842 [(set_attr "type" "sselog")
843 (set_attr "mode" "<ssevecmode>")])
845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
847 ;; SSE5 floating point multiply/accumulate instructions This includes the
848 ;; scalar version of the instructions as well as the vector
850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
852 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
853 ;; combine to generate a multiply/add with two memory references. We then
854 ;; split this insn, into loading up the destination register with one of the
855 ;; memory operations. If we don't manage to split the insn, reload will
856 ;; generate the appropriate moves. The reason this is needed, is that combine
857 ;; has already folded one of the memory references into both the multiply and
858 ;; add insns, and it can't generate a new pseudo. I.e.:
859 ;; (set (reg1) (mem (addr1)))
860 ;; (set (reg2) (mult (reg1) (mem (addr2))))
861 ;; (set (reg3) (plus (reg2) (mem (addr3))))
863 (define_insn "sse5_fmadd<mode>4"
864 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
867 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
868 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
869 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
870 "TARGET_SSE5 && TARGET_FUSED_MADD
871 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
872 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
873 [(set_attr "type" "ssemuladd")
874 (set_attr "mode" "<MODE>")])
876 ;; Split fmadd with two memory operands into a load and the fmadd.
878 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
881 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
882 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
883 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
885 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
886 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
887 && !reg_mentioned_p (operands[0], operands[1])
888 && !reg_mentioned_p (operands[0], operands[2])
889 && !reg_mentioned_p (operands[0], operands[3])"
892 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
893 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
894 operands[2], operands[3]));
898 ;; For the scalar operations, use operand1 for the upper words that aren't
899 ;; modified, so restrict the forms that are generated.
900 ;; Scalar version of fmadd
901 (define_insn "sse5_vmfmadd<mode>4"
902 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
903 (vec_merge:SSEMODEF2P
906 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
907 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
908 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
911 "TARGET_SSE5 && TARGET_FUSED_MADD
912 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
913 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
914 [(set_attr "type" "ssemuladd")
915 (set_attr "mode" "<MODE>")])
917 ;; Floating multiply and subtract
918 ;; Allow two memory operands the same as fmadd
919 (define_insn "sse5_fmsub<mode>4"
920 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
923 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
924 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
925 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
926 "TARGET_SSE5 && TARGET_FUSED_MADD
927 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
928 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
929 [(set_attr "type" "ssemuladd")
930 (set_attr "mode" "<MODE>")])
932 ;; Split fmsub with two memory operands into a load and the fmsub.
934 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
937 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
938 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
939 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
941 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
942 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
943 && !reg_mentioned_p (operands[0], operands[1])
944 && !reg_mentioned_p (operands[0], operands[2])
945 && !reg_mentioned_p (operands[0], operands[3])"
948 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
949 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
950 operands[2], operands[3]));
954 ;; For the scalar operations, use operand1 for the upper words that aren't
955 ;; modified, so restrict the forms that are generated.
956 ;; Scalar version of fmsub
957 (define_insn "sse5_vmfmsub<mode>4"
958 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
959 (vec_merge:SSEMODEF2P
962 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
963 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
964 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
967 "TARGET_SSE5 && TARGET_FUSED_MADD
968 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
969 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
970 [(set_attr "type" "ssemuladd")
971 (set_attr "mode" "<MODE>")])
973 ;; Floating point negative multiply and add
974 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
975 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
976 ;; Allow two memory operands to help in optimizing.
977 (define_insn "sse5_fnmadd<mode>4"
978 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
980 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
982 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
983 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
984 "TARGET_SSE5 && TARGET_FUSED_MADD
985 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
986 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
987 [(set_attr "type" "ssemuladd")
988 (set_attr "mode" "<MODE>")])
990 ;; Split fnmadd with two memory operands into a load and the fnmadd.
992 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
994 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
996 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
997 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
999 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1000 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1001 && !reg_mentioned_p (operands[0], operands[1])
1002 && !reg_mentioned_p (operands[0], operands[2])
1003 && !reg_mentioned_p (operands[0], operands[3])"
1006 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1007 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1008 operands[2], operands[3]));
1012 ;; For the scalar operations, use operand1 for the upper words that aren't
1013 ;; modified, so restrict the forms that are generated.
1014 ;; Scalar version of fnmadd
1015 (define_insn "sse5_vmfnmadd<mode>4"
1016 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1017 (vec_merge:SSEMODEF2P
1019 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1021 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1025 "TARGET_SSE5 && TARGET_FUSED_MADD
1026 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1027 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1028 [(set_attr "type" "ssemuladd")
1029 (set_attr "mode" "<MODE>")])
1031 ;; Floating point negative multiply and subtract
1032 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1033 ;; Allow 2 memory operands to help with optimization
1034 (define_insn "sse5_fnmsub<mode>4"
1035 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1039 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1040 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1041 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1042 "TARGET_SSE5 && TARGET_FUSED_MADD
1043 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1044 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1045 [(set_attr "type" "ssemuladd")
1046 (set_attr "mode" "<MODE>")])
1048 ;; Split fnmsub with two memory operands into a load and the fmsub.
1050 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1054 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1055 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1056 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1058 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1059 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1060 && !reg_mentioned_p (operands[0], operands[1])
1061 && !reg_mentioned_p (operands[0], operands[2])
1062 && !reg_mentioned_p (operands[0], operands[3])"
1065 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1066 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1067 operands[2], operands[3]));
1071 ;; For the scalar operations, use operand1 for the upper words that aren't
1072 ;; modified, so restrict the forms that are generated.
1073 ;; Scalar version of fnmsub
1074 (define_insn "sse5_vmfnmsub<mode>4"
1075 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1076 (vec_merge:SSEMODEF2P
1080 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1081 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1082 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1085 "TARGET_SSE5 && TARGET_FUSED_MADD
1086 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1087 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1088 [(set_attr "type" "ssemuladd")
1089 (set_attr "mode" "<MODE>")])
1091 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1092 ;; even if the user used -mno-fused-madd
1093 ;; Parallel instructions. During instruction generation, just default
1094 ;; to registers, and let combine later build the appropriate instruction.
1095 (define_expand "sse5i_fmadd<mode>4"
1096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1100 (match_operand:SSEMODEF2P 1 "register_operand" "")
1101 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1102 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1103 UNSPEC_SSE5_INTRINSIC))]
1106 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1107 if (TARGET_FUSED_MADD)
1109 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1110 operands[2], operands[3]));
1115 (define_insn "*sse5i_fmadd<mode>4"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1120 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1121 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1122 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1123 UNSPEC_SSE5_INTRINSIC))]
1124 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1125 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1126 [(set_attr "type" "ssemuladd")
1127 (set_attr "mode" "<MODE>")])
1129 (define_expand "sse5i_fmsub<mode>4"
1130 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1134 (match_operand:SSEMODEF2P 1 "register_operand" "")
1135 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1136 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1137 UNSPEC_SSE5_INTRINSIC))]
1140 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1141 if (TARGET_FUSED_MADD)
1143 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1144 operands[2], operands[3]));
1149 (define_insn "*sse5i_fmsub<mode>4"
1150 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1154 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1155 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1156 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1157 UNSPEC_SSE5_INTRINSIC))]
1158 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1159 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1160 [(set_attr "type" "ssemuladd")
1161 (set_attr "mode" "<MODE>")])
1163 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1164 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1165 (define_expand "sse5i_fnmadd<mode>4"
1166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1169 (match_operand:SSEMODEF2P 3 "register_operand" "")
1171 (match_operand:SSEMODEF2P 1 "register_operand" "")
1172 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1173 UNSPEC_SSE5_INTRINSIC))]
1176 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1177 if (TARGET_FUSED_MADD)
1179 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1180 operands[2], operands[3]));
1185 (define_insn "*sse5i_fnmadd<mode>4"
1186 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1189 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1191 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1192 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1193 UNSPEC_SSE5_INTRINSIC))]
1194 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1195 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1196 [(set_attr "type" "ssemuladd")
1197 (set_attr "mode" "<MODE>")])
1199 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1200 (define_expand "sse5i_fnmsub<mode>4"
1201 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1206 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1207 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1208 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1209 UNSPEC_SSE5_INTRINSIC))]
1212 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1213 if (TARGET_FUSED_MADD)
1215 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1216 operands[2], operands[3]));
1221 (define_insn "*sse5i_fnmsub<mode>4"
1222 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1227 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1228 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1229 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1230 UNSPEC_SSE5_INTRINSIC))]
1231 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1232 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1233 [(set_attr "type" "ssemuladd")
1234 (set_attr "mode" "<MODE>")])
1236 ;; Scalar instructions
1237 (define_expand "sse5i_vmfmadd<mode>4"
1238 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1240 [(vec_merge:SSEMODEF2P
1243 (match_operand:SSEMODEF2P 1 "register_operand" "")
1244 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1245 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1248 UNSPEC_SSE5_INTRINSIC))]
1251 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1252 if (TARGET_FUSED_MADD)
1254 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1255 operands[2], operands[3]));
1260 ;; For the scalar operations, use operand1 for the upper words that aren't
1261 ;; modified, so restrict the forms that are accepted.
1262 (define_insn "*sse5i_vmfmadd<mode>4"
1263 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1265 [(vec_merge:SSEMODEF2P
1268 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1269 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1270 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1273 UNSPEC_SSE5_INTRINSIC))]
1274 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1275 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1276 [(set_attr "type" "ssemuladd")
1277 (set_attr "mode" "<ssescalarmode>")])
1279 (define_expand "sse5i_vmfmsub<mode>4"
1280 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1282 [(vec_merge:SSEMODEF2P
1285 (match_operand:SSEMODEF2P 1 "register_operand" "")
1286 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1287 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1290 UNSPEC_SSE5_INTRINSIC))]
1293 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1294 if (TARGET_FUSED_MADD)
1296 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1297 operands[2], operands[3]));
1302 (define_insn "*sse5i_vmfmsub<mode>4"
1303 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1305 [(vec_merge:SSEMODEF2P
1308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1310 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1313 UNSPEC_SSE5_INTRINSIC))]
1314 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1315 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1316 [(set_attr "type" "ssemuladd")
1317 (set_attr "mode" "<ssescalarmode>")])
1319 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1320 (define_expand "sse5i_vmfnmadd<mode>4"
1321 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1323 [(vec_merge:SSEMODEF2P
1325 (match_operand:SSEMODEF2P 3 "register_operand" "")
1327 (match_operand:SSEMODEF2P 1 "register_operand" "")
1328 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1331 UNSPEC_SSE5_INTRINSIC))]
1334 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1335 if (TARGET_FUSED_MADD)
1337 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1338 operands[2], operands[3]));
1343 (define_insn "*sse5i_vmfnmadd<mode>4"
1344 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1346 [(vec_merge:SSEMODEF2P
1348 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1350 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1351 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1354 UNSPEC_SSE5_INTRINSIC))]
1355 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1356 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1357 [(set_attr "type" "ssemuladd")
1358 (set_attr "mode" "<ssescalarmode>")])
1360 (define_expand "sse5i_vmfnmsub<mode>4"
1361 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1363 [(vec_merge:SSEMODEF2P
1367 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1368 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1369 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1372 UNSPEC_SSE5_INTRINSIC))]
1375 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1376 if (TARGET_FUSED_MADD)
1378 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1379 operands[2], operands[3]));
1384 (define_insn "*sse5i_vmfnmsub<mode>4"
1385 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1387 [(vec_merge:SSEMODEF2P
1391 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1392 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1393 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1396 UNSPEC_SSE5_INTRINSIC))]
1397 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1398 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1399 [(set_attr "type" "ssemuladd")
1400 (set_attr "mode" "<ssescalarmode>")])
1402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1404 ;; Parallel single-precision floating point conversion operations
1406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1408 (define_insn "sse_cvtpi2ps"
1409 [(set (match_operand:V4SF 0 "register_operand" "=x")
1412 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1413 (match_operand:V4SF 1 "register_operand" "0")
1416 "cvtpi2ps\t{%2, %0|%0, %2}"
1417 [(set_attr "type" "ssecvt")
1418 (set_attr "mode" "V4SF")])
1420 (define_insn "sse_cvtps2pi"
1421 [(set (match_operand:V2SI 0 "register_operand" "=y")
1423 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1425 (parallel [(const_int 0) (const_int 1)])))]
1427 "cvtps2pi\t{%1, %0|%0, %1}"
1428 [(set_attr "type" "ssecvt")
1429 (set_attr "unit" "mmx")
1430 (set_attr "mode" "DI")])
1432 (define_insn "sse_cvttps2pi"
1433 [(set (match_operand:V2SI 0 "register_operand" "=y")
1435 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1436 (parallel [(const_int 0) (const_int 1)])))]
1438 "cvttps2pi\t{%1, %0|%0, %1}"
1439 [(set_attr "type" "ssecvt")
1440 (set_attr "unit" "mmx")
1441 (set_attr "mode" "SF")])
1443 (define_insn "sse_cvtsi2ss"
1444 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1447 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1448 (match_operand:V4SF 1 "register_operand" "0,0")
1451 "cvtsi2ss\t{%2, %0|%0, %2}"
1452 [(set_attr "type" "sseicvt")
1453 (set_attr "athlon_decode" "vector,double")
1454 (set_attr "amdfam10_decode" "vector,double")
1455 (set_attr "mode" "SF")])
1457 (define_insn "sse_cvtsi2ssq"
1458 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1461 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1462 (match_operand:V4SF 1 "register_operand" "0,0")
1464 "TARGET_SSE && TARGET_64BIT"
1465 "cvtsi2ssq\t{%2, %0|%0, %2}"
1466 [(set_attr "type" "sseicvt")
1467 (set_attr "athlon_decode" "vector,double")
1468 (set_attr "amdfam10_decode" "vector,double")
1469 (set_attr "mode" "SF")])
1471 (define_insn "sse_cvtss2si"
1472 [(set (match_operand:SI 0 "register_operand" "=r,r")
1475 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1476 (parallel [(const_int 0)]))]
1477 UNSPEC_FIX_NOTRUNC))]
1479 "cvtss2si\t{%1, %0|%0, %1}"
1480 [(set_attr "type" "sseicvt")
1481 (set_attr "athlon_decode" "double,vector")
1482 (set_attr "prefix_rep" "1")
1483 (set_attr "mode" "SI")])
1485 (define_insn "sse_cvtss2si_2"
1486 [(set (match_operand:SI 0 "register_operand" "=r,r")
1487 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1488 UNSPEC_FIX_NOTRUNC))]
1490 "cvtss2si\t{%1, %0|%0, %1}"
1491 [(set_attr "type" "sseicvt")
1492 (set_attr "athlon_decode" "double,vector")
1493 (set_attr "amdfam10_decode" "double,double")
1494 (set_attr "prefix_rep" "1")
1495 (set_attr "mode" "SI")])
1497 (define_insn "sse_cvtss2siq"
1498 [(set (match_operand:DI 0 "register_operand" "=r,r")
1501 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1502 (parallel [(const_int 0)]))]
1503 UNSPEC_FIX_NOTRUNC))]
1504 "TARGET_SSE && TARGET_64BIT"
1505 "cvtss2siq\t{%1, %0|%0, %1}"
1506 [(set_attr "type" "sseicvt")
1507 (set_attr "athlon_decode" "double,vector")
1508 (set_attr "prefix_rep" "1")
1509 (set_attr "mode" "DI")])
1511 (define_insn "sse_cvtss2siq_2"
1512 [(set (match_operand:DI 0 "register_operand" "=r,r")
1513 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1514 UNSPEC_FIX_NOTRUNC))]
1515 "TARGET_SSE && TARGET_64BIT"
1516 "cvtss2siq\t{%1, %0|%0, %1}"
1517 [(set_attr "type" "sseicvt")
1518 (set_attr "athlon_decode" "double,vector")
1519 (set_attr "amdfam10_decode" "double,double")
1520 (set_attr "prefix_rep" "1")
1521 (set_attr "mode" "DI")])
1523 (define_insn "sse_cvttss2si"
1524 [(set (match_operand:SI 0 "register_operand" "=r,r")
1527 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1528 (parallel [(const_int 0)]))))]
1530 "cvttss2si\t{%1, %0|%0, %1}"
1531 [(set_attr "type" "sseicvt")
1532 (set_attr "athlon_decode" "double,vector")
1533 (set_attr "amdfam10_decode" "double,double")
1534 (set_attr "prefix_rep" "1")
1535 (set_attr "mode" "SI")])
1537 (define_insn "sse_cvttss2siq"
1538 [(set (match_operand:DI 0 "register_operand" "=r,r")
1541 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1542 (parallel [(const_int 0)]))))]
1543 "TARGET_SSE && TARGET_64BIT"
1544 "cvttss2siq\t{%1, %0|%0, %1}"
1545 [(set_attr "type" "sseicvt")
1546 (set_attr "athlon_decode" "double,vector")
1547 (set_attr "amdfam10_decode" "double,double")
1548 (set_attr "prefix_rep" "1")
1549 (set_attr "mode" "DI")])
1551 (define_insn "sse2_cvtdq2ps"
1552 [(set (match_operand:V4SF 0 "register_operand" "=x")
1553 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1555 "cvtdq2ps\t{%1, %0|%0, %1}"
1556 [(set_attr "type" "ssecvt")
1557 (set_attr "mode" "V4SF")])
1559 (define_insn "sse2_cvtps2dq"
1560 [(set (match_operand:V4SI 0 "register_operand" "=x")
1561 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1562 UNSPEC_FIX_NOTRUNC))]
1564 "cvtps2dq\t{%1, %0|%0, %1}"
1565 [(set_attr "type" "ssecvt")
1566 (set_attr "prefix_data16" "1")
1567 (set_attr "mode" "TI")])
1569 (define_insn "sse2_cvttps2dq"
1570 [(set (match_operand:V4SI 0 "register_operand" "=x")
1571 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1573 "cvttps2dq\t{%1, %0|%0, %1}"
1574 [(set_attr "type" "ssecvt")
1575 (set_attr "prefix_rep" "1")
1576 (set_attr "mode" "TI")])
1578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1580 ;; Parallel double-precision floating point conversion operations
1582 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1584 (define_insn "sse2_cvtpi2pd"
1585 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1586 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1588 "cvtpi2pd\t{%1, %0|%0, %1}"
1589 [(set_attr "type" "ssecvt")
1590 (set_attr "unit" "mmx,*")
1591 (set_attr "mode" "V2DF")])
1593 (define_insn "sse2_cvtpd2pi"
1594 [(set (match_operand:V2SI 0 "register_operand" "=y")
1595 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1596 UNSPEC_FIX_NOTRUNC))]
1598 "cvtpd2pi\t{%1, %0|%0, %1}"
1599 [(set_attr "type" "ssecvt")
1600 (set_attr "unit" "mmx")
1601 (set_attr "prefix_data16" "1")
1602 (set_attr "mode" "DI")])
1604 (define_insn "sse2_cvttpd2pi"
1605 [(set (match_operand:V2SI 0 "register_operand" "=y")
1606 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1608 "cvttpd2pi\t{%1, %0|%0, %1}"
1609 [(set_attr "type" "ssecvt")
1610 (set_attr "unit" "mmx")
1611 (set_attr "prefix_data16" "1")
1612 (set_attr "mode" "TI")])
1614 (define_insn "sse2_cvtsi2sd"
1615 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1618 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1619 (match_operand:V2DF 1 "register_operand" "0,0")
1622 "cvtsi2sd\t{%2, %0|%0, %2}"
1623 [(set_attr "type" "sseicvt")
1624 (set_attr "mode" "DF")
1625 (set_attr "athlon_decode" "double,direct")
1626 (set_attr "amdfam10_decode" "vector,double")])
1628 (define_insn "sse2_cvtsi2sdq"
1629 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1632 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1633 (match_operand:V2DF 1 "register_operand" "0,0")
1635 "TARGET_SSE2 && TARGET_64BIT"
1636 "cvtsi2sdq\t{%2, %0|%0, %2}"
1637 [(set_attr "type" "sseicvt")
1638 (set_attr "mode" "DF")
1639 (set_attr "athlon_decode" "double,direct")
1640 (set_attr "amdfam10_decode" "vector,double")])
1642 (define_insn "sse2_cvtsd2si"
1643 [(set (match_operand:SI 0 "register_operand" "=r,r")
1646 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1647 (parallel [(const_int 0)]))]
1648 UNSPEC_FIX_NOTRUNC))]
1650 "cvtsd2si\t{%1, %0|%0, %1}"
1651 [(set_attr "type" "sseicvt")
1652 (set_attr "athlon_decode" "double,vector")
1653 (set_attr "prefix_rep" "1")
1654 (set_attr "mode" "SI")])
1656 (define_insn "sse2_cvtsd2si_2"
1657 [(set (match_operand:SI 0 "register_operand" "=r,r")
1658 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1659 UNSPEC_FIX_NOTRUNC))]
1661 "cvtsd2si\t{%1, %0|%0, %1}"
1662 [(set_attr "type" "sseicvt")
1663 (set_attr "athlon_decode" "double,vector")
1664 (set_attr "amdfam10_decode" "double,double")
1665 (set_attr "prefix_rep" "1")
1666 (set_attr "mode" "SI")])
1668 (define_insn "sse2_cvtsd2siq"
1669 [(set (match_operand:DI 0 "register_operand" "=r,r")
1672 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1673 (parallel [(const_int 0)]))]
1674 UNSPEC_FIX_NOTRUNC))]
1675 "TARGET_SSE2 && TARGET_64BIT"
1676 "cvtsd2siq\t{%1, %0|%0, %1}"
1677 [(set_attr "type" "sseicvt")
1678 (set_attr "athlon_decode" "double,vector")
1679 (set_attr "prefix_rep" "1")
1680 (set_attr "mode" "DI")])
1682 (define_insn "sse2_cvtsd2siq_2"
1683 [(set (match_operand:DI 0 "register_operand" "=r,r")
1684 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1685 UNSPEC_FIX_NOTRUNC))]
1686 "TARGET_SSE2 && TARGET_64BIT"
1687 "cvtsd2siq\t{%1, %0|%0, %1}"
1688 [(set_attr "type" "sseicvt")
1689 (set_attr "athlon_decode" "double,vector")
1690 (set_attr "amdfam10_decode" "double,double")
1691 (set_attr "prefix_rep" "1")
1692 (set_attr "mode" "DI")])
1694 (define_insn "sse2_cvttsd2si"
1695 [(set (match_operand:SI 0 "register_operand" "=r,r")
1698 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1699 (parallel [(const_int 0)]))))]
1701 "cvttsd2si\t{%1, %0|%0, %1}"
1702 [(set_attr "type" "sseicvt")
1703 (set_attr "prefix_rep" "1")
1704 (set_attr "mode" "SI")
1705 (set_attr "athlon_decode" "double,vector")
1706 (set_attr "amdfam10_decode" "double,double")])
1708 (define_insn "sse2_cvttsd2siq"
1709 [(set (match_operand:DI 0 "register_operand" "=r,r")
1712 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1713 (parallel [(const_int 0)]))))]
1714 "TARGET_SSE2 && TARGET_64BIT"
1715 "cvttsd2siq\t{%1, %0|%0, %1}"
1716 [(set_attr "type" "sseicvt")
1717 (set_attr "prefix_rep" "1")
1718 (set_attr "mode" "DI")
1719 (set_attr "athlon_decode" "double,vector")
1720 (set_attr "amdfam10_decode" "double,double")])
1722 (define_insn "sse2_cvtdq2pd"
1723 [(set (match_operand:V2DF 0 "register_operand" "=x")
1726 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1727 (parallel [(const_int 0) (const_int 1)]))))]
1729 "cvtdq2pd\t{%1, %0|%0, %1}"
1730 [(set_attr "type" "ssecvt")
1731 (set_attr "mode" "V2DF")])
1733 (define_expand "sse2_cvtpd2dq"
1734 [(set (match_operand:V4SI 0 "register_operand" "")
1736 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1740 "operands[2] = CONST0_RTX (V2SImode);")
1742 (define_insn "*sse2_cvtpd2dq"
1743 [(set (match_operand:V4SI 0 "register_operand" "=x")
1745 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1747 (match_operand:V2SI 2 "const0_operand" "")))]
1749 "cvtpd2dq\t{%1, %0|%0, %1}"
1750 [(set_attr "type" "ssecvt")
1751 (set_attr "prefix_rep" "1")
1752 (set_attr "mode" "TI")
1753 (set_attr "amdfam10_decode" "double")])
1755 (define_expand "sse2_cvttpd2dq"
1756 [(set (match_operand:V4SI 0 "register_operand" "")
1758 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1761 "operands[2] = CONST0_RTX (V2SImode);")
1763 (define_insn "*sse2_cvttpd2dq"
1764 [(set (match_operand:V4SI 0 "register_operand" "=x")
1766 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1767 (match_operand:V2SI 2 "const0_operand" "")))]
1769 "cvttpd2dq\t{%1, %0|%0, %1}"
1770 [(set_attr "type" "ssecvt")
1771 (set_attr "prefix_rep" "1")
1772 (set_attr "mode" "TI")
1773 (set_attr "amdfam10_decode" "double")])
1775 (define_insn "sse2_cvtsd2ss"
1776 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1779 (float_truncate:V2SF
1780 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1781 (match_operand:V4SF 1 "register_operand" "0,0")
1784 "cvtsd2ss\t{%2, %0|%0, %2}"
1785 [(set_attr "type" "ssecvt")
1786 (set_attr "athlon_decode" "vector,double")
1787 (set_attr "amdfam10_decode" "vector,double")
1788 (set_attr "mode" "SF")])
1790 (define_insn "sse2_cvtss2sd"
1791 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1795 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1796 (parallel [(const_int 0) (const_int 1)])))
1797 (match_operand:V2DF 1 "register_operand" "0,0")
1800 "cvtss2sd\t{%2, %0|%0, %2}"
1801 [(set_attr "type" "ssecvt")
1802 (set_attr "amdfam10_decode" "vector,double")
1803 (set_attr "mode" "DF")])
1805 (define_expand "sse2_cvtpd2ps"
1806 [(set (match_operand:V4SF 0 "register_operand" "")
1808 (float_truncate:V2SF
1809 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1812 "operands[2] = CONST0_RTX (V2SFmode);")
1814 (define_insn "*sse2_cvtpd2ps"
1815 [(set (match_operand:V4SF 0 "register_operand" "=x")
1817 (float_truncate:V2SF
1818 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1819 (match_operand:V2SF 2 "const0_operand" "")))]
1821 "cvtpd2ps\t{%1, %0|%0, %1}"
1822 [(set_attr "type" "ssecvt")
1823 (set_attr "prefix_data16" "1")
1824 (set_attr "mode" "V4SF")
1825 (set_attr "amdfam10_decode" "double")])
1827 (define_insn "sse2_cvtps2pd"
1828 [(set (match_operand:V2DF 0 "register_operand" "=x")
1831 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1832 (parallel [(const_int 0) (const_int 1)]))))]
1834 "cvtps2pd\t{%1, %0|%0, %1}"
1835 [(set_attr "type" "ssecvt")
1836 (set_attr "mode" "V2DF")
1837 (set_attr "amdfam10_decode" "direct")])
1839 (define_expand "vec_unpacks_hi_v4sf"
1844 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1845 (parallel [(const_int 6)
1849 (set (match_operand:V2DF 0 "register_operand" "")
1853 (parallel [(const_int 0) (const_int 1)]))))]
1856 operands[2] = gen_reg_rtx (V4SFmode);
1859 (define_expand "vec_unpacks_lo_v4sf"
1860 [(set (match_operand:V2DF 0 "register_operand" "")
1863 (match_operand:V4SF 1 "nonimmediate_operand" "")
1864 (parallel [(const_int 0) (const_int 1)]))))]
1867 (define_expand "vec_unpacks_float_hi_v8hi"
1868 [(match_operand:V4SF 0 "register_operand" "")
1869 (match_operand:V8HI 1 "register_operand" "")]
1872 rtx tmp = gen_reg_rtx (V4SImode);
1874 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1875 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1879 (define_expand "vec_unpacks_float_lo_v8hi"
1880 [(match_operand:V4SF 0 "register_operand" "")
1881 (match_operand:V8HI 1 "register_operand" "")]
1884 rtx tmp = gen_reg_rtx (V4SImode);
1886 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1887 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1891 (define_expand "vec_unpacku_float_hi_v8hi"
1892 [(match_operand:V4SF 0 "register_operand" "")
1893 (match_operand:V8HI 1 "register_operand" "")]
1896 rtx tmp = gen_reg_rtx (V4SImode);
1898 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1899 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1903 (define_expand "vec_unpacku_float_lo_v8hi"
1904 [(match_operand:V4SF 0 "register_operand" "")
1905 (match_operand:V8HI 1 "register_operand" "")]
1908 rtx tmp = gen_reg_rtx (V4SImode);
1910 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1911 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1915 (define_expand "vec_unpacks_float_hi_v4si"
1918 (match_operand:V4SI 1 "nonimmediate_operand" "")
1919 (parallel [(const_int 2)
1923 (set (match_operand:V2DF 0 "register_operand" "")
1927 (parallel [(const_int 0) (const_int 1)]))))]
1930 operands[2] = gen_reg_rtx (V4SImode);
1933 (define_expand "vec_unpacks_float_lo_v4si"
1934 [(set (match_operand:V2DF 0 "register_operand" "")
1937 (match_operand:V4SI 1 "nonimmediate_operand" "")
1938 (parallel [(const_int 0) (const_int 1)]))))]
1941 (define_expand "vec_pack_trunc_v2df"
1942 [(match_operand:V4SF 0 "register_operand" "")
1943 (match_operand:V2DF 1 "nonimmediate_operand" "")
1944 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1949 r1 = gen_reg_rtx (V4SFmode);
1950 r2 = gen_reg_rtx (V4SFmode);
1952 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1953 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1954 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1958 (define_expand "vec_pack_sfix_trunc_v2df"
1959 [(match_operand:V4SI 0 "register_operand" "")
1960 (match_operand:V2DF 1 "nonimmediate_operand" "")
1961 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1966 r1 = gen_reg_rtx (V4SImode);
1967 r2 = gen_reg_rtx (V4SImode);
1969 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1970 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1971 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1972 gen_lowpart (V2DImode, r1),
1973 gen_lowpart (V2DImode, r2)));
1977 (define_expand "vec_pack_sfix_v2df"
1978 [(match_operand:V4SI 0 "register_operand" "")
1979 (match_operand:V2DF 1 "nonimmediate_operand" "")
1980 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1985 r1 = gen_reg_rtx (V4SImode);
1986 r2 = gen_reg_rtx (V4SImode);
1988 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1989 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1990 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1991 gen_lowpart (V2DImode, r1),
1992 gen_lowpart (V2DImode, r2)));
1996 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1998 ;; Parallel single-precision floating point element swizzling
2000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2002 (define_expand "sse_movhlps_exp"
2003 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2006 (match_operand:V4SF 1 "nonimmediate_operand" "")
2007 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2008 (parallel [(const_int 6)
2013 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2015 (define_insn "sse_movhlps"
2016 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2019 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2020 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2021 (parallel [(const_int 6)
2025 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2027 movhlps\t{%2, %0|%0, %2}
2028 movlps\t{%H2, %0|%0, %H2}
2029 movhps\t{%2, %0|%0, %2}"
2030 [(set_attr "type" "ssemov")
2031 (set_attr "mode" "V4SF,V2SF,V2SF")])
2033 (define_expand "sse_movlhps_exp"
2034 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2037 (match_operand:V4SF 1 "nonimmediate_operand" "")
2038 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2039 (parallel [(const_int 0)
2044 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2046 (define_insn "sse_movlhps"
2047 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2050 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2051 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2052 (parallel [(const_int 0)
2056 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2058 movlhps\t{%2, %0|%0, %2}
2059 movhps\t{%2, %0|%0, %2}
2060 movlps\t{%2, %H0|%H0, %2}"
2061 [(set_attr "type" "ssemov")
2062 (set_attr "mode" "V4SF,V2SF,V2SF")])
2064 (define_insn "sse_unpckhps"
2065 [(set (match_operand:V4SF 0 "register_operand" "=x")
2068 (match_operand:V4SF 1 "register_operand" "0")
2069 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2070 (parallel [(const_int 2) (const_int 6)
2071 (const_int 3) (const_int 7)])))]
2073 "unpckhps\t{%2, %0|%0, %2}"
2074 [(set_attr "type" "sselog")
2075 (set_attr "mode" "V4SF")])
2077 (define_insn "sse_unpcklps"
2078 [(set (match_operand:V4SF 0 "register_operand" "=x")
2081 (match_operand:V4SF 1 "register_operand" "0")
2082 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2083 (parallel [(const_int 0) (const_int 4)
2084 (const_int 1) (const_int 5)])))]
2086 "unpcklps\t{%2, %0|%0, %2}"
2087 [(set_attr "type" "sselog")
2088 (set_attr "mode" "V4SF")])
2090 ;; These are modeled with the same vec_concat as the others so that we
2091 ;; capture users of shufps that can use the new instructions
2092 (define_insn "sse3_movshdup"
2093 [(set (match_operand:V4SF 0 "register_operand" "=x")
2096 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2098 (parallel [(const_int 1)
2103 "movshdup\t{%1, %0|%0, %1}"
2104 [(set_attr "type" "sse")
2105 (set_attr "prefix_rep" "1")
2106 (set_attr "mode" "V4SF")])
2108 (define_insn "sse3_movsldup"
2109 [(set (match_operand:V4SF 0 "register_operand" "=x")
2112 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2114 (parallel [(const_int 0)
2119 "movsldup\t{%1, %0|%0, %1}"
2120 [(set_attr "type" "sse")
2121 (set_attr "prefix_rep" "1")
2122 (set_attr "mode" "V4SF")])
2124 (define_expand "sse_shufps"
2125 [(match_operand:V4SF 0 "register_operand" "")
2126 (match_operand:V4SF 1 "register_operand" "")
2127 (match_operand:V4SF 2 "nonimmediate_operand" "")
2128 (match_operand:SI 3 "const_int_operand" "")]
2131 int mask = INTVAL (operands[3]);
2132 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2133 GEN_INT ((mask >> 0) & 3),
2134 GEN_INT ((mask >> 2) & 3),
2135 GEN_INT (((mask >> 4) & 3) + 4),
2136 GEN_INT (((mask >> 6) & 3) + 4)));
2140 (define_insn "sse_shufps_1"
2141 [(set (match_operand:V4SF 0 "register_operand" "=x")
2144 (match_operand:V4SF 1 "register_operand" "0")
2145 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2146 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2147 (match_operand 4 "const_0_to_3_operand" "")
2148 (match_operand 5 "const_4_to_7_operand" "")
2149 (match_operand 6 "const_4_to_7_operand" "")])))]
2153 mask |= INTVAL (operands[3]) << 0;
2154 mask |= INTVAL (operands[4]) << 2;
2155 mask |= (INTVAL (operands[5]) - 4) << 4;
2156 mask |= (INTVAL (operands[6]) - 4) << 6;
2157 operands[3] = GEN_INT (mask);
2159 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2161 [(set_attr "type" "sselog")
2162 (set_attr "mode" "V4SF")])
2164 (define_insn "sse_storehps"
2165 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2167 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2168 (parallel [(const_int 2) (const_int 3)])))]
2171 movhps\t{%1, %0|%0, %1}
2172 movhlps\t{%1, %0|%0, %1}
2173 movlps\t{%H1, %0|%0, %H1}"
2174 [(set_attr "type" "ssemov")
2175 (set_attr "mode" "V2SF,V4SF,V2SF")])
2177 (define_expand "sse_loadhps_exp"
2178 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2181 (match_operand:V4SF 1 "nonimmediate_operand" "")
2182 (parallel [(const_int 0) (const_int 1)]))
2183 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
2185 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2187 (define_insn "sse_loadhps"
2188 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2191 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2192 (parallel [(const_int 0) (const_int 1)]))
2193 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2196 movhps\t{%2, %0|%0, %2}
2197 movlhps\t{%2, %0|%0, %2}
2198 movlps\t{%2, %H0|%H0, %2}"
2199 [(set_attr "type" "ssemov")
2200 (set_attr "mode" "V2SF,V4SF,V2SF")])
2202 (define_insn "sse_storelps"
2203 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2205 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2206 (parallel [(const_int 0) (const_int 1)])))]
2209 movlps\t{%1, %0|%0, %1}
2210 movaps\t{%1, %0|%0, %1}
2211 movlps\t{%1, %0|%0, %1}"
2212 [(set_attr "type" "ssemov")
2213 (set_attr "mode" "V2SF,V4SF,V2SF")])
2215 (define_expand "sse_loadlps_exp"
2216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2218 (match_operand:V2SF 2 "nonimmediate_operand" "")
2220 (match_operand:V4SF 1 "nonimmediate_operand" "")
2221 (parallel [(const_int 2) (const_int 3)]))))]
2223 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2225 (define_insn "sse_loadlps"
2226 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2228 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2230 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2231 (parallel [(const_int 2) (const_int 3)]))))]
2234 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2235 movlps\t{%2, %0|%0, %2}
2236 movlps\t{%2, %0|%0, %2}"
2237 [(set_attr "type" "sselog,ssemov,ssemov")
2238 (set_attr "mode" "V4SF,V2SF,V2SF")])
2240 (define_insn "sse_movss"
2241 [(set (match_operand:V4SF 0 "register_operand" "=x")
2243 (match_operand:V4SF 2 "register_operand" "x")
2244 (match_operand:V4SF 1 "register_operand" "0")
2247 "movss\t{%2, %0|%0, %2}"
2248 [(set_attr "type" "ssemov")
2249 (set_attr "mode" "SF")])
2251 (define_insn "*vec_dupv4sf"
2252 [(set (match_operand:V4SF 0 "register_operand" "=x")
2254 (match_operand:SF 1 "register_operand" "0")))]
2256 "shufps\t{$0, %0, %0|%0, %0, 0}"
2257 [(set_attr "type" "sselog1")
2258 (set_attr "mode" "V4SF")])
2260 ;; Although insertps takes register source, we prefer
2261 ;; unpcklps with register source since it is shorter.
2262 (define_insn "*vec_concatv2sf_sse4_1"
2263 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
2265 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
2266 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
2269 unpcklps\t{%2, %0|%0, %2}
2270 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
2271 movss\t{%1, %0|%0, %1}
2272 punpckldq\t{%2, %0|%0, %2}
2273 movd\t{%1, %0|%0, %1}"
2274 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
2275 (set_attr "prefix_extra" "*,1,*,*,*")
2276 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
2278 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2279 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2280 ;; alternatives pretty much forces the MMX alternative to be chosen.
2281 (define_insn "*vec_concatv2sf_sse"
2282 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2284 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2285 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2288 unpcklps\t{%2, %0|%0, %2}
2289 movss\t{%1, %0|%0, %1}
2290 punpckldq\t{%2, %0|%0, %2}
2291 movd\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2293 (set_attr "mode" "V4SF,SF,DI,DI")])
2295 (define_insn "*vec_concatv4sf_sse"
2296 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2298 (match_operand:V2SF 1 "register_operand" " 0,0")
2299 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2302 movlhps\t{%2, %0|%0, %2}
2303 movhps\t{%2, %0|%0, %2}"
2304 [(set_attr "type" "ssemov")
2305 (set_attr "mode" "V4SF,V2SF")])
2307 (define_expand "vec_initv4sf"
2308 [(match_operand:V4SF 0 "register_operand" "")
2309 (match_operand 1 "" "")]
2312 ix86_expand_vector_init (false, operands[0], operands[1]);
2316 (define_insn "vec_setv4sf_0"
2317 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2320 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2321 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2325 movss\t{%2, %0|%0, %2}
2326 movss\t{%2, %0|%0, %2}
2327 movd\t{%2, %0|%0, %2}
2329 [(set_attr "type" "ssemov")
2330 (set_attr "mode" "SF")])
2332 ;; A subset is vec_setv4sf.
2333 (define_insn "*vec_setv4sf_sse4_1"
2334 [(set (match_operand:V4SF 0 "register_operand" "=x")
2337 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2338 (match_operand:V4SF 1 "register_operand" "0")
2339 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2342 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2343 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2345 [(set_attr "type" "sselog")
2346 (set_attr "prefix_extra" "1")
2347 (set_attr "mode" "V4SF")])
2349 (define_insn "sse4_1_insertps"
2350 [(set (match_operand:V4SF 0 "register_operand" "=x")
2351 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2352 (match_operand:V4SF 1 "register_operand" "0")
2353 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2356 "insertps\t{%3, %2, %0|%0, %2, %3}";
2357 [(set_attr "type" "sselog")
2358 (set_attr "prefix_extra" "1")
2359 (set_attr "mode" "V4SF")])
2362 [(set (match_operand:V4SF 0 "memory_operand" "")
2365 (match_operand:SF 1 "nonmemory_operand" ""))
2368 "TARGET_SSE && reload_completed"
2371 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2375 (define_expand "vec_setv4sf"
2376 [(match_operand:V4SF 0 "register_operand" "")
2377 (match_operand:SF 1 "register_operand" "")
2378 (match_operand 2 "const_int_operand" "")]
2381 ix86_expand_vector_set (false, operands[0], operands[1],
2382 INTVAL (operands[2]));
2386 (define_insn_and_split "*vec_extractv4sf_0"
2387 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2389 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2390 (parallel [(const_int 0)])))]
2391 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2393 "&& reload_completed"
2396 rtx op1 = operands[1];
2398 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2400 op1 = gen_lowpart (SFmode, op1);
2401 emit_move_insn (operands[0], op1);
2405 (define_insn "*sse4_1_extractps"
2406 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2408 (match_operand:V4SF 1 "register_operand" "x")
2409 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2411 "extractps\t{%2, %1, %0|%0, %1, %2}"
2412 [(set_attr "type" "sselog")
2413 (set_attr "prefix_extra" "1")
2414 (set_attr "mode" "V4SF")])
2416 (define_insn_and_split "*vec_extract_v4sf_mem"
2417 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2419 (match_operand:V4SF 1 "memory_operand" "o")
2420 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2426 int i = INTVAL (operands[2]);
2428 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2432 (define_expand "vec_extractv4sf"
2433 [(match_operand:SF 0 "register_operand" "")
2434 (match_operand:V4SF 1 "register_operand" "")
2435 (match_operand 2 "const_int_operand" "")]
2438 ix86_expand_vector_extract (false, operands[0], operands[1],
2439 INTVAL (operands[2]));
2443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2445 ;; Parallel double-precision floating point element swizzling
2447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2449 (define_expand "sse2_unpckhpd_exp"
2450 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2453 (match_operand:V2DF 1 "nonimmediate_operand" "")
2454 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2455 (parallel [(const_int 1)
2458 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2460 (define_insn "sse2_unpckhpd"
2461 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2464 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2465 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2466 (parallel [(const_int 1)
2468 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2470 unpckhpd\t{%2, %0|%0, %2}
2471 movlpd\t{%H1, %0|%0, %H1}
2472 movhpd\t{%1, %0|%0, %1}"
2473 [(set_attr "type" "sselog,ssemov,ssemov")
2474 (set_attr "mode" "V2DF,V1DF,V1DF")])
2476 (define_insn "*sse3_movddup"
2477 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2480 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2482 (parallel [(const_int 0)
2484 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2486 movddup\t{%1, %0|%0, %1}
2488 [(set_attr "type" "sselog1,ssemov")
2489 (set_attr "mode" "V2DF")])
2492 [(set (match_operand:V2DF 0 "memory_operand" "")
2495 (match_operand:V2DF 1 "register_operand" "")
2497 (parallel [(const_int 0)
2499 "TARGET_SSE3 && reload_completed"
2502 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2503 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2504 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2508 (define_expand "sse2_unpcklpd_exp"
2509 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2512 (match_operand:V2DF 1 "nonimmediate_operand" "")
2513 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2514 (parallel [(const_int 0)
2517 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2519 (define_insn "sse2_unpcklpd"
2520 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2523 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2524 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2525 (parallel [(const_int 0)
2527 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2529 unpcklpd\t{%2, %0|%0, %2}
2530 movhpd\t{%2, %0|%0, %2}
2531 movlpd\t{%2, %H0|%H0, %2}"
2532 [(set_attr "type" "sselog,ssemov,ssemov")
2533 (set_attr "mode" "V2DF,V1DF,V1DF")])
2535 (define_expand "sse2_shufpd"
2536 [(match_operand:V2DF 0 "register_operand" "")
2537 (match_operand:V2DF 1 "register_operand" "")
2538 (match_operand:V2DF 2 "nonimmediate_operand" "")
2539 (match_operand:SI 3 "const_int_operand" "")]
2542 int mask = INTVAL (operands[3]);
2543 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2545 GEN_INT (mask & 2 ? 3 : 2)));
2549 (define_insn "sse2_shufpd_1"
2550 [(set (match_operand:V2DF 0 "register_operand" "=x")
2553 (match_operand:V2DF 1 "register_operand" "0")
2554 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2555 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2556 (match_operand 4 "const_2_to_3_operand" "")])))]
2560 mask = INTVAL (operands[3]);
2561 mask |= (INTVAL (operands[4]) - 2) << 1;
2562 operands[3] = GEN_INT (mask);
2564 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2566 [(set_attr "type" "sselog")
2567 (set_attr "mode" "V2DF")])
2569 (define_insn "sse2_storehpd"
2570 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2572 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2573 (parallel [(const_int 1)])))]
2574 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2576 movhpd\t{%1, %0|%0, %1}
2579 [(set_attr "type" "ssemov,sselog1,ssemov")
2580 (set_attr "mode" "V1DF,V2DF,DF")])
2583 [(set (match_operand:DF 0 "register_operand" "")
2585 (match_operand:V2DF 1 "memory_operand" "")
2586 (parallel [(const_int 1)])))]
2587 "TARGET_SSE2 && reload_completed"
2588 [(set (match_dup 0) (match_dup 1))]
2590 operands[1] = adjust_address (operands[1], DFmode, 8);
2593 (define_insn "sse2_storelpd"
2594 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2596 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2597 (parallel [(const_int 0)])))]
2598 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2600 movlpd\t{%1, %0|%0, %1}
2603 [(set_attr "type" "ssemov")
2604 (set_attr "mode" "V1DF,DF,DF")])
2607 [(set (match_operand:DF 0 "register_operand" "")
2609 (match_operand:V2DF 1 "nonimmediate_operand" "")
2610 (parallel [(const_int 0)])))]
2611 "TARGET_SSE2 && reload_completed"
2614 rtx op1 = operands[1];
2616 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2618 op1 = gen_lowpart (DFmode, op1);
2619 emit_move_insn (operands[0], op1);
2623 (define_expand "sse2_loadhpd_exp"
2624 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2627 (match_operand:V2DF 1 "nonimmediate_operand" "")
2628 (parallel [(const_int 0)]))
2629 (match_operand:DF 2 "nonimmediate_operand" "")))]
2631 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2633 (define_insn "sse2_loadhpd"
2634 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2637 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2638 (parallel [(const_int 0)]))
2639 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2640 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2642 movhpd\t{%2, %0|%0, %2}
2643 unpcklpd\t{%2, %0|%0, %2}
2644 shufpd\t{$1, %1, %0|%0, %1, 1}
2646 [(set_attr "type" "ssemov,sselog,sselog,other")
2647 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2650 [(set (match_operand:V2DF 0 "memory_operand" "")
2652 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2653 (match_operand:DF 1 "register_operand" "")))]
2654 "TARGET_SSE2 && reload_completed"
2655 [(set (match_dup 0) (match_dup 1))]
2657 operands[0] = adjust_address (operands[0], DFmode, 8);
2660 (define_expand "sse2_loadlpd_exp"
2661 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2663 (match_operand:DF 2 "nonimmediate_operand" "")
2665 (match_operand:V2DF 1 "nonimmediate_operand" "")
2666 (parallel [(const_int 1)]))))]
2668 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2670 (define_insn "sse2_loadlpd"
2671 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2673 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2675 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2676 (parallel [(const_int 1)]))))]
2677 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2679 movsd\t{%2, %0|%0, %2}
2680 movlpd\t{%2, %0|%0, %2}
2681 movsd\t{%2, %0|%0, %2}
2682 shufpd\t{$2, %2, %0|%0, %2, 2}
2683 movhpd\t{%H1, %0|%0, %H1}
2685 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2686 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2689 [(set (match_operand:V2DF 0 "memory_operand" "")
2691 (match_operand:DF 1 "register_operand" "")
2692 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2693 "TARGET_SSE2 && reload_completed"
2694 [(set (match_dup 0) (match_dup 1))]
2696 operands[0] = adjust_address (operands[0], DFmode, 8);
2699 ;; Not sure these two are ever used, but it doesn't hurt to have
2701 (define_insn "*vec_extractv2df_1_sse"
2702 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2704 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2705 (parallel [(const_int 1)])))]
2706 "!TARGET_SSE2 && TARGET_SSE
2707 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2709 movhps\t{%1, %0|%0, %1}
2710 movhlps\t{%1, %0|%0, %1}
2711 movlps\t{%H1, %0|%0, %H1}"
2712 [(set_attr "type" "ssemov")
2713 (set_attr "mode" "V2SF,V4SF,V2SF")])
2715 (define_insn "*vec_extractv2df_0_sse"
2716 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2718 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2719 (parallel [(const_int 0)])))]
2720 "!TARGET_SSE2 && TARGET_SSE
2721 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2723 movlps\t{%1, %0|%0, %1}
2724 movaps\t{%1, %0|%0, %1}
2725 movlps\t{%1, %0|%0, %1}"
2726 [(set_attr "type" "ssemov")
2727 (set_attr "mode" "V2SF,V4SF,V2SF")])
2729 (define_insn "sse2_movsd"
2730 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2732 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2733 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2737 movsd\t{%2, %0|%0, %2}
2738 movlpd\t{%2, %0|%0, %2}
2739 movlpd\t{%2, %0|%0, %2}
2740 shufpd\t{$2, %2, %0|%0, %2, 2}
2741 movhps\t{%H1, %0|%0, %H1}
2742 movhps\t{%1, %H0|%H0, %1}"
2743 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2744 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2746 (define_insn "*vec_dupv2df_sse3"
2747 [(set (match_operand:V2DF 0 "register_operand" "=x")
2749 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2751 "movddup\t{%1, %0|%0, %1}"
2752 [(set_attr "type" "sselog1")
2753 (set_attr "mode" "DF")])
2755 (define_insn "vec_dupv2df"
2756 [(set (match_operand:V2DF 0 "register_operand" "=x")
2758 (match_operand:DF 1 "register_operand" "0")))]
2761 [(set_attr "type" "sselog1")
2762 (set_attr "mode" "V2DF")])
2764 (define_insn "*vec_concatv2df_sse3"
2765 [(set (match_operand:V2DF 0 "register_operand" "=x")
2767 (match_operand:DF 1 "nonimmediate_operand" "xm")
2770 "movddup\t{%1, %0|%0, %1}"
2771 [(set_attr "type" "sselog1")
2772 (set_attr "mode" "DF")])
2774 (define_insn "*vec_concatv2df"
2775 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2777 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2778 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2781 unpcklpd\t{%2, %0|%0, %2}
2782 movhpd\t{%2, %0|%0, %2}
2783 movsd\t{%1, %0|%0, %1}
2784 movlhps\t{%2, %0|%0, %2}
2785 movhps\t{%2, %0|%0, %2}"
2786 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2787 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2789 (define_expand "vec_setv2df"
2790 [(match_operand:V2DF 0 "register_operand" "")
2791 (match_operand:DF 1 "register_operand" "")
2792 (match_operand 2 "const_int_operand" "")]
2795 ix86_expand_vector_set (false, operands[0], operands[1],
2796 INTVAL (operands[2]));
2800 (define_expand "vec_extractv2df"
2801 [(match_operand:DF 0 "register_operand" "")
2802 (match_operand:V2DF 1 "register_operand" "")
2803 (match_operand 2 "const_int_operand" "")]
2806 ix86_expand_vector_extract (false, operands[0], operands[1],
2807 INTVAL (operands[2]));
2811 (define_expand "vec_initv2df"
2812 [(match_operand:V2DF 0 "register_operand" "")
2813 (match_operand 1 "" "")]
2816 ix86_expand_vector_init (false, operands[0], operands[1]);
2820 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2822 ;; Parallel integral arithmetic
2824 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2826 (define_expand "neg<mode>2"
2827 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2830 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2832 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2834 (define_expand "<plusminus_insn><mode>3"
2835 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2837 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2838 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2840 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2842 (define_insn "*<plusminus_insn><mode>3"
2843 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2845 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2846 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2847 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2848 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2849 [(set_attr "type" "sseiadd")
2850 (set_attr "prefix_data16" "1")
2851 (set_attr "mode" "TI")])
2853 (define_expand "sse2_<plusminus_insn><mode>3"
2854 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2855 (sat_plusminus:SSEMODE12
2856 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
2857 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
2859 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2861 (define_insn "*sse2_<plusminus_insn><mode>3"
2862 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2863 (sat_plusminus:SSEMODE12
2864 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2865 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2866 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2867 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2868 [(set_attr "type" "sseiadd")
2869 (set_attr "prefix_data16" "1")
2870 (set_attr "mode" "TI")])
2872 (define_insn_and_split "mulv16qi3"
2873 [(set (match_operand:V16QI 0 "register_operand" "")
2874 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2875 (match_operand:V16QI 2 "register_operand" "")))]
2877 && !(reload_completed || reload_in_progress)"
2882 rtx t[12], op0, op[3];
2887 /* On SSE5, we can take advantage of the pperm instruction to pack and
2888 unpack the bytes. Unpack data such that we've got a source byte in
2889 each low byte of each word. We don't care what goes into the high
2890 byte, so put 0 there. */
2891 for (i = 0; i < 6; ++i)
2892 t[i] = gen_reg_rtx (V8HImode);
2894 for (i = 0; i < 2; i++)
2897 op[1] = operands[i+1];
2898 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2901 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2904 /* Multiply words. */
2905 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2906 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2908 /* Pack the low byte of each word back into a single xmm */
2909 op[0] = operands[0];
2912 ix86_expand_sse5_pack (op);
2916 for (i = 0; i < 12; ++i)
2917 t[i] = gen_reg_rtx (V16QImode);
2919 /* Unpack data such that we've got a source byte in each low byte of
2920 each word. We don't care what goes into the high byte of each word.
2921 Rather than trying to get zero in there, most convenient is to let
2922 it be a copy of the low byte. */
2923 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2924 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2925 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2926 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2928 /* Multiply words. The end-of-line annotations here give a picture of what
2929 the output of that instruction looks like. Dot means don't care; the
2930 letters are the bytes of the result with A being the most significant. */
2931 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2932 gen_lowpart (V8HImode, t[0]),
2933 gen_lowpart (V8HImode, t[1])));
2934 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2935 gen_lowpart (V8HImode, t[2]),
2936 gen_lowpart (V8HImode, t[3])));
2938 /* Extract the relevant bytes and merge them back together. */
2939 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2940 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2941 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2942 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2943 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2944 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2947 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2951 (define_expand "mulv8hi3"
2952 [(set (match_operand:V8HI 0 "register_operand" "")
2953 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2954 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2956 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2958 (define_insn "*mulv8hi3"
2959 [(set (match_operand:V8HI 0 "register_operand" "=x")
2960 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2961 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2962 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2963 "pmullw\t{%2, %0|%0, %2}"
2964 [(set_attr "type" "sseimul")
2965 (set_attr "prefix_data16" "1")
2966 (set_attr "mode" "TI")])
2968 (define_expand "smulv8hi3_highpart"
2969 [(set (match_operand:V8HI 0 "register_operand" "")
2974 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2976 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2979 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2981 (define_insn "*smulv8hi3_highpart"
2982 [(set (match_operand:V8HI 0 "register_operand" "=x")
2987 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2989 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2991 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2992 "pmulhw\t{%2, %0|%0, %2}"
2993 [(set_attr "type" "sseimul")
2994 (set_attr "prefix_data16" "1")
2995 (set_attr "mode" "TI")])
2997 (define_expand "umulv8hi3_highpart"
2998 [(set (match_operand:V8HI 0 "register_operand" "")
3003 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3005 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3008 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3010 (define_insn "*umulv8hi3_highpart"
3011 [(set (match_operand:V8HI 0 "register_operand" "=x")
3016 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3018 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3020 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3021 "pmulhuw\t{%2, %0|%0, %2}"
3022 [(set_attr "type" "sseimul")
3023 (set_attr "prefix_data16" "1")
3024 (set_attr "mode" "TI")])
3026 (define_expand "sse2_umulv2siv2di3"
3027 [(set (match_operand:V2DI 0 "register_operand" "")
3031 (match_operand:V4SI 1 "nonimmediate_operand" "")
3032 (parallel [(const_int 0) (const_int 2)])))
3035 (match_operand:V4SI 2 "nonimmediate_operand" "")
3036 (parallel [(const_int 0) (const_int 2)])))))]
3038 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3040 (define_insn "*sse2_umulv2siv2di3"
3041 [(set (match_operand:V2DI 0 "register_operand" "=x")
3045 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3046 (parallel [(const_int 0) (const_int 2)])))
3049 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3050 (parallel [(const_int 0) (const_int 2)])))))]
3051 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3052 "pmuludq\t{%2, %0|%0, %2}"
3053 [(set_attr "type" "sseimul")
3054 (set_attr "prefix_data16" "1")
3055 (set_attr "mode" "TI")])
3057 (define_expand "sse4_1_mulv2siv2di3"
3058 [(set (match_operand:V2DI 0 "register_operand" "")
3062 (match_operand:V4SI 1 "nonimmediate_operand" "")
3063 (parallel [(const_int 0) (const_int 2)])))
3066 (match_operand:V4SI 2 "nonimmediate_operand" "")
3067 (parallel [(const_int 0) (const_int 2)])))))]
3069 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3071 (define_insn "*sse4_1_mulv2siv2di3"
3072 [(set (match_operand:V2DI 0 "register_operand" "=x")
3076 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3077 (parallel [(const_int 0) (const_int 2)])))
3080 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3081 (parallel [(const_int 0) (const_int 2)])))))]
3082 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3083 "pmuldq\t{%2, %0|%0, %2}"
3084 [(set_attr "type" "sseimul")
3085 (set_attr "prefix_extra" "1")
3086 (set_attr "mode" "TI")])
3088 (define_expand "sse2_pmaddwd"
3089 [(set (match_operand:V4SI 0 "register_operand" "")
3094 (match_operand:V8HI 1 "nonimmediate_operand" "")
3095 (parallel [(const_int 0)
3101 (match_operand:V8HI 2 "nonimmediate_operand" "")
3102 (parallel [(const_int 0)
3108 (vec_select:V4HI (match_dup 1)
3109 (parallel [(const_int 1)
3114 (vec_select:V4HI (match_dup 2)
3115 (parallel [(const_int 1)
3118 (const_int 7)]))))))]
3120 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3122 (define_insn "*sse2_pmaddwd"
3123 [(set (match_operand:V4SI 0 "register_operand" "=x")
3128 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3129 (parallel [(const_int 0)
3135 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3136 (parallel [(const_int 0)
3142 (vec_select:V4HI (match_dup 1)
3143 (parallel [(const_int 1)
3148 (vec_select:V4HI (match_dup 2)
3149 (parallel [(const_int 1)
3152 (const_int 7)]))))))]
3153 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3154 "pmaddwd\t{%2, %0|%0, %2}"
3155 [(set_attr "type" "sseiadd")
3156 (set_attr "prefix_data16" "1")
3157 (set_attr "mode" "TI")])
3159 (define_expand "mulv4si3"
3160 [(set (match_operand:V4SI 0 "register_operand" "")
3161 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3162 (match_operand:V4SI 2 "register_operand" "")))]
3165 if (TARGET_SSE4_1 || TARGET_SSE5)
3166 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3169 (define_insn "*sse4_1_mulv4si3"
3170 [(set (match_operand:V4SI 0 "register_operand" "=x")
3171 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3172 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3173 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3174 "pmulld\t{%2, %0|%0, %2}"
3175 [(set_attr "type" "sseimul")
3176 (set_attr "prefix_extra" "1")
3177 (set_attr "mode" "TI")])
3179 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3180 ;; multiply/add. In general, we expect the define_split to occur before
3181 ;; register allocation, so we have to handle the corner case where the target
3182 ;; is the same as one of the inputs.
3183 (define_insn_and_split "*sse5_mulv4si3"
3184 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3185 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3186 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3189 "&& (reload_completed
3190 || (!reg_mentioned_p (operands[0], operands[1])
3191 && !reg_mentioned_p (operands[0], operands[2])))"
3195 (plus:V4SI (mult:V4SI (match_dup 1)
3199 operands[3] = CONST0_RTX (V4SImode);
3201 [(set_attr "type" "ssemuladd")
3202 (set_attr "mode" "TI")])
3204 (define_insn_and_split "*sse2_mulv4si3"
3205 [(set (match_operand:V4SI 0 "register_operand" "")
3206 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3207 (match_operand:V4SI 2 "register_operand" "")))]
3208 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3209 && !(reload_completed || reload_in_progress)"
3214 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3220 t1 = gen_reg_rtx (V4SImode);
3221 t2 = gen_reg_rtx (V4SImode);
3222 t3 = gen_reg_rtx (V4SImode);
3223 t4 = gen_reg_rtx (V4SImode);
3224 t5 = gen_reg_rtx (V4SImode);
3225 t6 = gen_reg_rtx (V4SImode);
3226 thirtytwo = GEN_INT (32);
3228 /* Multiply elements 2 and 0. */
3229 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3232 /* Shift both input vectors down one element, so that elements 3
3233 and 1 are now in the slots for elements 2 and 0. For K8, at
3234 least, this is faster than using a shuffle. */
3235 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3236 gen_lowpart (TImode, op1),
3238 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3239 gen_lowpart (TImode, op2),
3241 /* Multiply elements 3 and 1. */
3242 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3245 /* Move the results in element 2 down to element 1; we don't care
3246 what goes in elements 2 and 3. */
3247 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3248 const0_rtx, const0_rtx));
3249 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3250 const0_rtx, const0_rtx));
3252 /* Merge the parts back together. */
3253 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3257 (define_insn_and_split "mulv2di3"
3258 [(set (match_operand:V2DI 0 "register_operand" "")
3259 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3260 (match_operand:V2DI 2 "register_operand" "")))]
3262 && !(reload_completed || reload_in_progress)"
3267 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3272 /* op1: A,B,C,D, op2: E,F,G,H */
3274 op1 = gen_lowpart (V4SImode, operands[1]);
3275 op2 = gen_lowpart (V4SImode, operands[2]);
3276 t1 = gen_reg_rtx (V4SImode);
3277 t2 = gen_reg_rtx (V4SImode);
3278 t3 = gen_reg_rtx (V4SImode);
3279 t4 = gen_reg_rtx (V2DImode);
3280 t5 = gen_reg_rtx (V2DImode);
3283 emit_insn (gen_sse2_pshufd_1 (t1, op1,
3290 emit_move_insn (t2, CONST0_RTX (V4SImode));
3292 /* t3: (B*E),(A*F),(D*G),(C*H) */
3293 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
3295 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
3296 emit_insn (gen_sse5_phadddq (t4, t3));
3298 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
3299 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
3301 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
3302 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
3309 t1 = gen_reg_rtx (V2DImode);
3310 t2 = gen_reg_rtx (V2DImode);
3311 t3 = gen_reg_rtx (V2DImode);
3312 t4 = gen_reg_rtx (V2DImode);
3313 t5 = gen_reg_rtx (V2DImode);
3314 t6 = gen_reg_rtx (V2DImode);
3315 thirtytwo = GEN_INT (32);
3317 /* Multiply low parts. */
3318 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3319 gen_lowpart (V4SImode, op2)));
3321 /* Shift input vectors left 32 bits so we can multiply high parts. */
3322 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3323 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3325 /* Multiply high parts by low parts. */
3326 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3327 gen_lowpart (V4SImode, t3)));
3328 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3329 gen_lowpart (V4SImode, t2)));
3331 /* Shift them back. */
3332 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3333 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3335 /* Add the three parts together. */
3336 emit_insn (gen_addv2di3 (t6, t1, t4));
3337 emit_insn (gen_addv2di3 (op0, t6, t5));
3341 (define_expand "vec_widen_smult_hi_v8hi"
3342 [(match_operand:V4SI 0 "register_operand" "")
3343 (match_operand:V8HI 1 "register_operand" "")
3344 (match_operand:V8HI 2 "register_operand" "")]
3347 rtx op1, op2, t1, t2, dest;
3351 t1 = gen_reg_rtx (V8HImode);
3352 t2 = gen_reg_rtx (V8HImode);
3353 dest = gen_lowpart (V8HImode, operands[0]);
3355 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3356 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3357 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3361 (define_expand "vec_widen_smult_lo_v8hi"
3362 [(match_operand:V4SI 0 "register_operand" "")
3363 (match_operand:V8HI 1 "register_operand" "")
3364 (match_operand:V8HI 2 "register_operand" "")]
3367 rtx op1, op2, t1, t2, dest;
3371 t1 = gen_reg_rtx (V8HImode);
3372 t2 = gen_reg_rtx (V8HImode);
3373 dest = gen_lowpart (V8HImode, operands[0]);
3375 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3376 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3377 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3381 (define_expand "vec_widen_umult_hi_v8hi"
3382 [(match_operand:V4SI 0 "register_operand" "")
3383 (match_operand:V8HI 1 "register_operand" "")
3384 (match_operand:V8HI 2 "register_operand" "")]
3387 rtx op1, op2, t1, t2, dest;
3391 t1 = gen_reg_rtx (V8HImode);
3392 t2 = gen_reg_rtx (V8HImode);
3393 dest = gen_lowpart (V8HImode, operands[0]);
3395 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3396 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3397 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3401 (define_expand "vec_widen_umult_lo_v8hi"
3402 [(match_operand:V4SI 0 "register_operand" "")
3403 (match_operand:V8HI 1 "register_operand" "")
3404 (match_operand:V8HI 2 "register_operand" "")]
3407 rtx op1, op2, t1, t2, dest;
3411 t1 = gen_reg_rtx (V8HImode);
3412 t2 = gen_reg_rtx (V8HImode);
3413 dest = gen_lowpart (V8HImode, operands[0]);
3415 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3416 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3417 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3421 (define_expand "vec_widen_smult_hi_v4si"
3422 [(match_operand:V2DI 0 "register_operand" "")
3423 (match_operand:V4SI 1 "register_operand" "")
3424 (match_operand:V4SI 2 "register_operand" "")]
3429 t1 = gen_reg_rtx (V4SImode);
3430 t2 = gen_reg_rtx (V4SImode);
3432 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3437 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3442 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
3446 (define_expand "vec_widen_smult_lo_v4si"
3447 [(match_operand:V2DI 0 "register_operand" "")
3448 (match_operand:V4SI 1 "register_operand" "")
3449 (match_operand:V4SI 2 "register_operand" "")]
3454 t1 = gen_reg_rtx (V4SImode);
3455 t2 = gen_reg_rtx (V4SImode);
3457 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3462 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3467 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
3472 (define_expand "vec_widen_umult_hi_v4si"
3473 [(match_operand:V2DI 0 "register_operand" "")
3474 (match_operand:V4SI 1 "register_operand" "")
3475 (match_operand:V4SI 2 "register_operand" "")]
3478 rtx op1, op2, t1, t2;
3482 t1 = gen_reg_rtx (V4SImode);
3483 t2 = gen_reg_rtx (V4SImode);
3485 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3486 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3487 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3491 (define_expand "vec_widen_umult_lo_v4si"
3492 [(match_operand:V2DI 0 "register_operand" "")
3493 (match_operand:V4SI 1 "register_operand" "")
3494 (match_operand:V4SI 2 "register_operand" "")]
3497 rtx op1, op2, t1, t2;
3501 t1 = gen_reg_rtx (V4SImode);
3502 t2 = gen_reg_rtx (V4SImode);
3504 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3505 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3506 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3510 (define_expand "sdot_prodv8hi"
3511 [(match_operand:V4SI 0 "register_operand" "")
3512 (match_operand:V8HI 1 "register_operand" "")
3513 (match_operand:V8HI 2 "register_operand" "")
3514 (match_operand:V4SI 3 "register_operand" "")]
3517 rtx t = gen_reg_rtx (V4SImode);
3518 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3519 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3523 (define_expand "udot_prodv4si"
3524 [(match_operand:V2DI 0 "register_operand" "")
3525 (match_operand:V4SI 1 "register_operand" "")
3526 (match_operand:V4SI 2 "register_operand" "")
3527 (match_operand:V2DI 3 "register_operand" "")]
3532 t1 = gen_reg_rtx (V2DImode);
3533 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3534 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3536 t2 = gen_reg_rtx (V4SImode);
3537 t3 = gen_reg_rtx (V4SImode);
3538 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3539 gen_lowpart (TImode, operands[1]),
3541 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3542 gen_lowpart (TImode, operands[2]),
3545 t4 = gen_reg_rtx (V2DImode);
3546 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3548 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3552 (define_insn "ashr<mode>3"
3553 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3555 (match_operand:SSEMODE24 1 "register_operand" "0")
3556 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3558 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3559 [(set_attr "type" "sseishft")
3560 (set_attr "prefix_data16" "1")
3561 (set_attr "mode" "TI")])
3563 (define_insn "lshr<mode>3"
3564 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3565 (lshiftrt:SSEMODE248
3566 (match_operand:SSEMODE248 1 "register_operand" "0")
3567 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3569 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3570 [(set_attr "type" "sseishft")
3571 (set_attr "prefix_data16" "1")
3572 (set_attr "mode" "TI")])
3574 (define_insn "ashl<mode>3"
3575 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3577 (match_operand:SSEMODE248 1 "register_operand" "0")
3578 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3580 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3581 [(set_attr "type" "sseishft")
3582 (set_attr "prefix_data16" "1")
3583 (set_attr "mode" "TI")])
3585 (define_expand "vec_shl_<mode>"
3586 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3587 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3588 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3591 operands[0] = gen_lowpart (TImode, operands[0]);
3592 operands[1] = gen_lowpart (TImode, operands[1]);
3595 (define_expand "vec_shr_<mode>"
3596 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3597 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3598 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3601 operands[0] = gen_lowpart (TImode, operands[0]);
3602 operands[1] = gen_lowpart (TImode, operands[1]);
3605 (define_expand "<code>v16qi3"
3606 [(set (match_operand:V16QI 0 "register_operand" "")
3608 (match_operand:V16QI 1 "nonimmediate_operand" "")
3609 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3611 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3613 (define_insn "*<code>v16qi3"
3614 [(set (match_operand:V16QI 0 "register_operand" "=x")
3616 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3617 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3618 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3619 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3620 [(set_attr "type" "sseiadd")
3621 (set_attr "prefix_data16" "1")
3622 (set_attr "mode" "TI")])
3624 (define_expand "<code>v8hi3"
3625 [(set (match_operand:V8HI 0 "register_operand" "")
3627 (match_operand:V8HI 1 "nonimmediate_operand" "")
3628 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3630 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3632 (define_insn "*<code>v8hi3"
3633 [(set (match_operand:V8HI 0 "register_operand" "=x")
3635 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3636 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3637 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3638 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3639 [(set_attr "type" "sseiadd")
3640 (set_attr "prefix_data16" "1")
3641 (set_attr "mode" "TI")])
3643 (define_expand "umaxv8hi3"
3644 [(set (match_operand:V8HI 0 "register_operand" "")
3645 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3646 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3650 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3653 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3654 if (rtx_equal_p (op3, op2))
3655 op3 = gen_reg_rtx (V8HImode);
3656 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3657 emit_insn (gen_addv8hi3 (op0, op3, op2));
3662 (define_expand "smax<mode>3"
3663 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3664 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3665 (match_operand:SSEMODE14 2 "register_operand" "")))]
3669 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3675 xops[0] = operands[0];
3676 xops[1] = operands[1];
3677 xops[2] = operands[2];
3678 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3679 xops[4] = operands[1];
3680 xops[5] = operands[2];
3681 ok = ix86_expand_int_vcond (xops);
3687 (define_insn "*sse4_1_<code><mode>3"
3688 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3690 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3691 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3692 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3693 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3694 [(set_attr "type" "sseiadd")
3695 (set_attr "prefix_extra" "1")
3696 (set_attr "mode" "TI")])
3698 (define_expand "umaxv4si3"
3699 [(set (match_operand:V4SI 0 "register_operand" "")
3700 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3701 (match_operand:V4SI 2 "register_operand" "")))]
3705 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3711 xops[0] = operands[0];
3712 xops[1] = operands[1];
3713 xops[2] = operands[2];
3714 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3715 xops[4] = operands[1];
3716 xops[5] = operands[2];
3717 ok = ix86_expand_int_vcond (xops);
3723 (define_insn "*sse4_1_<code><mode>3"
3724 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3726 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3727 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3728 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3729 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3730 [(set_attr "type" "sseiadd")
3731 (set_attr "prefix_extra" "1")
3732 (set_attr "mode" "TI")])
3734 (define_expand "smin<mode>3"
3735 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3736 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3737 (match_operand:SSEMODE14 2 "register_operand" "")))]
3741 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3747 xops[0] = operands[0];
3748 xops[1] = operands[2];
3749 xops[2] = operands[1];
3750 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3751 xops[4] = operands[1];
3752 xops[5] = operands[2];
3753 ok = ix86_expand_int_vcond (xops);
3759 (define_expand "umin<mode>3"
3760 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3761 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3762 (match_operand:SSEMODE24 2 "register_operand" "")))]
3766 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3772 xops[0] = operands[0];
3773 xops[1] = operands[2];
3774 xops[2] = operands[1];
3775 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3776 xops[4] = operands[1];
3777 xops[5] = operands[2];
3778 ok = ix86_expand_int_vcond (xops);
3784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3786 ;; Parallel integral comparisons
3788 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3790 (define_expand "sse2_eq<mode>3"
3791 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3793 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
3794 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
3795 "TARGET_SSE2 && !TARGET_SSE5"
3796 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
3798 (define_insn "*sse2_eq<mode>3"
3799 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3801 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3802 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3803 "TARGET_SSE2 && !TARGET_SSE5
3804 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3805 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3806 [(set_attr "type" "ssecmp")
3807 (set_attr "prefix_data16" "1")
3808 (set_attr "mode" "TI")])
3810 (define_expand "sse4_1_eqv2di3"
3811 [(set (match_operand:V2DI 0 "register_operand" "")
3813 (match_operand:V2DI 1 "nonimmediate_operand" "")
3814 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
3816 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
3818 (define_insn "*sse4_1_eqv2di3"
3819 [(set (match_operand:V2DI 0 "register_operand" "=x")
3821 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3822 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3823 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3824 "pcmpeqq\t{%2, %0|%0, %2}"
3825 [(set_attr "type" "ssecmp")
3826 (set_attr "prefix_extra" "1")
3827 (set_attr "mode" "TI")])
3829 (define_insn "sse2_gt<mode>3"
3830 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3832 (match_operand:SSEMODE124 1 "register_operand" "0")
3833 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3834 "TARGET_SSE2 && !TARGET_SSE5"
3835 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3836 [(set_attr "type" "ssecmp")
3837 (set_attr "prefix_data16" "1")
3838 (set_attr "mode" "TI")])
3840 (define_insn "sse4_2_gtv2di3"
3841 [(set (match_operand:V2DI 0 "register_operand" "=x")
3843 (match_operand:V2DI 1 "register_operand" "0")
3844 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3846 "pcmpgtq\t{%2, %0|%0, %2}"
3847 [(set_attr "type" "ssecmp")
3848 (set_attr "mode" "TI")])
3850 (define_expand "vcond<mode>"
3851 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3852 (if_then_else:SSEMODEI
3853 (match_operator 3 ""
3854 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3855 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3856 (match_operand:SSEMODEI 1 "general_operand" "")
3857 (match_operand:SSEMODEI 2 "general_operand" "")))]
3860 if (ix86_expand_int_vcond (operands))
3866 (define_expand "vcondu<mode>"
3867 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3868 (if_then_else:SSEMODEI
3869 (match_operator 3 ""
3870 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3871 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3872 (match_operand:SSEMODEI 1 "general_operand" "")
3873 (match_operand:SSEMODEI 2 "general_operand" "")))]
3876 if (ix86_expand_int_vcond (operands))
3882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3884 ;; Parallel bitwise logical operations
3886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3888 (define_expand "one_cmpl<mode>2"
3889 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3890 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3894 int i, n = GET_MODE_NUNITS (<MODE>mode);
3895 rtvec v = rtvec_alloc (n);
3897 for (i = 0; i < n; ++i)
3898 RTVEC_ELT (v, i) = constm1_rtx;
3900 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3903 (define_insn "*sse_nand<mode>3"
3904 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3906 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3907 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3908 "(TARGET_SSE && !TARGET_SSE2)"
3909 "andnps\t{%2, %0|%0, %2}"
3910 [(set_attr "type" "sselog")
3911 (set_attr "mode" "V4SF")])
3913 (define_insn "sse2_nand<mode>3"
3914 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3916 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3917 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3919 "pandn\t{%2, %0|%0, %2}"
3920 [(set_attr "type" "sselog")
3921 (set_attr "prefix_data16" "1")
3922 (set_attr "mode" "TI")])
3924 (define_insn "*nandtf3"
3925 [(set (match_operand:TF 0 "register_operand" "=x")
3927 (not:TF (match_operand:TF 1 "register_operand" "0"))
3928 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3930 "pandn\t{%2, %0|%0, %2}"
3931 [(set_attr "type" "sselog")
3932 (set_attr "prefix_data16" "1")
3933 (set_attr "mode" "TI")])
3935 (define_expand "<code><mode>3"
3936 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3938 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3939 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3941 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3943 (define_insn "*sse_<code><mode>3"
3944 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3946 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3947 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3948 "(TARGET_SSE && !TARGET_SSE2)
3949 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3950 "<plogicprefix>ps\t{%2, %0|%0, %2}"
3951 [(set_attr "type" "sselog")
3952 (set_attr "mode" "V4SF")])
3954 (define_insn "*sse2_<code><mode>3"
3955 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3957 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3958 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3959 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3960 "p<plogicprefix>\t{%2, %0|%0, %2}"
3961 [(set_attr "type" "sselog")
3962 (set_attr "prefix_data16" "1")
3963 (set_attr "mode" "TI")])
3965 (define_expand "<code>tf3"
3966 [(set (match_operand:TF 0 "register_operand" "")
3968 (match_operand:TF 1 "nonimmediate_operand" "")
3969 (match_operand:TF 2 "nonimmediate_operand" "")))]
3971 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3973 (define_insn "*<code>tf3"
3974 [(set (match_operand:TF 0 "register_operand" "=x")
3976 (match_operand:TF 1 "nonimmediate_operand" "%0")
3977 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3978 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3979 "p<plogicprefix>\t{%2, %0|%0, %2}"
3980 [(set_attr "type" "sselog")
3981 (set_attr "prefix_data16" "1")
3982 (set_attr "mode" "TI")])
3984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3986 ;; Parallel integral element swizzling
3988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3991 ;; op1 = abcdefghijklmnop
3992 ;; op2 = qrstuvwxyz012345
3993 ;; h1 = aqbrcsdteufvgwhx
3994 ;; l1 = iyjzk0l1m2n3o4p5
3995 ;; h2 = aiqybjrzcks0dlt1
3996 ;; l2 = emu2fnv3gow4hpx5
3997 ;; h3 = aeimquy2bfjnrvz3
3998 ;; l3 = cgkosw04dhlptx15
3999 ;; result = bdfhjlnprtvxz135
4000 (define_expand "vec_pack_trunc_v8hi"
4001 [(match_operand:V16QI 0 "register_operand" "")
4002 (match_operand:V8HI 1 "register_operand" "")
4003 (match_operand:V8HI 2 "register_operand" "")]
4006 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4010 ix86_expand_sse5_pack (operands);
4014 op1 = gen_lowpart (V16QImode, operands[1]);
4015 op2 = gen_lowpart (V16QImode, operands[2]);
4016 h1 = gen_reg_rtx (V16QImode);
4017 l1 = gen_reg_rtx (V16QImode);
4018 h2 = gen_reg_rtx (V16QImode);
4019 l2 = gen_reg_rtx (V16QImode);
4020 h3 = gen_reg_rtx (V16QImode);
4021 l3 = gen_reg_rtx (V16QImode);
4023 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4024 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4025 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4026 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4027 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4028 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4029 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4040 ;; result = bdfhjlnp
4041 (define_expand "vec_pack_trunc_v4si"
4042 [(match_operand:V8HI 0 "register_operand" "")
4043 (match_operand:V4SI 1 "register_operand" "")
4044 (match_operand:V4SI 2 "register_operand" "")]
4047 rtx op1, op2, h1, l1, h2, l2;
4051 ix86_expand_sse5_pack (operands);
4055 op1 = gen_lowpart (V8HImode, operands[1]);
4056 op2 = gen_lowpart (V8HImode, operands[2]);
4057 h1 = gen_reg_rtx (V8HImode);
4058 l1 = gen_reg_rtx (V8HImode);
4059 h2 = gen_reg_rtx (V8HImode);
4060 l2 = gen_reg_rtx (V8HImode);
4062 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4063 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4064 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4065 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4066 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4076 (define_expand "vec_pack_trunc_v2di"
4077 [(match_operand:V4SI 0 "register_operand" "")
4078 (match_operand:V2DI 1 "register_operand" "")
4079 (match_operand:V2DI 2 "register_operand" "")]
4082 rtx op1, op2, h1, l1;
4086 ix86_expand_sse5_pack (operands);
4090 op1 = gen_lowpart (V4SImode, operands[1]);
4091 op2 = gen_lowpart (V4SImode, operands[2]);
4092 h1 = gen_reg_rtx (V4SImode);
4093 l1 = gen_reg_rtx (V4SImode);
4095 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4096 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4097 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4101 (define_expand "vec_interleave_highv16qi"
4102 [(set (match_operand:V16QI 0 "register_operand" "")
4105 (match_operand:V16QI 1 "register_operand" "")
4106 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4107 (parallel [(const_int 8) (const_int 24)
4108 (const_int 9) (const_int 25)
4109 (const_int 10) (const_int 26)
4110 (const_int 11) (const_int 27)
4111 (const_int 12) (const_int 28)
4112 (const_int 13) (const_int 29)
4113 (const_int 14) (const_int 30)
4114 (const_int 15) (const_int 31)])))]
4117 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4121 (define_expand "vec_interleave_lowv16qi"
4122 [(set (match_operand:V16QI 0 "register_operand" "")
4125 (match_operand:V16QI 1 "register_operand" "")
4126 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4127 (parallel [(const_int 0) (const_int 16)
4128 (const_int 1) (const_int 17)
4129 (const_int 2) (const_int 18)
4130 (const_int 3) (const_int 19)
4131 (const_int 4) (const_int 20)
4132 (const_int 5) (const_int 21)
4133 (const_int 6) (const_int 22)
4134 (const_int 7) (const_int 23)])))]
4137 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4141 (define_expand "vec_interleave_highv8hi"
4142 [(set (match_operand:V8HI 0 "register_operand" "=")
4145 (match_operand:V8HI 1 "register_operand" "")
4146 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4147 (parallel [(const_int 4) (const_int 12)
4148 (const_int 5) (const_int 13)
4149 (const_int 6) (const_int 14)
4150 (const_int 7) (const_int 15)])))]
4153 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4157 (define_expand "vec_interleave_lowv8hi"
4158 [(set (match_operand:V8HI 0 "register_operand" "")
4161 (match_operand:V8HI 1 "register_operand" "")
4162 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4163 (parallel [(const_int 0) (const_int 8)
4164 (const_int 1) (const_int 9)
4165 (const_int 2) (const_int 10)
4166 (const_int 3) (const_int 11)])))]
4169 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4173 (define_expand "vec_interleave_highv4si"
4174 [(set (match_operand:V4SI 0 "register_operand" "")
4177 (match_operand:V4SI 1 "register_operand" "")
4178 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4179 (parallel [(const_int 2) (const_int 6)
4180 (const_int 3) (const_int 7)])))]
4183 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4187 (define_expand "vec_interleave_lowv4si"
4188 [(set (match_operand:V4SI 0 "register_operand" "")
4191 (match_operand:V4SI 1 "register_operand" "")
4192 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4193 (parallel [(const_int 0) (const_int 4)
4194 (const_int 1) (const_int 5)])))]
4197 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4201 (define_expand "vec_interleave_highv2di"
4202 [(set (match_operand:V2DI 0 "register_operand" "")
4205 (match_operand:V2DI 1 "register_operand" "")
4206 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4207 (parallel [(const_int 1)
4211 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4215 (define_expand "vec_interleave_lowv2di"
4216 [(set (match_operand:V2DI 0 "register_operand" "")
4219 (match_operand:V2DI 1 "register_operand" "")
4220 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4221 (parallel [(const_int 0)
4225 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4229 (define_insn "sse2_packsswb"
4230 [(set (match_operand:V16QI 0 "register_operand" "=x")
4233 (match_operand:V8HI 1 "register_operand" "0"))
4235 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4237 "packsswb\t{%2, %0|%0, %2}"
4238 [(set_attr "type" "sselog")
4239 (set_attr "prefix_data16" "1")
4240 (set_attr "mode" "TI")])
4242 (define_insn "sse2_packssdw"
4243 [(set (match_operand:V8HI 0 "register_operand" "=x")
4246 (match_operand:V4SI 1 "register_operand" "0"))
4248 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4250 "packssdw\t{%2, %0|%0, %2}"
4251 [(set_attr "type" "sselog")
4252 (set_attr "prefix_data16" "1")
4253 (set_attr "mode" "TI")])
4255 (define_insn "sse2_packuswb"
4256 [(set (match_operand:V16QI 0 "register_operand" "=x")
4259 (match_operand:V8HI 1 "register_operand" "0"))
4261 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4263 "packuswb\t{%2, %0|%0, %2}"
4264 [(set_attr "type" "sselog")
4265 (set_attr "prefix_data16" "1")
4266 (set_attr "mode" "TI")])
4268 (define_insn "sse2_punpckhbw"
4269 [(set (match_operand:V16QI 0 "register_operand" "=x")
4272 (match_operand:V16QI 1 "register_operand" "0")
4273 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4274 (parallel [(const_int 8) (const_int 24)
4275 (const_int 9) (const_int 25)
4276 (const_int 10) (const_int 26)
4277 (const_int 11) (const_int 27)
4278 (const_int 12) (const_int 28)
4279 (const_int 13) (const_int 29)
4280 (const_int 14) (const_int 30)
4281 (const_int 15) (const_int 31)])))]
4283 "punpckhbw\t{%2, %0|%0, %2}"
4284 [(set_attr "type" "sselog")
4285 (set_attr "prefix_data16" "1")
4286 (set_attr "mode" "TI")])
4288 (define_insn "sse2_punpcklbw"
4289 [(set (match_operand:V16QI 0 "register_operand" "=x")
4292 (match_operand:V16QI 1 "register_operand" "0")
4293 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4294 (parallel [(const_int 0) (const_int 16)
4295 (const_int 1) (const_int 17)
4296 (const_int 2) (const_int 18)
4297 (const_int 3) (const_int 19)
4298 (const_int 4) (const_int 20)
4299 (const_int 5) (const_int 21)
4300 (const_int 6) (const_int 22)
4301 (const_int 7) (const_int 23)])))]
4303 "punpcklbw\t{%2, %0|%0, %2}"
4304 [(set_attr "type" "sselog")
4305 (set_attr "prefix_data16" "1")
4306 (set_attr "mode" "TI")])
4308 (define_insn "sse2_punpckhwd"
4309 [(set (match_operand:V8HI 0 "register_operand" "=x")
4312 (match_operand:V8HI 1 "register_operand" "0")
4313 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4314 (parallel [(const_int 4) (const_int 12)
4315 (const_int 5) (const_int 13)
4316 (const_int 6) (const_int 14)
4317 (const_int 7) (const_int 15)])))]
4319 "punpckhwd\t{%2, %0|%0, %2}"
4320 [(set_attr "type" "sselog")
4321 (set_attr "prefix_data16" "1")
4322 (set_attr "mode" "TI")])
4324 (define_insn "sse2_punpcklwd"
4325 [(set (match_operand:V8HI 0 "register_operand" "=x")
4328 (match_operand:V8HI 1 "register_operand" "0")
4329 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4330 (parallel [(const_int 0) (const_int 8)
4331 (const_int 1) (const_int 9)
4332 (const_int 2) (const_int 10)
4333 (const_int 3) (const_int 11)])))]
4335 "punpcklwd\t{%2, %0|%0, %2}"
4336 [(set_attr "type" "sselog")
4337 (set_attr "prefix_data16" "1")
4338 (set_attr "mode" "TI")])
4340 (define_insn "sse2_punpckhdq"
4341 [(set (match_operand:V4SI 0 "register_operand" "=x")
4344 (match_operand:V4SI 1 "register_operand" "0")
4345 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4346 (parallel [(const_int 2) (const_int 6)
4347 (const_int 3) (const_int 7)])))]
4349 "punpckhdq\t{%2, %0|%0, %2}"
4350 [(set_attr "type" "sselog")
4351 (set_attr "prefix_data16" "1")
4352 (set_attr "mode" "TI")])
4354 (define_insn "sse2_punpckldq"
4355 [(set (match_operand:V4SI 0 "register_operand" "=x")
4358 (match_operand:V4SI 1 "register_operand" "0")
4359 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4360 (parallel [(const_int 0) (const_int 4)
4361 (const_int 1) (const_int 5)])))]
4363 "punpckldq\t{%2, %0|%0, %2}"
4364 [(set_attr "type" "sselog")
4365 (set_attr "prefix_data16" "1")
4366 (set_attr "mode" "TI")])
4368 (define_insn "sse2_punpckhqdq"
4369 [(set (match_operand:V2DI 0 "register_operand" "=x")
4372 (match_operand:V2DI 1 "register_operand" "0")
4373 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4374 (parallel [(const_int 1)
4377 "punpckhqdq\t{%2, %0|%0, %2}"
4378 [(set_attr "type" "sselog")
4379 (set_attr "prefix_data16" "1")
4380 (set_attr "mode" "TI")])
4382 (define_insn "sse2_punpcklqdq"
4383 [(set (match_operand:V2DI 0 "register_operand" "=x")
4386 (match_operand:V2DI 1 "register_operand" "0")
4387 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4388 (parallel [(const_int 0)
4391 "punpcklqdq\t{%2, %0|%0, %2}"
4392 [(set_attr "type" "sselog")
4393 (set_attr "prefix_data16" "1")
4394 (set_attr "mode" "TI")])
4396 (define_insn "*sse4_1_pinsrb"
4397 [(set (match_operand:V16QI 0 "register_operand" "=x")
4399 (vec_duplicate:V16QI
4400 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4401 (match_operand:V16QI 1 "register_operand" "0")
4402 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4405 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4406 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4408 [(set_attr "type" "sselog")
4409 (set_attr "prefix_extra" "1")
4410 (set_attr "mode" "TI")])
4412 (define_insn "*sse2_pinsrw"
4413 [(set (match_operand:V8HI 0 "register_operand" "=x")
4416 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4417 (match_operand:V8HI 1 "register_operand" "0")
4418 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4421 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4422 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4424 [(set_attr "type" "sselog")
4425 (set_attr "prefix_data16" "1")
4426 (set_attr "mode" "TI")])
4428 ;; It must come before sse2_loadld since it is preferred.
4429 (define_insn "*sse4_1_pinsrd"
4430 [(set (match_operand:V4SI 0 "register_operand" "=x")
4433 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4434 (match_operand:V4SI 1 "register_operand" "0")
4435 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4438 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4439 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4441 [(set_attr "type" "sselog")
4442 (set_attr "prefix_extra" "1")
4443 (set_attr "mode" "TI")])
4445 (define_insn "*sse4_1_pinsrq"
4446 [(set (match_operand:V2DI 0 "register_operand" "=x")
4449 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4450 (match_operand:V2DI 1 "register_operand" "0")
4451 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4452 "TARGET_SSE4_1 && TARGET_64BIT"
4454 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4455 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4457 [(set_attr "type" "sselog")
4458 (set_attr "prefix_extra" "1")
4459 (set_attr "mode" "TI")])
4461 (define_insn "*sse4_1_pextrb"
4462 [(set (match_operand:SI 0 "register_operand" "=r")
4465 (match_operand:V16QI 1 "register_operand" "x")
4466 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4468 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4469 [(set_attr "type" "sselog")
4470 (set_attr "prefix_extra" "1")
4471 (set_attr "mode" "TI")])
4473 (define_insn "*sse4_1_pextrb_memory"
4474 [(set (match_operand:QI 0 "memory_operand" "=m")
4476 (match_operand:V16QI 1 "register_operand" "x")
4477 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4479 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4480 [(set_attr "type" "sselog")
4481 (set_attr "prefix_extra" "1")
4482 (set_attr "mode" "TI")])
4484 (define_insn "*sse2_pextrw"
4485 [(set (match_operand:SI 0 "register_operand" "=r")
4488 (match_operand:V8HI 1 "register_operand" "x")
4489 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4491 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4492 [(set_attr "type" "sselog")
4493 (set_attr "prefix_data16" "1")
4494 (set_attr "mode" "TI")])
4496 (define_insn "*sse4_1_pextrw_memory"
4497 [(set (match_operand:HI 0 "memory_operand" "=m")
4499 (match_operand:V8HI 1 "register_operand" "x")
4500 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4502 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4503 [(set_attr "type" "sselog")
4504 (set_attr "prefix_extra" "1")
4505 (set_attr "mode" "TI")])
4507 (define_insn "*sse4_1_pextrd"
4508 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4510 (match_operand:V4SI 1 "register_operand" "x")
4511 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4513 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4514 [(set_attr "type" "sselog")
4515 (set_attr "prefix_extra" "1")
4516 (set_attr "mode" "TI")])
4518 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4519 (define_insn "*sse4_1_pextrq"
4520 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4522 (match_operand:V2DI 1 "register_operand" "x")
4523 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4524 "TARGET_SSE4_1 && TARGET_64BIT"
4525 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4526 [(set_attr "type" "sselog")
4527 (set_attr "prefix_extra" "1")
4528 (set_attr "mode" "TI")])
4530 (define_expand "sse2_pshufd"
4531 [(match_operand:V4SI 0 "register_operand" "")
4532 (match_operand:V4SI 1 "nonimmediate_operand" "")
4533 (match_operand:SI 2 "const_int_operand" "")]
4536 int mask = INTVAL (operands[2]);
4537 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4538 GEN_INT ((mask >> 0) & 3),
4539 GEN_INT ((mask >> 2) & 3),
4540 GEN_INT ((mask >> 4) & 3),
4541 GEN_INT ((mask >> 6) & 3)));
4545 (define_insn "sse2_pshufd_1"
4546 [(set (match_operand:V4SI 0 "register_operand" "=x")
4548 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4549 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4550 (match_operand 3 "const_0_to_3_operand" "")
4551 (match_operand 4 "const_0_to_3_operand" "")
4552 (match_operand 5 "const_0_to_3_operand" "")])))]
4556 mask |= INTVAL (operands[2]) << 0;
4557 mask |= INTVAL (operands[3]) << 2;
4558 mask |= INTVAL (operands[4]) << 4;
4559 mask |= INTVAL (operands[5]) << 6;
4560 operands[2] = GEN_INT (mask);
4562 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4564 [(set_attr "type" "sselog1")
4565 (set_attr "prefix_data16" "1")
4566 (set_attr "mode" "TI")])
4568 (define_expand "sse2_pshuflw"
4569 [(match_operand:V8HI 0 "register_operand" "")
4570 (match_operand:V8HI 1 "nonimmediate_operand" "")
4571 (match_operand:SI 2 "const_int_operand" "")]
4574 int mask = INTVAL (operands[2]);
4575 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4576 GEN_INT ((mask >> 0) & 3),
4577 GEN_INT ((mask >> 2) & 3),
4578 GEN_INT ((mask >> 4) & 3),
4579 GEN_INT ((mask >> 6) & 3)));
4583 (define_insn "sse2_pshuflw_1"
4584 [(set (match_operand:V8HI 0 "register_operand" "=x")
4586 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4587 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4588 (match_operand 3 "const_0_to_3_operand" "")
4589 (match_operand 4 "const_0_to_3_operand" "")
4590 (match_operand 5 "const_0_to_3_operand" "")
4598 mask |= INTVAL (operands[2]) << 0;
4599 mask |= INTVAL (operands[3]) << 2;
4600 mask |= INTVAL (operands[4]) << 4;
4601 mask |= INTVAL (operands[5]) << 6;
4602 operands[2] = GEN_INT (mask);
4604 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4606 [(set_attr "type" "sselog")
4607 (set_attr "prefix_rep" "1")
4608 (set_attr "mode" "TI")])
4610 (define_expand "sse2_pshufhw"
4611 [(match_operand:V8HI 0 "register_operand" "")
4612 (match_operand:V8HI 1 "nonimmediate_operand" "")
4613 (match_operand:SI 2 "const_int_operand" "")]
4616 int mask = INTVAL (operands[2]);
4617 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4618 GEN_INT (((mask >> 0) & 3) + 4),
4619 GEN_INT (((mask >> 2) & 3) + 4),
4620 GEN_INT (((mask >> 4) & 3) + 4),
4621 GEN_INT (((mask >> 6) & 3) + 4)));
4625 (define_insn "sse2_pshufhw_1"
4626 [(set (match_operand:V8HI 0 "register_operand" "=x")
4628 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4629 (parallel [(const_int 0)
4633 (match_operand 2 "const_4_to_7_operand" "")
4634 (match_operand 3 "const_4_to_7_operand" "")
4635 (match_operand 4 "const_4_to_7_operand" "")
4636 (match_operand 5 "const_4_to_7_operand" "")])))]
4640 mask |= (INTVAL (operands[2]) - 4) << 0;
4641 mask |= (INTVAL (operands[3]) - 4) << 2;
4642 mask |= (INTVAL (operands[4]) - 4) << 4;
4643 mask |= (INTVAL (operands[5]) - 4) << 6;
4644 operands[2] = GEN_INT (mask);
4646 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4648 [(set_attr "type" "sselog")
4649 (set_attr "prefix_rep" "1")
4650 (set_attr "mode" "TI")])
4652 (define_expand "sse2_loadd"
4653 [(set (match_operand:V4SI 0 "register_operand" "")
4656 (match_operand:SI 1 "nonimmediate_operand" ""))
4660 "operands[2] = CONST0_RTX (V4SImode);")
4662 (define_insn "sse2_loadld"
4663 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4666 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4667 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4671 movd\t{%2, %0|%0, %2}
4672 movd\t{%2, %0|%0, %2}
4673 movss\t{%2, %0|%0, %2}
4674 movss\t{%2, %0|%0, %2}"
4675 [(set_attr "type" "ssemov")
4676 (set_attr "mode" "TI,TI,V4SF,SF")])
4678 (define_insn_and_split "sse2_stored"
4679 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4681 (match_operand:V4SI 1 "register_operand" "x,Yi")
4682 (parallel [(const_int 0)])))]
4685 "&& reload_completed
4686 && (TARGET_INTER_UNIT_MOVES
4687 || MEM_P (operands [0])
4688 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4689 [(set (match_dup 0) (match_dup 1))]
4691 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4694 (define_insn_and_split "*vec_ext_v4si_mem"
4695 [(set (match_operand:SI 0 "register_operand" "=r")
4697 (match_operand:V4SI 1 "memory_operand" "o")
4698 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4704 int i = INTVAL (operands[2]);
4706 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4710 (define_expand "sse_storeq"
4711 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4713 (match_operand:V2DI 1 "register_operand" "")
4714 (parallel [(const_int 0)])))]
4718 (define_insn "*sse2_storeq_rex64"
4719 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4721 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4722 (parallel [(const_int 0)])))]
4723 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4727 mov{q}\t{%1, %0|%0, %1}"
4728 [(set_attr "type" "*,*,imov")
4729 (set_attr "mode" "*,*,DI")])
4731 (define_insn "*sse2_storeq"
4732 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4734 (match_operand:V2DI 1 "register_operand" "x")
4735 (parallel [(const_int 0)])))]
4740 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4742 (match_operand:V2DI 1 "register_operand" "")
4743 (parallel [(const_int 0)])))]
4746 && (TARGET_INTER_UNIT_MOVES
4747 || MEM_P (operands [0])
4748 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4749 [(set (match_dup 0) (match_dup 1))]
4751 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4754 (define_insn "*vec_extractv2di_1_rex64"
4755 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4757 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4758 (parallel [(const_int 1)])))]
4759 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4761 movhps\t{%1, %0|%0, %1}
4762 psrldq\t{$8, %0|%0, 8}
4763 movq\t{%H1, %0|%0, %H1}
4764 mov{q}\t{%H1, %0|%0, %H1}"
4765 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4766 (set_attr "memory" "*,none,*,*")
4767 (set_attr "mode" "V2SF,TI,TI,DI")])
4769 (define_insn "*vec_extractv2di_1_sse2"
4770 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4772 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4773 (parallel [(const_int 1)])))]
4775 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4777 movhps\t{%1, %0|%0, %1}
4778 psrldq\t{$8, %0|%0, 8}
4779 movq\t{%H1, %0|%0, %H1}"
4780 [(set_attr "type" "ssemov,sseishft,ssemov")
4781 (set_attr "memory" "*,none,*")
4782 (set_attr "mode" "V2SF,TI,TI")])
4784 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4785 (define_insn "*vec_extractv2di_1_sse"
4786 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4788 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4789 (parallel [(const_int 1)])))]
4790 "!TARGET_SSE2 && TARGET_SSE
4791 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4793 movhps\t{%1, %0|%0, %1}
4794 movhlps\t{%1, %0|%0, %1}
4795 movlps\t{%H1, %0|%0, %H1}"
4796 [(set_attr "type" "ssemov")
4797 (set_attr "mode" "V2SF,V4SF,V2SF")])
4799 (define_insn "*vec_dupv4si"
4800 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4802 (match_operand:SI 1 "register_operand" " Y2,0")))]
4805 pshufd\t{$0, %1, %0|%0, %1, 0}
4806 shufps\t{$0, %0, %0|%0, %0, 0}"
4807 [(set_attr "type" "sselog1")
4808 (set_attr "mode" "TI,V4SF")])
4810 (define_insn "*vec_dupv2di"
4811 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4813 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4818 [(set_attr "type" "sselog1,ssemov")
4819 (set_attr "mode" "TI,V4SF")])
4821 (define_insn "*vec_concatv2si_sse4_1"
4822 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
4824 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
4825 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
4828 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
4829 punpckldq\t{%2, %0|%0, %2}
4830 movd\t{%1, %0|%0, %1}
4831 punpckldq\t{%2, %0|%0, %2}
4832 movd\t{%1, %0|%0, %1}"
4833 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4834 (set_attr "prefix_extra" "1,*,*,*,*")
4835 (set_attr "mode" "TI,TI,TI,DI,DI")])
4837 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4838 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4839 ;; alternatives pretty much forces the MMX alternative to be chosen.
4840 (define_insn "*vec_concatv2si_sse2"
4841 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
4843 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
4844 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
4847 punpckldq\t{%2, %0|%0, %2}
4848 movd\t{%1, %0|%0, %1}
4849 punpckldq\t{%2, %0|%0, %2}
4850 movd\t{%1, %0|%0, %1}"
4851 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4852 (set_attr "mode" "TI,TI,DI,DI")])
4854 (define_insn "*vec_concatv2si_sse"
4855 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4857 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4858 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4861 unpcklps\t{%2, %0|%0, %2}
4862 movss\t{%1, %0|%0, %1}
4863 punpckldq\t{%2, %0|%0, %2}
4864 movd\t{%1, %0|%0, %1}"
4865 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4866 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4868 (define_insn "*vec_concatv4si_1"
4869 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4871 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4872 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4875 punpcklqdq\t{%2, %0|%0, %2}
4876 movlhps\t{%2, %0|%0, %2}
4877 movhps\t{%2, %0|%0, %2}"
4878 [(set_attr "type" "sselog,ssemov,ssemov")
4879 (set_attr "mode" "TI,V4SF,V2SF")])
4881 (define_insn "vec_concatv2di"
4882 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4884 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4885 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4886 "!TARGET_64BIT && TARGET_SSE"
4888 movq\t{%1, %0|%0, %1}
4889 movq2dq\t{%1, %0|%0, %1}
4890 punpcklqdq\t{%2, %0|%0, %2}
4891 movlhps\t{%2, %0|%0, %2}
4892 movhps\t{%2, %0|%0, %2}
4893 movlps\t{%1, %0|%0, %1}"
4894 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4895 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4897 (define_insn "*vec_concatv2di_rex64_sse4_1"
4898 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x,x")
4900 (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m")
4901 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,x,m,0")))]
4902 "TARGET_64BIT && TARGET_SSE4_1"
4904 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
4905 movq\t{%1, %0|%0, %1}
4906 movq\t{%1, %0|%0, %1}
4907 movq2dq\t{%1, %0|%0, %1}
4908 punpcklqdq\t{%2, %0|%0, %2}
4909 movlhps\t{%2, %0|%0, %2}
4910 movhps\t{%2, %0|%0, %2}
4911 movlps\t{%1, %0|%0, %1}"
4912 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4913 (set_attr "prefix_extra" "1,*,*,*,*,*,*,*")
4914 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4916 (define_insn "*vec_concatv2di_rex64_sse"
4917 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
4919 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4920 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
4921 "TARGET_64BIT && TARGET_SSE"
4923 movq\t{%1, %0|%0, %1}
4924 movq\t{%1, %0|%0, %1}
4925 movq2dq\t{%1, %0|%0, %1}
4926 punpcklqdq\t{%2, %0|%0, %2}
4927 movlhps\t{%2, %0|%0, %2}
4928 movhps\t{%2, %0|%0, %2}
4929 movlps\t{%1, %0|%0, %1}"
4930 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4931 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4933 (define_expand "vec_setv2di"
4934 [(match_operand:V2DI 0 "register_operand" "")
4935 (match_operand:DI 1 "register_operand" "")
4936 (match_operand 2 "const_int_operand" "")]
4939 ix86_expand_vector_set (false, operands[0], operands[1],
4940 INTVAL (operands[2]));
4944 (define_expand "vec_extractv2di"
4945 [(match_operand:DI 0 "register_operand" "")
4946 (match_operand:V2DI 1 "register_operand" "")
4947 (match_operand 2 "const_int_operand" "")]
4950 ix86_expand_vector_extract (false, operands[0], operands[1],
4951 INTVAL (operands[2]));
4955 (define_expand "vec_initv2di"
4956 [(match_operand:V2DI 0 "register_operand" "")
4957 (match_operand 1 "" "")]
4960 ix86_expand_vector_init (false, operands[0], operands[1]);
4964 (define_expand "vec_setv4si"
4965 [(match_operand:V4SI 0 "register_operand" "")
4966 (match_operand:SI 1 "register_operand" "")
4967 (match_operand 2 "const_int_operand" "")]
4970 ix86_expand_vector_set (false, operands[0], operands[1],
4971 INTVAL (operands[2]));
4975 (define_expand "vec_extractv4si"
4976 [(match_operand:SI 0 "register_operand" "")
4977 (match_operand:V4SI 1 "register_operand" "")
4978 (match_operand 2 "const_int_operand" "")]
4981 ix86_expand_vector_extract (false, operands[0], operands[1],
4982 INTVAL (operands[2]));
4986 (define_expand "vec_initv4si"
4987 [(match_operand:V4SI 0 "register_operand" "")
4988 (match_operand 1 "" "")]
4991 ix86_expand_vector_init (false, operands[0], operands[1]);
4995 (define_expand "vec_setv8hi"
4996 [(match_operand:V8HI 0 "register_operand" "")
4997 (match_operand:HI 1 "register_operand" "")
4998 (match_operand 2 "const_int_operand" "")]
5001 ix86_expand_vector_set (false, operands[0], operands[1],
5002 INTVAL (operands[2]));
5006 (define_expand "vec_extractv8hi"
5007 [(match_operand:HI 0 "register_operand" "")
5008 (match_operand:V8HI 1 "register_operand" "")
5009 (match_operand 2 "const_int_operand" "")]
5012 ix86_expand_vector_extract (false, operands[0], operands[1],
5013 INTVAL (operands[2]));
5017 (define_expand "vec_initv8hi"
5018 [(match_operand:V8HI 0 "register_operand" "")
5019 (match_operand 1 "" "")]
5022 ix86_expand_vector_init (false, operands[0], operands[1]);
5026 (define_expand "vec_setv16qi"
5027 [(match_operand:V16QI 0 "register_operand" "")
5028 (match_operand:QI 1 "register_operand" "")
5029 (match_operand 2 "const_int_operand" "")]
5032 ix86_expand_vector_set (false, operands[0], operands[1],
5033 INTVAL (operands[2]));
5037 (define_expand "vec_extractv16qi"
5038 [(match_operand:QI 0 "register_operand" "")
5039 (match_operand:V16QI 1 "register_operand" "")
5040 (match_operand 2 "const_int_operand" "")]
5043 ix86_expand_vector_extract (false, operands[0], operands[1],
5044 INTVAL (operands[2]));
5048 (define_expand "vec_initv16qi"
5049 [(match_operand:V16QI 0 "register_operand" "")
5050 (match_operand 1 "" "")]
5053 ix86_expand_vector_init (false, operands[0], operands[1]);
5057 (define_expand "vec_unpacku_hi_v16qi"
5058 [(match_operand:V8HI 0 "register_operand" "")
5059 (match_operand:V16QI 1 "register_operand" "")]
5063 ix86_expand_sse4_unpack (operands, true, true);
5064 else if (TARGET_SSE5)
5065 ix86_expand_sse5_unpack (operands, true, true);
5067 ix86_expand_sse_unpack (operands, true, true);
5071 (define_expand "vec_unpacks_hi_v16qi"
5072 [(match_operand:V8HI 0 "register_operand" "")
5073 (match_operand:V16QI 1 "register_operand" "")]
5077 ix86_expand_sse4_unpack (operands, false, true);
5078 else if (TARGET_SSE5)
5079 ix86_expand_sse5_unpack (operands, false, true);
5081 ix86_expand_sse_unpack (operands, false, true);
5085 (define_expand "vec_unpacku_lo_v16qi"
5086 [(match_operand:V8HI 0 "register_operand" "")
5087 (match_operand:V16QI 1 "register_operand" "")]
5091 ix86_expand_sse4_unpack (operands, true, false);
5092 else if (TARGET_SSE5)
5093 ix86_expand_sse5_unpack (operands, true, false);
5095 ix86_expand_sse_unpack (operands, true, false);
5099 (define_expand "vec_unpacks_lo_v16qi"
5100 [(match_operand:V8HI 0 "register_operand" "")
5101 (match_operand:V16QI 1 "register_operand" "")]
5105 ix86_expand_sse4_unpack (operands, false, false);
5106 else if (TARGET_SSE5)
5107 ix86_expand_sse5_unpack (operands, false, false);
5109 ix86_expand_sse_unpack (operands, false, false);
5113 (define_expand "vec_unpacku_hi_v8hi"
5114 [(match_operand:V4SI 0 "register_operand" "")
5115 (match_operand:V8HI 1 "register_operand" "")]
5119 ix86_expand_sse4_unpack (operands, true, true);
5120 else if (TARGET_SSE5)
5121 ix86_expand_sse5_unpack (operands, true, true);
5123 ix86_expand_sse_unpack (operands, true, true);
5127 (define_expand "vec_unpacks_hi_v8hi"
5128 [(match_operand:V4SI 0 "register_operand" "")
5129 (match_operand:V8HI 1 "register_operand" "")]
5133 ix86_expand_sse4_unpack (operands, false, true);
5134 else if (TARGET_SSE5)
5135 ix86_expand_sse5_unpack (operands, false, true);
5137 ix86_expand_sse_unpack (operands, false, true);
5141 (define_expand "vec_unpacku_lo_v8hi"
5142 [(match_operand:V4SI 0 "register_operand" "")
5143 (match_operand:V8HI 1 "register_operand" "")]
5147 ix86_expand_sse4_unpack (operands, true, false);
5148 else if (TARGET_SSE5)
5149 ix86_expand_sse5_unpack (operands, true, false);
5151 ix86_expand_sse_unpack (operands, true, false);
5155 (define_expand "vec_unpacks_lo_v8hi"
5156 [(match_operand:V4SI 0 "register_operand" "")
5157 (match_operand:V8HI 1 "register_operand" "")]
5161 ix86_expand_sse4_unpack (operands, false, false);
5162 else if (TARGET_SSE5)
5163 ix86_expand_sse5_unpack (operands, false, false);
5165 ix86_expand_sse_unpack (operands, false, false);
5169 (define_expand "vec_unpacku_hi_v4si"
5170 [(match_operand:V2DI 0 "register_operand" "")
5171 (match_operand:V4SI 1 "register_operand" "")]
5175 ix86_expand_sse4_unpack (operands, true, true);
5176 else if (TARGET_SSE5)
5177 ix86_expand_sse5_unpack (operands, true, true);
5179 ix86_expand_sse_unpack (operands, true, true);
5183 (define_expand "vec_unpacks_hi_v4si"
5184 [(match_operand:V2DI 0 "register_operand" "")
5185 (match_operand:V4SI 1 "register_operand" "")]
5189 ix86_expand_sse4_unpack (operands, false, true);
5190 else if (TARGET_SSE5)
5191 ix86_expand_sse5_unpack (operands, false, true);
5193 ix86_expand_sse_unpack (operands, false, true);
5197 (define_expand "vec_unpacku_lo_v4si"
5198 [(match_operand:V2DI 0 "register_operand" "")
5199 (match_operand:V4SI 1 "register_operand" "")]
5203 ix86_expand_sse4_unpack (operands, true, false);
5204 else if (TARGET_SSE5)
5205 ix86_expand_sse5_unpack (operands, true, false);
5207 ix86_expand_sse_unpack (operands, true, false);
5211 (define_expand "vec_unpacks_lo_v4si"
5212 [(match_operand:V2DI 0 "register_operand" "")
5213 (match_operand:V4SI 1 "register_operand" "")]
5217 ix86_expand_sse4_unpack (operands, false, false);
5218 else if (TARGET_SSE5)
5219 ix86_expand_sse5_unpack (operands, false, false);
5221 ix86_expand_sse_unpack (operands, false, false);
5225 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5231 (define_expand "sse2_uavgv16qi3"
5232 [(set (match_operand:V16QI 0 "register_operand" "")
5238 (match_operand:V16QI 1 "nonimmediate_operand" ""))
5240 (match_operand:V16QI 2 "nonimmediate_operand" "")))
5241 (const_vector:V16QI [(const_int 1) (const_int 1)
5242 (const_int 1) (const_int 1)
5243 (const_int 1) (const_int 1)
5244 (const_int 1) (const_int 1)
5245 (const_int 1) (const_int 1)
5246 (const_int 1) (const_int 1)
5247 (const_int 1) (const_int 1)
5248 (const_int 1) (const_int 1)]))
5251 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
5253 (define_insn "*sse2_uavgv16qi3"
5254 [(set (match_operand:V16QI 0 "register_operand" "=x")
5260 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5262 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5263 (const_vector:V16QI [(const_int 1) (const_int 1)
5264 (const_int 1) (const_int 1)
5265 (const_int 1) (const_int 1)
5266 (const_int 1) (const_int 1)
5267 (const_int 1) (const_int 1)
5268 (const_int 1) (const_int 1)
5269 (const_int 1) (const_int 1)
5270 (const_int 1) (const_int 1)]))
5272 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5273 "pavgb\t{%2, %0|%0, %2}"
5274 [(set_attr "type" "sseiadd")
5275 (set_attr "prefix_data16" "1")
5276 (set_attr "mode" "TI")])
5278 (define_expand "sse2_uavgv8hi3"
5279 [(set (match_operand:V8HI 0 "register_operand" "")
5285 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5287 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5288 (const_vector:V8HI [(const_int 1) (const_int 1)
5289 (const_int 1) (const_int 1)
5290 (const_int 1) (const_int 1)
5291 (const_int 1) (const_int 1)]))
5294 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
5296 (define_insn "*sse2_uavgv8hi3"
5297 [(set (match_operand:V8HI 0 "register_operand" "=x")
5303 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5305 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5306 (const_vector:V8HI [(const_int 1) (const_int 1)
5307 (const_int 1) (const_int 1)
5308 (const_int 1) (const_int 1)
5309 (const_int 1) (const_int 1)]))
5311 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5312 "pavgw\t{%2, %0|%0, %2}"
5313 [(set_attr "type" "sseiadd")
5314 (set_attr "prefix_data16" "1")
5315 (set_attr "mode" "TI")])
5317 ;; The correct representation for this is absolutely enormous, and
5318 ;; surely not generally useful.
5319 (define_insn "sse2_psadbw"
5320 [(set (match_operand:V2DI 0 "register_operand" "=x")
5321 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5322 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5325 "psadbw\t{%2, %0|%0, %2}"
5326 [(set_attr "type" "sseiadd")
5327 (set_attr "prefix_data16" "1")
5328 (set_attr "mode" "TI")])
5330 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5331 [(set (match_operand:SI 0 "register_operand" "=r")
5333 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5335 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5336 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5337 [(set_attr "type" "ssecvt")
5338 (set_attr "mode" "<MODE>")])
5340 (define_insn "sse2_pmovmskb"
5341 [(set (match_operand:SI 0 "register_operand" "=r")
5342 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5345 "pmovmskb\t{%1, %0|%0, %1}"
5346 [(set_attr "type" "ssecvt")
5347 (set_attr "prefix_data16" "1")
5348 (set_attr "mode" "SI")])
5350 (define_expand "sse2_maskmovdqu"
5351 [(set (match_operand:V16QI 0 "memory_operand" "")
5352 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5353 (match_operand:V16QI 2 "register_operand" "")
5359 (define_insn "*sse2_maskmovdqu"
5360 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5361 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5362 (match_operand:V16QI 2 "register_operand" "x")
5363 (mem:V16QI (match_dup 0))]
5365 "TARGET_SSE2 && !TARGET_64BIT"
5366 ;; @@@ check ordering of operands in intel/nonintel syntax
5367 "maskmovdqu\t{%2, %1|%1, %2}"
5368 [(set_attr "type" "ssecvt")
5369 (set_attr "prefix_data16" "1")
5370 (set_attr "mode" "TI")])
5372 (define_insn "*sse2_maskmovdqu_rex64"
5373 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5374 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5375 (match_operand:V16QI 2 "register_operand" "x")
5376 (mem:V16QI (match_dup 0))]
5378 "TARGET_SSE2 && TARGET_64BIT"
5379 ;; @@@ check ordering of operands in intel/nonintel syntax
5380 "maskmovdqu\t{%2, %1|%1, %2}"
5381 [(set_attr "type" "ssecvt")
5382 (set_attr "prefix_data16" "1")
5383 (set_attr "mode" "TI")])
5385 (define_insn "sse_ldmxcsr"
5386 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5390 [(set_attr "type" "sse")
5391 (set_attr "memory" "load")])
5393 (define_insn "sse_stmxcsr"
5394 [(set (match_operand:SI 0 "memory_operand" "=m")
5395 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5398 [(set_attr "type" "sse")
5399 (set_attr "memory" "store")])
5401 (define_expand "sse_sfence"
5403 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5404 "TARGET_SSE || TARGET_3DNOW_A"
5406 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5407 MEM_VOLATILE_P (operands[0]) = 1;
5410 (define_insn "*sse_sfence"
5411 [(set (match_operand:BLK 0 "" "")
5412 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5413 "TARGET_SSE || TARGET_3DNOW_A"
5415 [(set_attr "type" "sse")
5416 (set_attr "memory" "unknown")])
5418 (define_insn "sse2_clflush"
5419 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5423 [(set_attr "type" "sse")
5424 (set_attr "memory" "unknown")])
5426 (define_expand "sse2_mfence"
5428 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5431 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5432 MEM_VOLATILE_P (operands[0]) = 1;
5435 (define_insn "*sse2_mfence"
5436 [(set (match_operand:BLK 0 "" "")
5437 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5440 [(set_attr "type" "sse")
5441 (set_attr "memory" "unknown")])
5443 (define_expand "sse2_lfence"
5445 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5448 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5449 MEM_VOLATILE_P (operands[0]) = 1;
5452 (define_insn "*sse2_lfence"
5453 [(set (match_operand:BLK 0 "" "")
5454 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5457 [(set_attr "type" "sse")
5458 (set_attr "memory" "unknown")])
5460 (define_insn "sse3_mwait"
5461 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5462 (match_operand:SI 1 "register_operand" "c")]
5465 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5466 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5467 ;; we only need to set up 32bit registers.
5469 [(set_attr "length" "3")])
5471 (define_insn "sse3_monitor"
5472 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5473 (match_operand:SI 1 "register_operand" "c")
5474 (match_operand:SI 2 "register_operand" "d")]
5476 "TARGET_SSE3 && !TARGET_64BIT"
5477 "monitor\t%0, %1, %2"
5478 [(set_attr "length" "3")])
5480 (define_insn "sse3_monitor64"
5481 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5482 (match_operand:SI 1 "register_operand" "c")
5483 (match_operand:SI 2 "register_operand" "d")]
5485 "TARGET_SSE3 && TARGET_64BIT"
5486 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5487 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5488 ;; zero extended to 64bit, we only need to set up 32bit registers.
5490 [(set_attr "length" "3")])
5492 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5494 ;; SSSE3 instructions
5496 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5498 (define_insn "ssse3_phaddwv8hi3"
5499 [(set (match_operand:V8HI 0 "register_operand" "=x")
5505 (match_operand:V8HI 1 "register_operand" "0")
5506 (parallel [(const_int 0)]))
5507 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5509 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5510 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5513 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5514 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5516 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5517 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5522 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5523 (parallel [(const_int 0)]))
5524 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5526 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5527 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5530 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5531 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5533 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5534 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5536 "phaddw\t{%2, %0|%0, %2}"
5537 [(set_attr "type" "sseiadd")
5538 (set_attr "prefix_data16" "1")
5539 (set_attr "prefix_extra" "1")
5540 (set_attr "mode" "TI")])
5542 (define_insn "ssse3_phaddwv4hi3"
5543 [(set (match_operand:V4HI 0 "register_operand" "=y")
5548 (match_operand:V4HI 1 "register_operand" "0")
5549 (parallel [(const_int 0)]))
5550 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5552 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5553 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5557 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5558 (parallel [(const_int 0)]))
5559 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5561 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5562 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5564 "phaddw\t{%2, %0|%0, %2}"
5565 [(set_attr "type" "sseiadd")
5566 (set_attr "prefix_extra" "1")
5567 (set_attr "mode" "DI")])
5569 (define_insn "ssse3_phadddv4si3"
5570 [(set (match_operand:V4SI 0 "register_operand" "=x")
5575 (match_operand:V4SI 1 "register_operand" "0")
5576 (parallel [(const_int 0)]))
5577 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5579 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5580 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5584 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5585 (parallel [(const_int 0)]))
5586 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5588 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5589 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5591 "phaddd\t{%2, %0|%0, %2}"
5592 [(set_attr "type" "sseiadd")
5593 (set_attr "prefix_data16" "1")
5594 (set_attr "prefix_extra" "1")
5595 (set_attr "mode" "TI")])
5597 (define_insn "ssse3_phadddv2si3"
5598 [(set (match_operand:V2SI 0 "register_operand" "=y")
5602 (match_operand:V2SI 1 "register_operand" "0")
5603 (parallel [(const_int 0)]))
5604 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5607 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5608 (parallel [(const_int 0)]))
5609 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5611 "phaddd\t{%2, %0|%0, %2}"
5612 [(set_attr "type" "sseiadd")
5613 (set_attr "prefix_extra" "1")
5614 (set_attr "mode" "DI")])
5616 (define_insn "ssse3_phaddswv8hi3"
5617 [(set (match_operand:V8HI 0 "register_operand" "=x")
5623 (match_operand:V8HI 1 "register_operand" "0")
5624 (parallel [(const_int 0)]))
5625 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5627 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5628 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5631 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5632 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5634 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5635 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5640 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5641 (parallel [(const_int 0)]))
5642 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5644 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5645 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5648 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5649 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5651 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5652 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5654 "phaddsw\t{%2, %0|%0, %2}"
5655 [(set_attr "type" "sseiadd")
5656 (set_attr "prefix_data16" "1")
5657 (set_attr "prefix_extra" "1")
5658 (set_attr "mode" "TI")])
5660 (define_insn "ssse3_phaddswv4hi3"
5661 [(set (match_operand:V4HI 0 "register_operand" "=y")
5666 (match_operand:V4HI 1 "register_operand" "0")
5667 (parallel [(const_int 0)]))
5668 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5670 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5671 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5675 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5676 (parallel [(const_int 0)]))
5677 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5679 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5680 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5682 "phaddsw\t{%2, %0|%0, %2}"
5683 [(set_attr "type" "sseiadd")
5684 (set_attr "prefix_extra" "1")
5685 (set_attr "mode" "DI")])
5687 (define_insn "ssse3_phsubwv8hi3"
5688 [(set (match_operand:V8HI 0 "register_operand" "=x")
5694 (match_operand:V8HI 1 "register_operand" "0")
5695 (parallel [(const_int 0)]))
5696 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5698 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5699 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5702 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5703 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5705 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5706 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5711 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5712 (parallel [(const_int 0)]))
5713 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5715 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5716 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5719 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5720 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5722 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5723 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5725 "phsubw\t{%2, %0|%0, %2}"
5726 [(set_attr "type" "sseiadd")
5727 (set_attr "prefix_data16" "1")
5728 (set_attr "prefix_extra" "1")
5729 (set_attr "mode" "TI")])
5731 (define_insn "ssse3_phsubwv4hi3"
5732 [(set (match_operand:V4HI 0 "register_operand" "=y")
5737 (match_operand:V4HI 1 "register_operand" "0")
5738 (parallel [(const_int 0)]))
5739 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5741 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5742 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5746 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5747 (parallel [(const_int 0)]))
5748 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5750 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5751 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5753 "phsubw\t{%2, %0|%0, %2}"
5754 [(set_attr "type" "sseiadd")
5755 (set_attr "prefix_extra" "1")
5756 (set_attr "mode" "DI")])
5758 (define_insn "ssse3_phsubdv4si3"
5759 [(set (match_operand:V4SI 0 "register_operand" "=x")
5764 (match_operand:V4SI 1 "register_operand" "0")
5765 (parallel [(const_int 0)]))
5766 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5768 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5769 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5773 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5774 (parallel [(const_int 0)]))
5775 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5777 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5778 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5780 "phsubd\t{%2, %0|%0, %2}"
5781 [(set_attr "type" "sseiadd")
5782 (set_attr "prefix_data16" "1")
5783 (set_attr "prefix_extra" "1")
5784 (set_attr "mode" "TI")])
5786 (define_insn "ssse3_phsubdv2si3"
5787 [(set (match_operand:V2SI 0 "register_operand" "=y")
5791 (match_operand:V2SI 1 "register_operand" "0")
5792 (parallel [(const_int 0)]))
5793 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5796 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5797 (parallel [(const_int 0)]))
5798 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5800 "phsubd\t{%2, %0|%0, %2}"
5801 [(set_attr "type" "sseiadd")
5802 (set_attr "prefix_extra" "1")
5803 (set_attr "mode" "DI")])
5805 (define_insn "ssse3_phsubswv8hi3"
5806 [(set (match_operand:V8HI 0 "register_operand" "=x")
5812 (match_operand:V8HI 1 "register_operand" "0")
5813 (parallel [(const_int 0)]))
5814 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5816 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5817 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5820 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5821 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5823 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5824 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5830 (parallel [(const_int 0)]))
5831 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5833 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5834 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5837 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5838 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5840 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5841 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5843 "phsubsw\t{%2, %0|%0, %2}"
5844 [(set_attr "type" "sseiadd")
5845 (set_attr "prefix_data16" "1")
5846 (set_attr "prefix_extra" "1")
5847 (set_attr "mode" "TI")])
5849 (define_insn "ssse3_phsubswv4hi3"
5850 [(set (match_operand:V4HI 0 "register_operand" "=y")
5855 (match_operand:V4HI 1 "register_operand" "0")
5856 (parallel [(const_int 0)]))
5857 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5859 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5860 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5864 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5865 (parallel [(const_int 0)]))
5866 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5868 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5869 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5871 "phsubsw\t{%2, %0|%0, %2}"
5872 [(set_attr "type" "sseiadd")
5873 (set_attr "prefix_extra" "1")
5874 (set_attr "mode" "DI")])
5876 (define_insn "ssse3_pmaddubsw128"
5877 [(set (match_operand:V8HI 0 "register_operand" "=x")
5882 (match_operand:V16QI 1 "register_operand" "0")
5883 (parallel [(const_int 0)
5893 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5894 (parallel [(const_int 0)
5904 (vec_select:V16QI (match_dup 1)
5905 (parallel [(const_int 1)
5914 (vec_select:V16QI (match_dup 2)
5915 (parallel [(const_int 1)
5922 (const_int 15)]))))))]
5924 "pmaddubsw\t{%2, %0|%0, %2}"
5925 [(set_attr "type" "sseiadd")
5926 (set_attr "prefix_data16" "1")
5927 (set_attr "prefix_extra" "1")
5928 (set_attr "mode" "TI")])
5930 (define_insn "ssse3_pmaddubsw"
5931 [(set (match_operand:V4HI 0 "register_operand" "=y")
5936 (match_operand:V8QI 1 "register_operand" "0")
5937 (parallel [(const_int 0)
5943 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5944 (parallel [(const_int 0)
5950 (vec_select:V8QI (match_dup 1)
5951 (parallel [(const_int 1)
5956 (vec_select:V8QI (match_dup 2)
5957 (parallel [(const_int 1)
5960 (const_int 7)]))))))]
5962 "pmaddubsw\t{%2, %0|%0, %2}"
5963 [(set_attr "type" "sseiadd")
5964 (set_attr "prefix_extra" "1")
5965 (set_attr "mode" "DI")])
5967 (define_expand "ssse3_pmulhrswv8hi3"
5968 [(set (match_operand:V8HI 0 "register_operand" "")
5975 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5977 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5979 (const_vector:V8HI [(const_int 1) (const_int 1)
5980 (const_int 1) (const_int 1)
5981 (const_int 1) (const_int 1)
5982 (const_int 1) (const_int 1)]))
5985 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5987 (define_insn "*ssse3_pmulhrswv8hi3"
5988 [(set (match_operand:V8HI 0 "register_operand" "=x")
5995 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5997 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5999 (const_vector:V8HI [(const_int 1) (const_int 1)
6000 (const_int 1) (const_int 1)
6001 (const_int 1) (const_int 1)
6002 (const_int 1) (const_int 1)]))
6004 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
6005 "pmulhrsw\t{%2, %0|%0, %2}"
6006 [(set_attr "type" "sseimul")
6007 (set_attr "prefix_data16" "1")
6008 (set_attr "prefix_extra" "1")
6009 (set_attr "mode" "TI")])
6011 (define_expand "ssse3_pmulhrswv4hi3"
6012 [(set (match_operand:V4HI 0 "register_operand" "")
6019 (match_operand:V4HI 1 "nonimmediate_operand" ""))
6021 (match_operand:V4HI 2 "nonimmediate_operand" "")))
6023 (const_vector:V4HI [(const_int 1) (const_int 1)
6024 (const_int 1) (const_int 1)]))
6027 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
6029 (define_insn "*ssse3_pmulhrswv4hi3"
6030 [(set (match_operand:V4HI 0 "register_operand" "=y")
6037 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
6039 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
6041 (const_vector:V4HI [(const_int 1) (const_int 1)
6042 (const_int 1) (const_int 1)]))
6044 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
6045 "pmulhrsw\t{%2, %0|%0, %2}"
6046 [(set_attr "type" "sseimul")
6047 (set_attr "prefix_extra" "1")
6048 (set_attr "mode" "DI")])
6050 (define_insn "ssse3_pshufbv16qi3"
6051 [(set (match_operand:V16QI 0 "register_operand" "=x")
6052 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6053 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
6056 "pshufb\t{%2, %0|%0, %2}";
6057 [(set_attr "type" "sselog1")
6058 (set_attr "prefix_data16" "1")
6059 (set_attr "prefix_extra" "1")
6060 (set_attr "mode" "TI")])
6062 (define_insn "ssse3_pshufbv8qi3"
6063 [(set (match_operand:V8QI 0 "register_operand" "=y")
6064 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
6065 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
6068 "pshufb\t{%2, %0|%0, %2}";
6069 [(set_attr "type" "sselog1")
6070 (set_attr "prefix_extra" "1")
6071 (set_attr "mode" "DI")])
6073 (define_insn "ssse3_psign<mode>3"
6074 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6076 [(match_operand:SSEMODE124 1 "register_operand" "0")
6077 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
6080 "psign<ssevecsize>\t{%2, %0|%0, %2}";
6081 [(set_attr "type" "sselog1")
6082 (set_attr "prefix_data16" "1")
6083 (set_attr "prefix_extra" "1")
6084 (set_attr "mode" "TI")])
6086 (define_insn "ssse3_psign<mode>3"
6087 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6089 [(match_operand:MMXMODEI 1 "register_operand" "0")
6090 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
6093 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
6094 [(set_attr "type" "sselog1")
6095 (set_attr "prefix_extra" "1")
6096 (set_attr "mode" "DI")])
6098 (define_insn "ssse3_palignrti"
6099 [(set (match_operand:TI 0 "register_operand" "=x")
6100 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6101 (match_operand:TI 2 "nonimmediate_operand" "xm")
6102 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6106 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6107 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6109 [(set_attr "type" "sseishft")
6110 (set_attr "prefix_data16" "1")
6111 (set_attr "prefix_extra" "1")
6112 (set_attr "mode" "TI")])
6114 (define_insn "ssse3_palignrdi"
6115 [(set (match_operand:DI 0 "register_operand" "=y")
6116 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6117 (match_operand:DI 2 "nonimmediate_operand" "ym")
6118 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6122 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6123 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6125 [(set_attr "type" "sseishft")
6126 (set_attr "prefix_extra" "1")
6127 (set_attr "mode" "DI")])
6129 (define_insn "abs<mode>2"
6130 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6131 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6133 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6134 [(set_attr "type" "sselog1")
6135 (set_attr "prefix_data16" "1")
6136 (set_attr "prefix_extra" "1")
6137 (set_attr "mode" "TI")])
6139 (define_insn "abs<mode>2"
6140 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6141 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6143 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6144 [(set_attr "type" "sselog1")
6145 (set_attr "prefix_extra" "1")
6146 (set_attr "mode" "DI")])
6148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6150 ;; AMD SSE4A instructions
6152 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6154 (define_insn "sse4a_movnt<mode>"
6155 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6157 [(match_operand:MODEF 1 "register_operand" "x")]
6160 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6161 [(set_attr "type" "ssemov")
6162 (set_attr "mode" "<MODE>")])
6164 (define_insn "sse4a_vmmovnt<mode>"
6165 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6166 (unspec:<ssescalarmode>
6167 [(vec_select:<ssescalarmode>
6168 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6169 (parallel [(const_int 0)]))]
6172 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6173 [(set_attr "type" "ssemov")
6174 (set_attr "mode" "<ssescalarmode>")])
6176 (define_insn "sse4a_extrqi"
6177 [(set (match_operand:V2DI 0 "register_operand" "=x")
6178 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6179 (match_operand 2 "const_int_operand" "")
6180 (match_operand 3 "const_int_operand" "")]
6183 "extrq\t{%3, %2, %0|%0, %2, %3}"
6184 [(set_attr "type" "sse")
6185 (set_attr "prefix_data16" "1")
6186 (set_attr "mode" "TI")])
6188 (define_insn "sse4a_extrq"
6189 [(set (match_operand:V2DI 0 "register_operand" "=x")
6190 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6191 (match_operand:V16QI 2 "register_operand" "x")]
6194 "extrq\t{%2, %0|%0, %2}"
6195 [(set_attr "type" "sse")
6196 (set_attr "prefix_data16" "1")
6197 (set_attr "mode" "TI")])
6199 (define_insn "sse4a_insertqi"
6200 [(set (match_operand:V2DI 0 "register_operand" "=x")
6201 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6202 (match_operand:V2DI 2 "register_operand" "x")
6203 (match_operand 3 "const_int_operand" "")
6204 (match_operand 4 "const_int_operand" "")]
6207 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6208 [(set_attr "type" "sseins")
6209 (set_attr "prefix_rep" "1")
6210 (set_attr "mode" "TI")])
6212 (define_insn "sse4a_insertq"
6213 [(set (match_operand:V2DI 0 "register_operand" "=x")
6214 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6215 (match_operand:V2DI 2 "register_operand" "x")]
6218 "insertq\t{%2, %0|%0, %2}"
6219 [(set_attr "type" "sseins")
6220 (set_attr "prefix_rep" "1")
6221 (set_attr "mode" "TI")])
6223 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6225 ;; Intel SSE4.1 instructions
6227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6229 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6230 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6231 (vec_merge:SSEMODEF2P
6232 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6233 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6234 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
6236 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6237 [(set_attr "type" "ssemov")
6238 (set_attr "prefix_extra" "1")
6239 (set_attr "mode" "<MODE>")])
6241 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6242 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6244 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6245 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6246 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6249 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6250 [(set_attr "type" "ssemov")
6251 (set_attr "prefix_extra" "1")
6252 (set_attr "mode" "<MODE>")])
6254 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6255 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6257 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6258 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6259 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6262 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6263 [(set_attr "type" "ssemul")
6264 (set_attr "prefix_extra" "1")
6265 (set_attr "mode" "<MODE>")])
6267 (define_insn "sse4_1_movntdqa"
6268 [(set (match_operand:V2DI 0 "register_operand" "=x")
6269 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6272 "movntdqa\t{%1, %0|%0, %1}"
6273 [(set_attr "type" "ssecvt")
6274 (set_attr "prefix_extra" "1")
6275 (set_attr "mode" "TI")])
6277 (define_insn "sse4_1_mpsadbw"
6278 [(set (match_operand:V16QI 0 "register_operand" "=x")
6279 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6280 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6281 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6284 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6285 [(set_attr "type" "sselog1")
6286 (set_attr "prefix_extra" "1")
6287 (set_attr "mode" "TI")])
6289 (define_insn "sse4_1_packusdw"
6290 [(set (match_operand:V8HI 0 "register_operand" "=x")
6293 (match_operand:V4SI 1 "register_operand" "0"))
6295 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6297 "packusdw\t{%2, %0|%0, %2}"
6298 [(set_attr "type" "sselog")
6299 (set_attr "prefix_extra" "1")
6300 (set_attr "mode" "TI")])
6302 (define_insn "sse4_1_pblendvb"
6303 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6304 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6305 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6306 (match_operand:V16QI 3 "register_operand" "Yz")]
6309 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6310 [(set_attr "type" "ssemov")
6311 (set_attr "prefix_extra" "1")
6312 (set_attr "mode" "TI")])
6314 (define_insn "sse4_1_pblendw"
6315 [(set (match_operand:V8HI 0 "register_operand" "=x")
6317 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6318 (match_operand:V8HI 1 "register_operand" "0")
6319 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6321 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6322 [(set_attr "type" "ssemov")
6323 (set_attr "prefix_extra" "1")
6324 (set_attr "mode" "TI")])
6326 (define_insn "sse4_1_phminposuw"
6327 [(set (match_operand:V8HI 0 "register_operand" "=x")
6328 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6329 UNSPEC_PHMINPOSUW))]
6331 "phminposuw\t{%1, %0|%0, %1}"
6332 [(set_attr "type" "sselog1")
6333 (set_attr "prefix_extra" "1")
6334 (set_attr "mode" "TI")])
6336 (define_insn "sse4_1_extendv8qiv8hi2"
6337 [(set (match_operand:V8HI 0 "register_operand" "=x")
6340 (match_operand:V16QI 1 "register_operand" "x")
6341 (parallel [(const_int 0)
6350 "pmovsxbw\t{%1, %0|%0, %1}"
6351 [(set_attr "type" "ssemov")
6352 (set_attr "prefix_extra" "1")
6353 (set_attr "mode" "TI")])
6355 (define_insn "*sse4_1_extendv8qiv8hi2"
6356 [(set (match_operand:V8HI 0 "register_operand" "=x")
6359 (vec_duplicate:V16QI
6360 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6361 (parallel [(const_int 0)
6370 "pmovsxbw\t{%1, %0|%0, %1}"
6371 [(set_attr "type" "ssemov")
6372 (set_attr "prefix_extra" "1")
6373 (set_attr "mode" "TI")])
6375 (define_insn "sse4_1_extendv4qiv4si2"
6376 [(set (match_operand:V4SI 0 "register_operand" "=x")
6379 (match_operand:V16QI 1 "register_operand" "x")
6380 (parallel [(const_int 0)
6385 "pmovsxbd\t{%1, %0|%0, %1}"
6386 [(set_attr "type" "ssemov")
6387 (set_attr "prefix_extra" "1")
6388 (set_attr "mode" "TI")])
6390 (define_insn "*sse4_1_extendv4qiv4si2"
6391 [(set (match_operand:V4SI 0 "register_operand" "=x")
6394 (vec_duplicate:V16QI
6395 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6396 (parallel [(const_int 0)
6401 "pmovsxbd\t{%1, %0|%0, %1}"
6402 [(set_attr "type" "ssemov")
6403 (set_attr "prefix_extra" "1")
6404 (set_attr "mode" "TI")])
6406 (define_insn "sse4_1_extendv2qiv2di2"
6407 [(set (match_operand:V2DI 0 "register_operand" "=x")
6410 (match_operand:V16QI 1 "register_operand" "x")
6411 (parallel [(const_int 0)
6414 "pmovsxbq\t{%1, %0|%0, %1}"
6415 [(set_attr "type" "ssemov")
6416 (set_attr "prefix_extra" "1")
6417 (set_attr "mode" "TI")])
6419 (define_insn "*sse4_1_extendv2qiv2di2"
6420 [(set (match_operand:V2DI 0 "register_operand" "=x")
6423 (vec_duplicate:V16QI
6424 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6425 (parallel [(const_int 0)
6428 "pmovsxbq\t{%1, %0|%0, %1}"
6429 [(set_attr "type" "ssemov")
6430 (set_attr "prefix_extra" "1")
6431 (set_attr "mode" "TI")])
6433 (define_insn "sse4_1_extendv4hiv4si2"
6434 [(set (match_operand:V4SI 0 "register_operand" "=x")
6437 (match_operand:V8HI 1 "register_operand" "x")
6438 (parallel [(const_int 0)
6443 "pmovsxwd\t{%1, %0|%0, %1}"
6444 [(set_attr "type" "ssemov")
6445 (set_attr "prefix_extra" "1")
6446 (set_attr "mode" "TI")])
6448 (define_insn "*sse4_1_extendv4hiv4si2"
6449 [(set (match_operand:V4SI 0 "register_operand" "=x")
6453 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6454 (parallel [(const_int 0)
6459 "pmovsxwd\t{%1, %0|%0, %1}"
6460 [(set_attr "type" "ssemov")
6461 (set_attr "prefix_extra" "1")
6462 (set_attr "mode" "TI")])
6464 (define_insn "sse4_1_extendv2hiv2di2"
6465 [(set (match_operand:V2DI 0 "register_operand" "=x")
6468 (match_operand:V8HI 1 "register_operand" "x")
6469 (parallel [(const_int 0)
6472 "pmovsxwq\t{%1, %0|%0, %1}"
6473 [(set_attr "type" "ssemov")
6474 (set_attr "prefix_extra" "1")
6475 (set_attr "mode" "TI")])
6477 (define_insn "*sse4_1_extendv2hiv2di2"
6478 [(set (match_operand:V2DI 0 "register_operand" "=x")
6482 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6483 (parallel [(const_int 0)
6486 "pmovsxwq\t{%1, %0|%0, %1}"
6487 [(set_attr "type" "ssemov")
6488 (set_attr "prefix_extra" "1")
6489 (set_attr "mode" "TI")])
6491 (define_insn "sse4_1_extendv2siv2di2"
6492 [(set (match_operand:V2DI 0 "register_operand" "=x")
6495 (match_operand:V4SI 1 "register_operand" "x")
6496 (parallel [(const_int 0)
6499 "pmovsxdq\t{%1, %0|%0, %1}"
6500 [(set_attr "type" "ssemov")
6501 (set_attr "prefix_extra" "1")
6502 (set_attr "mode" "TI")])
6504 (define_insn "*sse4_1_extendv2siv2di2"
6505 [(set (match_operand:V2DI 0 "register_operand" "=x")
6509 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6510 (parallel [(const_int 0)
6513 "pmovsxdq\t{%1, %0|%0, %1}"
6514 [(set_attr "type" "ssemov")
6515 (set_attr "prefix_extra" "1")
6516 (set_attr "mode" "TI")])
6518 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6519 [(set (match_operand:V8HI 0 "register_operand" "=x")
6522 (match_operand:V16QI 1 "register_operand" "x")
6523 (parallel [(const_int 0)
6532 "pmovzxbw\t{%1, %0|%0, %1}"
6533 [(set_attr "type" "ssemov")
6534 (set_attr "prefix_extra" "1")
6535 (set_attr "mode" "TI")])
6537 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6538 [(set (match_operand:V8HI 0 "register_operand" "=x")
6541 (vec_duplicate:V16QI
6542 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6543 (parallel [(const_int 0)
6552 "pmovzxbw\t{%1, %0|%0, %1}"
6553 [(set_attr "type" "ssemov")
6554 (set_attr "prefix_extra" "1")
6555 (set_attr "mode" "TI")])
6557 (define_insn "sse4_1_zero_extendv4qiv4si2"
6558 [(set (match_operand:V4SI 0 "register_operand" "=x")
6561 (match_operand:V16QI 1 "register_operand" "x")
6562 (parallel [(const_int 0)
6567 "pmovzxbd\t{%1, %0|%0, %1}"
6568 [(set_attr "type" "ssemov")
6569 (set_attr "prefix_extra" "1")
6570 (set_attr "mode" "TI")])
6572 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6573 [(set (match_operand:V4SI 0 "register_operand" "=x")
6576 (vec_duplicate:V16QI
6577 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6578 (parallel [(const_int 0)
6583 "pmovzxbd\t{%1, %0|%0, %1}"
6584 [(set_attr "type" "ssemov")
6585 (set_attr "prefix_extra" "1")
6586 (set_attr "mode" "TI")])
6588 (define_insn "sse4_1_zero_extendv2qiv2di2"
6589 [(set (match_operand:V2DI 0 "register_operand" "=x")
6592 (match_operand:V16QI 1 "register_operand" "x")
6593 (parallel [(const_int 0)
6596 "pmovzxbq\t{%1, %0|%0, %1}"
6597 [(set_attr "type" "ssemov")
6598 (set_attr "prefix_extra" "1")
6599 (set_attr "mode" "TI")])
6601 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6602 [(set (match_operand:V2DI 0 "register_operand" "=x")
6605 (vec_duplicate:V16QI
6606 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6607 (parallel [(const_int 0)
6610 "pmovzxbq\t{%1, %0|%0, %1}"
6611 [(set_attr "type" "ssemov")
6612 (set_attr "prefix_extra" "1")
6613 (set_attr "mode" "TI")])
6615 (define_insn "sse4_1_zero_extendv4hiv4si2"
6616 [(set (match_operand:V4SI 0 "register_operand" "=x")
6619 (match_operand:V8HI 1 "register_operand" "x")
6620 (parallel [(const_int 0)
6625 "pmovzxwd\t{%1, %0|%0, %1}"
6626 [(set_attr "type" "ssemov")
6627 (set_attr "prefix_extra" "1")
6628 (set_attr "mode" "TI")])
6630 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6631 [(set (match_operand:V4SI 0 "register_operand" "=x")
6635 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6636 (parallel [(const_int 0)
6641 "pmovzxwd\t{%1, %0|%0, %1}"
6642 [(set_attr "type" "ssemov")
6643 (set_attr "prefix_extra" "1")
6644 (set_attr "mode" "TI")])
6646 (define_insn "sse4_1_zero_extendv2hiv2di2"
6647 [(set (match_operand:V2DI 0 "register_operand" "=x")
6650 (match_operand:V8HI 1 "register_operand" "x")
6651 (parallel [(const_int 0)
6654 "pmovzxwq\t{%1, %0|%0, %1}"
6655 [(set_attr "type" "ssemov")
6656 (set_attr "prefix_extra" "1")
6657 (set_attr "mode" "TI")])
6659 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6660 [(set (match_operand:V2DI 0 "register_operand" "=x")
6664 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6665 (parallel [(const_int 0)
6668 "pmovzxwq\t{%1, %0|%0, %1}"
6669 [(set_attr "type" "ssemov")
6670 (set_attr "prefix_extra" "1")
6671 (set_attr "mode" "TI")])
6673 (define_insn "sse4_1_zero_extendv2siv2di2"
6674 [(set (match_operand:V2DI 0 "register_operand" "=x")
6677 (match_operand:V4SI 1 "register_operand" "x")
6678 (parallel [(const_int 0)
6681 "pmovzxdq\t{%1, %0|%0, %1}"
6682 [(set_attr "type" "ssemov")
6683 (set_attr "prefix_extra" "1")
6684 (set_attr "mode" "TI")])
6686 (define_insn "*sse4_1_zero_extendv2siv2di2"
6687 [(set (match_operand:V2DI 0 "register_operand" "=x")
6691 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6692 (parallel [(const_int 0)
6695 "pmovzxdq\t{%1, %0|%0, %1}"
6696 [(set_attr "type" "ssemov")
6697 (set_attr "prefix_extra" "1")
6698 (set_attr "mode" "TI")])
6700 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6701 ;; But it is not a really compare instruction.
6702 (define_insn "sse4_1_ptest"
6703 [(set (reg:CC FLAGS_REG)
6704 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6705 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6708 "ptest\t{%1, %0|%0, %1}"
6709 [(set_attr "type" "ssecomi")
6710 (set_attr "prefix_extra" "1")
6711 (set_attr "mode" "TI")])
6713 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6714 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6716 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6717 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6720 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6721 [(set_attr "type" "ssecvt")
6722 (set_attr "prefix_extra" "1")
6723 (set_attr "mode" "<MODE>")])
6725 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6727 (vec_merge:SSEMODEF2P
6729 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6730 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6732 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6735 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6736 [(set_attr "type" "ssecvt")
6737 (set_attr "prefix_extra" "1")
6738 (set_attr "mode" "<MODE>")])
6740 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6742 ;; Intel SSE4.2 string/text processing instructions
6744 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6746 (define_insn_and_split "sse4_2_pcmpestr"
6747 [(set (match_operand:SI 0 "register_operand" "=c,c")
6749 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6750 (match_operand:SI 3 "register_operand" "a,a")
6751 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6752 (match_operand:SI 5 "register_operand" "d,d")
6753 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6755 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6763 (set (reg:CC FLAGS_REG)
6772 && !(reload_completed || reload_in_progress)"
6777 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6778 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6779 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6782 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6783 operands[3], operands[4],
6784 operands[5], operands[6]));
6786 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6787 operands[3], operands[4],
6788 operands[5], operands[6]));
6789 if (flags && !(ecx || xmm0))
6790 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6791 operands[2], operands[3],
6792 operands[4], operands[5],
6796 [(set_attr "type" "sselog")
6797 (set_attr "prefix_data16" "1")
6798 (set_attr "prefix_extra" "1")
6799 (set_attr "memory" "none,load")
6800 (set_attr "mode" "TI")])
6802 (define_insn "sse4_2_pcmpestri"
6803 [(set (match_operand:SI 0 "register_operand" "=c,c")
6805 [(match_operand:V16QI 1 "register_operand" "x,x")
6806 (match_operand:SI 2 "register_operand" "a,a")
6807 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6808 (match_operand:SI 4 "register_operand" "d,d")
6809 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6811 (set (reg:CC FLAGS_REG)
6820 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6821 [(set_attr "type" "sselog")
6822 (set_attr "prefix_data16" "1")
6823 (set_attr "prefix_extra" "1")
6824 (set_attr "memory" "none,load")
6825 (set_attr "mode" "TI")])
6827 (define_insn "sse4_2_pcmpestrm"
6828 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6830 [(match_operand:V16QI 1 "register_operand" "x,x")
6831 (match_operand:SI 2 "register_operand" "a,a")
6832 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6833 (match_operand:SI 4 "register_operand" "d,d")
6834 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6836 (set (reg:CC FLAGS_REG)
6845 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6846 [(set_attr "type" "sselog")
6847 (set_attr "prefix_data16" "1")
6848 (set_attr "prefix_extra" "1")
6849 (set_attr "memory" "none,load")
6850 (set_attr "mode" "TI")])
6852 (define_insn "sse4_2_pcmpestr_cconly"
6853 [(set (reg:CC FLAGS_REG)
6855 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6856 (match_operand:SI 3 "register_operand" "a,a,a,a")
6857 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6858 (match_operand:SI 5 "register_operand" "d,d,d,d")
6859 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6861 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6862 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6865 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6866 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6867 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6868 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6869 [(set_attr "type" "sselog")
6870 (set_attr "prefix_data16" "1")
6871 (set_attr "prefix_extra" "1")
6872 (set_attr "memory" "none,load,none,load")
6873 (set_attr "mode" "TI")])
6875 (define_insn_and_split "sse4_2_pcmpistr"
6876 [(set (match_operand:SI 0 "register_operand" "=c,c")
6878 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6879 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6880 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6882 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6888 (set (reg:CC FLAGS_REG)
6895 && !(reload_completed || reload_in_progress)"
6900 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6901 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6902 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6905 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6906 operands[3], operands[4]));
6908 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6909 operands[3], operands[4]));
6910 if (flags && !(ecx || xmm0))
6911 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6912 operands[2], operands[3],
6916 [(set_attr "type" "sselog")
6917 (set_attr "prefix_data16" "1")
6918 (set_attr "prefix_extra" "1")
6919 (set_attr "memory" "none,load")
6920 (set_attr "mode" "TI")])
6922 (define_insn "sse4_2_pcmpistri"
6923 [(set (match_operand:SI 0 "register_operand" "=c,c")
6925 [(match_operand:V16QI 1 "register_operand" "x,x")
6926 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6927 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6929 (set (reg:CC FLAGS_REG)
6936 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6937 [(set_attr "type" "sselog")
6938 (set_attr "prefix_data16" "1")
6939 (set_attr "prefix_extra" "1")
6940 (set_attr "memory" "none,load")
6941 (set_attr "mode" "TI")])
6943 (define_insn "sse4_2_pcmpistrm"
6944 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6946 [(match_operand:V16QI 1 "register_operand" "x,x")
6947 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6948 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6950 (set (reg:CC FLAGS_REG)
6957 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6958 [(set_attr "type" "sselog")
6959 (set_attr "prefix_data16" "1")
6960 (set_attr "prefix_extra" "1")
6961 (set_attr "memory" "none,load")
6962 (set_attr "mode" "TI")])
6964 (define_insn "sse4_2_pcmpistr_cconly"
6965 [(set (reg:CC FLAGS_REG)
6967 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6968 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6969 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6971 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6972 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6975 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6976 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6977 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6978 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6979 [(set_attr "type" "sselog")
6980 (set_attr "prefix_data16" "1")
6981 (set_attr "prefix_extra" "1")
6982 (set_attr "memory" "none,load,none,load")
6983 (set_attr "mode" "TI")])
6985 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6987 ;; SSE5 instructions
6989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6991 ;; SSE5 parallel integer multiply/add instructions.
6992 ;; Note the instruction does not allow the value being added to be a memory
6993 ;; operation. However by pretending via the nonimmediate_operand predicate
6994 ;; that it does and splitting it later allows the following to be recognized:
6995 ;; a[i] = b[i] * c[i] + d[i];
6996 (define_insn "sse5_pmacsww"
6997 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7000 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7001 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7002 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7003 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7005 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7006 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7007 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7008 [(set_attr "type" "ssemuladd")
7009 (set_attr "mode" "TI")])
7011 ;; Split pmacsww with two memory operands into a load and the pmacsww.
7013 [(set (match_operand:V8HI 0 "register_operand" "")
7015 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
7016 (match_operand:V8HI 2 "nonimmediate_operand" ""))
7017 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
7019 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7020 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7021 && !reg_mentioned_p (operands[0], operands[1])
7022 && !reg_mentioned_p (operands[0], operands[2])
7023 && !reg_mentioned_p (operands[0], operands[3])"
7026 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
7027 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
7032 (define_insn "sse5_pmacssww"
7033 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7035 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7036 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7037 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7038 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7040 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7041 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7042 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7043 [(set_attr "type" "ssemuladd")
7044 (set_attr "mode" "TI")])
7046 ;; Note the instruction does not allow the value being added to be a memory
7047 ;; operation. However by pretending via the nonimmediate_operand predicate
7048 ;; that it does and splitting it later allows the following to be recognized:
7049 ;; a[i] = b[i] * c[i] + d[i];
7050 (define_insn "sse5_pmacsdd"
7051 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7054 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7055 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7056 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7057 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7059 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7060 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7061 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7062 [(set_attr "type" "ssemuladd")
7063 (set_attr "mode" "TI")])
7065 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
7067 [(set (match_operand:V4SI 0 "register_operand" "")
7069 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
7070 (match_operand:V4SI 2 "nonimmediate_operand" ""))
7071 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
7073 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7074 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7075 && !reg_mentioned_p (operands[0], operands[1])
7076 && !reg_mentioned_p (operands[0], operands[2])
7077 && !reg_mentioned_p (operands[0], operands[3])"
7080 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
7081 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
7086 (define_insn "sse5_pmacssdd"
7087 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7089 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7090 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7091 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7092 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7094 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7095 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7096 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7097 [(set_attr "type" "ssemuladd")
7098 (set_attr "mode" "TI")])
7100 (define_insn "sse5_pmacssdql"
7101 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7106 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7107 (parallel [(const_int 1)
7110 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7111 (parallel [(const_int 1)
7113 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7114 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7116 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7117 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7118 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7119 [(set_attr "type" "ssemuladd")
7120 (set_attr "mode" "TI")])
7122 (define_insn "sse5_pmacssdqh"
7123 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7128 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7129 (parallel [(const_int 0)
7133 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7134 (parallel [(const_int 0)
7136 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7137 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7139 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7140 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7141 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7142 [(set_attr "type" "ssemuladd")
7143 (set_attr "mode" "TI")])
7145 (define_insn "sse5_pmacsdql"
7146 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7151 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7152 (parallel [(const_int 1)
7156 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7157 (parallel [(const_int 1)
7159 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7160 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7162 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7163 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7164 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7165 [(set_attr "type" "ssemuladd")
7166 (set_attr "mode" "TI")])
7168 (define_insn_and_split "*sse5_pmacsdql_mem"
7169 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7174 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7175 (parallel [(const_int 1)
7179 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7180 (parallel [(const_int 1)
7182 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7183 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
7185 "&& (reload_completed
7186 || (!reg_mentioned_p (operands[0], operands[1])
7187 && !reg_mentioned_p (operands[0], operands[2])))"
7196 (parallel [(const_int 1)
7201 (parallel [(const_int 1)
7205 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7206 ;; fake it with a multiply/add. In general, we expect the define_split to
7207 ;; occur before register allocation, so we have to handle the corner case where
7208 ;; the target is the same as operands 1/2
7209 (define_insn_and_split "sse5_mulv2div2di3_low"
7210 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7214 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7215 (parallel [(const_int 1)
7219 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7220 (parallel [(const_int 1)
7221 (const_int 3)])))))]
7224 "&& (reload_completed
7225 || (!reg_mentioned_p (operands[0], operands[1])
7226 && !reg_mentioned_p (operands[0], operands[2])))"
7235 (parallel [(const_int 1)
7240 (parallel [(const_int 1)
7244 operands[3] = CONST0_RTX (V2DImode);
7246 [(set_attr "type" "ssemuladd")
7247 (set_attr "mode" "TI")])
7249 (define_insn "sse5_pmacsdqh"
7250 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7255 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7256 (parallel [(const_int 0)
7260 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7261 (parallel [(const_int 0)
7263 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7264 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7266 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7267 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7268 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7269 [(set_attr "type" "ssemuladd")
7270 (set_attr "mode" "TI")])
7272 (define_insn_and_split "*sse5_pmacsdqh_mem"
7273 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7278 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7279 (parallel [(const_int 0)
7283 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7284 (parallel [(const_int 0)
7286 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7287 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
7289 "&& (reload_completed
7290 || (!reg_mentioned_p (operands[0], operands[1])
7291 && !reg_mentioned_p (operands[0], operands[2])))"
7300 (parallel [(const_int 0)
7305 (parallel [(const_int 0)
7309 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7310 ;; fake it with a multiply/add. In general, we expect the define_split to
7311 ;; occur before register allocation, so we have to handle the corner case where
7312 ;; the target is the same as either operands[1] or operands[2]
7313 (define_insn_and_split "sse5_mulv2div2di3_high"
7314 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7318 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7319 (parallel [(const_int 0)
7323 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7324 (parallel [(const_int 0)
7325 (const_int 2)])))))]
7328 "&& (reload_completed
7329 || (!reg_mentioned_p (operands[0], operands[1])
7330 && !reg_mentioned_p (operands[0], operands[2])))"
7339 (parallel [(const_int 0)
7344 (parallel [(const_int 0)
7348 operands[3] = CONST0_RTX (V2DImode);
7350 [(set_attr "type" "ssemuladd")
7351 (set_attr "mode" "TI")])
7353 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7354 (define_insn "sse5_pmacsswd"
7355 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7360 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7361 (parallel [(const_int 1)
7367 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7368 (parallel [(const_int 1)
7372 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7373 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7375 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7376 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7377 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7378 [(set_attr "type" "ssemuladd")
7379 (set_attr "mode" "TI")])
7381 (define_insn "sse5_pmacswd"
7382 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7387 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7388 (parallel [(const_int 1)
7394 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7395 (parallel [(const_int 1)
7399 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7400 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7402 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7403 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7404 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7405 [(set_attr "type" "ssemuladd")
7406 (set_attr "mode" "TI")])
7408 (define_insn "sse5_pmadcsswd"
7409 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7415 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7416 (parallel [(const_int 0)
7422 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7423 (parallel [(const_int 0)
7431 (parallel [(const_int 1)
7438 (parallel [(const_int 1)
7442 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7443 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7445 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7446 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7447 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7448 [(set_attr "type" "ssemuladd")
7449 (set_attr "mode" "TI")])
7451 (define_insn "sse5_pmadcswd"
7452 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7458 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7459 (parallel [(const_int 0)
7465 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7466 (parallel [(const_int 0)
7474 (parallel [(const_int 1)
7481 (parallel [(const_int 1)
7485 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7486 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7488 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7489 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7490 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7491 [(set_attr "type" "ssemuladd")
7492 (set_attr "mode" "TI")])
7494 ;; SSE5 parallel XMM conditional moves
7495 (define_insn "sse5_pcmov_<mode>"
7496 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
7497 (if_then_else:SSEMODE
7498 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
7499 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
7500 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
7501 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7503 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7504 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7505 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7506 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7507 [(set_attr "type" "sse4arg")])
7509 ;; SSE5 horizontal add/subtract instructions
7510 (define_insn "sse5_phaddbw"
7511 [(set (match_operand:V8HI 0 "register_operand" "=x")
7515 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7516 (parallel [(const_int 0)
7527 (parallel [(const_int 1)
7534 (const_int 15)])))))]
7536 "phaddbw\t{%1, %0|%0, %1}"
7537 [(set_attr "type" "sseiadd1")])
7539 (define_insn "sse5_phaddbd"
7540 [(set (match_operand:V4SI 0 "register_operand" "=x")
7545 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7546 (parallel [(const_int 0)
7553 (parallel [(const_int 1)
7561 (parallel [(const_int 2)
7568 (parallel [(const_int 3)
7571 (const_int 15)]))))))]
7573 "phaddbd\t{%1, %0|%0, %1}"
7574 [(set_attr "type" "sseiadd1")])
7576 (define_insn "sse5_phaddbq"
7577 [(set (match_operand:V2DI 0 "register_operand" "=x")
7583 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7584 (parallel [(const_int 0)
7589 (parallel [(const_int 1)
7595 (parallel [(const_int 2)
7600 (parallel [(const_int 3)
7607 (parallel [(const_int 8)
7612 (parallel [(const_int 9)
7618 (parallel [(const_int 10)
7623 (parallel [(const_int 11)
7624 (const_int 15)])))))))]
7626 "phaddbq\t{%1, %0|%0, %1}"
7627 [(set_attr "type" "sseiadd1")])
7629 (define_insn "sse5_phaddwd"
7630 [(set (match_operand:V4SI 0 "register_operand" "=x")
7634 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7635 (parallel [(const_int 0)
7642 (parallel [(const_int 1)
7645 (const_int 7)])))))]
7647 "phaddwd\t{%1, %0|%0, %1}"
7648 [(set_attr "type" "sseiadd1")])
7650 (define_insn "sse5_phaddwq"
7651 [(set (match_operand:V2DI 0 "register_operand" "=x")
7656 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7657 (parallel [(const_int 0)
7662 (parallel [(const_int 1)
7668 (parallel [(const_int 2)
7673 (parallel [(const_int 3)
7674 (const_int 7)]))))))]
7676 "phaddwq\t{%1, %0|%0, %1}"
7677 [(set_attr "type" "sseiadd1")])
7679 (define_insn "sse5_phadddq"
7680 [(set (match_operand:V2DI 0 "register_operand" "=x")
7684 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7685 (parallel [(const_int 0)
7690 (parallel [(const_int 1)
7691 (const_int 3)])))))]
7693 "phadddq\t{%1, %0|%0, %1}"
7694 [(set_attr "type" "sseiadd1")])
7696 (define_insn "sse5_phaddubw"
7697 [(set (match_operand:V8HI 0 "register_operand" "=x")
7701 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7702 (parallel [(const_int 0)
7713 (parallel [(const_int 1)
7720 (const_int 15)])))))]
7722 "phaddubw\t{%1, %0|%0, %1}"
7723 [(set_attr "type" "sseiadd1")])
7725 (define_insn "sse5_phaddubd"
7726 [(set (match_operand:V4SI 0 "register_operand" "=x")
7731 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7732 (parallel [(const_int 0)
7739 (parallel [(const_int 1)
7747 (parallel [(const_int 2)
7754 (parallel [(const_int 3)
7757 (const_int 15)]))))))]
7759 "phaddubd\t{%1, %0|%0, %1}"
7760 [(set_attr "type" "sseiadd1")])
7762 (define_insn "sse5_phaddubq"
7763 [(set (match_operand:V2DI 0 "register_operand" "=x")
7769 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7770 (parallel [(const_int 0)
7775 (parallel [(const_int 1)
7781 (parallel [(const_int 2)
7786 (parallel [(const_int 3)
7793 (parallel [(const_int 8)
7798 (parallel [(const_int 9)
7804 (parallel [(const_int 10)
7809 (parallel [(const_int 11)
7810 (const_int 15)])))))))]
7812 "phaddubq\t{%1, %0|%0, %1}"
7813 [(set_attr "type" "sseiadd1")])
7815 (define_insn "sse5_phadduwd"
7816 [(set (match_operand:V4SI 0 "register_operand" "=x")
7820 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7821 (parallel [(const_int 0)
7828 (parallel [(const_int 1)
7831 (const_int 7)])))))]
7833 "phadduwd\t{%1, %0|%0, %1}"
7834 [(set_attr "type" "sseiadd1")])
7836 (define_insn "sse5_phadduwq"
7837 [(set (match_operand:V2DI 0 "register_operand" "=x")
7842 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7843 (parallel [(const_int 0)
7848 (parallel [(const_int 1)
7854 (parallel [(const_int 2)
7859 (parallel [(const_int 3)
7860 (const_int 7)]))))))]
7862 "phadduwq\t{%1, %0|%0, %1}"
7863 [(set_attr "type" "sseiadd1")])
7865 (define_insn "sse5_phaddudq"
7866 [(set (match_operand:V2DI 0 "register_operand" "=x")
7870 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7871 (parallel [(const_int 0)
7876 (parallel [(const_int 1)
7877 (const_int 3)])))))]
7879 "phaddudq\t{%1, %0|%0, %1}"
7880 [(set_attr "type" "sseiadd1")])
7882 (define_insn "sse5_phsubbw"
7883 [(set (match_operand:V8HI 0 "register_operand" "=x")
7887 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7888 (parallel [(const_int 0)
7899 (parallel [(const_int 1)
7906 (const_int 15)])))))]
7908 "phsubbw\t{%1, %0|%0, %1}"
7909 [(set_attr "type" "sseiadd1")])
7911 (define_insn "sse5_phsubwd"
7912 [(set (match_operand:V4SI 0 "register_operand" "=x")
7916 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7917 (parallel [(const_int 0)
7924 (parallel [(const_int 1)
7927 (const_int 7)])))))]
7929 "phsubwd\t{%1, %0|%0, %1}"
7930 [(set_attr "type" "sseiadd1")])
7932 (define_insn "sse5_phsubdq"
7933 [(set (match_operand:V2DI 0 "register_operand" "=x")
7937 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7938 (parallel [(const_int 0)
7943 (parallel [(const_int 1)
7944 (const_int 3)])))))]
7946 "phsubdq\t{%1, %0|%0, %1}"
7947 [(set_attr "type" "sseiadd1")])
7949 ;; SSE5 permute instructions
7950 (define_insn "sse5_pperm"
7951 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7953 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7954 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7955 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7956 UNSPEC_SSE5_PERMUTE))]
7957 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7958 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7959 [(set_attr "type" "sse4arg")
7960 (set_attr "mode" "TI")])
7962 ;; The following are for the various unpack insns which doesn't need the first
7963 ;; source operand, so we can just use the output operand for the first operand.
7964 ;; This allows either of the other two operands to be a memory operand. We
7965 ;; can't just use the first operand as an argument to the normal pperm because
7966 ;; then an output only argument, suddenly becomes an input operand.
7967 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7968 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7971 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7972 (match_operand 2 "" "")))) ;; parallel with const_int's
7973 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7975 && (register_operand (operands[1], V16QImode)
7976 || register_operand (operands[2], V16QImode))"
7977 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7978 [(set_attr "type" "sseadd")
7979 (set_attr "mode" "TI")])
7981 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7982 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7985 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7986 (match_operand 2 "" "")))) ;; parallel with const_int's
7987 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7989 && (register_operand (operands[1], V16QImode)
7990 || register_operand (operands[2], V16QImode))"
7991 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7992 [(set_attr "type" "sseadd")
7993 (set_attr "mode" "TI")])
7995 (define_insn "sse5_pperm_zero_v8hi_v4si"
7996 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7999 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
8000 (match_operand 2 "" "")))) ;; parallel with const_int's
8001 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8003 && (register_operand (operands[1], V8HImode)
8004 || register_operand (operands[2], V16QImode))"
8005 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8006 [(set_attr "type" "sseadd")
8007 (set_attr "mode" "TI")])
8009 (define_insn "sse5_pperm_sign_v8hi_v4si"
8010 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8013 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
8014 (match_operand 2 "" "")))) ;; parallel with const_int's
8015 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8017 && (register_operand (operands[1], V8HImode)
8018 || register_operand (operands[2], V16QImode))"
8019 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8020 [(set_attr "type" "sseadd")
8021 (set_attr "mode" "TI")])
8023 (define_insn "sse5_pperm_zero_v4si_v2di"
8024 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8027 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8028 (match_operand 2 "" "")))) ;; parallel with const_int's
8029 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8031 && (register_operand (operands[1], V4SImode)
8032 || register_operand (operands[2], V16QImode))"
8033 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8034 [(set_attr "type" "sseadd")
8035 (set_attr "mode" "TI")])
8037 (define_insn "sse5_pperm_sign_v4si_v2di"
8038 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8041 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8042 (match_operand 2 "" "")))) ;; parallel with const_int's
8043 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8045 && (register_operand (operands[1], V4SImode)
8046 || register_operand (operands[2], V16QImode))"
8047 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8048 [(set_attr "type" "sseadd")
8049 (set_attr "mode" "TI")])
8051 ;; SSE5 pack instructions that combine two vectors into a smaller vector
8052 (define_insn "sse5_pperm_pack_v2di_v4si"
8053 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
8056 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
8058 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8059 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8060 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8061 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8062 [(set_attr "type" "sse4arg")
8063 (set_attr "mode" "TI")])
8065 (define_insn "sse5_pperm_pack_v4si_v8hi"
8066 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
8069 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
8071 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8072 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8073 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8074 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8075 [(set_attr "type" "sse4arg")
8076 (set_attr "mode" "TI")])
8078 (define_insn "sse5_pperm_pack_v8hi_v16qi"
8079 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
8082 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
8084 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8085 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8086 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8087 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8088 [(set_attr "type" "sse4arg")
8089 (set_attr "mode" "TI")])
8091 ;; Floating point permutation (permps, permpd)
8092 (define_insn "sse5_perm<mode>"
8093 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
8095 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
8096 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
8097 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
8098 UNSPEC_SSE5_PERMUTE))]
8099 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8100 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8101 [(set_attr "type" "sse4arg")
8102 (set_attr "mode" "<MODE>")])
8104 ;; SSE5 packed rotate instructions
8105 (define_expand "rotl<mode>3"
8106 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
8108 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
8109 (match_operand:SI 2 "general_operand")))]
8112 /* If we were given a scalar, convert it to parallel */
8113 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
8115 rtvec vs = rtvec_alloc (<ssescalarnum>);
8116 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
8117 rtx reg = gen_reg_rtx (<MODE>mode);
8118 rtx op2 = operands[2];
8121 if (GET_MODE (op2) != <ssescalarmode>mode)
8123 op2 = gen_reg_rtx (<ssescalarmode>mode);
8124 convert_move (op2, operands[2], false);
8127 for (i = 0; i < <ssescalarnum>; i++)
8128 RTVEC_ELT (vs, i) = op2;
8130 emit_insn (gen_vec_init<mode> (reg, par));
8131 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
8136 (define_expand "rotr<mode>3"
8137 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
8138 (rotatert:SSEMODE1248
8139 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
8140 (match_operand:SI 2 "general_operand")))]
8143 /* If we were given a scalar, convert it to parallel */
8144 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
8146 rtvec vs = rtvec_alloc (<ssescalarnum>);
8147 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
8148 rtx neg = gen_reg_rtx (<MODE>mode);
8149 rtx reg = gen_reg_rtx (<MODE>mode);
8150 rtx op2 = operands[2];
8153 if (GET_MODE (op2) != <ssescalarmode>mode)
8155 op2 = gen_reg_rtx (<ssescalarmode>mode);
8156 convert_move (op2, operands[2], false);
8159 for (i = 0; i < <ssescalarnum>; i++)
8160 RTVEC_ELT (vs, i) = op2;
8162 emit_insn (gen_vec_init<mode> (reg, par));
8163 emit_insn (gen_neg<mode>2 (neg, reg));
8164 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
8169 (define_insn "sse5_rotl<mode>3"
8170 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8172 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8173 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8175 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8176 [(set_attr "type" "sseishft")
8177 (set_attr "mode" "TI")])
8179 (define_insn "sse5_rotr<mode>3"
8180 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8181 (rotatert:SSEMODE1248
8182 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8183 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8186 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
8187 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
8189 [(set_attr "type" "sseishft")
8190 (set_attr "mode" "TI")])
8192 (define_expand "vrotr<mode>3"
8193 [(match_operand:SSEMODE1248 0 "register_operand" "")
8194 (match_operand:SSEMODE1248 1 "register_operand" "")
8195 (match_operand:SSEMODE1248 2 "register_operand" "")]
8198 rtx reg = gen_reg_rtx (<MODE>mode);
8199 emit_insn (gen_neg<mode>2 (reg, operands[2]));
8200 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
8204 (define_expand "vrotl<mode>3"
8205 [(match_operand:SSEMODE1248 0 "register_operand" "")
8206 (match_operand:SSEMODE1248 1 "register_operand" "")
8207 (match_operand:SSEMODE1248 2 "register_operand" "")]
8210 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
8214 (define_insn "sse5_vrotl<mode>3"
8215 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8216 (if_then_else:SSEMODE1248
8218 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8221 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8223 (rotatert:SSEMODE1248
8225 (neg:SSEMODE1248 (match_dup 2)))))]
8226 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8227 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8228 [(set_attr "type" "sseishft")
8229 (set_attr "mode" "TI")])
8231 ;; SSE5 packed shift instructions.
8232 ;; FIXME: add V2DI back in
8233 (define_expand "vlshr<mode>3"
8234 [(match_operand:SSEMODE124 0 "register_operand" "")
8235 (match_operand:SSEMODE124 1 "register_operand" "")
8236 (match_operand:SSEMODE124 2 "register_operand" "")]
8239 rtx neg = gen_reg_rtx (<MODE>mode);
8240 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8241 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
8245 (define_expand "vashr<mode>3"
8246 [(match_operand:SSEMODE124 0 "register_operand" "")
8247 (match_operand:SSEMODE124 1 "register_operand" "")
8248 (match_operand:SSEMODE124 2 "register_operand" "")]
8251 rtx neg = gen_reg_rtx (<MODE>mode);
8252 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8253 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
8257 (define_expand "vashl<mode>3"
8258 [(match_operand:SSEMODE124 0 "register_operand" "")
8259 (match_operand:SSEMODE124 1 "register_operand" "")
8260 (match_operand:SSEMODE124 2 "register_operand" "")]
8263 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
8267 (define_insn "sse5_ashl<mode>3"
8268 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8269 (if_then_else:SSEMODE1248
8271 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8274 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8276 (ashiftrt:SSEMODE1248
8278 (neg:SSEMODE1248 (match_dup 2)))))]
8279 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8280 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8281 [(set_attr "type" "sseishft")
8282 (set_attr "mode" "TI")])
8284 (define_insn "sse5_lshl<mode>3"
8285 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8286 (if_then_else:SSEMODE1248
8288 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8291 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8293 (lshiftrt:SSEMODE1248
8295 (neg:SSEMODE1248 (match_dup 2)))))]
8296 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8297 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8298 [(set_attr "type" "sseishft")
8299 (set_attr "mode" "TI")])
8301 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
8302 (define_expand "ashlv16qi3"
8303 [(match_operand:V16QI 0 "register_operand" "")
8304 (match_operand:V16QI 1 "register_operand" "")
8305 (match_operand:SI 2 "nonmemory_operand" "")]
8308 rtvec vs = rtvec_alloc (16);
8309 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8310 rtx reg = gen_reg_rtx (V16QImode);
8312 for (i = 0; i < 16; i++)
8313 RTVEC_ELT (vs, i) = operands[2];
8315 emit_insn (gen_vec_initv16qi (reg, par));
8316 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8320 (define_expand "lshlv16qi3"
8321 [(match_operand:V16QI 0 "register_operand" "")
8322 (match_operand:V16QI 1 "register_operand" "")
8323 (match_operand:SI 2 "nonmemory_operand" "")]
8326 rtvec vs = rtvec_alloc (16);
8327 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8328 rtx reg = gen_reg_rtx (V16QImode);
8330 for (i = 0; i < 16; i++)
8331 RTVEC_ELT (vs, i) = operands[2];
8333 emit_insn (gen_vec_initv16qi (reg, par));
8334 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
8338 (define_expand "ashrv16qi3"
8339 [(match_operand:V16QI 0 "register_operand" "")
8340 (match_operand:V16QI 1 "register_operand" "")
8341 (match_operand:SI 2 "nonmemory_operand" "")]
8344 rtvec vs = rtvec_alloc (16);
8345 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8346 rtx reg = gen_reg_rtx (V16QImode);
8348 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
8349 ? GEN_INT (- INTVAL (operands[2]))
8352 for (i = 0; i < 16; i++)
8353 RTVEC_ELT (vs, i) = ele;
8355 emit_insn (gen_vec_initv16qi (reg, par));
8357 if (GET_CODE (operands[2]) != CONST_INT)
8359 rtx neg = gen_reg_rtx (V16QImode);
8360 emit_insn (gen_negv16qi2 (neg, reg));
8361 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
8364 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8369 (define_expand "ashrv2di3"
8370 [(match_operand:V2DI 0 "register_operand" "")
8371 (match_operand:V2DI 1 "register_operand" "")
8372 (match_operand:DI 2 "nonmemory_operand" "")]
8375 rtvec vs = rtvec_alloc (2);
8376 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
8377 rtx reg = gen_reg_rtx (V2DImode);
8380 if (GET_CODE (operands[2]) == CONST_INT)
8381 ele = GEN_INT (- INTVAL (operands[2]));
8382 else if (GET_MODE (operands[2]) != DImode)
8384 rtx move = gen_reg_rtx (DImode);
8385 ele = gen_reg_rtx (DImode);
8386 convert_move (move, operands[2], false);
8387 emit_insn (gen_negdi2 (ele, move));
8391 ele = gen_reg_rtx (DImode);
8392 emit_insn (gen_negdi2 (ele, operands[2]));
8395 RTVEC_ELT (vs, 0) = ele;
8396 RTVEC_ELT (vs, 1) = ele;
8397 emit_insn (gen_vec_initv2di (reg, par));
8398 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
8402 ;; SSE5 FRCZ support
8404 (define_insn "sse5_frcz<mode>2"
8405 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8407 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8410 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
8411 [(set_attr "type" "ssecvt1")
8412 (set_attr "prefix_extra" "1")
8413 (set_attr "mode" "<MODE>")])
8416 (define_insn "sse5_vmfrcz<mode>2"
8417 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8418 (vec_merge:SSEMODEF2P
8420 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8422 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8425 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
8426 [(set_attr "type" "ssecvt1")
8427 (set_attr "prefix_extra" "1")
8428 (set_attr "mode" "<MODE>")])
8430 (define_insn "sse5_cvtph2ps"
8431 [(set (match_operand:V4SF 0 "register_operand" "=x")
8432 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8435 "cvtph2ps\t{%1, %0|%0, %1}"
8436 [(set_attr "type" "ssecvt")
8437 (set_attr "mode" "V4SF")])
8439 (define_insn "sse5_cvtps2ph"
8440 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8441 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8444 "cvtps2ph\t{%1, %0|%0, %1}"
8445 [(set_attr "type" "ssecvt")
8446 (set_attr "mode" "V4SF")])
8448 ;; Scalar versions of the com instructions that use vector types that are
8449 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8450 ;; com instructions fill in 0's in the upper bits instead of leaving them
8451 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8452 (define_expand "sse5_vmmaskcmp<mode>3"
8453 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8454 (vec_merge:SSEMODEF2P
8455 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8456 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8457 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8462 operands[4] = CONST0_RTX (<MODE>mode);
8465 (define_insn "*sse5_vmmaskcmp<mode>3"
8466 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8467 (vec_merge:SSEMODEF2P
8468 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8469 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8470 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8471 (match_operand:SSEMODEF2P 4 "")
8474 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8475 [(set_attr "type" "sse4arg")
8476 (set_attr "mode" "<ssescalarmode>")])
8478 ;; We don't have a comparison operator that always returns true/false, so
8479 ;; handle comfalse and comtrue specially.
8480 (define_insn "sse5_com_tf<mode>3"
8481 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8483 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8484 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8485 (match_operand:SI 3 "const_int_operand" "n")]
8486 UNSPEC_SSE5_TRUEFALSE))]
8489 const char *ret = NULL;
8491 switch (INTVAL (operands[3]))
8494 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8498 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8502 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8506 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8515 [(set_attr "type" "ssecmp")
8516 (set_attr "mode" "<MODE>")])
8518 (define_insn "sse5_maskcmp<mode>3"
8519 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8520 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8521 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8522 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8524 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8525 [(set_attr "type" "ssecmp")
8526 (set_attr "mode" "<MODE>")])
8528 (define_insn "sse5_maskcmp<mode>3"
8529 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8530 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8531 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8532 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8534 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8535 [(set_attr "type" "sse4arg")
8536 (set_attr "mode" "TI")])
8538 (define_insn "sse5_maskcmp_uns<mode>3"
8539 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8540 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8541 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8542 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8544 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8545 [(set_attr "type" "ssecmp")
8546 (set_attr "mode" "TI")])
8548 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8549 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8550 ;; the exact instruction generated for the intrinsic.
8551 (define_insn "sse5_maskcmp_uns2<mode>3"
8552 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8554 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8555 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8556 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8557 UNSPEC_SSE5_UNSIGNED_CMP))]
8559 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8560 [(set_attr "type" "ssecmp")
8561 (set_attr "mode" "TI")])
8563 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8564 ;; being added here to be complete.
8565 (define_insn "sse5_pcom_tf<mode>3"
8566 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8568 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8569 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8570 (match_operand:SI 3 "const_int_operand" "n")]
8571 UNSPEC_SSE5_TRUEFALSE))]
8574 return ((INTVAL (operands[3]) != 0)
8575 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8576 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8578 [(set_attr "type" "ssecmp")
8579 (set_attr "mode" "TI")])
8581 (define_insn "aesenc"
8582 [(set (match_operand:V2DI 0 "register_operand" "=x")
8583 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8584 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8587 "aesenc\t{%2, %0|%0, %2}"
8588 [(set_attr "type" "sselog1")
8589 (set_attr "prefix_extra" "1")
8590 (set_attr "mode" "TI")])
8592 (define_insn "aesenclast"
8593 [(set (match_operand:V2DI 0 "register_operand" "=x")
8594 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8595 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8596 UNSPEC_AESENCLAST))]
8598 "aesenclast\t{%2, %0|%0, %2}"
8599 [(set_attr "type" "sselog1")
8600 (set_attr "prefix_extra" "1")
8601 (set_attr "mode" "TI")])
8603 (define_insn "aesdec"
8604 [(set (match_operand:V2DI 0 "register_operand" "=x")
8605 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8606 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8609 "aesdec\t{%2, %0|%0, %2}"
8610 [(set_attr "type" "sselog1")
8611 (set_attr "prefix_extra" "1")
8612 (set_attr "mode" "TI")])
8614 (define_insn "aesdeclast"
8615 [(set (match_operand:V2DI 0 "register_operand" "=x")
8616 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8617 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8618 UNSPEC_AESDECLAST))]
8620 "aesdeclast\t{%2, %0|%0, %2}"
8621 [(set_attr "type" "sselog1")
8622 (set_attr "prefix_extra" "1")
8623 (set_attr "mode" "TI")])
8625 (define_insn "aesimc"
8626 [(set (match_operand:V2DI 0 "register_operand" "=x")
8627 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8630 "aesimc\t{%1, %0|%0, %1}"
8631 [(set_attr "type" "sselog1")
8632 (set_attr "prefix_extra" "1")
8633 (set_attr "mode" "TI")])
8635 (define_insn "aeskeygenassist"
8636 [(set (match_operand:V2DI 0 "register_operand" "=x")
8637 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
8638 (match_operand:SI 2 "const_0_to_255_operand" "n")]
8639 UNSPEC_AESKEYGENASSIST))]
8641 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
8642 [(set_attr "type" "sselog1")
8643 (set_attr "prefix_extra" "1")
8644 (set_attr "mode" "TI")])
8646 (define_insn "pclmulqdq"
8647 [(set (match_operand:V2DI 0 "register_operand" "=x")
8648 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8649 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
8650 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8653 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
8654 [(set_attr "type" "sselog1")
8655 (set_attr "prefix_extra" "1")
8656 (set_attr "mode" "TI")])