1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
57 (V16QI "QI") (V8HI "HI")
58 (V4SI "SI") (V2DI "DI")])
60 ;; Number of scalar elements in each vector type
61 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
62 (V16QI "16") (V8HI "8")
63 (V4SI "4") (V2DI "2")])
65 ;; Mapping of immediate bits for blend instructions
66 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
68 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
70 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
74 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
76 ;; All of these patterns are enabled for SSE1 as well as SSE2.
77 ;; This is essential for maintaining stable calling conventions.
79 (define_expand "mov<mode>"
80 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
81 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
84 ix86_expand_vector_move (<MODE>mode, operands);
88 (define_insn "*mov<mode>_internal"
89 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
90 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
92 && (register_operand (operands[0], <MODE>mode)
93 || register_operand (operands[1], <MODE>mode))"
95 switch (which_alternative)
98 return standard_sse_constant_opcode (insn, operands[1]);
101 switch (get_attr_mode (insn))
104 return "movaps\t{%1, %0|%0, %1}";
106 return "movapd\t{%1, %0|%0, %1}";
108 return "movdqa\t{%1, %0|%0, %1}";
114 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
117 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
118 (and (eq_attr "alternative" "2")
119 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
121 (const_string "V4SF")
122 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
123 (const_string "V4SF")
124 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
125 (const_string "V2DF")
127 (const_string "TI")))])
129 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
130 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
131 ;; from memory, we'd prefer to load the memory directly into the %xmm
132 ;; register. To facilitate this happy circumstance, this pattern won't
133 ;; split until after register allocation. If the 64-bit value didn't
134 ;; come from memory, this is the best we can do. This is much better
135 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
138 (define_insn_and_split "movdi_to_sse"
140 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
141 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
142 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
143 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
145 "&& reload_completed"
148 if (register_operand (operands[1], DImode))
150 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
151 Assemble the 64-bit DImode value in an xmm register. */
152 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
153 gen_rtx_SUBREG (SImode, operands[1], 0)));
154 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
155 gen_rtx_SUBREG (SImode, operands[1], 4)));
156 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
158 else if (memory_operand (operands[1], DImode))
159 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
165 [(set (match_operand:V4SF 0 "register_operand" "")
166 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
167 "TARGET_SSE && reload_completed"
170 (vec_duplicate:V4SF (match_dup 1))
174 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
175 operands[2] = CONST0_RTX (V4SFmode);
179 [(set (match_operand:V2DF 0 "register_operand" "")
180 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
181 "TARGET_SSE2 && reload_completed"
182 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
184 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
185 operands[2] = CONST0_RTX (DFmode);
188 (define_expand "push<mode>1"
189 [(match_operand:SSEMODE 0 "register_operand" "")]
192 ix86_expand_push (<MODE>mode, operands[0]);
196 (define_expand "movmisalign<mode>"
197 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
198 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
201 ix86_expand_vector_move_misalign (<MODE>mode, operands);
205 (define_insn "<sse>_movup<ssemodesuffixf2c>"
206 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
208 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
210 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
211 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "<MODE>")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "prefix_data16" "1")
224 (set_attr "mode" "TI")])
226 (define_insn "<sse>_movnt<mode>"
227 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
229 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
231 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
232 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
233 [(set_attr "type" "ssemov")
234 (set_attr "mode" "<MODE>")])
236 (define_insn "sse2_movntv2di"
237 [(set (match_operand:V2DI 0 "memory_operand" "=m")
238 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
241 "movntdq\t{%1, %0|%0, %1}"
242 [(set_attr "type" "ssecvt")
243 (set_attr "prefix_data16" "1")
244 (set_attr "mode" "TI")])
246 (define_insn "sse2_movntsi"
247 [(set (match_operand:SI 0 "memory_operand" "=m")
248 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
251 "movnti\t{%1, %0|%0, %1}"
252 [(set_attr "type" "ssecvt")
253 (set_attr "mode" "V2DF")])
255 (define_insn "sse3_lddqu"
256 [(set (match_operand:V16QI 0 "register_operand" "=x")
257 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
260 "lddqu\t{%1, %0|%0, %1}"
261 [(set_attr "type" "ssecvt")
262 (set_attr "prefix_rep" "1")
263 (set_attr "mode" "TI")])
265 ; Expand patterns for non-temporal stores. At the moment, only those
266 ; that directly map to insns are defined; it would be possible to
267 ; define patterns for other modes that would expand to several insns.
269 (define_expand "storent<mode>"
270 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
272 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
274 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
277 (define_expand "storent<mode>"
278 [(set (match_operand:MODEF 0 "memory_operand" "")
280 [(match_operand:MODEF 1 "register_operand" "")]
285 (define_expand "storentv2di"
286 [(set (match_operand:V2DI 0 "memory_operand" "")
287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
292 (define_expand "storentsi"
293 [(set (match_operand:SI 0 "memory_operand" "")
294 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
301 ;; Parallel floating point arithmetic
303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
305 (define_expand "<code><mode>2"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
308 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
309 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
310 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
312 (define_expand "<plusminus_insn><mode>3"
313 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
314 (plusminus:SSEMODEF2P
315 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
316 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
317 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
318 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
320 (define_insn "*<plusminus_insn><mode>3"
321 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
322 (plusminus:SSEMODEF2P
323 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
324 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
325 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
326 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
327 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
328 [(set_attr "type" "sseadd")
329 (set_attr "mode" "<MODE>")])
331 (define_insn "<sse>_vm<plusminus_insn><mode>3"
332 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
333 (vec_merge:SSEMODEF2P
334 (plusminus:SSEMODEF2P
335 (match_operand:SSEMODEF2P 1 "register_operand" "0")
336 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
339 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
340 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "<ssescalarmode>")])
344 (define_expand "mul<mode>3"
345 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
347 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
348 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
349 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
350 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
352 (define_insn "*mul<mode>3"
353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
355 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
356 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
357 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
358 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
359 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
360 [(set_attr "type" "ssemul")
361 (set_attr "mode" "<MODE>")])
363 (define_insn "<sse>_vmmul<mode>3"
364 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
365 (vec_merge:SSEMODEF2P
367 (match_operand:SSEMODEF2P 1 "register_operand" "0")
368 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
372 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
373 [(set_attr "type" "ssemul")
374 (set_attr "mode" "<ssescalarmode>")])
376 (define_expand "divv4sf3"
377 [(set (match_operand:V4SF 0 "register_operand" "")
378 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
379 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
382 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
383 && flag_finite_math_only && !flag_trapping_math
384 && flag_unsafe_math_optimizations)
386 ix86_emit_swdivsf (operands[0], operands[1],
387 operands[2], V4SFmode);
392 (define_expand "divv2df3"
393 [(set (match_operand:V2DF 0 "register_operand" "")
394 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
395 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
399 (define_insn "<sse>_div<mode>3"
400 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
402 (match_operand:SSEMODEF2P 1 "register_operand" "0")
403 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
404 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
405 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
406 [(set_attr "type" "ssediv")
407 (set_attr "mode" "<MODE>")])
409 (define_insn "<sse>_vmdiv<mode>3"
410 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
411 (vec_merge:SSEMODEF2P
413 (match_operand:SSEMODEF2P 1 "register_operand" "0")
414 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
417 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
418 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
419 [(set_attr "type" "ssediv")
420 (set_attr "mode" "<ssescalarmode>")])
422 (define_insn "sse_rcpv4sf2"
423 [(set (match_operand:V4SF 0 "register_operand" "=x")
425 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
427 "rcpps\t{%1, %0|%0, %1}"
428 [(set_attr "type" "sse")
429 (set_attr "mode" "V4SF")])
431 (define_insn "sse_vmrcpv4sf2"
432 [(set (match_operand:V4SF 0 "register_operand" "=x")
434 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
436 (match_operand:V4SF 2 "register_operand" "0")
439 "rcpss\t{%1, %0|%0, %1}"
440 [(set_attr "type" "sse")
441 (set_attr "mode" "SF")])
443 (define_expand "sqrtv4sf2"
444 [(set (match_operand:V4SF 0 "register_operand" "")
445 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
448 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
449 && flag_finite_math_only && !flag_trapping_math
450 && flag_unsafe_math_optimizations)
452 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
457 (define_insn "sse_sqrtv4sf2"
458 [(set (match_operand:V4SF 0 "register_operand" "=x")
459 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
461 "sqrtps\t{%1, %0|%0, %1}"
462 [(set_attr "type" "sse")
463 (set_attr "mode" "V4SF")])
465 (define_insn "sqrtv2df2"
466 [(set (match_operand:V2DF 0 "register_operand" "=x")
467 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
469 "sqrtpd\t{%1, %0|%0, %1}"
470 [(set_attr "type" "sse")
471 (set_attr "mode" "V2DF")])
473 (define_insn "<sse>_vmsqrt<mode>2"
474 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
475 (vec_merge:SSEMODEF2P
477 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
478 (match_operand:SSEMODEF2P 2 "register_operand" "0")
480 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
481 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
482 [(set_attr "type" "sse")
483 (set_attr "mode" "<ssescalarmode>")])
485 (define_expand "rsqrtv4sf2"
486 [(set (match_operand:V4SF 0 "register_operand" "")
488 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
491 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
495 (define_insn "sse_rsqrtv4sf2"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
500 "rsqrtps\t{%1, %0|%0, %1}"
501 [(set_attr "type" "sse")
502 (set_attr "mode" "V4SF")])
504 (define_insn "sse_vmrsqrtv4sf2"
505 [(set (match_operand:V4SF 0 "register_operand" "=x")
507 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
509 (match_operand:V4SF 2 "register_operand" "0")
512 "rsqrtss\t{%1, %0|%0, %1}"
513 [(set_attr "type" "sse")
514 (set_attr "mode" "SF")])
516 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
517 ;; isn't really correct, as those rtl operators aren't defined when
518 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
520 (define_expand "<code><mode>3"
521 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
523 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
524 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
525 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
527 if (!flag_finite_math_only)
528 operands[1] = force_reg (<MODE>mode, operands[1]);
529 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
532 (define_insn "*<code><mode>3_finite"
533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
535 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
536 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
537 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
538 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
539 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
540 [(set_attr "type" "sseadd")
541 (set_attr "mode" "<MODE>")])
543 (define_insn "*<code><mode>3"
544 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
546 (match_operand:SSEMODEF2P 1 "register_operand" "0")
547 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
548 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
549 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
550 [(set_attr "type" "sseadd")
551 (set_attr "mode" "<MODE>")])
553 (define_insn "<sse>_vm<code><mode>3"
554 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
555 (vec_merge:SSEMODEF2P
557 (match_operand:SSEMODEF2P 1 "register_operand" "0")
558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
561 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
562 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "<ssescalarmode>")])
566 ;; These versions of the min/max patterns implement exactly the operations
567 ;; min = (op1 < op2 ? op1 : op2)
568 ;; max = (!(op1 < op2) ? op1 : op2)
569 ;; Their operands are not commutative, and thus they may be used in the
570 ;; presence of -0.0 and NaN.
572 (define_insn "*ieee_smin<mode>3"
573 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
576 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
578 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
579 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
580 [(set_attr "type" "sseadd")
581 (set_attr "mode" "<MODE>")])
583 (define_insn "*ieee_smax<mode>3"
584 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
586 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
587 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
589 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
590 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
591 [(set_attr "type" "sseadd")
592 (set_attr "mode" "<MODE>")])
594 (define_insn "sse3_addsubv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "=x")
598 (match_operand:V4SF 1 "register_operand" "0")
599 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
600 (minus:V4SF (match_dup 1) (match_dup 2))
603 "addsubps\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "prefix_rep" "1")
606 (set_attr "mode" "V4SF")])
608 (define_insn "sse3_addsubv2df3"
609 [(set (match_operand:V2DF 0 "register_operand" "=x")
612 (match_operand:V2DF 1 "register_operand" "0")
613 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
614 (minus:V2DF (match_dup 1) (match_dup 2))
617 "addsubpd\t{%2, %0|%0, %2}"
618 [(set_attr "type" "sseadd")
619 (set_attr "mode" "V2DF")])
621 (define_insn "sse3_h<plusminus_insn>v4sf3"
622 [(set (match_operand:V4SF 0 "register_operand" "=x")
627 (match_operand:V4SF 1 "register_operand" "0")
628 (parallel [(const_int 0)]))
629 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
631 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
632 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
636 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
637 (parallel [(const_int 0)]))
638 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
640 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
641 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
643 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
644 [(set_attr "type" "sseadd")
645 (set_attr "prefix_rep" "1")
646 (set_attr "mode" "V4SF")])
648 (define_insn "sse3_h<plusminus_insn>v2df3"
649 [(set (match_operand:V2DF 0 "register_operand" "=x")
653 (match_operand:V2DF 1 "register_operand" "0")
654 (parallel [(const_int 0)]))
655 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
658 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
659 (parallel [(const_int 0)]))
660 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
662 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sseadd")
664 (set_attr "mode" "V2DF")])
666 (define_expand "reduc_splus_v4sf"
667 [(match_operand:V4SF 0 "register_operand" "")
668 (match_operand:V4SF 1 "register_operand" "")]
673 rtx tmp = gen_reg_rtx (V4SFmode);
674 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
675 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
678 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
682 (define_expand "reduc_splus_v2df"
683 [(match_operand:V2DF 0 "register_operand" "")
684 (match_operand:V2DF 1 "register_operand" "")]
687 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
691 (define_expand "reduc_smax_v4sf"
692 [(match_operand:V4SF 0 "register_operand" "")
693 (match_operand:V4SF 1 "register_operand" "")]
696 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
700 (define_expand "reduc_smin_v4sf"
701 [(match_operand:V4SF 0 "register_operand" "")
702 (match_operand:V4SF 1 "register_operand" "")]
705 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
711 ;; Parallel floating point comparisons
713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
715 (define_insn "<sse>_maskcmp<mode>3"
716 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
717 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
718 [(match_operand:SSEMODEF4 1 "register_operand" "0")
719 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
720 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
722 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
723 [(set_attr "type" "ssecmp")
724 (set_attr "mode" "<MODE>")])
726 (define_insn "<sse>_vmmaskcmp<mode>3"
727 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (vec_merge:SSEMODEF2P
729 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
730 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
731 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
734 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
735 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
736 [(set_attr "type" "ssecmp")
737 (set_attr "mode" "<ssescalarmode>")])
739 (define_insn "<sse>_comi"
740 [(set (reg:CCFP FLAGS_REG)
743 (match_operand:<ssevecmode> 0 "register_operand" "x")
744 (parallel [(const_int 0)]))
746 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
747 (parallel [(const_int 0)]))))]
748 "SSE_FLOAT_MODE_P (<MODE>mode)"
749 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
750 [(set_attr "type" "ssecomi")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "<sse>_ucomi"
754 [(set (reg:CCFPU FLAGS_REG)
757 (match_operand:<ssevecmode> 0 "register_operand" "x")
758 (parallel [(const_int 0)]))
760 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
761 (parallel [(const_int 0)]))))]
762 "SSE_FLOAT_MODE_P (<MODE>mode)"
763 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
764 [(set_attr "type" "ssecomi")
765 (set_attr "mode" "<MODE>")])
767 (define_expand "vcond<mode>"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
769 (if_then_else:SSEMODEF2P
771 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
772 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
773 (match_operand:SSEMODEF2P 1 "general_operand" "")
774 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
777 if (ix86_expand_fp_vcond (operands))
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 ;; Parallel floating point logical operations
787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
789 (define_insn "<sse>_nand<mode>3"
790 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
793 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
794 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
795 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
796 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
797 [(set_attr "type" "sselog")
798 (set_attr "mode" "<MODE>")])
800 (define_expand "<code><mode>3"
801 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
803 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
804 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
805 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
806 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
808 (define_insn "*<code><mode>3"
809 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
811 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
812 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
813 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
814 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
815 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
816 [(set_attr "type" "sselog")
817 (set_attr "mode" "<MODE>")])
819 ;; Also define scalar versions. These are used for abs, neg, and
820 ;; conditional move. Using subregs into vector modes causes register
821 ;; allocation lossage. These patterns do not allow memory operands
822 ;; because the native instructions read the full 128-bits.
824 (define_insn "*nand<mode>3"
825 [(set (match_operand:MODEF 0 "register_operand" "=x")
828 (match_operand:MODEF 1 "register_operand" "0"))
829 (match_operand:MODEF 2 "register_operand" "x")))]
830 "SSE_FLOAT_MODE_P (<MODE>mode)"
831 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
832 [(set_attr "type" "sselog")
833 (set_attr "mode" "<ssevecmode>")])
835 (define_insn "*<code><mode>3"
836 [(set (match_operand:MODEF 0 "register_operand" "=x")
838 (match_operand:MODEF 1 "register_operand" "0")
839 (match_operand:MODEF 2 "register_operand" "x")))]
840 "SSE_FLOAT_MODE_P (<MODE>mode)"
841 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
842 [(set_attr "type" "sselog")
843 (set_attr "mode" "<ssevecmode>")])
845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
847 ;; SSE5 floating point multiply/accumulate instructions This includes the
848 ;; scalar version of the instructions as well as the vector
850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
852 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
853 ;; combine to generate a multiply/add with two memory references. We then
854 ;; split this insn, into loading up the destination register with one of the
855 ;; memory operations. If we don't manage to split the insn, reload will
856 ;; generate the appropriate moves. The reason this is needed, is that combine
857 ;; has already folded one of the memory references into both the multiply and
858 ;; add insns, and it can't generate a new pseudo. I.e.:
859 ;; (set (reg1) (mem (addr1)))
860 ;; (set (reg2) (mult (reg1) (mem (addr2))))
861 ;; (set (reg3) (plus (reg2) (mem (addr3))))
863 (define_insn "sse5_fmadd<mode>4"
864 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
867 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
868 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
869 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
870 "TARGET_SSE5 && TARGET_FUSED_MADD
871 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
872 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
873 [(set_attr "type" "ssemuladd")
874 (set_attr "mode" "<MODE>")])
876 ;; Split fmadd with two memory operands into a load and the fmadd.
878 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
881 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
882 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
883 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
885 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
886 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
887 && !reg_mentioned_p (operands[0], operands[1])
888 && !reg_mentioned_p (operands[0], operands[2])
889 && !reg_mentioned_p (operands[0], operands[3])"
892 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
893 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
894 operands[2], operands[3]));
898 ;; For the scalar operations, use operand1 for the upper words that aren't
899 ;; modified, so restrict the forms that are generated.
900 ;; Scalar version of fmadd
901 (define_insn "sse5_vmfmadd<mode>4"
902 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
903 (vec_merge:SSEMODEF2P
906 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
907 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
908 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
911 "TARGET_SSE5 && TARGET_FUSED_MADD
912 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
913 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
914 [(set_attr "type" "ssemuladd")
915 (set_attr "mode" "<MODE>")])
917 ;; Floating multiply and subtract
918 ;; Allow two memory operands the same as fmadd
919 (define_insn "sse5_fmsub<mode>4"
920 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
923 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
924 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
925 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
926 "TARGET_SSE5 && TARGET_FUSED_MADD
927 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
928 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
929 [(set_attr "type" "ssemuladd")
930 (set_attr "mode" "<MODE>")])
932 ;; Split fmsub with two memory operands into a load and the fmsub.
934 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
937 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
938 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
939 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
941 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
942 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
943 && !reg_mentioned_p (operands[0], operands[1])
944 && !reg_mentioned_p (operands[0], operands[2])
945 && !reg_mentioned_p (operands[0], operands[3])"
948 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
949 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
950 operands[2], operands[3]));
954 ;; For the scalar operations, use operand1 for the upper words that aren't
955 ;; modified, so restrict the forms that are generated.
956 ;; Scalar version of fmsub
957 (define_insn "sse5_vmfmsub<mode>4"
958 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
959 (vec_merge:SSEMODEF2P
962 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
963 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
964 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
967 "TARGET_SSE5 && TARGET_FUSED_MADD
968 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
969 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
970 [(set_attr "type" "ssemuladd")
971 (set_attr "mode" "<MODE>")])
973 ;; Floating point negative multiply and add
974 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
975 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
976 ;; Allow two memory operands to help in optimizing.
977 (define_insn "sse5_fnmadd<mode>4"
978 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
980 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
982 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
983 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
984 "TARGET_SSE5 && TARGET_FUSED_MADD
985 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
986 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
987 [(set_attr "type" "ssemuladd")
988 (set_attr "mode" "<MODE>")])
990 ;; Split fnmadd with two memory operands into a load and the fnmadd.
992 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
994 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
996 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
997 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
999 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1000 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1001 && !reg_mentioned_p (operands[0], operands[1])
1002 && !reg_mentioned_p (operands[0], operands[2])
1003 && !reg_mentioned_p (operands[0], operands[3])"
1006 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1007 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1008 operands[2], operands[3]));
1012 ;; For the scalar operations, use operand1 for the upper words that aren't
1013 ;; modified, so restrict the forms that are generated.
1014 ;; Scalar version of fnmadd
1015 (define_insn "sse5_vmfnmadd<mode>4"
1016 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1017 (vec_merge:SSEMODEF2P
1019 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1021 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1025 "TARGET_SSE5 && TARGET_FUSED_MADD
1026 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1027 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1028 [(set_attr "type" "ssemuladd")
1029 (set_attr "mode" "<MODE>")])
1031 ;; Floating point negative multiply and subtract
1032 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1033 ;; Allow 2 memory operands to help with optimization
1034 (define_insn "sse5_fnmsub<mode>4"
1035 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1039 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1040 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1041 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1042 "TARGET_SSE5 && TARGET_FUSED_MADD
1043 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1044 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1045 [(set_attr "type" "ssemuladd")
1046 (set_attr "mode" "<MODE>")])
1048 ;; Split fnmsub with two memory operands into a load and the fmsub.
1050 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1054 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1055 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1056 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1058 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1059 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1060 && !reg_mentioned_p (operands[0], operands[1])
1061 && !reg_mentioned_p (operands[0], operands[2])
1062 && !reg_mentioned_p (operands[0], operands[3])"
1065 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1066 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1067 operands[2], operands[3]));
1071 ;; For the scalar operations, use operand1 for the upper words that aren't
1072 ;; modified, so restrict the forms that are generated.
1073 ;; Scalar version of fnmsub
1074 (define_insn "sse5_vmfnmsub<mode>4"
1075 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1076 (vec_merge:SSEMODEF2P
1080 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1081 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1082 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1085 "TARGET_SSE5 && TARGET_FUSED_MADD
1086 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1087 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1088 [(set_attr "type" "ssemuladd")
1089 (set_attr "mode" "<MODE>")])
1091 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1092 ;; even if the user used -mno-fused-madd
1093 ;; Parallel instructions. During instruction generation, just default
1094 ;; to registers, and let combine later build the appropriate instruction.
1095 (define_expand "sse5i_fmadd<mode>4"
1096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1100 (match_operand:SSEMODEF2P 1 "register_operand" "")
1101 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1102 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1103 UNSPEC_SSE5_INTRINSIC))]
1106 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1107 if (TARGET_FUSED_MADD)
1109 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1110 operands[2], operands[3]));
1115 (define_insn "*sse5i_fmadd<mode>4"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1120 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1121 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1122 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1123 UNSPEC_SSE5_INTRINSIC))]
1124 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1125 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1126 [(set_attr "type" "ssemuladd")
1127 (set_attr "mode" "<MODE>")])
1129 (define_expand "sse5i_fmsub<mode>4"
1130 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1134 (match_operand:SSEMODEF2P 1 "register_operand" "")
1135 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1136 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1137 UNSPEC_SSE5_INTRINSIC))]
1140 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1141 if (TARGET_FUSED_MADD)
1143 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1144 operands[2], operands[3]));
1149 (define_insn "*sse5i_fmsub<mode>4"
1150 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1154 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1155 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1156 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1157 UNSPEC_SSE5_INTRINSIC))]
1158 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1159 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1160 [(set_attr "type" "ssemuladd")
1161 (set_attr "mode" "<MODE>")])
1163 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1164 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1165 (define_expand "sse5i_fnmadd<mode>4"
1166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1169 (match_operand:SSEMODEF2P 3 "register_operand" "")
1171 (match_operand:SSEMODEF2P 1 "register_operand" "")
1172 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1173 UNSPEC_SSE5_INTRINSIC))]
1176 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1177 if (TARGET_FUSED_MADD)
1179 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1180 operands[2], operands[3]));
1185 (define_insn "*sse5i_fnmadd<mode>4"
1186 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1189 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1191 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1192 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1193 UNSPEC_SSE5_INTRINSIC))]
1194 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1195 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1196 [(set_attr "type" "ssemuladd")
1197 (set_attr "mode" "<MODE>")])
1199 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1200 (define_expand "sse5i_fnmsub<mode>4"
1201 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1206 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1207 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1208 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1209 UNSPEC_SSE5_INTRINSIC))]
1212 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1213 if (TARGET_FUSED_MADD)
1215 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1216 operands[2], operands[3]));
1221 (define_insn "*sse5i_fnmsub<mode>4"
1222 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1227 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1228 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1229 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1230 UNSPEC_SSE5_INTRINSIC))]
1231 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1232 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1233 [(set_attr "type" "ssemuladd")
1234 (set_attr "mode" "<MODE>")])
1236 ;; Scalar instructions
1237 (define_expand "sse5i_vmfmadd<mode>4"
1238 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1240 [(vec_merge:SSEMODEF2P
1243 (match_operand:SSEMODEF2P 1 "register_operand" "")
1244 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1245 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1248 UNSPEC_SSE5_INTRINSIC))]
1251 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1252 if (TARGET_FUSED_MADD)
1254 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1255 operands[2], operands[3]));
1260 ;; For the scalar operations, use operand1 for the upper words that aren't
1261 ;; modified, so restrict the forms that are accepted.
1262 (define_insn "*sse5i_vmfmadd<mode>4"
1263 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1265 [(vec_merge:SSEMODEF2P
1268 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1269 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1270 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1273 UNSPEC_SSE5_INTRINSIC))]
1274 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1275 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1276 [(set_attr "type" "ssemuladd")
1277 (set_attr "mode" "<ssescalarmode>")])
1279 (define_expand "sse5i_vmfmsub<mode>4"
1280 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1282 [(vec_merge:SSEMODEF2P
1285 (match_operand:SSEMODEF2P 1 "register_operand" "")
1286 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1287 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1290 UNSPEC_SSE5_INTRINSIC))]
1293 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1294 if (TARGET_FUSED_MADD)
1296 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1297 operands[2], operands[3]));
1302 (define_insn "*sse5i_vmfmsub<mode>4"
1303 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1305 [(vec_merge:SSEMODEF2P
1308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1310 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1313 UNSPEC_SSE5_INTRINSIC))]
1314 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1315 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1316 [(set_attr "type" "ssemuladd")
1317 (set_attr "mode" "<ssescalarmode>")])
1319 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1320 (define_expand "sse5i_vmfnmadd<mode>4"
1321 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1323 [(vec_merge:SSEMODEF2P
1325 (match_operand:SSEMODEF2P 3 "register_operand" "")
1327 (match_operand:SSEMODEF2P 1 "register_operand" "")
1328 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1331 UNSPEC_SSE5_INTRINSIC))]
1334 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1335 if (TARGET_FUSED_MADD)
1337 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1338 operands[2], operands[3]));
1343 (define_insn "*sse5i_vmfnmadd<mode>4"
1344 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1346 [(vec_merge:SSEMODEF2P
1348 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1350 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1351 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1354 UNSPEC_SSE5_INTRINSIC))]
1355 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1356 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1357 [(set_attr "type" "ssemuladd")
1358 (set_attr "mode" "<ssescalarmode>")])
1360 (define_expand "sse5i_vmfnmsub<mode>4"
1361 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1363 [(vec_merge:SSEMODEF2P
1367 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1368 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1369 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1372 UNSPEC_SSE5_INTRINSIC))]
1375 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1376 if (TARGET_FUSED_MADD)
1378 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1379 operands[2], operands[3]));
1384 (define_insn "*sse5i_vmfnmsub<mode>4"
1385 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1387 [(vec_merge:SSEMODEF2P
1391 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1392 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1393 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1396 UNSPEC_SSE5_INTRINSIC))]
1397 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1398 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1399 [(set_attr "type" "ssemuladd")
1400 (set_attr "mode" "<ssescalarmode>")])
1402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1404 ;; Parallel single-precision floating point conversion operations
1406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1408 (define_insn "sse_cvtpi2ps"
1409 [(set (match_operand:V4SF 0 "register_operand" "=x")
1412 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1413 (match_operand:V4SF 1 "register_operand" "0")
1416 "cvtpi2ps\t{%2, %0|%0, %2}"
1417 [(set_attr "type" "ssecvt")
1418 (set_attr "mode" "V4SF")])
1420 (define_insn "sse_cvtps2pi"
1421 [(set (match_operand:V2SI 0 "register_operand" "=y")
1423 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1425 (parallel [(const_int 0) (const_int 1)])))]
1427 "cvtps2pi\t{%1, %0|%0, %1}"
1428 [(set_attr "type" "ssecvt")
1429 (set_attr "unit" "mmx")
1430 (set_attr "mode" "DI")])
1432 (define_insn "sse_cvttps2pi"
1433 [(set (match_operand:V2SI 0 "register_operand" "=y")
1435 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1436 (parallel [(const_int 0) (const_int 1)])))]
1438 "cvttps2pi\t{%1, %0|%0, %1}"
1439 [(set_attr "type" "ssecvt")
1440 (set_attr "unit" "mmx")
1441 (set_attr "mode" "SF")])
1443 (define_insn "sse_cvtsi2ss"
1444 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1447 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1448 (match_operand:V4SF 1 "register_operand" "0,0")
1451 "cvtsi2ss\t{%2, %0|%0, %2}"
1452 [(set_attr "type" "sseicvt")
1453 (set_attr "athlon_decode" "vector,double")
1454 (set_attr "amdfam10_decode" "vector,double")
1455 (set_attr "mode" "SF")])
1457 (define_insn "sse_cvtsi2ssq"
1458 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1461 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1462 (match_operand:V4SF 1 "register_operand" "0,0")
1464 "TARGET_SSE && TARGET_64BIT"
1465 "cvtsi2ssq\t{%2, %0|%0, %2}"
1466 [(set_attr "type" "sseicvt")
1467 (set_attr "athlon_decode" "vector,double")
1468 (set_attr "amdfam10_decode" "vector,double")
1469 (set_attr "mode" "SF")])
1471 (define_insn "sse_cvtss2si"
1472 [(set (match_operand:SI 0 "register_operand" "=r,r")
1475 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1476 (parallel [(const_int 0)]))]
1477 UNSPEC_FIX_NOTRUNC))]
1479 "cvtss2si\t{%1, %0|%0, %1}"
1480 [(set_attr "type" "sseicvt")
1481 (set_attr "athlon_decode" "double,vector")
1482 (set_attr "prefix_rep" "1")
1483 (set_attr "mode" "SI")])
1485 (define_insn "sse_cvtss2si_2"
1486 [(set (match_operand:SI 0 "register_operand" "=r,r")
1487 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1488 UNSPEC_FIX_NOTRUNC))]
1490 "cvtss2si\t{%1, %0|%0, %1}"
1491 [(set_attr "type" "sseicvt")
1492 (set_attr "athlon_decode" "double,vector")
1493 (set_attr "amdfam10_decode" "double,double")
1494 (set_attr "prefix_rep" "1")
1495 (set_attr "mode" "SI")])
1497 (define_insn "sse_cvtss2siq"
1498 [(set (match_operand:DI 0 "register_operand" "=r,r")
1501 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1502 (parallel [(const_int 0)]))]
1503 UNSPEC_FIX_NOTRUNC))]
1504 "TARGET_SSE && TARGET_64BIT"
1505 "cvtss2siq\t{%1, %0|%0, %1}"
1506 [(set_attr "type" "sseicvt")
1507 (set_attr "athlon_decode" "double,vector")
1508 (set_attr "prefix_rep" "1")
1509 (set_attr "mode" "DI")])
1511 (define_insn "sse_cvtss2siq_2"
1512 [(set (match_operand:DI 0 "register_operand" "=r,r")
1513 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1514 UNSPEC_FIX_NOTRUNC))]
1515 "TARGET_SSE && TARGET_64BIT"
1516 "cvtss2siq\t{%1, %0|%0, %1}"
1517 [(set_attr "type" "sseicvt")
1518 (set_attr "athlon_decode" "double,vector")
1519 (set_attr "amdfam10_decode" "double,double")
1520 (set_attr "prefix_rep" "1")
1521 (set_attr "mode" "DI")])
1523 (define_insn "sse_cvttss2si"
1524 [(set (match_operand:SI 0 "register_operand" "=r,r")
1527 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1528 (parallel [(const_int 0)]))))]
1530 "cvttss2si\t{%1, %0|%0, %1}"
1531 [(set_attr "type" "sseicvt")
1532 (set_attr "athlon_decode" "double,vector")
1533 (set_attr "amdfam10_decode" "double,double")
1534 (set_attr "prefix_rep" "1")
1535 (set_attr "mode" "SI")])
1537 (define_insn "sse_cvttss2siq"
1538 [(set (match_operand:DI 0 "register_operand" "=r,r")
1541 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1542 (parallel [(const_int 0)]))))]
1543 "TARGET_SSE && TARGET_64BIT"
1544 "cvttss2siq\t{%1, %0|%0, %1}"
1545 [(set_attr "type" "sseicvt")
1546 (set_attr "athlon_decode" "double,vector")
1547 (set_attr "amdfam10_decode" "double,double")
1548 (set_attr "prefix_rep" "1")
1549 (set_attr "mode" "DI")])
1551 (define_insn "sse2_cvtdq2ps"
1552 [(set (match_operand:V4SF 0 "register_operand" "=x")
1553 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1555 "cvtdq2ps\t{%1, %0|%0, %1}"
1556 [(set_attr "type" "ssecvt")
1557 (set_attr "mode" "V4SF")])
1559 (define_insn "sse2_cvtps2dq"
1560 [(set (match_operand:V4SI 0 "register_operand" "=x")
1561 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1562 UNSPEC_FIX_NOTRUNC))]
1564 "cvtps2dq\t{%1, %0|%0, %1}"
1565 [(set_attr "type" "ssecvt")
1566 (set_attr "prefix_data16" "1")
1567 (set_attr "mode" "TI")])
1569 (define_insn "sse2_cvttps2dq"
1570 [(set (match_operand:V4SI 0 "register_operand" "=x")
1571 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1573 "cvttps2dq\t{%1, %0|%0, %1}"
1574 [(set_attr "type" "ssecvt")
1575 (set_attr "prefix_rep" "1")
1576 (set_attr "mode" "TI")])
1578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1580 ;; Parallel double-precision floating point conversion operations
1582 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1584 (define_insn "sse2_cvtpi2pd"
1585 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1586 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1588 "cvtpi2pd\t{%1, %0|%0, %1}"
1589 [(set_attr "type" "ssecvt")
1590 (set_attr "unit" "mmx,*")
1591 (set_attr "mode" "V2DF")])
1593 (define_insn "sse2_cvtpd2pi"
1594 [(set (match_operand:V2SI 0 "register_operand" "=y")
1595 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1596 UNSPEC_FIX_NOTRUNC))]
1598 "cvtpd2pi\t{%1, %0|%0, %1}"
1599 [(set_attr "type" "ssecvt")
1600 (set_attr "unit" "mmx")
1601 (set_attr "prefix_data16" "1")
1602 (set_attr "mode" "DI")])
1604 (define_insn "sse2_cvttpd2pi"
1605 [(set (match_operand:V2SI 0 "register_operand" "=y")
1606 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1608 "cvttpd2pi\t{%1, %0|%0, %1}"
1609 [(set_attr "type" "ssecvt")
1610 (set_attr "unit" "mmx")
1611 (set_attr "prefix_data16" "1")
1612 (set_attr "mode" "TI")])
1614 (define_insn "sse2_cvtsi2sd"
1615 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1618 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1619 (match_operand:V2DF 1 "register_operand" "0,0")
1622 "cvtsi2sd\t{%2, %0|%0, %2}"
1623 [(set_attr "type" "sseicvt")
1624 (set_attr "mode" "DF")
1625 (set_attr "athlon_decode" "double,direct")
1626 (set_attr "amdfam10_decode" "vector,double")])
1628 (define_insn "sse2_cvtsi2sdq"
1629 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1632 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1633 (match_operand:V2DF 1 "register_operand" "0,0")
1635 "TARGET_SSE2 && TARGET_64BIT"
1636 "cvtsi2sdq\t{%2, %0|%0, %2}"
1637 [(set_attr "type" "sseicvt")
1638 (set_attr "mode" "DF")
1639 (set_attr "athlon_decode" "double,direct")
1640 (set_attr "amdfam10_decode" "vector,double")])
1642 (define_insn "sse2_cvtsd2si"
1643 [(set (match_operand:SI 0 "register_operand" "=r,r")
1646 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1647 (parallel [(const_int 0)]))]
1648 UNSPEC_FIX_NOTRUNC))]
1650 "cvtsd2si\t{%1, %0|%0, %1}"
1651 [(set_attr "type" "sseicvt")
1652 (set_attr "athlon_decode" "double,vector")
1653 (set_attr "prefix_rep" "1")
1654 (set_attr "mode" "SI")])
1656 (define_insn "sse2_cvtsd2si_2"
1657 [(set (match_operand:SI 0 "register_operand" "=r,r")
1658 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1659 UNSPEC_FIX_NOTRUNC))]
1661 "cvtsd2si\t{%1, %0|%0, %1}"
1662 [(set_attr "type" "sseicvt")
1663 (set_attr "athlon_decode" "double,vector")
1664 (set_attr "amdfam10_decode" "double,double")
1665 (set_attr "prefix_rep" "1")
1666 (set_attr "mode" "SI")])
1668 (define_insn "sse2_cvtsd2siq"
1669 [(set (match_operand:DI 0 "register_operand" "=r,r")
1672 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1673 (parallel [(const_int 0)]))]
1674 UNSPEC_FIX_NOTRUNC))]
1675 "TARGET_SSE2 && TARGET_64BIT"
1676 "cvtsd2siq\t{%1, %0|%0, %1}"
1677 [(set_attr "type" "sseicvt")
1678 (set_attr "athlon_decode" "double,vector")
1679 (set_attr "prefix_rep" "1")
1680 (set_attr "mode" "DI")])
1682 (define_insn "sse2_cvtsd2siq_2"
1683 [(set (match_operand:DI 0 "register_operand" "=r,r")
1684 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1685 UNSPEC_FIX_NOTRUNC))]
1686 "TARGET_SSE2 && TARGET_64BIT"
1687 "cvtsd2siq\t{%1, %0|%0, %1}"
1688 [(set_attr "type" "sseicvt")
1689 (set_attr "athlon_decode" "double,vector")
1690 (set_attr "amdfam10_decode" "double,double")
1691 (set_attr "prefix_rep" "1")
1692 (set_attr "mode" "DI")])
1694 (define_insn "sse2_cvttsd2si"
1695 [(set (match_operand:SI 0 "register_operand" "=r,r")
1698 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1699 (parallel [(const_int 0)]))))]
1701 "cvttsd2si\t{%1, %0|%0, %1}"
1702 [(set_attr "type" "sseicvt")
1703 (set_attr "prefix_rep" "1")
1704 (set_attr "mode" "SI")
1705 (set_attr "athlon_decode" "double,vector")
1706 (set_attr "amdfam10_decode" "double,double")])
1708 (define_insn "sse2_cvttsd2siq"
1709 [(set (match_operand:DI 0 "register_operand" "=r,r")
1712 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1713 (parallel [(const_int 0)]))))]
1714 "TARGET_SSE2 && TARGET_64BIT"
1715 "cvttsd2siq\t{%1, %0|%0, %1}"
1716 [(set_attr "type" "sseicvt")
1717 (set_attr "prefix_rep" "1")
1718 (set_attr "mode" "DI")
1719 (set_attr "athlon_decode" "double,vector")
1720 (set_attr "amdfam10_decode" "double,double")])
1722 (define_insn "sse2_cvtdq2pd"
1723 [(set (match_operand:V2DF 0 "register_operand" "=x")
1726 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1727 (parallel [(const_int 0) (const_int 1)]))))]
1729 "cvtdq2pd\t{%1, %0|%0, %1}"
1730 [(set_attr "type" "ssecvt")
1731 (set_attr "mode" "V2DF")])
1733 (define_expand "sse2_cvtpd2dq"
1734 [(set (match_operand:V4SI 0 "register_operand" "")
1736 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1740 "operands[2] = CONST0_RTX (V2SImode);")
1742 (define_insn "*sse2_cvtpd2dq"
1743 [(set (match_operand:V4SI 0 "register_operand" "=x")
1745 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1747 (match_operand:V2SI 2 "const0_operand" "")))]
1749 "cvtpd2dq\t{%1, %0|%0, %1}"
1750 [(set_attr "type" "ssecvt")
1751 (set_attr "prefix_rep" "1")
1752 (set_attr "mode" "TI")
1753 (set_attr "amdfam10_decode" "double")])
1755 (define_expand "sse2_cvttpd2dq"
1756 [(set (match_operand:V4SI 0 "register_operand" "")
1758 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1761 "operands[2] = CONST0_RTX (V2SImode);")
1763 (define_insn "*sse2_cvttpd2dq"
1764 [(set (match_operand:V4SI 0 "register_operand" "=x")
1766 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1767 (match_operand:V2SI 2 "const0_operand" "")))]
1769 "cvttpd2dq\t{%1, %0|%0, %1}"
1770 [(set_attr "type" "ssecvt")
1771 (set_attr "prefix_rep" "1")
1772 (set_attr "mode" "TI")
1773 (set_attr "amdfam10_decode" "double")])
1775 (define_insn "sse2_cvtsd2ss"
1776 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1779 (float_truncate:V2SF
1780 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1781 (match_operand:V4SF 1 "register_operand" "0,0")
1784 "cvtsd2ss\t{%2, %0|%0, %2}"
1785 [(set_attr "type" "ssecvt")
1786 (set_attr "athlon_decode" "vector,double")
1787 (set_attr "amdfam10_decode" "vector,double")
1788 (set_attr "mode" "SF")])
1790 (define_insn "sse2_cvtss2sd"
1791 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1795 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1796 (parallel [(const_int 0) (const_int 1)])))
1797 (match_operand:V2DF 1 "register_operand" "0,0")
1800 "cvtss2sd\t{%2, %0|%0, %2}"
1801 [(set_attr "type" "ssecvt")
1802 (set_attr "amdfam10_decode" "vector,double")
1803 (set_attr "mode" "DF")])
1805 (define_expand "sse2_cvtpd2ps"
1806 [(set (match_operand:V4SF 0 "register_operand" "")
1808 (float_truncate:V2SF
1809 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1812 "operands[2] = CONST0_RTX (V2SFmode);")
1814 (define_insn "*sse2_cvtpd2ps"
1815 [(set (match_operand:V4SF 0 "register_operand" "=x")
1817 (float_truncate:V2SF
1818 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1819 (match_operand:V2SF 2 "const0_operand" "")))]
1821 "cvtpd2ps\t{%1, %0|%0, %1}"
1822 [(set_attr "type" "ssecvt")
1823 (set_attr "prefix_data16" "1")
1824 (set_attr "mode" "V4SF")
1825 (set_attr "amdfam10_decode" "double")])
1827 (define_insn "sse2_cvtps2pd"
1828 [(set (match_operand:V2DF 0 "register_operand" "=x")
1831 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1832 (parallel [(const_int 0) (const_int 1)]))))]
1834 "cvtps2pd\t{%1, %0|%0, %1}"
1835 [(set_attr "type" "ssecvt")
1836 (set_attr "mode" "V2DF")
1837 (set_attr "amdfam10_decode" "direct")])
1839 (define_expand "vec_unpacks_hi_v4sf"
1844 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1845 (parallel [(const_int 6)
1849 (set (match_operand:V2DF 0 "register_operand" "")
1853 (parallel [(const_int 0) (const_int 1)]))))]
1856 operands[2] = gen_reg_rtx (V4SFmode);
1859 (define_expand "vec_unpacks_lo_v4sf"
1860 [(set (match_operand:V2DF 0 "register_operand" "")
1863 (match_operand:V4SF 1 "nonimmediate_operand" "")
1864 (parallel [(const_int 0) (const_int 1)]))))]
1867 (define_expand "vec_unpacks_float_hi_v8hi"
1868 [(match_operand:V4SF 0 "register_operand" "")
1869 (match_operand:V8HI 1 "register_operand" "")]
1872 rtx tmp = gen_reg_rtx (V4SImode);
1874 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1875 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1879 (define_expand "vec_unpacks_float_lo_v8hi"
1880 [(match_operand:V4SF 0 "register_operand" "")
1881 (match_operand:V8HI 1 "register_operand" "")]
1884 rtx tmp = gen_reg_rtx (V4SImode);
1886 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1887 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1891 (define_expand "vec_unpacku_float_hi_v8hi"
1892 [(match_operand:V4SF 0 "register_operand" "")
1893 (match_operand:V8HI 1 "register_operand" "")]
1896 rtx tmp = gen_reg_rtx (V4SImode);
1898 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1899 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1903 (define_expand "vec_unpacku_float_lo_v8hi"
1904 [(match_operand:V4SF 0 "register_operand" "")
1905 (match_operand:V8HI 1 "register_operand" "")]
1908 rtx tmp = gen_reg_rtx (V4SImode);
1910 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1911 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1915 (define_expand "vec_unpacks_float_hi_v4si"
1918 (match_operand:V4SI 1 "nonimmediate_operand" "")
1919 (parallel [(const_int 2)
1923 (set (match_operand:V2DF 0 "register_operand" "")
1927 (parallel [(const_int 0) (const_int 1)]))))]
1930 operands[2] = gen_reg_rtx (V4SImode);
1933 (define_expand "vec_unpacks_float_lo_v4si"
1934 [(set (match_operand:V2DF 0 "register_operand" "")
1937 (match_operand:V4SI 1 "nonimmediate_operand" "")
1938 (parallel [(const_int 0) (const_int 1)]))))]
1941 (define_expand "vec_pack_trunc_v2df"
1942 [(match_operand:V4SF 0 "register_operand" "")
1943 (match_operand:V2DF 1 "nonimmediate_operand" "")
1944 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1949 r1 = gen_reg_rtx (V4SFmode);
1950 r2 = gen_reg_rtx (V4SFmode);
1952 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1953 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1954 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1958 (define_expand "vec_pack_sfix_trunc_v2df"
1959 [(match_operand:V4SI 0 "register_operand" "")
1960 (match_operand:V2DF 1 "nonimmediate_operand" "")
1961 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1966 r1 = gen_reg_rtx (V4SImode);
1967 r2 = gen_reg_rtx (V4SImode);
1969 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1970 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1971 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1972 gen_lowpart (V2DImode, r1),
1973 gen_lowpart (V2DImode, r2)));
1977 (define_expand "vec_pack_sfix_v2df"
1978 [(match_operand:V4SI 0 "register_operand" "")
1979 (match_operand:V2DF 1 "nonimmediate_operand" "")
1980 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1985 r1 = gen_reg_rtx (V4SImode);
1986 r2 = gen_reg_rtx (V4SImode);
1988 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1989 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1990 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1991 gen_lowpart (V2DImode, r1),
1992 gen_lowpart (V2DImode, r2)));
1996 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1998 ;; Parallel single-precision floating point element swizzling
2000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2002 (define_expand "sse_movhlps_exp"
2003 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2006 (match_operand:V4SF 1 "nonimmediate_operand" "")
2007 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2008 (parallel [(const_int 6)
2013 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2015 (define_insn "sse_movhlps"
2016 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2019 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2020 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2021 (parallel [(const_int 6)
2025 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2027 movhlps\t{%2, %0|%0, %2}
2028 movlps\t{%H2, %0|%0, %H2}
2029 movhps\t{%2, %0|%0, %2}"
2030 [(set_attr "type" "ssemov")
2031 (set_attr "mode" "V4SF,V2SF,V2SF")])
2033 (define_expand "sse_movlhps_exp"
2034 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2037 (match_operand:V4SF 1 "nonimmediate_operand" "")
2038 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2039 (parallel [(const_int 0)
2044 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2046 (define_insn "sse_movlhps"
2047 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2050 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2051 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2052 (parallel [(const_int 0)
2056 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2058 movlhps\t{%2, %0|%0, %2}
2059 movhps\t{%2, %0|%0, %2}
2060 movlps\t{%2, %H0|%H0, %2}"
2061 [(set_attr "type" "ssemov")
2062 (set_attr "mode" "V4SF,V2SF,V2SF")])
2064 (define_insn "sse_unpckhps"
2065 [(set (match_operand:V4SF 0 "register_operand" "=x")
2068 (match_operand:V4SF 1 "register_operand" "0")
2069 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2070 (parallel [(const_int 2) (const_int 6)
2071 (const_int 3) (const_int 7)])))]
2073 "unpckhps\t{%2, %0|%0, %2}"
2074 [(set_attr "type" "sselog")
2075 (set_attr "mode" "V4SF")])
2077 (define_insn "sse_unpcklps"
2078 [(set (match_operand:V4SF 0 "register_operand" "=x")
2081 (match_operand:V4SF 1 "register_operand" "0")
2082 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2083 (parallel [(const_int 0) (const_int 4)
2084 (const_int 1) (const_int 5)])))]
2086 "unpcklps\t{%2, %0|%0, %2}"
2087 [(set_attr "type" "sselog")
2088 (set_attr "mode" "V4SF")])
2090 ;; These are modeled with the same vec_concat as the others so that we
2091 ;; capture users of shufps that can use the new instructions
2092 (define_insn "sse3_movshdup"
2093 [(set (match_operand:V4SF 0 "register_operand" "=x")
2096 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2098 (parallel [(const_int 1)
2103 "movshdup\t{%1, %0|%0, %1}"
2104 [(set_attr "type" "sse")
2105 (set_attr "prefix_rep" "1")
2106 (set_attr "mode" "V4SF")])
2108 (define_insn "sse3_movsldup"
2109 [(set (match_operand:V4SF 0 "register_operand" "=x")
2112 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2114 (parallel [(const_int 0)
2119 "movsldup\t{%1, %0|%0, %1}"
2120 [(set_attr "type" "sse")
2121 (set_attr "prefix_rep" "1")
2122 (set_attr "mode" "V4SF")])
2124 (define_expand "sse_shufps"
2125 [(match_operand:V4SF 0 "register_operand" "")
2126 (match_operand:V4SF 1 "register_operand" "")
2127 (match_operand:V4SF 2 "nonimmediate_operand" "")
2128 (match_operand:SI 3 "const_int_operand" "")]
2131 int mask = INTVAL (operands[3]);
2132 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2133 GEN_INT ((mask >> 0) & 3),
2134 GEN_INT ((mask >> 2) & 3),
2135 GEN_INT (((mask >> 4) & 3) + 4),
2136 GEN_INT (((mask >> 6) & 3) + 4)));
2140 (define_insn "sse_shufps_1"
2141 [(set (match_operand:V4SF 0 "register_operand" "=x")
2144 (match_operand:V4SF 1 "register_operand" "0")
2145 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2146 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2147 (match_operand 4 "const_0_to_3_operand" "")
2148 (match_operand 5 "const_4_to_7_operand" "")
2149 (match_operand 6 "const_4_to_7_operand" "")])))]
2153 mask |= INTVAL (operands[3]) << 0;
2154 mask |= INTVAL (operands[4]) << 2;
2155 mask |= (INTVAL (operands[5]) - 4) << 4;
2156 mask |= (INTVAL (operands[6]) - 4) << 6;
2157 operands[3] = GEN_INT (mask);
2159 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2161 [(set_attr "type" "sselog")
2162 (set_attr "mode" "V4SF")])
2164 (define_insn "sse_storehps"
2165 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2167 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2168 (parallel [(const_int 2) (const_int 3)])))]
2171 movhps\t{%1, %0|%0, %1}
2172 movhlps\t{%1, %0|%0, %1}
2173 movlps\t{%H1, %0|%0, %H1}"
2174 [(set_attr "type" "ssemov")
2175 (set_attr "mode" "V2SF,V4SF,V2SF")])
2177 (define_expand "sse_loadhps_exp"
2178 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2181 (match_operand:V4SF 1 "nonimmediate_operand" "")
2182 (parallel [(const_int 0) (const_int 1)]))
2183 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
2185 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2187 (define_insn "sse_loadhps"
2188 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2191 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2192 (parallel [(const_int 0) (const_int 1)]))
2193 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2196 movhps\t{%2, %0|%0, %2}
2197 movlhps\t{%2, %0|%0, %2}
2198 movlps\t{%2, %H0|%H0, %2}"
2199 [(set_attr "type" "ssemov")
2200 (set_attr "mode" "V2SF,V4SF,V2SF")])
2202 (define_insn "sse_storelps"
2203 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2205 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2206 (parallel [(const_int 0) (const_int 1)])))]
2209 movlps\t{%1, %0|%0, %1}
2210 movaps\t{%1, %0|%0, %1}
2211 movlps\t{%1, %0|%0, %1}"
2212 [(set_attr "type" "ssemov")
2213 (set_attr "mode" "V2SF,V4SF,V2SF")])
2215 (define_expand "sse_loadlps_exp"
2216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2218 (match_operand:V2SF 2 "nonimmediate_operand" "")
2220 (match_operand:V4SF 1 "nonimmediate_operand" "")
2221 (parallel [(const_int 2) (const_int 3)]))))]
2223 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2225 (define_insn "sse_loadlps"
2226 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2228 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2230 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2231 (parallel [(const_int 2) (const_int 3)]))))]
2234 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2235 movlps\t{%2, %0|%0, %2}
2236 movlps\t{%2, %0|%0, %2}"
2237 [(set_attr "type" "sselog,ssemov,ssemov")
2238 (set_attr "mode" "V4SF,V2SF,V2SF")])
2240 (define_insn "sse_movss"
2241 [(set (match_operand:V4SF 0 "register_operand" "=x")
2243 (match_operand:V4SF 2 "register_operand" "x")
2244 (match_operand:V4SF 1 "register_operand" "0")
2247 "movss\t{%2, %0|%0, %2}"
2248 [(set_attr "type" "ssemov")
2249 (set_attr "mode" "SF")])
2251 (define_insn "*vec_dupv4sf"
2252 [(set (match_operand:V4SF 0 "register_operand" "=x")
2254 (match_operand:SF 1 "register_operand" "0")))]
2256 "shufps\t{$0, %0, %0|%0, %0, 0}"
2257 [(set_attr "type" "sselog1")
2258 (set_attr "mode" "V4SF")])
2260 ;; Although insertps takes register source, we prefer
2261 ;; unpcklps with register source since it is shorter.
2262 (define_insn "*vec_concatv2sf_sse4_1"
2263 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
2265 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
2266 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
2269 unpcklps\t{%2, %0|%0, %2}
2270 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
2271 movss\t{%1, %0|%0, %1}
2272 punpckldq\t{%2, %0|%0, %2}
2273 movd\t{%1, %0|%0, %1}"
2274 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
2275 (set_attr "prefix_extra" "*,1,*,*,*")
2276 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
2278 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2279 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2280 ;; alternatives pretty much forces the MMX alternative to be chosen.
2281 (define_insn "*vec_concatv2sf_sse"
2282 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2284 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2285 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2288 unpcklps\t{%2, %0|%0, %2}
2289 movss\t{%1, %0|%0, %1}
2290 punpckldq\t{%2, %0|%0, %2}
2291 movd\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2293 (set_attr "mode" "V4SF,SF,DI,DI")])
2295 (define_insn "*vec_concatv4sf_sse"
2296 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2298 (match_operand:V2SF 1 "register_operand" " 0,0")
2299 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2302 movlhps\t{%2, %0|%0, %2}
2303 movhps\t{%2, %0|%0, %2}"
2304 [(set_attr "type" "ssemov")
2305 (set_attr "mode" "V4SF,V2SF")])
2307 (define_expand "vec_init<mode>"
2308 [(match_operand:SSEMODE 0 "register_operand" "")
2309 (match_operand 1 "" "")]
2312 ix86_expand_vector_init (false, operands[0], operands[1]);
2316 (define_insn "vec_setv4sf_0"
2317 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2320 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2321 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2325 movss\t{%2, %0|%0, %2}
2326 movss\t{%2, %0|%0, %2}
2327 movd\t{%2, %0|%0, %2}
2329 [(set_attr "type" "ssemov")
2330 (set_attr "mode" "SF")])
2332 ;; A subset is vec_setv4sf.
2333 (define_insn "*vec_setv4sf_sse4_1"
2334 [(set (match_operand:V4SF 0 "register_operand" "=x")
2337 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2338 (match_operand:V4SF 1 "register_operand" "0")
2339 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2342 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2343 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2345 [(set_attr "type" "sselog")
2346 (set_attr "prefix_extra" "1")
2347 (set_attr "mode" "V4SF")])
2349 (define_insn "sse4_1_insertps"
2350 [(set (match_operand:V4SF 0 "register_operand" "=x")
2351 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2352 (match_operand:V4SF 1 "register_operand" "0")
2353 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2356 "insertps\t{%3, %2, %0|%0, %2, %3}";
2357 [(set_attr "type" "sselog")
2358 (set_attr "prefix_extra" "1")
2359 (set_attr "mode" "V4SF")])
2362 [(set (match_operand:V4SF 0 "memory_operand" "")
2365 (match_operand:SF 1 "nonmemory_operand" ""))
2368 "TARGET_SSE && reload_completed"
2371 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2375 (define_expand "vec_set<mode>"
2376 [(match_operand:SSEMODE 0 "register_operand" "")
2377 (match_operand:<ssescalarmode> 1 "register_operand" "")
2378 (match_operand 2 "const_int_operand" "")]
2381 ix86_expand_vector_set (false, operands[0], operands[1],
2382 INTVAL (operands[2]));
2386 (define_insn_and_split "*vec_extractv4sf_0"
2387 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2389 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2390 (parallel [(const_int 0)])))]
2391 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2393 "&& reload_completed"
2396 rtx op1 = operands[1];
2398 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2400 op1 = gen_lowpart (SFmode, op1);
2401 emit_move_insn (operands[0], op1);
2405 (define_insn "*sse4_1_extractps"
2406 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2408 (match_operand:V4SF 1 "register_operand" "x")
2409 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2411 "extractps\t{%2, %1, %0|%0, %1, %2}"
2412 [(set_attr "type" "sselog")
2413 (set_attr "prefix_extra" "1")
2414 (set_attr "mode" "V4SF")])
2416 (define_insn_and_split "*vec_extract_v4sf_mem"
2417 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2419 (match_operand:V4SF 1 "memory_operand" "o")
2420 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2426 int i = INTVAL (operands[2]);
2428 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2432 (define_expand "vec_extractv4sf"
2433 [(match_operand:SF 0 "register_operand" "")
2434 (match_operand:V4SF 1 "register_operand" "")
2435 (match_operand 2 "const_int_operand" "")]
2438 ix86_expand_vector_extract (false, operands[0], operands[1],
2439 INTVAL (operands[2]));
2443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2445 ;; Parallel double-precision floating point element swizzling
2447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2449 (define_expand "sse2_unpckhpd_exp"
2450 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2453 (match_operand:V2DF 1 "nonimmediate_operand" "")
2454 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2455 (parallel [(const_int 1)
2458 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2460 (define_insn "sse2_unpckhpd"
2461 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2464 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2465 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2466 (parallel [(const_int 1)
2468 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2470 unpckhpd\t{%2, %0|%0, %2}
2471 movlpd\t{%H1, %0|%0, %H1}
2472 movhpd\t{%1, %0|%0, %1}"
2473 [(set_attr "type" "sselog,ssemov,ssemov")
2474 (set_attr "mode" "V2DF,V1DF,V1DF")])
2476 (define_insn "*sse3_movddup"
2477 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2480 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2482 (parallel [(const_int 0)
2484 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2486 movddup\t{%1, %0|%0, %1}
2488 [(set_attr "type" "sselog1,ssemov")
2489 (set_attr "mode" "V2DF")])
2492 [(set (match_operand:V2DF 0 "memory_operand" "")
2495 (match_operand:V2DF 1 "register_operand" "")
2497 (parallel [(const_int 0)
2499 "TARGET_SSE3 && reload_completed"
2502 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2503 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2504 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2508 (define_expand "sse2_unpcklpd_exp"
2509 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2512 (match_operand:V2DF 1 "nonimmediate_operand" "")
2513 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2514 (parallel [(const_int 0)
2517 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2519 (define_insn "sse2_unpcklpd"
2520 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2523 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2524 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2525 (parallel [(const_int 0)
2527 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2529 unpcklpd\t{%2, %0|%0, %2}
2530 movhpd\t{%2, %0|%0, %2}
2531 movlpd\t{%2, %H0|%H0, %2}"
2532 [(set_attr "type" "sselog,ssemov,ssemov")
2533 (set_attr "mode" "V2DF,V1DF,V1DF")])
2535 (define_expand "sse2_shufpd"
2536 [(match_operand:V2DF 0 "register_operand" "")
2537 (match_operand:V2DF 1 "register_operand" "")
2538 (match_operand:V2DF 2 "nonimmediate_operand" "")
2539 (match_operand:SI 3 "const_int_operand" "")]
2542 int mask = INTVAL (operands[3]);
2543 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2545 GEN_INT (mask & 2 ? 3 : 2)));
2549 (define_insn "sse2_shufpd_1"
2550 [(set (match_operand:V2DF 0 "register_operand" "=x")
2553 (match_operand:V2DF 1 "register_operand" "0")
2554 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2555 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2556 (match_operand 4 "const_2_to_3_operand" "")])))]
2560 mask = INTVAL (operands[3]);
2561 mask |= (INTVAL (operands[4]) - 2) << 1;
2562 operands[3] = GEN_INT (mask);
2564 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2566 [(set_attr "type" "sselog")
2567 (set_attr "mode" "V2DF")])
2569 (define_insn "sse2_storehpd"
2570 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2572 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2573 (parallel [(const_int 1)])))]
2574 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2576 movhpd\t{%1, %0|%0, %1}
2579 [(set_attr "type" "ssemov,sselog1,ssemov")
2580 (set_attr "mode" "V1DF,V2DF,DF")])
2583 [(set (match_operand:DF 0 "register_operand" "")
2585 (match_operand:V2DF 1 "memory_operand" "")
2586 (parallel [(const_int 1)])))]
2587 "TARGET_SSE2 && reload_completed"
2588 [(set (match_dup 0) (match_dup 1))]
2590 operands[1] = adjust_address (operands[1], DFmode, 8);
2593 (define_insn "sse2_storelpd"
2594 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2596 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2597 (parallel [(const_int 0)])))]
2598 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2600 movlpd\t{%1, %0|%0, %1}
2603 [(set_attr "type" "ssemov")
2604 (set_attr "mode" "V1DF,DF,DF")])
2607 [(set (match_operand:DF 0 "register_operand" "")
2609 (match_operand:V2DF 1 "nonimmediate_operand" "")
2610 (parallel [(const_int 0)])))]
2611 "TARGET_SSE2 && reload_completed"
2614 rtx op1 = operands[1];
2616 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2618 op1 = gen_lowpart (DFmode, op1);
2619 emit_move_insn (operands[0], op1);
2623 (define_expand "sse2_loadhpd_exp"
2624 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2627 (match_operand:V2DF 1 "nonimmediate_operand" "")
2628 (parallel [(const_int 0)]))
2629 (match_operand:DF 2 "nonimmediate_operand" "")))]
2631 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2633 (define_insn "sse2_loadhpd"
2634 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2637 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2638 (parallel [(const_int 0)]))
2639 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2640 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2642 movhpd\t{%2, %0|%0, %2}
2643 unpcklpd\t{%2, %0|%0, %2}
2644 shufpd\t{$1, %1, %0|%0, %1, 1}
2646 [(set_attr "type" "ssemov,sselog,sselog,other")
2647 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2650 [(set (match_operand:V2DF 0 "memory_operand" "")
2652 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2653 (match_operand:DF 1 "register_operand" "")))]
2654 "TARGET_SSE2 && reload_completed"
2655 [(set (match_dup 0) (match_dup 1))]
2657 operands[0] = adjust_address (operands[0], DFmode, 8);
2660 (define_expand "sse2_loadlpd_exp"
2661 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2663 (match_operand:DF 2 "nonimmediate_operand" "")
2665 (match_operand:V2DF 1 "nonimmediate_operand" "")
2666 (parallel [(const_int 1)]))))]
2668 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2670 (define_insn "sse2_loadlpd"
2671 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2673 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2675 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2676 (parallel [(const_int 1)]))))]
2677 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2679 movsd\t{%2, %0|%0, %2}
2680 movlpd\t{%2, %0|%0, %2}
2681 movsd\t{%2, %0|%0, %2}
2682 shufpd\t{$2, %2, %0|%0, %2, 2}
2683 movhpd\t{%H1, %0|%0, %H1}
2685 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2686 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2689 [(set (match_operand:V2DF 0 "memory_operand" "")
2691 (match_operand:DF 1 "register_operand" "")
2692 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2693 "TARGET_SSE2 && reload_completed"
2694 [(set (match_dup 0) (match_dup 1))]
2696 operands[0] = adjust_address (operands[0], DFmode, 8);
2699 ;; Not sure these two are ever used, but it doesn't hurt to have
2701 (define_insn "*vec_extractv2df_1_sse"
2702 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2704 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2705 (parallel [(const_int 1)])))]
2706 "!TARGET_SSE2 && TARGET_SSE
2707 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2709 movhps\t{%1, %0|%0, %1}
2710 movhlps\t{%1, %0|%0, %1}
2711 movlps\t{%H1, %0|%0, %H1}"
2712 [(set_attr "type" "ssemov")
2713 (set_attr "mode" "V2SF,V4SF,V2SF")])
2715 (define_insn "*vec_extractv2df_0_sse"
2716 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2718 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2719 (parallel [(const_int 0)])))]
2720 "!TARGET_SSE2 && TARGET_SSE
2721 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2723 movlps\t{%1, %0|%0, %1}
2724 movaps\t{%1, %0|%0, %1}
2725 movlps\t{%1, %0|%0, %1}"
2726 [(set_attr "type" "ssemov")
2727 (set_attr "mode" "V2SF,V4SF,V2SF")])
2729 (define_insn "sse2_movsd"
2730 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2732 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2733 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2737 movsd\t{%2, %0|%0, %2}
2738 movlpd\t{%2, %0|%0, %2}
2739 movlpd\t{%2, %0|%0, %2}
2740 shufpd\t{$2, %2, %0|%0, %2, 2}
2741 movhps\t{%H1, %0|%0, %H1}
2742 movhps\t{%1, %H0|%H0, %1}"
2743 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2744 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2746 (define_insn "*vec_dupv2df_sse3"
2747 [(set (match_operand:V2DF 0 "register_operand" "=x")
2749 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2751 "movddup\t{%1, %0|%0, %1}"
2752 [(set_attr "type" "sselog1")
2753 (set_attr "mode" "DF")])
2755 (define_insn "vec_dupv2df"
2756 [(set (match_operand:V2DF 0 "register_operand" "=x")
2758 (match_operand:DF 1 "register_operand" "0")))]
2761 [(set_attr "type" "sselog1")
2762 (set_attr "mode" "V2DF")])
2764 (define_insn "*vec_concatv2df_sse3"
2765 [(set (match_operand:V2DF 0 "register_operand" "=x")
2767 (match_operand:DF 1 "nonimmediate_operand" "xm")
2770 "movddup\t{%1, %0|%0, %1}"
2771 [(set_attr "type" "sselog1")
2772 (set_attr "mode" "DF")])
2774 (define_insn "*vec_concatv2df"
2775 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2777 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2778 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2781 unpcklpd\t{%2, %0|%0, %2}
2782 movhpd\t{%2, %0|%0, %2}
2783 movsd\t{%1, %0|%0, %1}
2784 movlhps\t{%2, %0|%0, %2}
2785 movhps\t{%2, %0|%0, %2}"
2786 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2787 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2789 (define_expand "vec_extractv2df"
2790 [(match_operand:DF 0 "register_operand" "")
2791 (match_operand:V2DF 1 "register_operand" "")
2792 (match_operand 2 "const_int_operand" "")]
2795 ix86_expand_vector_extract (false, operands[0], operands[1],
2796 INTVAL (operands[2]));
2800 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2802 ;; Parallel integral arithmetic
2804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2806 (define_expand "neg<mode>2"
2807 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2810 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2812 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2814 (define_expand "<plusminus_insn><mode>3"
2815 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2817 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2818 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2820 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2822 (define_insn "*<plusminus_insn><mode>3"
2823 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2825 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2826 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2827 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2828 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2829 [(set_attr "type" "sseiadd")
2830 (set_attr "prefix_data16" "1")
2831 (set_attr "mode" "TI")])
2833 (define_expand "sse2_<plusminus_insn><mode>3"
2834 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2835 (sat_plusminus:SSEMODE12
2836 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
2837 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
2839 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2841 (define_insn "*sse2_<plusminus_insn><mode>3"
2842 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2843 (sat_plusminus:SSEMODE12
2844 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2845 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2846 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2847 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2848 [(set_attr "type" "sseiadd")
2849 (set_attr "prefix_data16" "1")
2850 (set_attr "mode" "TI")])
2852 (define_insn_and_split "mulv16qi3"
2853 [(set (match_operand:V16QI 0 "register_operand" "")
2854 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2855 (match_operand:V16QI 2 "register_operand" "")))]
2857 && !(reload_completed || reload_in_progress)"
2862 rtx t[12], op0, op[3];
2867 /* On SSE5, we can take advantage of the pperm instruction to pack and
2868 unpack the bytes. Unpack data such that we've got a source byte in
2869 each low byte of each word. We don't care what goes into the high
2870 byte, so put 0 there. */
2871 for (i = 0; i < 6; ++i)
2872 t[i] = gen_reg_rtx (V8HImode);
2874 for (i = 0; i < 2; i++)
2877 op[1] = operands[i+1];
2878 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2881 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2884 /* Multiply words. */
2885 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2886 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2888 /* Pack the low byte of each word back into a single xmm */
2889 op[0] = operands[0];
2892 ix86_expand_sse5_pack (op);
2896 for (i = 0; i < 12; ++i)
2897 t[i] = gen_reg_rtx (V16QImode);
2899 /* Unpack data such that we've got a source byte in each low byte of
2900 each word. We don't care what goes into the high byte of each word.
2901 Rather than trying to get zero in there, most convenient is to let
2902 it be a copy of the low byte. */
2903 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2904 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2905 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2906 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2908 /* Multiply words. The end-of-line annotations here give a picture of what
2909 the output of that instruction looks like. Dot means don't care; the
2910 letters are the bytes of the result with A being the most significant. */
2911 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2912 gen_lowpart (V8HImode, t[0]),
2913 gen_lowpart (V8HImode, t[1])));
2914 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2915 gen_lowpart (V8HImode, t[2]),
2916 gen_lowpart (V8HImode, t[3])));
2918 /* Extract the relevant bytes and merge them back together. */
2919 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2920 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2921 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2922 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2923 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2924 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2927 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2931 (define_expand "mulv8hi3"
2932 [(set (match_operand:V8HI 0 "register_operand" "")
2933 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2934 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2936 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2938 (define_insn "*mulv8hi3"
2939 [(set (match_operand:V8HI 0 "register_operand" "=x")
2940 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2941 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2942 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2943 "pmullw\t{%2, %0|%0, %2}"
2944 [(set_attr "type" "sseimul")
2945 (set_attr "prefix_data16" "1")
2946 (set_attr "mode" "TI")])
2948 (define_expand "smulv8hi3_highpart"
2949 [(set (match_operand:V8HI 0 "register_operand" "")
2954 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2956 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2959 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2961 (define_insn "*smulv8hi3_highpart"
2962 [(set (match_operand:V8HI 0 "register_operand" "=x")
2967 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2969 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2971 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2972 "pmulhw\t{%2, %0|%0, %2}"
2973 [(set_attr "type" "sseimul")
2974 (set_attr "prefix_data16" "1")
2975 (set_attr "mode" "TI")])
2977 (define_expand "umulv8hi3_highpart"
2978 [(set (match_operand:V8HI 0 "register_operand" "")
2983 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2985 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2988 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2990 (define_insn "*umulv8hi3_highpart"
2991 [(set (match_operand:V8HI 0 "register_operand" "=x")
2996 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2998 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3000 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3001 "pmulhuw\t{%2, %0|%0, %2}"
3002 [(set_attr "type" "sseimul")
3003 (set_attr "prefix_data16" "1")
3004 (set_attr "mode" "TI")])
3006 (define_expand "sse2_umulv2siv2di3"
3007 [(set (match_operand:V2DI 0 "register_operand" "")
3011 (match_operand:V4SI 1 "nonimmediate_operand" "")
3012 (parallel [(const_int 0) (const_int 2)])))
3015 (match_operand:V4SI 2 "nonimmediate_operand" "")
3016 (parallel [(const_int 0) (const_int 2)])))))]
3018 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3020 (define_insn "*sse2_umulv2siv2di3"
3021 [(set (match_operand:V2DI 0 "register_operand" "=x")
3025 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3026 (parallel [(const_int 0) (const_int 2)])))
3029 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3030 (parallel [(const_int 0) (const_int 2)])))))]
3031 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3032 "pmuludq\t{%2, %0|%0, %2}"
3033 [(set_attr "type" "sseimul")
3034 (set_attr "prefix_data16" "1")
3035 (set_attr "mode" "TI")])
3037 (define_expand "sse4_1_mulv2siv2di3"
3038 [(set (match_operand:V2DI 0 "register_operand" "")
3042 (match_operand:V4SI 1 "nonimmediate_operand" "")
3043 (parallel [(const_int 0) (const_int 2)])))
3046 (match_operand:V4SI 2 "nonimmediate_operand" "")
3047 (parallel [(const_int 0) (const_int 2)])))))]
3049 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3051 (define_insn "*sse4_1_mulv2siv2di3"
3052 [(set (match_operand:V2DI 0 "register_operand" "=x")
3056 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3057 (parallel [(const_int 0) (const_int 2)])))
3060 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3061 (parallel [(const_int 0) (const_int 2)])))))]
3062 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3063 "pmuldq\t{%2, %0|%0, %2}"
3064 [(set_attr "type" "sseimul")
3065 (set_attr "prefix_extra" "1")
3066 (set_attr "mode" "TI")])
3068 (define_expand "sse2_pmaddwd"
3069 [(set (match_operand:V4SI 0 "register_operand" "")
3074 (match_operand:V8HI 1 "nonimmediate_operand" "")
3075 (parallel [(const_int 0)
3081 (match_operand:V8HI 2 "nonimmediate_operand" "")
3082 (parallel [(const_int 0)
3088 (vec_select:V4HI (match_dup 1)
3089 (parallel [(const_int 1)
3094 (vec_select:V4HI (match_dup 2)
3095 (parallel [(const_int 1)
3098 (const_int 7)]))))))]
3100 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3102 (define_insn "*sse2_pmaddwd"
3103 [(set (match_operand:V4SI 0 "register_operand" "=x")
3108 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3109 (parallel [(const_int 0)
3115 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3116 (parallel [(const_int 0)
3122 (vec_select:V4HI (match_dup 1)
3123 (parallel [(const_int 1)
3128 (vec_select:V4HI (match_dup 2)
3129 (parallel [(const_int 1)
3132 (const_int 7)]))))))]
3133 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3134 "pmaddwd\t{%2, %0|%0, %2}"
3135 [(set_attr "type" "sseiadd")
3136 (set_attr "prefix_data16" "1")
3137 (set_attr "mode" "TI")])
3139 (define_expand "mulv4si3"
3140 [(set (match_operand:V4SI 0 "register_operand" "")
3141 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3142 (match_operand:V4SI 2 "register_operand" "")))]
3145 if (TARGET_SSE4_1 || TARGET_SSE5)
3146 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3149 (define_insn "*sse4_1_mulv4si3"
3150 [(set (match_operand:V4SI 0 "register_operand" "=x")
3151 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3152 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3153 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3154 "pmulld\t{%2, %0|%0, %2}"
3155 [(set_attr "type" "sseimul")
3156 (set_attr "prefix_extra" "1")
3157 (set_attr "mode" "TI")])
3159 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3160 ;; multiply/add. In general, we expect the define_split to occur before
3161 ;; register allocation, so we have to handle the corner case where the target
3162 ;; is the same as one of the inputs.
3163 (define_insn_and_split "*sse5_mulv4si3"
3164 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3165 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3166 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3169 "&& (reload_completed
3170 || (!reg_mentioned_p (operands[0], operands[1])
3171 && !reg_mentioned_p (operands[0], operands[2])))"
3175 (plus:V4SI (mult:V4SI (match_dup 1)
3179 operands[3] = CONST0_RTX (V4SImode);
3181 [(set_attr "type" "ssemuladd")
3182 (set_attr "mode" "TI")])
3184 (define_insn_and_split "*sse2_mulv4si3"
3185 [(set (match_operand:V4SI 0 "register_operand" "")
3186 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3187 (match_operand:V4SI 2 "register_operand" "")))]
3188 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3189 && !(reload_completed || reload_in_progress)"
3194 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3200 t1 = gen_reg_rtx (V4SImode);
3201 t2 = gen_reg_rtx (V4SImode);
3202 t3 = gen_reg_rtx (V4SImode);
3203 t4 = gen_reg_rtx (V4SImode);
3204 t5 = gen_reg_rtx (V4SImode);
3205 t6 = gen_reg_rtx (V4SImode);
3206 thirtytwo = GEN_INT (32);
3208 /* Multiply elements 2 and 0. */
3209 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3212 /* Shift both input vectors down one element, so that elements 3
3213 and 1 are now in the slots for elements 2 and 0. For K8, at
3214 least, this is faster than using a shuffle. */
3215 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3216 gen_lowpart (TImode, op1),
3218 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3219 gen_lowpart (TImode, op2),
3221 /* Multiply elements 3 and 1. */
3222 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3225 /* Move the results in element 2 down to element 1; we don't care
3226 what goes in elements 2 and 3. */
3227 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3228 const0_rtx, const0_rtx));
3229 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3230 const0_rtx, const0_rtx));
3232 /* Merge the parts back together. */
3233 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3237 (define_insn_and_split "mulv2di3"
3238 [(set (match_operand:V2DI 0 "register_operand" "")
3239 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3240 (match_operand:V2DI 2 "register_operand" "")))]
3242 && !(reload_completed || reload_in_progress)"
3247 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3252 /* op1: A,B,C,D, op2: E,F,G,H */
3254 op1 = gen_lowpart (V4SImode, operands[1]);
3255 op2 = gen_lowpart (V4SImode, operands[2]);
3256 t1 = gen_reg_rtx (V4SImode);
3257 t2 = gen_reg_rtx (V4SImode);
3258 t3 = gen_reg_rtx (V4SImode);
3259 t4 = gen_reg_rtx (V2DImode);
3260 t5 = gen_reg_rtx (V2DImode);
3263 emit_insn (gen_sse2_pshufd_1 (t1, op1,
3270 emit_move_insn (t2, CONST0_RTX (V4SImode));
3272 /* t3: (B*E),(A*F),(D*G),(C*H) */
3273 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
3275 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
3276 emit_insn (gen_sse5_phadddq (t4, t3));
3278 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
3279 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
3281 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
3282 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
3289 t1 = gen_reg_rtx (V2DImode);
3290 t2 = gen_reg_rtx (V2DImode);
3291 t3 = gen_reg_rtx (V2DImode);
3292 t4 = gen_reg_rtx (V2DImode);
3293 t5 = gen_reg_rtx (V2DImode);
3294 t6 = gen_reg_rtx (V2DImode);
3295 thirtytwo = GEN_INT (32);
3297 /* Multiply low parts. */
3298 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3299 gen_lowpart (V4SImode, op2)));
3301 /* Shift input vectors left 32 bits so we can multiply high parts. */
3302 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3303 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3305 /* Multiply high parts by low parts. */
3306 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3307 gen_lowpart (V4SImode, t3)));
3308 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3309 gen_lowpart (V4SImode, t2)));
3311 /* Shift them back. */
3312 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3313 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3315 /* Add the three parts together. */
3316 emit_insn (gen_addv2di3 (t6, t1, t4));
3317 emit_insn (gen_addv2di3 (op0, t6, t5));
3321 (define_expand "vec_widen_smult_hi_v8hi"
3322 [(match_operand:V4SI 0 "register_operand" "")
3323 (match_operand:V8HI 1 "register_operand" "")
3324 (match_operand:V8HI 2 "register_operand" "")]
3327 rtx op1, op2, t1, t2, dest;
3331 t1 = gen_reg_rtx (V8HImode);
3332 t2 = gen_reg_rtx (V8HImode);
3333 dest = gen_lowpart (V8HImode, operands[0]);
3335 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3336 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3337 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3341 (define_expand "vec_widen_smult_lo_v8hi"
3342 [(match_operand:V4SI 0 "register_operand" "")
3343 (match_operand:V8HI 1 "register_operand" "")
3344 (match_operand:V8HI 2 "register_operand" "")]
3347 rtx op1, op2, t1, t2, dest;
3351 t1 = gen_reg_rtx (V8HImode);
3352 t2 = gen_reg_rtx (V8HImode);
3353 dest = gen_lowpart (V8HImode, operands[0]);
3355 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3356 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3357 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3361 (define_expand "vec_widen_umult_hi_v8hi"
3362 [(match_operand:V4SI 0 "register_operand" "")
3363 (match_operand:V8HI 1 "register_operand" "")
3364 (match_operand:V8HI 2 "register_operand" "")]
3367 rtx op1, op2, t1, t2, dest;
3371 t1 = gen_reg_rtx (V8HImode);
3372 t2 = gen_reg_rtx (V8HImode);
3373 dest = gen_lowpart (V8HImode, operands[0]);
3375 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3376 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3377 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3381 (define_expand "vec_widen_umult_lo_v8hi"
3382 [(match_operand:V4SI 0 "register_operand" "")
3383 (match_operand:V8HI 1 "register_operand" "")
3384 (match_operand:V8HI 2 "register_operand" "")]
3387 rtx op1, op2, t1, t2, dest;
3391 t1 = gen_reg_rtx (V8HImode);
3392 t2 = gen_reg_rtx (V8HImode);
3393 dest = gen_lowpart (V8HImode, operands[0]);
3395 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3396 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3397 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3401 (define_expand "vec_widen_smult_hi_v4si"
3402 [(match_operand:V2DI 0 "register_operand" "")
3403 (match_operand:V4SI 1 "register_operand" "")
3404 (match_operand:V4SI 2 "register_operand" "")]
3409 t1 = gen_reg_rtx (V4SImode);
3410 t2 = gen_reg_rtx (V4SImode);
3412 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3417 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3422 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
3426 (define_expand "vec_widen_smult_lo_v4si"
3427 [(match_operand:V2DI 0 "register_operand" "")
3428 (match_operand:V4SI 1 "register_operand" "")
3429 (match_operand:V4SI 2 "register_operand" "")]
3434 t1 = gen_reg_rtx (V4SImode);
3435 t2 = gen_reg_rtx (V4SImode);
3437 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3442 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3447 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
3452 (define_expand "vec_widen_umult_hi_v4si"
3453 [(match_operand:V2DI 0 "register_operand" "")
3454 (match_operand:V4SI 1 "register_operand" "")
3455 (match_operand:V4SI 2 "register_operand" "")]
3458 rtx op1, op2, t1, t2;
3462 t1 = gen_reg_rtx (V4SImode);
3463 t2 = gen_reg_rtx (V4SImode);
3465 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3466 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3467 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3471 (define_expand "vec_widen_umult_lo_v4si"
3472 [(match_operand:V2DI 0 "register_operand" "")
3473 (match_operand:V4SI 1 "register_operand" "")
3474 (match_operand:V4SI 2 "register_operand" "")]
3477 rtx op1, op2, t1, t2;
3481 t1 = gen_reg_rtx (V4SImode);
3482 t2 = gen_reg_rtx (V4SImode);
3484 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3485 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3486 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3490 (define_expand "sdot_prodv8hi"
3491 [(match_operand:V4SI 0 "register_operand" "")
3492 (match_operand:V8HI 1 "register_operand" "")
3493 (match_operand:V8HI 2 "register_operand" "")
3494 (match_operand:V4SI 3 "register_operand" "")]
3497 rtx t = gen_reg_rtx (V4SImode);
3498 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3499 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3503 (define_expand "udot_prodv4si"
3504 [(match_operand:V2DI 0 "register_operand" "")
3505 (match_operand:V4SI 1 "register_operand" "")
3506 (match_operand:V4SI 2 "register_operand" "")
3507 (match_operand:V2DI 3 "register_operand" "")]
3512 t1 = gen_reg_rtx (V2DImode);
3513 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3514 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3516 t2 = gen_reg_rtx (V4SImode);
3517 t3 = gen_reg_rtx (V4SImode);
3518 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3519 gen_lowpart (TImode, operands[1]),
3521 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3522 gen_lowpart (TImode, operands[2]),
3525 t4 = gen_reg_rtx (V2DImode);
3526 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3528 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3532 (define_insn "ashr<mode>3"
3533 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3535 (match_operand:SSEMODE24 1 "register_operand" "0")
3536 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3538 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3539 [(set_attr "type" "sseishft")
3540 (set_attr "prefix_data16" "1")
3541 (set_attr "mode" "TI")])
3543 (define_insn "lshr<mode>3"
3544 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3545 (lshiftrt:SSEMODE248
3546 (match_operand:SSEMODE248 1 "register_operand" "0")
3547 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3549 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3550 [(set_attr "type" "sseishft")
3551 (set_attr "prefix_data16" "1")
3552 (set_attr "mode" "TI")])
3554 (define_insn "ashl<mode>3"
3555 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3557 (match_operand:SSEMODE248 1 "register_operand" "0")
3558 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3560 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3561 [(set_attr "type" "sseishft")
3562 (set_attr "prefix_data16" "1")
3563 (set_attr "mode" "TI")])
3565 (define_expand "vec_shl_<mode>"
3566 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3567 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3568 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3571 operands[0] = gen_lowpart (TImode, operands[0]);
3572 operands[1] = gen_lowpart (TImode, operands[1]);
3575 (define_expand "vec_shr_<mode>"
3576 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3577 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3578 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3581 operands[0] = gen_lowpart (TImode, operands[0]);
3582 operands[1] = gen_lowpart (TImode, operands[1]);
3585 (define_expand "<code>v16qi3"
3586 [(set (match_operand:V16QI 0 "register_operand" "")
3588 (match_operand:V16QI 1 "nonimmediate_operand" "")
3589 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3591 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3593 (define_insn "*<code>v16qi3"
3594 [(set (match_operand:V16QI 0 "register_operand" "=x")
3596 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3597 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3598 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3599 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3600 [(set_attr "type" "sseiadd")
3601 (set_attr "prefix_data16" "1")
3602 (set_attr "mode" "TI")])
3604 (define_expand "<code>v8hi3"
3605 [(set (match_operand:V8HI 0 "register_operand" "")
3607 (match_operand:V8HI 1 "nonimmediate_operand" "")
3608 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3610 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3612 (define_insn "*<code>v8hi3"
3613 [(set (match_operand:V8HI 0 "register_operand" "=x")
3615 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3616 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3617 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3618 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3619 [(set_attr "type" "sseiadd")
3620 (set_attr "prefix_data16" "1")
3621 (set_attr "mode" "TI")])
3623 (define_expand "umaxv8hi3"
3624 [(set (match_operand:V8HI 0 "register_operand" "")
3625 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3626 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3630 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3633 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3634 if (rtx_equal_p (op3, op2))
3635 op3 = gen_reg_rtx (V8HImode);
3636 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3637 emit_insn (gen_addv8hi3 (op0, op3, op2));
3642 (define_expand "smax<mode>3"
3643 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3644 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3645 (match_operand:SSEMODE14 2 "register_operand" "")))]
3649 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3655 xops[0] = operands[0];
3656 xops[1] = operands[1];
3657 xops[2] = operands[2];
3658 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3659 xops[4] = operands[1];
3660 xops[5] = operands[2];
3661 ok = ix86_expand_int_vcond (xops);
3667 (define_insn "*sse4_1_<code><mode>3"
3668 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3670 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3671 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3672 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3673 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3674 [(set_attr "type" "sseiadd")
3675 (set_attr "prefix_extra" "1")
3676 (set_attr "mode" "TI")])
3678 (define_expand "umaxv4si3"
3679 [(set (match_operand:V4SI 0 "register_operand" "")
3680 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3681 (match_operand:V4SI 2 "register_operand" "")))]
3685 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3691 xops[0] = operands[0];
3692 xops[1] = operands[1];
3693 xops[2] = operands[2];
3694 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3695 xops[4] = operands[1];
3696 xops[5] = operands[2];
3697 ok = ix86_expand_int_vcond (xops);
3703 (define_insn "*sse4_1_<code><mode>3"
3704 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3706 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3707 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3708 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3709 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3710 [(set_attr "type" "sseiadd")
3711 (set_attr "prefix_extra" "1")
3712 (set_attr "mode" "TI")])
3714 (define_expand "smin<mode>3"
3715 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3716 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3717 (match_operand:SSEMODE14 2 "register_operand" "")))]
3721 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3727 xops[0] = operands[0];
3728 xops[1] = operands[2];
3729 xops[2] = operands[1];
3730 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3731 xops[4] = operands[1];
3732 xops[5] = operands[2];
3733 ok = ix86_expand_int_vcond (xops);
3739 (define_expand "umin<mode>3"
3740 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3741 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3742 (match_operand:SSEMODE24 2 "register_operand" "")))]
3746 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3752 xops[0] = operands[0];
3753 xops[1] = operands[2];
3754 xops[2] = operands[1];
3755 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3756 xops[4] = operands[1];
3757 xops[5] = operands[2];
3758 ok = ix86_expand_int_vcond (xops);
3764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3766 ;; Parallel integral comparisons
3768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3770 (define_expand "sse2_eq<mode>3"
3771 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3773 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
3774 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
3775 "TARGET_SSE2 && !TARGET_SSE5"
3776 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
3778 (define_insn "*sse2_eq<mode>3"
3779 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3781 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3782 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3783 "TARGET_SSE2 && !TARGET_SSE5
3784 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3785 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3786 [(set_attr "type" "ssecmp")
3787 (set_attr "prefix_data16" "1")
3788 (set_attr "mode" "TI")])
3790 (define_expand "sse4_1_eqv2di3"
3791 [(set (match_operand:V2DI 0 "register_operand" "")
3793 (match_operand:V2DI 1 "nonimmediate_operand" "")
3794 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
3796 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
3798 (define_insn "*sse4_1_eqv2di3"
3799 [(set (match_operand:V2DI 0 "register_operand" "=x")
3801 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3802 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3803 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3804 "pcmpeqq\t{%2, %0|%0, %2}"
3805 [(set_attr "type" "ssecmp")
3806 (set_attr "prefix_extra" "1")
3807 (set_attr "mode" "TI")])
3809 (define_insn "sse2_gt<mode>3"
3810 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3812 (match_operand:SSEMODE124 1 "register_operand" "0")
3813 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3814 "TARGET_SSE2 && !TARGET_SSE5"
3815 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3816 [(set_attr "type" "ssecmp")
3817 (set_attr "prefix_data16" "1")
3818 (set_attr "mode" "TI")])
3820 (define_insn "sse4_2_gtv2di3"
3821 [(set (match_operand:V2DI 0 "register_operand" "=x")
3823 (match_operand:V2DI 1 "register_operand" "0")
3824 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3826 "pcmpgtq\t{%2, %0|%0, %2}"
3827 [(set_attr "type" "ssecmp")
3828 (set_attr "mode" "TI")])
3830 (define_expand "vcond<mode>"
3831 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3832 (if_then_else:SSEMODEI
3833 (match_operator 3 ""
3834 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3835 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3836 (match_operand:SSEMODEI 1 "general_operand" "")
3837 (match_operand:SSEMODEI 2 "general_operand" "")))]
3840 if (ix86_expand_int_vcond (operands))
3846 (define_expand "vcondu<mode>"
3847 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3848 (if_then_else:SSEMODEI
3849 (match_operator 3 ""
3850 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3851 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3852 (match_operand:SSEMODEI 1 "general_operand" "")
3853 (match_operand:SSEMODEI 2 "general_operand" "")))]
3856 if (ix86_expand_int_vcond (operands))
3862 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3864 ;; Parallel bitwise logical operations
3866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3868 (define_expand "one_cmpl<mode>2"
3869 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3870 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3874 int i, n = GET_MODE_NUNITS (<MODE>mode);
3875 rtvec v = rtvec_alloc (n);
3877 for (i = 0; i < n; ++i)
3878 RTVEC_ELT (v, i) = constm1_rtx;
3880 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3883 (define_insn "*sse_nand<mode>3"
3884 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3886 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3887 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3888 "(TARGET_SSE && !TARGET_SSE2)"
3889 "andnps\t{%2, %0|%0, %2}"
3890 [(set_attr "type" "sselog")
3891 (set_attr "mode" "V4SF")])
3893 (define_insn "sse2_nand<mode>3"
3894 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3896 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3897 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3899 "pandn\t{%2, %0|%0, %2}"
3900 [(set_attr "type" "sselog")
3901 (set_attr "prefix_data16" "1")
3902 (set_attr "mode" "TI")])
3904 (define_insn "*nandtf3"
3905 [(set (match_operand:TF 0 "register_operand" "=x")
3907 (not:TF (match_operand:TF 1 "register_operand" "0"))
3908 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3910 "pandn\t{%2, %0|%0, %2}"
3911 [(set_attr "type" "sselog")
3912 (set_attr "prefix_data16" "1")
3913 (set_attr "mode" "TI")])
3915 (define_expand "<code><mode>3"
3916 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3918 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3919 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3921 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3923 (define_insn "*sse_<code><mode>3"
3924 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3926 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3927 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3928 "(TARGET_SSE && !TARGET_SSE2)
3929 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3930 "<plogicprefix>ps\t{%2, %0|%0, %2}"
3931 [(set_attr "type" "sselog")
3932 (set_attr "mode" "V4SF")])
3934 (define_insn "*sse2_<code><mode>3"
3935 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3937 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3938 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3939 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3940 "p<plogicprefix>\t{%2, %0|%0, %2}"
3941 [(set_attr "type" "sselog")
3942 (set_attr "prefix_data16" "1")
3943 (set_attr "mode" "TI")])
3945 (define_expand "<code>tf3"
3946 [(set (match_operand:TF 0 "register_operand" "")
3948 (match_operand:TF 1 "nonimmediate_operand" "")
3949 (match_operand:TF 2 "nonimmediate_operand" "")))]
3951 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3953 (define_insn "*<code>tf3"
3954 [(set (match_operand:TF 0 "register_operand" "=x")
3956 (match_operand:TF 1 "nonimmediate_operand" "%0")
3957 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3958 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3959 "p<plogicprefix>\t{%2, %0|%0, %2}"
3960 [(set_attr "type" "sselog")
3961 (set_attr "prefix_data16" "1")
3962 (set_attr "mode" "TI")])
3964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3966 ;; Parallel integral element swizzling
3968 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3971 ;; op1 = abcdefghijklmnop
3972 ;; op2 = qrstuvwxyz012345
3973 ;; h1 = aqbrcsdteufvgwhx
3974 ;; l1 = iyjzk0l1m2n3o4p5
3975 ;; h2 = aiqybjrzcks0dlt1
3976 ;; l2 = emu2fnv3gow4hpx5
3977 ;; h3 = aeimquy2bfjnrvz3
3978 ;; l3 = cgkosw04dhlptx15
3979 ;; result = bdfhjlnprtvxz135
3980 (define_expand "vec_pack_trunc_v8hi"
3981 [(match_operand:V16QI 0 "register_operand" "")
3982 (match_operand:V8HI 1 "register_operand" "")
3983 (match_operand:V8HI 2 "register_operand" "")]
3986 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3990 ix86_expand_sse5_pack (operands);
3994 op1 = gen_lowpart (V16QImode, operands[1]);
3995 op2 = gen_lowpart (V16QImode, operands[2]);
3996 h1 = gen_reg_rtx (V16QImode);
3997 l1 = gen_reg_rtx (V16QImode);
3998 h2 = gen_reg_rtx (V16QImode);
3999 l2 = gen_reg_rtx (V16QImode);
4000 h3 = gen_reg_rtx (V16QImode);
4001 l3 = gen_reg_rtx (V16QImode);
4003 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4004 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4005 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4006 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4007 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4008 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4009 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4020 ;; result = bdfhjlnp
4021 (define_expand "vec_pack_trunc_v4si"
4022 [(match_operand:V8HI 0 "register_operand" "")
4023 (match_operand:V4SI 1 "register_operand" "")
4024 (match_operand:V4SI 2 "register_operand" "")]
4027 rtx op1, op2, h1, l1, h2, l2;
4031 ix86_expand_sse5_pack (operands);
4035 op1 = gen_lowpart (V8HImode, operands[1]);
4036 op2 = gen_lowpart (V8HImode, operands[2]);
4037 h1 = gen_reg_rtx (V8HImode);
4038 l1 = gen_reg_rtx (V8HImode);
4039 h2 = gen_reg_rtx (V8HImode);
4040 l2 = gen_reg_rtx (V8HImode);
4042 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4043 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4044 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4045 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4046 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4056 (define_expand "vec_pack_trunc_v2di"
4057 [(match_operand:V4SI 0 "register_operand" "")
4058 (match_operand:V2DI 1 "register_operand" "")
4059 (match_operand:V2DI 2 "register_operand" "")]
4062 rtx op1, op2, h1, l1;
4066 ix86_expand_sse5_pack (operands);
4070 op1 = gen_lowpart (V4SImode, operands[1]);
4071 op2 = gen_lowpart (V4SImode, operands[2]);
4072 h1 = gen_reg_rtx (V4SImode);
4073 l1 = gen_reg_rtx (V4SImode);
4075 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4076 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4077 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4081 (define_expand "vec_interleave_highv16qi"
4082 [(set (match_operand:V16QI 0 "register_operand" "")
4085 (match_operand:V16QI 1 "register_operand" "")
4086 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4087 (parallel [(const_int 8) (const_int 24)
4088 (const_int 9) (const_int 25)
4089 (const_int 10) (const_int 26)
4090 (const_int 11) (const_int 27)
4091 (const_int 12) (const_int 28)
4092 (const_int 13) (const_int 29)
4093 (const_int 14) (const_int 30)
4094 (const_int 15) (const_int 31)])))]
4097 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4101 (define_expand "vec_interleave_lowv16qi"
4102 [(set (match_operand:V16QI 0 "register_operand" "")
4105 (match_operand:V16QI 1 "register_operand" "")
4106 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4107 (parallel [(const_int 0) (const_int 16)
4108 (const_int 1) (const_int 17)
4109 (const_int 2) (const_int 18)
4110 (const_int 3) (const_int 19)
4111 (const_int 4) (const_int 20)
4112 (const_int 5) (const_int 21)
4113 (const_int 6) (const_int 22)
4114 (const_int 7) (const_int 23)])))]
4117 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4121 (define_expand "vec_interleave_highv8hi"
4122 [(set (match_operand:V8HI 0 "register_operand" "=")
4125 (match_operand:V8HI 1 "register_operand" "")
4126 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4127 (parallel [(const_int 4) (const_int 12)
4128 (const_int 5) (const_int 13)
4129 (const_int 6) (const_int 14)
4130 (const_int 7) (const_int 15)])))]
4133 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4137 (define_expand "vec_interleave_lowv8hi"
4138 [(set (match_operand:V8HI 0 "register_operand" "")
4141 (match_operand:V8HI 1 "register_operand" "")
4142 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4143 (parallel [(const_int 0) (const_int 8)
4144 (const_int 1) (const_int 9)
4145 (const_int 2) (const_int 10)
4146 (const_int 3) (const_int 11)])))]
4149 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4153 (define_expand "vec_interleave_highv4si"
4154 [(set (match_operand:V4SI 0 "register_operand" "")
4157 (match_operand:V4SI 1 "register_operand" "")
4158 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4159 (parallel [(const_int 2) (const_int 6)
4160 (const_int 3) (const_int 7)])))]
4163 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4167 (define_expand "vec_interleave_lowv4si"
4168 [(set (match_operand:V4SI 0 "register_operand" "")
4171 (match_operand:V4SI 1 "register_operand" "")
4172 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4173 (parallel [(const_int 0) (const_int 4)
4174 (const_int 1) (const_int 5)])))]
4177 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4181 (define_expand "vec_interleave_highv2di"
4182 [(set (match_operand:V2DI 0 "register_operand" "")
4185 (match_operand:V2DI 1 "register_operand" "")
4186 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4187 (parallel [(const_int 1)
4191 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4195 (define_expand "vec_interleave_lowv2di"
4196 [(set (match_operand:V2DI 0 "register_operand" "")
4199 (match_operand:V2DI 1 "register_operand" "")
4200 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4201 (parallel [(const_int 0)
4205 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4209 (define_insn "sse2_packsswb"
4210 [(set (match_operand:V16QI 0 "register_operand" "=x")
4213 (match_operand:V8HI 1 "register_operand" "0"))
4215 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4217 "packsswb\t{%2, %0|%0, %2}"
4218 [(set_attr "type" "sselog")
4219 (set_attr "prefix_data16" "1")
4220 (set_attr "mode" "TI")])
4222 (define_insn "sse2_packssdw"
4223 [(set (match_operand:V8HI 0 "register_operand" "=x")
4226 (match_operand:V4SI 1 "register_operand" "0"))
4228 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4230 "packssdw\t{%2, %0|%0, %2}"
4231 [(set_attr "type" "sselog")
4232 (set_attr "prefix_data16" "1")
4233 (set_attr "mode" "TI")])
4235 (define_insn "sse2_packuswb"
4236 [(set (match_operand:V16QI 0 "register_operand" "=x")
4239 (match_operand:V8HI 1 "register_operand" "0"))
4241 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4243 "packuswb\t{%2, %0|%0, %2}"
4244 [(set_attr "type" "sselog")
4245 (set_attr "prefix_data16" "1")
4246 (set_attr "mode" "TI")])
4248 (define_insn "sse2_punpckhbw"
4249 [(set (match_operand:V16QI 0 "register_operand" "=x")
4252 (match_operand:V16QI 1 "register_operand" "0")
4253 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4254 (parallel [(const_int 8) (const_int 24)
4255 (const_int 9) (const_int 25)
4256 (const_int 10) (const_int 26)
4257 (const_int 11) (const_int 27)
4258 (const_int 12) (const_int 28)
4259 (const_int 13) (const_int 29)
4260 (const_int 14) (const_int 30)
4261 (const_int 15) (const_int 31)])))]
4263 "punpckhbw\t{%2, %0|%0, %2}"
4264 [(set_attr "type" "sselog")
4265 (set_attr "prefix_data16" "1")
4266 (set_attr "mode" "TI")])
4268 (define_insn "sse2_punpcklbw"
4269 [(set (match_operand:V16QI 0 "register_operand" "=x")
4272 (match_operand:V16QI 1 "register_operand" "0")
4273 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4274 (parallel [(const_int 0) (const_int 16)
4275 (const_int 1) (const_int 17)
4276 (const_int 2) (const_int 18)
4277 (const_int 3) (const_int 19)
4278 (const_int 4) (const_int 20)
4279 (const_int 5) (const_int 21)
4280 (const_int 6) (const_int 22)
4281 (const_int 7) (const_int 23)])))]
4283 "punpcklbw\t{%2, %0|%0, %2}"
4284 [(set_attr "type" "sselog")
4285 (set_attr "prefix_data16" "1")
4286 (set_attr "mode" "TI")])
4288 (define_insn "sse2_punpckhwd"
4289 [(set (match_operand:V8HI 0 "register_operand" "=x")
4292 (match_operand:V8HI 1 "register_operand" "0")
4293 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4294 (parallel [(const_int 4) (const_int 12)
4295 (const_int 5) (const_int 13)
4296 (const_int 6) (const_int 14)
4297 (const_int 7) (const_int 15)])))]
4299 "punpckhwd\t{%2, %0|%0, %2}"
4300 [(set_attr "type" "sselog")
4301 (set_attr "prefix_data16" "1")
4302 (set_attr "mode" "TI")])
4304 (define_insn "sse2_punpcklwd"
4305 [(set (match_operand:V8HI 0 "register_operand" "=x")
4308 (match_operand:V8HI 1 "register_operand" "0")
4309 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4310 (parallel [(const_int 0) (const_int 8)
4311 (const_int 1) (const_int 9)
4312 (const_int 2) (const_int 10)
4313 (const_int 3) (const_int 11)])))]
4315 "punpcklwd\t{%2, %0|%0, %2}"
4316 [(set_attr "type" "sselog")
4317 (set_attr "prefix_data16" "1")
4318 (set_attr "mode" "TI")])
4320 (define_insn "sse2_punpckhdq"
4321 [(set (match_operand:V4SI 0 "register_operand" "=x")
4324 (match_operand:V4SI 1 "register_operand" "0")
4325 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4326 (parallel [(const_int 2) (const_int 6)
4327 (const_int 3) (const_int 7)])))]
4329 "punpckhdq\t{%2, %0|%0, %2}"
4330 [(set_attr "type" "sselog")
4331 (set_attr "prefix_data16" "1")
4332 (set_attr "mode" "TI")])
4334 (define_insn "sse2_punpckldq"
4335 [(set (match_operand:V4SI 0 "register_operand" "=x")
4338 (match_operand:V4SI 1 "register_operand" "0")
4339 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4340 (parallel [(const_int 0) (const_int 4)
4341 (const_int 1) (const_int 5)])))]
4343 "punpckldq\t{%2, %0|%0, %2}"
4344 [(set_attr "type" "sselog")
4345 (set_attr "prefix_data16" "1")
4346 (set_attr "mode" "TI")])
4348 (define_insn "sse2_punpckhqdq"
4349 [(set (match_operand:V2DI 0 "register_operand" "=x")
4352 (match_operand:V2DI 1 "register_operand" "0")
4353 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4354 (parallel [(const_int 1)
4357 "punpckhqdq\t{%2, %0|%0, %2}"
4358 [(set_attr "type" "sselog")
4359 (set_attr "prefix_data16" "1")
4360 (set_attr "mode" "TI")])
4362 (define_insn "sse2_punpcklqdq"
4363 [(set (match_operand:V2DI 0 "register_operand" "=x")
4366 (match_operand:V2DI 1 "register_operand" "0")
4367 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4368 (parallel [(const_int 0)
4371 "punpcklqdq\t{%2, %0|%0, %2}"
4372 [(set_attr "type" "sselog")
4373 (set_attr "prefix_data16" "1")
4374 (set_attr "mode" "TI")])
4376 (define_insn "*sse4_1_pinsrb"
4377 [(set (match_operand:V16QI 0 "register_operand" "=x")
4379 (vec_duplicate:V16QI
4380 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4381 (match_operand:V16QI 1 "register_operand" "0")
4382 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4385 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4386 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4388 [(set_attr "type" "sselog")
4389 (set_attr "prefix_extra" "1")
4390 (set_attr "mode" "TI")])
4392 (define_insn "*sse2_pinsrw"
4393 [(set (match_operand:V8HI 0 "register_operand" "=x")
4396 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4397 (match_operand:V8HI 1 "register_operand" "0")
4398 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4401 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4402 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4404 [(set_attr "type" "sselog")
4405 (set_attr "prefix_data16" "1")
4406 (set_attr "mode" "TI")])
4408 ;; It must come before sse2_loadld since it is preferred.
4409 (define_insn "*sse4_1_pinsrd"
4410 [(set (match_operand:V4SI 0 "register_operand" "=x")
4413 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4414 (match_operand:V4SI 1 "register_operand" "0")
4415 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4418 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4419 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4421 [(set_attr "type" "sselog")
4422 (set_attr "prefix_extra" "1")
4423 (set_attr "mode" "TI")])
4425 (define_insn "*sse4_1_pinsrq"
4426 [(set (match_operand:V2DI 0 "register_operand" "=x")
4429 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4430 (match_operand:V2DI 1 "register_operand" "0")
4431 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4432 "TARGET_SSE4_1 && TARGET_64BIT"
4434 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4435 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4437 [(set_attr "type" "sselog")
4438 (set_attr "prefix_extra" "1")
4439 (set_attr "mode" "TI")])
4441 (define_insn "*sse4_1_pextrb"
4442 [(set (match_operand:SI 0 "register_operand" "=r")
4445 (match_operand:V16QI 1 "register_operand" "x")
4446 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4448 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4449 [(set_attr "type" "sselog")
4450 (set_attr "prefix_extra" "1")
4451 (set_attr "mode" "TI")])
4453 (define_insn "*sse4_1_pextrb_memory"
4454 [(set (match_operand:QI 0 "memory_operand" "=m")
4456 (match_operand:V16QI 1 "register_operand" "x")
4457 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4459 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4460 [(set_attr "type" "sselog")
4461 (set_attr "prefix_extra" "1")
4462 (set_attr "mode" "TI")])
4464 (define_insn "*sse2_pextrw"
4465 [(set (match_operand:SI 0 "register_operand" "=r")
4468 (match_operand:V8HI 1 "register_operand" "x")
4469 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4471 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4472 [(set_attr "type" "sselog")
4473 (set_attr "prefix_data16" "1")
4474 (set_attr "mode" "TI")])
4476 (define_insn "*sse4_1_pextrw_memory"
4477 [(set (match_operand:HI 0 "memory_operand" "=m")
4479 (match_operand:V8HI 1 "register_operand" "x")
4480 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4482 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4483 [(set_attr "type" "sselog")
4484 (set_attr "prefix_extra" "1")
4485 (set_attr "mode" "TI")])
4487 (define_insn "*sse4_1_pextrd"
4488 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4490 (match_operand:V4SI 1 "register_operand" "x")
4491 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4493 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4494 [(set_attr "type" "sselog")
4495 (set_attr "prefix_extra" "1")
4496 (set_attr "mode" "TI")])
4498 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4499 (define_insn "*sse4_1_pextrq"
4500 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4502 (match_operand:V2DI 1 "register_operand" "x")
4503 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4504 "TARGET_SSE4_1 && TARGET_64BIT"
4505 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4506 [(set_attr "type" "sselog")
4507 (set_attr "prefix_extra" "1")
4508 (set_attr "mode" "TI")])
4510 (define_expand "sse2_pshufd"
4511 [(match_operand:V4SI 0 "register_operand" "")
4512 (match_operand:V4SI 1 "nonimmediate_operand" "")
4513 (match_operand:SI 2 "const_int_operand" "")]
4516 int mask = INTVAL (operands[2]);
4517 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4518 GEN_INT ((mask >> 0) & 3),
4519 GEN_INT ((mask >> 2) & 3),
4520 GEN_INT ((mask >> 4) & 3),
4521 GEN_INT ((mask >> 6) & 3)));
4525 (define_insn "sse2_pshufd_1"
4526 [(set (match_operand:V4SI 0 "register_operand" "=x")
4528 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4529 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4530 (match_operand 3 "const_0_to_3_operand" "")
4531 (match_operand 4 "const_0_to_3_operand" "")
4532 (match_operand 5 "const_0_to_3_operand" "")])))]
4536 mask |= INTVAL (operands[2]) << 0;
4537 mask |= INTVAL (operands[3]) << 2;
4538 mask |= INTVAL (operands[4]) << 4;
4539 mask |= INTVAL (operands[5]) << 6;
4540 operands[2] = GEN_INT (mask);
4542 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4544 [(set_attr "type" "sselog1")
4545 (set_attr "prefix_data16" "1")
4546 (set_attr "mode" "TI")])
4548 (define_expand "sse2_pshuflw"
4549 [(match_operand:V8HI 0 "register_operand" "")
4550 (match_operand:V8HI 1 "nonimmediate_operand" "")
4551 (match_operand:SI 2 "const_int_operand" "")]
4554 int mask = INTVAL (operands[2]);
4555 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4556 GEN_INT ((mask >> 0) & 3),
4557 GEN_INT ((mask >> 2) & 3),
4558 GEN_INT ((mask >> 4) & 3),
4559 GEN_INT ((mask >> 6) & 3)));
4563 (define_insn "sse2_pshuflw_1"
4564 [(set (match_operand:V8HI 0 "register_operand" "=x")
4566 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4567 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4568 (match_operand 3 "const_0_to_3_operand" "")
4569 (match_operand 4 "const_0_to_3_operand" "")
4570 (match_operand 5 "const_0_to_3_operand" "")
4578 mask |= INTVAL (operands[2]) << 0;
4579 mask |= INTVAL (operands[3]) << 2;
4580 mask |= INTVAL (operands[4]) << 4;
4581 mask |= INTVAL (operands[5]) << 6;
4582 operands[2] = GEN_INT (mask);
4584 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4586 [(set_attr "type" "sselog")
4587 (set_attr "prefix_rep" "1")
4588 (set_attr "mode" "TI")])
4590 (define_expand "sse2_pshufhw"
4591 [(match_operand:V8HI 0 "register_operand" "")
4592 (match_operand:V8HI 1 "nonimmediate_operand" "")
4593 (match_operand:SI 2 "const_int_operand" "")]
4596 int mask = INTVAL (operands[2]);
4597 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4598 GEN_INT (((mask >> 0) & 3) + 4),
4599 GEN_INT (((mask >> 2) & 3) + 4),
4600 GEN_INT (((mask >> 4) & 3) + 4),
4601 GEN_INT (((mask >> 6) & 3) + 4)));
4605 (define_insn "sse2_pshufhw_1"
4606 [(set (match_operand:V8HI 0 "register_operand" "=x")
4608 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4609 (parallel [(const_int 0)
4613 (match_operand 2 "const_4_to_7_operand" "")
4614 (match_operand 3 "const_4_to_7_operand" "")
4615 (match_operand 4 "const_4_to_7_operand" "")
4616 (match_operand 5 "const_4_to_7_operand" "")])))]
4620 mask |= (INTVAL (operands[2]) - 4) << 0;
4621 mask |= (INTVAL (operands[3]) - 4) << 2;
4622 mask |= (INTVAL (operands[4]) - 4) << 4;
4623 mask |= (INTVAL (operands[5]) - 4) << 6;
4624 operands[2] = GEN_INT (mask);
4626 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4628 [(set_attr "type" "sselog")
4629 (set_attr "prefix_rep" "1")
4630 (set_attr "mode" "TI")])
4632 (define_expand "sse2_loadd"
4633 [(set (match_operand:V4SI 0 "register_operand" "")
4636 (match_operand:SI 1 "nonimmediate_operand" ""))
4640 "operands[2] = CONST0_RTX (V4SImode);")
4642 (define_insn "sse2_loadld"
4643 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4646 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4647 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4651 movd\t{%2, %0|%0, %2}
4652 movd\t{%2, %0|%0, %2}
4653 movss\t{%2, %0|%0, %2}
4654 movss\t{%2, %0|%0, %2}"
4655 [(set_attr "type" "ssemov")
4656 (set_attr "mode" "TI,TI,V4SF,SF")])
4658 (define_insn_and_split "sse2_stored"
4659 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4661 (match_operand:V4SI 1 "register_operand" "x,Yi")
4662 (parallel [(const_int 0)])))]
4665 "&& reload_completed
4666 && (TARGET_INTER_UNIT_MOVES
4667 || MEM_P (operands [0])
4668 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4669 [(set (match_dup 0) (match_dup 1))]
4671 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4674 (define_insn_and_split "*vec_ext_v4si_mem"
4675 [(set (match_operand:SI 0 "register_operand" "=r")
4677 (match_operand:V4SI 1 "memory_operand" "o")
4678 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4684 int i = INTVAL (operands[2]);
4686 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4690 (define_expand "sse_storeq"
4691 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4693 (match_operand:V2DI 1 "register_operand" "")
4694 (parallel [(const_int 0)])))]
4698 (define_insn "*sse2_storeq_rex64"
4699 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4701 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4702 (parallel [(const_int 0)])))]
4703 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4707 mov{q}\t{%1, %0|%0, %1}"
4708 [(set_attr "type" "*,*,imov")
4709 (set_attr "mode" "*,*,DI")])
4711 (define_insn "*sse2_storeq"
4712 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4714 (match_operand:V2DI 1 "register_operand" "x")
4715 (parallel [(const_int 0)])))]
4720 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4722 (match_operand:V2DI 1 "register_operand" "")
4723 (parallel [(const_int 0)])))]
4726 && (TARGET_INTER_UNIT_MOVES
4727 || MEM_P (operands [0])
4728 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4729 [(set (match_dup 0) (match_dup 1))]
4731 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4734 (define_insn "*vec_extractv2di_1_rex64"
4735 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4737 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4738 (parallel [(const_int 1)])))]
4739 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4741 movhps\t{%1, %0|%0, %1}
4742 psrldq\t{$8, %0|%0, 8}
4743 movq\t{%H1, %0|%0, %H1}
4744 mov{q}\t{%H1, %0|%0, %H1}"
4745 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4746 (set_attr "memory" "*,none,*,*")
4747 (set_attr "mode" "V2SF,TI,TI,DI")])
4749 (define_insn "*vec_extractv2di_1_sse2"
4750 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4752 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4753 (parallel [(const_int 1)])))]
4755 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4757 movhps\t{%1, %0|%0, %1}
4758 psrldq\t{$8, %0|%0, 8}
4759 movq\t{%H1, %0|%0, %H1}"
4760 [(set_attr "type" "ssemov,sseishft,ssemov")
4761 (set_attr "memory" "*,none,*")
4762 (set_attr "mode" "V2SF,TI,TI")])
4764 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4765 (define_insn "*vec_extractv2di_1_sse"
4766 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4768 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4769 (parallel [(const_int 1)])))]
4770 "!TARGET_SSE2 && TARGET_SSE
4771 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4773 movhps\t{%1, %0|%0, %1}
4774 movhlps\t{%1, %0|%0, %1}
4775 movlps\t{%H1, %0|%0, %H1}"
4776 [(set_attr "type" "ssemov")
4777 (set_attr "mode" "V2SF,V4SF,V2SF")])
4779 (define_insn "*vec_dupv4si"
4780 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4782 (match_operand:SI 1 "register_operand" " Y2,0")))]
4785 pshufd\t{$0, %1, %0|%0, %1, 0}
4786 shufps\t{$0, %0, %0|%0, %0, 0}"
4787 [(set_attr "type" "sselog1")
4788 (set_attr "mode" "TI,V4SF")])
4790 (define_insn "*vec_dupv2di"
4791 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4793 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4798 [(set_attr "type" "sselog1,ssemov")
4799 (set_attr "mode" "TI,V4SF")])
4801 (define_insn "*vec_concatv2si_sse4_1"
4802 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
4804 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
4805 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
4808 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
4809 punpckldq\t{%2, %0|%0, %2}
4810 movd\t{%1, %0|%0, %1}
4811 punpckldq\t{%2, %0|%0, %2}
4812 movd\t{%1, %0|%0, %1}"
4813 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4814 (set_attr "prefix_extra" "1,*,*,*,*")
4815 (set_attr "mode" "TI,TI,TI,DI,DI")])
4817 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4818 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4819 ;; alternatives pretty much forces the MMX alternative to be chosen.
4820 (define_insn "*vec_concatv2si_sse2"
4821 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
4823 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
4824 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
4827 punpckldq\t{%2, %0|%0, %2}
4828 movd\t{%1, %0|%0, %1}
4829 punpckldq\t{%2, %0|%0, %2}
4830 movd\t{%1, %0|%0, %1}"
4831 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4832 (set_attr "mode" "TI,TI,DI,DI")])
4834 (define_insn "*vec_concatv2si_sse"
4835 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4837 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4838 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4841 unpcklps\t{%2, %0|%0, %2}
4842 movss\t{%1, %0|%0, %1}
4843 punpckldq\t{%2, %0|%0, %2}
4844 movd\t{%1, %0|%0, %1}"
4845 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4846 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4848 (define_insn "*vec_concatv4si_1"
4849 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4851 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4852 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4855 punpcklqdq\t{%2, %0|%0, %2}
4856 movlhps\t{%2, %0|%0, %2}
4857 movhps\t{%2, %0|%0, %2}"
4858 [(set_attr "type" "sselog,ssemov,ssemov")
4859 (set_attr "mode" "TI,V4SF,V2SF")])
4861 (define_insn "vec_concatv2di"
4862 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4864 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4865 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4866 "!TARGET_64BIT && TARGET_SSE"
4868 movq\t{%1, %0|%0, %1}
4869 movq2dq\t{%1, %0|%0, %1}
4870 punpcklqdq\t{%2, %0|%0, %2}
4871 movlhps\t{%2, %0|%0, %2}
4872 movhps\t{%2, %0|%0, %2}
4873 movlps\t{%1, %0|%0, %1}"
4874 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4875 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4877 (define_insn "*vec_concatv2di_rex64_sse4_1"
4878 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x,x")
4880 (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m")
4881 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,x,m,0")))]
4882 "TARGET_64BIT && TARGET_SSE4_1"
4884 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
4885 movq\t{%1, %0|%0, %1}
4886 movq\t{%1, %0|%0, %1}
4887 movq2dq\t{%1, %0|%0, %1}
4888 punpcklqdq\t{%2, %0|%0, %2}
4889 movlhps\t{%2, %0|%0, %2}
4890 movhps\t{%2, %0|%0, %2}
4891 movlps\t{%1, %0|%0, %1}"
4892 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4893 (set_attr "prefix_extra" "1,*,*,*,*,*,*,*")
4894 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4896 (define_insn "*vec_concatv2di_rex64_sse"
4897 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
4899 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4900 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
4901 "TARGET_64BIT && TARGET_SSE"
4903 movq\t{%1, %0|%0, %1}
4904 movq\t{%1, %0|%0, %1}
4905 movq2dq\t{%1, %0|%0, %1}
4906 punpcklqdq\t{%2, %0|%0, %2}
4907 movlhps\t{%2, %0|%0, %2}
4908 movhps\t{%2, %0|%0, %2}
4909 movlps\t{%1, %0|%0, %1}"
4910 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4911 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4913 (define_expand "vec_extractv2di"
4914 [(match_operand:DI 0 "register_operand" "")
4915 (match_operand:V2DI 1 "register_operand" "")
4916 (match_operand 2 "const_int_operand" "")]
4919 ix86_expand_vector_extract (false, operands[0], operands[1],
4920 INTVAL (operands[2]));
4924 (define_expand "vec_extractv4si"
4925 [(match_operand:SI 0 "register_operand" "")
4926 (match_operand:V4SI 1 "register_operand" "")
4927 (match_operand 2 "const_int_operand" "")]
4930 ix86_expand_vector_extract (false, operands[0], operands[1],
4931 INTVAL (operands[2]));
4935 (define_expand "vec_extractv8hi"
4936 [(match_operand:HI 0 "register_operand" "")
4937 (match_operand:V8HI 1 "register_operand" "")
4938 (match_operand 2 "const_int_operand" "")]
4941 ix86_expand_vector_extract (false, operands[0], operands[1],
4942 INTVAL (operands[2]));
4946 (define_expand "vec_extractv16qi"
4947 [(match_operand:QI 0 "register_operand" "")
4948 (match_operand:V16QI 1 "register_operand" "")
4949 (match_operand 2 "const_int_operand" "")]
4952 ix86_expand_vector_extract (false, operands[0], operands[1],
4953 INTVAL (operands[2]));
4957 (define_expand "vec_unpacku_hi_v16qi"
4958 [(match_operand:V8HI 0 "register_operand" "")
4959 (match_operand:V16QI 1 "register_operand" "")]
4963 ix86_expand_sse4_unpack (operands, true, true);
4964 else if (TARGET_SSE5)
4965 ix86_expand_sse5_unpack (operands, true, true);
4967 ix86_expand_sse_unpack (operands, true, true);
4971 (define_expand "vec_unpacks_hi_v16qi"
4972 [(match_operand:V8HI 0 "register_operand" "")
4973 (match_operand:V16QI 1 "register_operand" "")]
4977 ix86_expand_sse4_unpack (operands, false, true);
4978 else if (TARGET_SSE5)
4979 ix86_expand_sse5_unpack (operands, false, true);
4981 ix86_expand_sse_unpack (operands, false, true);
4985 (define_expand "vec_unpacku_lo_v16qi"
4986 [(match_operand:V8HI 0 "register_operand" "")
4987 (match_operand:V16QI 1 "register_operand" "")]
4991 ix86_expand_sse4_unpack (operands, true, false);
4992 else if (TARGET_SSE5)
4993 ix86_expand_sse5_unpack (operands, true, false);
4995 ix86_expand_sse_unpack (operands, true, false);
4999 (define_expand "vec_unpacks_lo_v16qi"
5000 [(match_operand:V8HI 0 "register_operand" "")
5001 (match_operand:V16QI 1 "register_operand" "")]
5005 ix86_expand_sse4_unpack (operands, false, false);
5006 else if (TARGET_SSE5)
5007 ix86_expand_sse5_unpack (operands, false, false);
5009 ix86_expand_sse_unpack (operands, false, false);
5013 (define_expand "vec_unpacku_hi_v8hi"
5014 [(match_operand:V4SI 0 "register_operand" "")
5015 (match_operand:V8HI 1 "register_operand" "")]
5019 ix86_expand_sse4_unpack (operands, true, true);
5020 else if (TARGET_SSE5)
5021 ix86_expand_sse5_unpack (operands, true, true);
5023 ix86_expand_sse_unpack (operands, true, true);
5027 (define_expand "vec_unpacks_hi_v8hi"
5028 [(match_operand:V4SI 0 "register_operand" "")
5029 (match_operand:V8HI 1 "register_operand" "")]
5033 ix86_expand_sse4_unpack (operands, false, true);
5034 else if (TARGET_SSE5)
5035 ix86_expand_sse5_unpack (operands, false, true);
5037 ix86_expand_sse_unpack (operands, false, true);
5041 (define_expand "vec_unpacku_lo_v8hi"
5042 [(match_operand:V4SI 0 "register_operand" "")
5043 (match_operand:V8HI 1 "register_operand" "")]
5047 ix86_expand_sse4_unpack (operands, true, false);
5048 else if (TARGET_SSE5)
5049 ix86_expand_sse5_unpack (operands, true, false);
5051 ix86_expand_sse_unpack (operands, true, false);
5055 (define_expand "vec_unpacks_lo_v8hi"
5056 [(match_operand:V4SI 0 "register_operand" "")
5057 (match_operand:V8HI 1 "register_operand" "")]
5061 ix86_expand_sse4_unpack (operands, false, false);
5062 else if (TARGET_SSE5)
5063 ix86_expand_sse5_unpack (operands, false, false);
5065 ix86_expand_sse_unpack (operands, false, false);
5069 (define_expand "vec_unpacku_hi_v4si"
5070 [(match_operand:V2DI 0 "register_operand" "")
5071 (match_operand:V4SI 1 "register_operand" "")]
5075 ix86_expand_sse4_unpack (operands, true, true);
5076 else if (TARGET_SSE5)
5077 ix86_expand_sse5_unpack (operands, true, true);
5079 ix86_expand_sse_unpack (operands, true, true);
5083 (define_expand "vec_unpacks_hi_v4si"
5084 [(match_operand:V2DI 0 "register_operand" "")
5085 (match_operand:V4SI 1 "register_operand" "")]
5089 ix86_expand_sse4_unpack (operands, false, true);
5090 else if (TARGET_SSE5)
5091 ix86_expand_sse5_unpack (operands, false, true);
5093 ix86_expand_sse_unpack (operands, false, true);
5097 (define_expand "vec_unpacku_lo_v4si"
5098 [(match_operand:V2DI 0 "register_operand" "")
5099 (match_operand:V4SI 1 "register_operand" "")]
5103 ix86_expand_sse4_unpack (operands, true, false);
5104 else if (TARGET_SSE5)
5105 ix86_expand_sse5_unpack (operands, true, false);
5107 ix86_expand_sse_unpack (operands, true, false);
5111 (define_expand "vec_unpacks_lo_v4si"
5112 [(match_operand:V2DI 0 "register_operand" "")
5113 (match_operand:V4SI 1 "register_operand" "")]
5117 ix86_expand_sse4_unpack (operands, false, false);
5118 else if (TARGET_SSE5)
5119 ix86_expand_sse5_unpack (operands, false, false);
5121 ix86_expand_sse_unpack (operands, false, false);
5125 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5129 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5131 (define_expand "sse2_uavgv16qi3"
5132 [(set (match_operand:V16QI 0 "register_operand" "")
5138 (match_operand:V16QI 1 "nonimmediate_operand" ""))
5140 (match_operand:V16QI 2 "nonimmediate_operand" "")))
5141 (const_vector:V16QI [(const_int 1) (const_int 1)
5142 (const_int 1) (const_int 1)
5143 (const_int 1) (const_int 1)
5144 (const_int 1) (const_int 1)
5145 (const_int 1) (const_int 1)
5146 (const_int 1) (const_int 1)
5147 (const_int 1) (const_int 1)
5148 (const_int 1) (const_int 1)]))
5151 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
5153 (define_insn "*sse2_uavgv16qi3"
5154 [(set (match_operand:V16QI 0 "register_operand" "=x")
5160 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5162 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5163 (const_vector:V16QI [(const_int 1) (const_int 1)
5164 (const_int 1) (const_int 1)
5165 (const_int 1) (const_int 1)
5166 (const_int 1) (const_int 1)
5167 (const_int 1) (const_int 1)
5168 (const_int 1) (const_int 1)
5169 (const_int 1) (const_int 1)
5170 (const_int 1) (const_int 1)]))
5172 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5173 "pavgb\t{%2, %0|%0, %2}"
5174 [(set_attr "type" "sseiadd")
5175 (set_attr "prefix_data16" "1")
5176 (set_attr "mode" "TI")])
5178 (define_expand "sse2_uavgv8hi3"
5179 [(set (match_operand:V8HI 0 "register_operand" "")
5185 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5187 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5188 (const_vector:V8HI [(const_int 1) (const_int 1)
5189 (const_int 1) (const_int 1)
5190 (const_int 1) (const_int 1)
5191 (const_int 1) (const_int 1)]))
5194 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
5196 (define_insn "*sse2_uavgv8hi3"
5197 [(set (match_operand:V8HI 0 "register_operand" "=x")
5203 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5205 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5206 (const_vector:V8HI [(const_int 1) (const_int 1)
5207 (const_int 1) (const_int 1)
5208 (const_int 1) (const_int 1)
5209 (const_int 1) (const_int 1)]))
5211 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5212 "pavgw\t{%2, %0|%0, %2}"
5213 [(set_attr "type" "sseiadd")
5214 (set_attr "prefix_data16" "1")
5215 (set_attr "mode" "TI")])
5217 ;; The correct representation for this is absolutely enormous, and
5218 ;; surely not generally useful.
5219 (define_insn "sse2_psadbw"
5220 [(set (match_operand:V2DI 0 "register_operand" "=x")
5221 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5222 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5225 "psadbw\t{%2, %0|%0, %2}"
5226 [(set_attr "type" "sseiadd")
5227 (set_attr "prefix_data16" "1")
5228 (set_attr "mode" "TI")])
5230 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5231 [(set (match_operand:SI 0 "register_operand" "=r")
5233 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5235 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5236 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5237 [(set_attr "type" "ssecvt")
5238 (set_attr "mode" "<MODE>")])
5240 (define_insn "sse2_pmovmskb"
5241 [(set (match_operand:SI 0 "register_operand" "=r")
5242 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5245 "pmovmskb\t{%1, %0|%0, %1}"
5246 [(set_attr "type" "ssecvt")
5247 (set_attr "prefix_data16" "1")
5248 (set_attr "mode" "SI")])
5250 (define_expand "sse2_maskmovdqu"
5251 [(set (match_operand:V16QI 0 "memory_operand" "")
5252 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5253 (match_operand:V16QI 2 "register_operand" "")
5259 (define_insn "*sse2_maskmovdqu"
5260 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5261 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5262 (match_operand:V16QI 2 "register_operand" "x")
5263 (mem:V16QI (match_dup 0))]
5265 "TARGET_SSE2 && !TARGET_64BIT"
5266 ;; @@@ check ordering of operands in intel/nonintel syntax
5267 "maskmovdqu\t{%2, %1|%1, %2}"
5268 [(set_attr "type" "ssecvt")
5269 (set_attr "prefix_data16" "1")
5270 (set_attr "mode" "TI")])
5272 (define_insn "*sse2_maskmovdqu_rex64"
5273 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5274 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5275 (match_operand:V16QI 2 "register_operand" "x")
5276 (mem:V16QI (match_dup 0))]
5278 "TARGET_SSE2 && TARGET_64BIT"
5279 ;; @@@ check ordering of operands in intel/nonintel syntax
5280 "maskmovdqu\t{%2, %1|%1, %2}"
5281 [(set_attr "type" "ssecvt")
5282 (set_attr "prefix_data16" "1")
5283 (set_attr "mode" "TI")])
5285 (define_insn "sse_ldmxcsr"
5286 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5290 [(set_attr "type" "sse")
5291 (set_attr "memory" "load")])
5293 (define_insn "sse_stmxcsr"
5294 [(set (match_operand:SI 0 "memory_operand" "=m")
5295 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5298 [(set_attr "type" "sse")
5299 (set_attr "memory" "store")])
5301 (define_expand "sse_sfence"
5303 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5304 "TARGET_SSE || TARGET_3DNOW_A"
5306 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5307 MEM_VOLATILE_P (operands[0]) = 1;
5310 (define_insn "*sse_sfence"
5311 [(set (match_operand:BLK 0 "" "")
5312 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5313 "TARGET_SSE || TARGET_3DNOW_A"
5315 [(set_attr "type" "sse")
5316 (set_attr "memory" "unknown")])
5318 (define_insn "sse2_clflush"
5319 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5323 [(set_attr "type" "sse")
5324 (set_attr "memory" "unknown")])
5326 (define_expand "sse2_mfence"
5328 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5331 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5332 MEM_VOLATILE_P (operands[0]) = 1;
5335 (define_insn "*sse2_mfence"
5336 [(set (match_operand:BLK 0 "" "")
5337 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5340 [(set_attr "type" "sse")
5341 (set_attr "memory" "unknown")])
5343 (define_expand "sse2_lfence"
5345 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5348 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5349 MEM_VOLATILE_P (operands[0]) = 1;
5352 (define_insn "*sse2_lfence"
5353 [(set (match_operand:BLK 0 "" "")
5354 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5357 [(set_attr "type" "sse")
5358 (set_attr "memory" "unknown")])
5360 (define_insn "sse3_mwait"
5361 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5362 (match_operand:SI 1 "register_operand" "c")]
5365 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5366 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5367 ;; we only need to set up 32bit registers.
5369 [(set_attr "length" "3")])
5371 (define_insn "sse3_monitor"
5372 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5373 (match_operand:SI 1 "register_operand" "c")
5374 (match_operand:SI 2 "register_operand" "d")]
5376 "TARGET_SSE3 && !TARGET_64BIT"
5377 "monitor\t%0, %1, %2"
5378 [(set_attr "length" "3")])
5380 (define_insn "sse3_monitor64"
5381 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5382 (match_operand:SI 1 "register_operand" "c")
5383 (match_operand:SI 2 "register_operand" "d")]
5385 "TARGET_SSE3 && TARGET_64BIT"
5386 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5387 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5388 ;; zero extended to 64bit, we only need to set up 32bit registers.
5390 [(set_attr "length" "3")])
5392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5394 ;; SSSE3 instructions
5396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5398 (define_insn "ssse3_phaddwv8hi3"
5399 [(set (match_operand:V8HI 0 "register_operand" "=x")
5405 (match_operand:V8HI 1 "register_operand" "0")
5406 (parallel [(const_int 0)]))
5407 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5409 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5410 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5413 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5414 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5416 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5417 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5422 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5423 (parallel [(const_int 0)]))
5424 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5426 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5427 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5430 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5431 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5433 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5434 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5436 "phaddw\t{%2, %0|%0, %2}"
5437 [(set_attr "type" "sseiadd")
5438 (set_attr "prefix_data16" "1")
5439 (set_attr "prefix_extra" "1")
5440 (set_attr "mode" "TI")])
5442 (define_insn "ssse3_phaddwv4hi3"
5443 [(set (match_operand:V4HI 0 "register_operand" "=y")
5448 (match_operand:V4HI 1 "register_operand" "0")
5449 (parallel [(const_int 0)]))
5450 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5452 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5453 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5457 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5458 (parallel [(const_int 0)]))
5459 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5461 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5462 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5464 "phaddw\t{%2, %0|%0, %2}"
5465 [(set_attr "type" "sseiadd")
5466 (set_attr "prefix_extra" "1")
5467 (set_attr "mode" "DI")])
5469 (define_insn "ssse3_phadddv4si3"
5470 [(set (match_operand:V4SI 0 "register_operand" "=x")
5475 (match_operand:V4SI 1 "register_operand" "0")
5476 (parallel [(const_int 0)]))
5477 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5479 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5480 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5484 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5485 (parallel [(const_int 0)]))
5486 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5488 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5489 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5491 "phaddd\t{%2, %0|%0, %2}"
5492 [(set_attr "type" "sseiadd")
5493 (set_attr "prefix_data16" "1")
5494 (set_attr "prefix_extra" "1")
5495 (set_attr "mode" "TI")])
5497 (define_insn "ssse3_phadddv2si3"
5498 [(set (match_operand:V2SI 0 "register_operand" "=y")
5502 (match_operand:V2SI 1 "register_operand" "0")
5503 (parallel [(const_int 0)]))
5504 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5507 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5508 (parallel [(const_int 0)]))
5509 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5511 "phaddd\t{%2, %0|%0, %2}"
5512 [(set_attr "type" "sseiadd")
5513 (set_attr "prefix_extra" "1")
5514 (set_attr "mode" "DI")])
5516 (define_insn "ssse3_phaddswv8hi3"
5517 [(set (match_operand:V8HI 0 "register_operand" "=x")
5523 (match_operand:V8HI 1 "register_operand" "0")
5524 (parallel [(const_int 0)]))
5525 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5527 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5528 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5531 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5532 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5534 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5535 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5540 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5541 (parallel [(const_int 0)]))
5542 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5544 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5545 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5548 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5549 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5551 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5552 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5554 "phaddsw\t{%2, %0|%0, %2}"
5555 [(set_attr "type" "sseiadd")
5556 (set_attr "prefix_data16" "1")
5557 (set_attr "prefix_extra" "1")
5558 (set_attr "mode" "TI")])
5560 (define_insn "ssse3_phaddswv4hi3"
5561 [(set (match_operand:V4HI 0 "register_operand" "=y")
5566 (match_operand:V4HI 1 "register_operand" "0")
5567 (parallel [(const_int 0)]))
5568 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5570 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5571 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5575 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5576 (parallel [(const_int 0)]))
5577 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5579 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5580 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5582 "phaddsw\t{%2, %0|%0, %2}"
5583 [(set_attr "type" "sseiadd")
5584 (set_attr "prefix_extra" "1")
5585 (set_attr "mode" "DI")])
5587 (define_insn "ssse3_phsubwv8hi3"
5588 [(set (match_operand:V8HI 0 "register_operand" "=x")
5594 (match_operand:V8HI 1 "register_operand" "0")
5595 (parallel [(const_int 0)]))
5596 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5598 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5599 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5602 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5603 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5605 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5606 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5611 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5612 (parallel [(const_int 0)]))
5613 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5615 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5616 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5619 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5620 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5622 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5623 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5625 "phsubw\t{%2, %0|%0, %2}"
5626 [(set_attr "type" "sseiadd")
5627 (set_attr "prefix_data16" "1")
5628 (set_attr "prefix_extra" "1")
5629 (set_attr "mode" "TI")])
5631 (define_insn "ssse3_phsubwv4hi3"
5632 [(set (match_operand:V4HI 0 "register_operand" "=y")
5637 (match_operand:V4HI 1 "register_operand" "0")
5638 (parallel [(const_int 0)]))
5639 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5641 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5642 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5646 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5647 (parallel [(const_int 0)]))
5648 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5650 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5651 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5653 "phsubw\t{%2, %0|%0, %2}"
5654 [(set_attr "type" "sseiadd")
5655 (set_attr "prefix_extra" "1")
5656 (set_attr "mode" "DI")])
5658 (define_insn "ssse3_phsubdv4si3"
5659 [(set (match_operand:V4SI 0 "register_operand" "=x")
5664 (match_operand:V4SI 1 "register_operand" "0")
5665 (parallel [(const_int 0)]))
5666 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5668 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5669 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5673 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5674 (parallel [(const_int 0)]))
5675 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5677 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5678 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5680 "phsubd\t{%2, %0|%0, %2}"
5681 [(set_attr "type" "sseiadd")
5682 (set_attr "prefix_data16" "1")
5683 (set_attr "prefix_extra" "1")
5684 (set_attr "mode" "TI")])
5686 (define_insn "ssse3_phsubdv2si3"
5687 [(set (match_operand:V2SI 0 "register_operand" "=y")
5691 (match_operand:V2SI 1 "register_operand" "0")
5692 (parallel [(const_int 0)]))
5693 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5696 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5697 (parallel [(const_int 0)]))
5698 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5700 "phsubd\t{%2, %0|%0, %2}"
5701 [(set_attr "type" "sseiadd")
5702 (set_attr "prefix_extra" "1")
5703 (set_attr "mode" "DI")])
5705 (define_insn "ssse3_phsubswv8hi3"
5706 [(set (match_operand:V8HI 0 "register_operand" "=x")
5712 (match_operand:V8HI 1 "register_operand" "0")
5713 (parallel [(const_int 0)]))
5714 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5716 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5717 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5720 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5721 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5723 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5724 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5729 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5730 (parallel [(const_int 0)]))
5731 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5733 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5734 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5737 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5738 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5740 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5741 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5743 "phsubsw\t{%2, %0|%0, %2}"
5744 [(set_attr "type" "sseiadd")
5745 (set_attr "prefix_data16" "1")
5746 (set_attr "prefix_extra" "1")
5747 (set_attr "mode" "TI")])
5749 (define_insn "ssse3_phsubswv4hi3"
5750 [(set (match_operand:V4HI 0 "register_operand" "=y")
5755 (match_operand:V4HI 1 "register_operand" "0")
5756 (parallel [(const_int 0)]))
5757 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5759 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5760 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5764 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5765 (parallel [(const_int 0)]))
5766 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5768 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5769 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5771 "phsubsw\t{%2, %0|%0, %2}"
5772 [(set_attr "type" "sseiadd")
5773 (set_attr "prefix_extra" "1")
5774 (set_attr "mode" "DI")])
5776 (define_insn "ssse3_pmaddubsw128"
5777 [(set (match_operand:V8HI 0 "register_operand" "=x")
5782 (match_operand:V16QI 1 "register_operand" "0")
5783 (parallel [(const_int 0)
5793 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5794 (parallel [(const_int 0)
5804 (vec_select:V16QI (match_dup 1)
5805 (parallel [(const_int 1)
5814 (vec_select:V16QI (match_dup 2)
5815 (parallel [(const_int 1)
5822 (const_int 15)]))))))]
5824 "pmaddubsw\t{%2, %0|%0, %2}"
5825 [(set_attr "type" "sseiadd")
5826 (set_attr "prefix_data16" "1")
5827 (set_attr "prefix_extra" "1")
5828 (set_attr "mode" "TI")])
5830 (define_insn "ssse3_pmaddubsw"
5831 [(set (match_operand:V4HI 0 "register_operand" "=y")
5836 (match_operand:V8QI 1 "register_operand" "0")
5837 (parallel [(const_int 0)
5843 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5844 (parallel [(const_int 0)
5850 (vec_select:V8QI (match_dup 1)
5851 (parallel [(const_int 1)
5856 (vec_select:V8QI (match_dup 2)
5857 (parallel [(const_int 1)
5860 (const_int 7)]))))))]
5862 "pmaddubsw\t{%2, %0|%0, %2}"
5863 [(set_attr "type" "sseiadd")
5864 (set_attr "prefix_extra" "1")
5865 (set_attr "mode" "DI")])
5867 (define_expand "ssse3_pmulhrswv8hi3"
5868 [(set (match_operand:V8HI 0 "register_operand" "")
5875 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5877 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5879 (const_vector:V8HI [(const_int 1) (const_int 1)
5880 (const_int 1) (const_int 1)
5881 (const_int 1) (const_int 1)
5882 (const_int 1) (const_int 1)]))
5885 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5887 (define_insn "*ssse3_pmulhrswv8hi3"
5888 [(set (match_operand:V8HI 0 "register_operand" "=x")
5895 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5897 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5899 (const_vector:V8HI [(const_int 1) (const_int 1)
5900 (const_int 1) (const_int 1)
5901 (const_int 1) (const_int 1)
5902 (const_int 1) (const_int 1)]))
5904 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5905 "pmulhrsw\t{%2, %0|%0, %2}"
5906 [(set_attr "type" "sseimul")
5907 (set_attr "prefix_data16" "1")
5908 (set_attr "prefix_extra" "1")
5909 (set_attr "mode" "TI")])
5911 (define_expand "ssse3_pmulhrswv4hi3"
5912 [(set (match_operand:V4HI 0 "register_operand" "")
5919 (match_operand:V4HI 1 "nonimmediate_operand" ""))
5921 (match_operand:V4HI 2 "nonimmediate_operand" "")))
5923 (const_vector:V4HI [(const_int 1) (const_int 1)
5924 (const_int 1) (const_int 1)]))
5927 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
5929 (define_insn "*ssse3_pmulhrswv4hi3"
5930 [(set (match_operand:V4HI 0 "register_operand" "=y")
5937 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5939 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5941 (const_vector:V4HI [(const_int 1) (const_int 1)
5942 (const_int 1) (const_int 1)]))
5944 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5945 "pmulhrsw\t{%2, %0|%0, %2}"
5946 [(set_attr "type" "sseimul")
5947 (set_attr "prefix_extra" "1")
5948 (set_attr "mode" "DI")])
5950 (define_insn "ssse3_pshufbv16qi3"
5951 [(set (match_operand:V16QI 0 "register_operand" "=x")
5952 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5953 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5956 "pshufb\t{%2, %0|%0, %2}";
5957 [(set_attr "type" "sselog1")
5958 (set_attr "prefix_data16" "1")
5959 (set_attr "prefix_extra" "1")
5960 (set_attr "mode" "TI")])
5962 (define_insn "ssse3_pshufbv8qi3"
5963 [(set (match_operand:V8QI 0 "register_operand" "=y")
5964 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5965 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5968 "pshufb\t{%2, %0|%0, %2}";
5969 [(set_attr "type" "sselog1")
5970 (set_attr "prefix_extra" "1")
5971 (set_attr "mode" "DI")])
5973 (define_insn "ssse3_psign<mode>3"
5974 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5976 [(match_operand:SSEMODE124 1 "register_operand" "0")
5977 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5980 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5981 [(set_attr "type" "sselog1")
5982 (set_attr "prefix_data16" "1")
5983 (set_attr "prefix_extra" "1")
5984 (set_attr "mode" "TI")])
5986 (define_insn "ssse3_psign<mode>3"
5987 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5989 [(match_operand:MMXMODEI 1 "register_operand" "0")
5990 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5993 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5994 [(set_attr "type" "sselog1")
5995 (set_attr "prefix_extra" "1")
5996 (set_attr "mode" "DI")])
5998 (define_insn "ssse3_palignrti"
5999 [(set (match_operand:TI 0 "register_operand" "=x")
6000 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6001 (match_operand:TI 2 "nonimmediate_operand" "xm")
6002 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6006 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6007 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6009 [(set_attr "type" "sseishft")
6010 (set_attr "prefix_data16" "1")
6011 (set_attr "prefix_extra" "1")
6012 (set_attr "mode" "TI")])
6014 (define_insn "ssse3_palignrdi"
6015 [(set (match_operand:DI 0 "register_operand" "=y")
6016 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6017 (match_operand:DI 2 "nonimmediate_operand" "ym")
6018 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6022 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6023 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6025 [(set_attr "type" "sseishft")
6026 (set_attr "prefix_extra" "1")
6027 (set_attr "mode" "DI")])
6029 (define_insn "abs<mode>2"
6030 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6031 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6033 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6034 [(set_attr "type" "sselog1")
6035 (set_attr "prefix_data16" "1")
6036 (set_attr "prefix_extra" "1")
6037 (set_attr "mode" "TI")])
6039 (define_insn "abs<mode>2"
6040 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6041 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6043 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6044 [(set_attr "type" "sselog1")
6045 (set_attr "prefix_extra" "1")
6046 (set_attr "mode" "DI")])
6048 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6050 ;; AMD SSE4A instructions
6052 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6054 (define_insn "sse4a_movnt<mode>"
6055 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6057 [(match_operand:MODEF 1 "register_operand" "x")]
6060 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6061 [(set_attr "type" "ssemov")
6062 (set_attr "mode" "<MODE>")])
6064 (define_insn "sse4a_vmmovnt<mode>"
6065 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6066 (unspec:<ssescalarmode>
6067 [(vec_select:<ssescalarmode>
6068 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6069 (parallel [(const_int 0)]))]
6072 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6073 [(set_attr "type" "ssemov")
6074 (set_attr "mode" "<ssescalarmode>")])
6076 (define_insn "sse4a_extrqi"
6077 [(set (match_operand:V2DI 0 "register_operand" "=x")
6078 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6079 (match_operand 2 "const_int_operand" "")
6080 (match_operand 3 "const_int_operand" "")]
6083 "extrq\t{%3, %2, %0|%0, %2, %3}"
6084 [(set_attr "type" "sse")
6085 (set_attr "prefix_data16" "1")
6086 (set_attr "mode" "TI")])
6088 (define_insn "sse4a_extrq"
6089 [(set (match_operand:V2DI 0 "register_operand" "=x")
6090 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6091 (match_operand:V16QI 2 "register_operand" "x")]
6094 "extrq\t{%2, %0|%0, %2}"
6095 [(set_attr "type" "sse")
6096 (set_attr "prefix_data16" "1")
6097 (set_attr "mode" "TI")])
6099 (define_insn "sse4a_insertqi"
6100 [(set (match_operand:V2DI 0 "register_operand" "=x")
6101 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6102 (match_operand:V2DI 2 "register_operand" "x")
6103 (match_operand 3 "const_int_operand" "")
6104 (match_operand 4 "const_int_operand" "")]
6107 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6108 [(set_attr "type" "sseins")
6109 (set_attr "prefix_rep" "1")
6110 (set_attr "mode" "TI")])
6112 (define_insn "sse4a_insertq"
6113 [(set (match_operand:V2DI 0 "register_operand" "=x")
6114 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6115 (match_operand:V2DI 2 "register_operand" "x")]
6118 "insertq\t{%2, %0|%0, %2}"
6119 [(set_attr "type" "sseins")
6120 (set_attr "prefix_rep" "1")
6121 (set_attr "mode" "TI")])
6123 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6125 ;; Intel SSE4.1 instructions
6127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6129 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6130 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6131 (vec_merge:SSEMODEF2P
6132 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6133 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6134 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
6136 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6137 [(set_attr "type" "ssemov")
6138 (set_attr "prefix_extra" "1")
6139 (set_attr "mode" "<MODE>")])
6141 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6142 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6144 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6145 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6146 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6149 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6150 [(set_attr "type" "ssemov")
6151 (set_attr "prefix_extra" "1")
6152 (set_attr "mode" "<MODE>")])
6154 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6155 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6157 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6158 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6159 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6162 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6163 [(set_attr "type" "ssemul")
6164 (set_attr "prefix_extra" "1")
6165 (set_attr "mode" "<MODE>")])
6167 (define_insn "sse4_1_movntdqa"
6168 [(set (match_operand:V2DI 0 "register_operand" "=x")
6169 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6172 "movntdqa\t{%1, %0|%0, %1}"
6173 [(set_attr "type" "ssecvt")
6174 (set_attr "prefix_extra" "1")
6175 (set_attr "mode" "TI")])
6177 (define_insn "sse4_1_mpsadbw"
6178 [(set (match_operand:V16QI 0 "register_operand" "=x")
6179 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6180 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6181 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6184 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6185 [(set_attr "type" "sselog1")
6186 (set_attr "prefix_extra" "1")
6187 (set_attr "mode" "TI")])
6189 (define_insn "sse4_1_packusdw"
6190 [(set (match_operand:V8HI 0 "register_operand" "=x")
6193 (match_operand:V4SI 1 "register_operand" "0"))
6195 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6197 "packusdw\t{%2, %0|%0, %2}"
6198 [(set_attr "type" "sselog")
6199 (set_attr "prefix_extra" "1")
6200 (set_attr "mode" "TI")])
6202 (define_insn "sse4_1_pblendvb"
6203 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6204 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6205 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6206 (match_operand:V16QI 3 "register_operand" "Yz")]
6209 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6210 [(set_attr "type" "ssemov")
6211 (set_attr "prefix_extra" "1")
6212 (set_attr "mode" "TI")])
6214 (define_insn "sse4_1_pblendw"
6215 [(set (match_operand:V8HI 0 "register_operand" "=x")
6217 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6218 (match_operand:V8HI 1 "register_operand" "0")
6219 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6221 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6222 [(set_attr "type" "ssemov")
6223 (set_attr "prefix_extra" "1")
6224 (set_attr "mode" "TI")])
6226 (define_insn "sse4_1_phminposuw"
6227 [(set (match_operand:V8HI 0 "register_operand" "=x")
6228 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6229 UNSPEC_PHMINPOSUW))]
6231 "phminposuw\t{%1, %0|%0, %1}"
6232 [(set_attr "type" "sselog1")
6233 (set_attr "prefix_extra" "1")
6234 (set_attr "mode" "TI")])
6236 (define_insn "sse4_1_extendv8qiv8hi2"
6237 [(set (match_operand:V8HI 0 "register_operand" "=x")
6240 (match_operand:V16QI 1 "register_operand" "x")
6241 (parallel [(const_int 0)
6250 "pmovsxbw\t{%1, %0|%0, %1}"
6251 [(set_attr "type" "ssemov")
6252 (set_attr "prefix_extra" "1")
6253 (set_attr "mode" "TI")])
6255 (define_insn "*sse4_1_extendv8qiv8hi2"
6256 [(set (match_operand:V8HI 0 "register_operand" "=x")
6259 (vec_duplicate:V16QI
6260 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6261 (parallel [(const_int 0)
6270 "pmovsxbw\t{%1, %0|%0, %1}"
6271 [(set_attr "type" "ssemov")
6272 (set_attr "prefix_extra" "1")
6273 (set_attr "mode" "TI")])
6275 (define_insn "sse4_1_extendv4qiv4si2"
6276 [(set (match_operand:V4SI 0 "register_operand" "=x")
6279 (match_operand:V16QI 1 "register_operand" "x")
6280 (parallel [(const_int 0)
6285 "pmovsxbd\t{%1, %0|%0, %1}"
6286 [(set_attr "type" "ssemov")
6287 (set_attr "prefix_extra" "1")
6288 (set_attr "mode" "TI")])
6290 (define_insn "*sse4_1_extendv4qiv4si2"
6291 [(set (match_operand:V4SI 0 "register_operand" "=x")
6294 (vec_duplicate:V16QI
6295 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6296 (parallel [(const_int 0)
6301 "pmovsxbd\t{%1, %0|%0, %1}"
6302 [(set_attr "type" "ssemov")
6303 (set_attr "prefix_extra" "1")
6304 (set_attr "mode" "TI")])
6306 (define_insn "sse4_1_extendv2qiv2di2"
6307 [(set (match_operand:V2DI 0 "register_operand" "=x")
6310 (match_operand:V16QI 1 "register_operand" "x")
6311 (parallel [(const_int 0)
6314 "pmovsxbq\t{%1, %0|%0, %1}"
6315 [(set_attr "type" "ssemov")
6316 (set_attr "prefix_extra" "1")
6317 (set_attr "mode" "TI")])
6319 (define_insn "*sse4_1_extendv2qiv2di2"
6320 [(set (match_operand:V2DI 0 "register_operand" "=x")
6323 (vec_duplicate:V16QI
6324 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6325 (parallel [(const_int 0)
6328 "pmovsxbq\t{%1, %0|%0, %1}"
6329 [(set_attr "type" "ssemov")
6330 (set_attr "prefix_extra" "1")
6331 (set_attr "mode" "TI")])
6333 (define_insn "sse4_1_extendv4hiv4si2"
6334 [(set (match_operand:V4SI 0 "register_operand" "=x")
6337 (match_operand:V8HI 1 "register_operand" "x")
6338 (parallel [(const_int 0)
6343 "pmovsxwd\t{%1, %0|%0, %1}"
6344 [(set_attr "type" "ssemov")
6345 (set_attr "prefix_extra" "1")
6346 (set_attr "mode" "TI")])
6348 (define_insn "*sse4_1_extendv4hiv4si2"
6349 [(set (match_operand:V4SI 0 "register_operand" "=x")
6353 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6354 (parallel [(const_int 0)
6359 "pmovsxwd\t{%1, %0|%0, %1}"
6360 [(set_attr "type" "ssemov")
6361 (set_attr "prefix_extra" "1")
6362 (set_attr "mode" "TI")])
6364 (define_insn "sse4_1_extendv2hiv2di2"
6365 [(set (match_operand:V2DI 0 "register_operand" "=x")
6368 (match_operand:V8HI 1 "register_operand" "x")
6369 (parallel [(const_int 0)
6372 "pmovsxwq\t{%1, %0|%0, %1}"
6373 [(set_attr "type" "ssemov")
6374 (set_attr "prefix_extra" "1")
6375 (set_attr "mode" "TI")])
6377 (define_insn "*sse4_1_extendv2hiv2di2"
6378 [(set (match_operand:V2DI 0 "register_operand" "=x")
6382 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6383 (parallel [(const_int 0)
6386 "pmovsxwq\t{%1, %0|%0, %1}"
6387 [(set_attr "type" "ssemov")
6388 (set_attr "prefix_extra" "1")
6389 (set_attr "mode" "TI")])
6391 (define_insn "sse4_1_extendv2siv2di2"
6392 [(set (match_operand:V2DI 0 "register_operand" "=x")
6395 (match_operand:V4SI 1 "register_operand" "x")
6396 (parallel [(const_int 0)
6399 "pmovsxdq\t{%1, %0|%0, %1}"
6400 [(set_attr "type" "ssemov")
6401 (set_attr "prefix_extra" "1")
6402 (set_attr "mode" "TI")])
6404 (define_insn "*sse4_1_extendv2siv2di2"
6405 [(set (match_operand:V2DI 0 "register_operand" "=x")
6409 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6410 (parallel [(const_int 0)
6413 "pmovsxdq\t{%1, %0|%0, %1}"
6414 [(set_attr "type" "ssemov")
6415 (set_attr "prefix_extra" "1")
6416 (set_attr "mode" "TI")])
6418 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6419 [(set (match_operand:V8HI 0 "register_operand" "=x")
6422 (match_operand:V16QI 1 "register_operand" "x")
6423 (parallel [(const_int 0)
6432 "pmovzxbw\t{%1, %0|%0, %1}"
6433 [(set_attr "type" "ssemov")
6434 (set_attr "prefix_extra" "1")
6435 (set_attr "mode" "TI")])
6437 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6438 [(set (match_operand:V8HI 0 "register_operand" "=x")
6441 (vec_duplicate:V16QI
6442 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6443 (parallel [(const_int 0)
6452 "pmovzxbw\t{%1, %0|%0, %1}"
6453 [(set_attr "type" "ssemov")
6454 (set_attr "prefix_extra" "1")
6455 (set_attr "mode" "TI")])
6457 (define_insn "sse4_1_zero_extendv4qiv4si2"
6458 [(set (match_operand:V4SI 0 "register_operand" "=x")
6461 (match_operand:V16QI 1 "register_operand" "x")
6462 (parallel [(const_int 0)
6467 "pmovzxbd\t{%1, %0|%0, %1}"
6468 [(set_attr "type" "ssemov")
6469 (set_attr "prefix_extra" "1")
6470 (set_attr "mode" "TI")])
6472 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6473 [(set (match_operand:V4SI 0 "register_operand" "=x")
6476 (vec_duplicate:V16QI
6477 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6478 (parallel [(const_int 0)
6483 "pmovzxbd\t{%1, %0|%0, %1}"
6484 [(set_attr "type" "ssemov")
6485 (set_attr "prefix_extra" "1")
6486 (set_attr "mode" "TI")])
6488 (define_insn "sse4_1_zero_extendv2qiv2di2"
6489 [(set (match_operand:V2DI 0 "register_operand" "=x")
6492 (match_operand:V16QI 1 "register_operand" "x")
6493 (parallel [(const_int 0)
6496 "pmovzxbq\t{%1, %0|%0, %1}"
6497 [(set_attr "type" "ssemov")
6498 (set_attr "prefix_extra" "1")
6499 (set_attr "mode" "TI")])
6501 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6502 [(set (match_operand:V2DI 0 "register_operand" "=x")
6505 (vec_duplicate:V16QI
6506 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6507 (parallel [(const_int 0)
6510 "pmovzxbq\t{%1, %0|%0, %1}"
6511 [(set_attr "type" "ssemov")
6512 (set_attr "prefix_extra" "1")
6513 (set_attr "mode" "TI")])
6515 (define_insn "sse4_1_zero_extendv4hiv4si2"
6516 [(set (match_operand:V4SI 0 "register_operand" "=x")
6519 (match_operand:V8HI 1 "register_operand" "x")
6520 (parallel [(const_int 0)
6525 "pmovzxwd\t{%1, %0|%0, %1}"
6526 [(set_attr "type" "ssemov")
6527 (set_attr "prefix_extra" "1")
6528 (set_attr "mode" "TI")])
6530 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6531 [(set (match_operand:V4SI 0 "register_operand" "=x")
6535 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6536 (parallel [(const_int 0)
6541 "pmovzxwd\t{%1, %0|%0, %1}"
6542 [(set_attr "type" "ssemov")
6543 (set_attr "prefix_extra" "1")
6544 (set_attr "mode" "TI")])
6546 (define_insn "sse4_1_zero_extendv2hiv2di2"
6547 [(set (match_operand:V2DI 0 "register_operand" "=x")
6550 (match_operand:V8HI 1 "register_operand" "x")
6551 (parallel [(const_int 0)
6554 "pmovzxwq\t{%1, %0|%0, %1}"
6555 [(set_attr "type" "ssemov")
6556 (set_attr "prefix_extra" "1")
6557 (set_attr "mode" "TI")])
6559 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6560 [(set (match_operand:V2DI 0 "register_operand" "=x")
6564 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6565 (parallel [(const_int 0)
6568 "pmovzxwq\t{%1, %0|%0, %1}"
6569 [(set_attr "type" "ssemov")
6570 (set_attr "prefix_extra" "1")
6571 (set_attr "mode" "TI")])
6573 (define_insn "sse4_1_zero_extendv2siv2di2"
6574 [(set (match_operand:V2DI 0 "register_operand" "=x")
6577 (match_operand:V4SI 1 "register_operand" "x")
6578 (parallel [(const_int 0)
6581 "pmovzxdq\t{%1, %0|%0, %1}"
6582 [(set_attr "type" "ssemov")
6583 (set_attr "prefix_extra" "1")
6584 (set_attr "mode" "TI")])
6586 (define_insn "*sse4_1_zero_extendv2siv2di2"
6587 [(set (match_operand:V2DI 0 "register_operand" "=x")
6591 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6592 (parallel [(const_int 0)
6595 "pmovzxdq\t{%1, %0|%0, %1}"
6596 [(set_attr "type" "ssemov")
6597 (set_attr "prefix_extra" "1")
6598 (set_attr "mode" "TI")])
6600 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6601 ;; But it is not a really compare instruction.
6602 (define_insn "sse4_1_ptest"
6603 [(set (reg:CC FLAGS_REG)
6604 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6605 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6608 "ptest\t{%1, %0|%0, %1}"
6609 [(set_attr "type" "ssecomi")
6610 (set_attr "prefix_extra" "1")
6611 (set_attr "mode" "TI")])
6613 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6614 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6616 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6617 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6620 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6621 [(set_attr "type" "ssecvt")
6622 (set_attr "prefix_extra" "1")
6623 (set_attr "mode" "<MODE>")])
6625 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6626 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6627 (vec_merge:SSEMODEF2P
6629 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6630 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6632 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6635 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6636 [(set_attr "type" "ssecvt")
6637 (set_attr "prefix_extra" "1")
6638 (set_attr "mode" "<MODE>")])
6640 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6642 ;; Intel SSE4.2 string/text processing instructions
6644 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6646 (define_insn_and_split "sse4_2_pcmpestr"
6647 [(set (match_operand:SI 0 "register_operand" "=c,c")
6649 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6650 (match_operand:SI 3 "register_operand" "a,a")
6651 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6652 (match_operand:SI 5 "register_operand" "d,d")
6653 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6655 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6663 (set (reg:CC FLAGS_REG)
6672 && !(reload_completed || reload_in_progress)"
6677 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6678 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6679 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6682 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6683 operands[3], operands[4],
6684 operands[5], operands[6]));
6686 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6687 operands[3], operands[4],
6688 operands[5], operands[6]));
6689 if (flags && !(ecx || xmm0))
6690 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6691 operands[2], operands[3],
6692 operands[4], operands[5],
6696 [(set_attr "type" "sselog")
6697 (set_attr "prefix_data16" "1")
6698 (set_attr "prefix_extra" "1")
6699 (set_attr "memory" "none,load")
6700 (set_attr "mode" "TI")])
6702 (define_insn "sse4_2_pcmpestri"
6703 [(set (match_operand:SI 0 "register_operand" "=c,c")
6705 [(match_operand:V16QI 1 "register_operand" "x,x")
6706 (match_operand:SI 2 "register_operand" "a,a")
6707 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6708 (match_operand:SI 4 "register_operand" "d,d")
6709 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6711 (set (reg:CC FLAGS_REG)
6720 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6721 [(set_attr "type" "sselog")
6722 (set_attr "prefix_data16" "1")
6723 (set_attr "prefix_extra" "1")
6724 (set_attr "memory" "none,load")
6725 (set_attr "mode" "TI")])
6727 (define_insn "sse4_2_pcmpestrm"
6728 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6730 [(match_operand:V16QI 1 "register_operand" "x,x")
6731 (match_operand:SI 2 "register_operand" "a,a")
6732 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6733 (match_operand:SI 4 "register_operand" "d,d")
6734 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6736 (set (reg:CC FLAGS_REG)
6745 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6746 [(set_attr "type" "sselog")
6747 (set_attr "prefix_data16" "1")
6748 (set_attr "prefix_extra" "1")
6749 (set_attr "memory" "none,load")
6750 (set_attr "mode" "TI")])
6752 (define_insn "sse4_2_pcmpestr_cconly"
6753 [(set (reg:CC FLAGS_REG)
6755 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6756 (match_operand:SI 3 "register_operand" "a,a,a,a")
6757 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6758 (match_operand:SI 5 "register_operand" "d,d,d,d")
6759 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6761 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6762 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6765 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6766 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6767 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6768 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6769 [(set_attr "type" "sselog")
6770 (set_attr "prefix_data16" "1")
6771 (set_attr "prefix_extra" "1")
6772 (set_attr "memory" "none,load,none,load")
6773 (set_attr "mode" "TI")])
6775 (define_insn_and_split "sse4_2_pcmpistr"
6776 [(set (match_operand:SI 0 "register_operand" "=c,c")
6778 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6779 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6780 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6782 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6788 (set (reg:CC FLAGS_REG)
6795 && !(reload_completed || reload_in_progress)"
6800 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6801 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6802 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6805 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6806 operands[3], operands[4]));
6808 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6809 operands[3], operands[4]));
6810 if (flags && !(ecx || xmm0))
6811 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6812 operands[2], operands[3],
6816 [(set_attr "type" "sselog")
6817 (set_attr "prefix_data16" "1")
6818 (set_attr "prefix_extra" "1")
6819 (set_attr "memory" "none,load")
6820 (set_attr "mode" "TI")])
6822 (define_insn "sse4_2_pcmpistri"
6823 [(set (match_operand:SI 0 "register_operand" "=c,c")
6825 [(match_operand:V16QI 1 "register_operand" "x,x")
6826 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6827 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6829 (set (reg:CC FLAGS_REG)
6836 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6837 [(set_attr "type" "sselog")
6838 (set_attr "prefix_data16" "1")
6839 (set_attr "prefix_extra" "1")
6840 (set_attr "memory" "none,load")
6841 (set_attr "mode" "TI")])
6843 (define_insn "sse4_2_pcmpistrm"
6844 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6846 [(match_operand:V16QI 1 "register_operand" "x,x")
6847 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6848 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6850 (set (reg:CC FLAGS_REG)
6857 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6858 [(set_attr "type" "sselog")
6859 (set_attr "prefix_data16" "1")
6860 (set_attr "prefix_extra" "1")
6861 (set_attr "memory" "none,load")
6862 (set_attr "mode" "TI")])
6864 (define_insn "sse4_2_pcmpistr_cconly"
6865 [(set (reg:CC FLAGS_REG)
6867 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6868 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6869 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6871 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6872 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6875 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6876 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6877 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6878 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6879 [(set_attr "type" "sselog")
6880 (set_attr "prefix_data16" "1")
6881 (set_attr "prefix_extra" "1")
6882 (set_attr "memory" "none,load,none,load")
6883 (set_attr "mode" "TI")])
6885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6887 ;; SSE5 instructions
6889 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6891 ;; SSE5 parallel integer multiply/add instructions.
6892 ;; Note the instruction does not allow the value being added to be a memory
6893 ;; operation. However by pretending via the nonimmediate_operand predicate
6894 ;; that it does and splitting it later allows the following to be recognized:
6895 ;; a[i] = b[i] * c[i] + d[i];
6896 (define_insn "sse5_pmacsww"
6897 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6900 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6901 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6902 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6903 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6905 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6906 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6907 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6908 [(set_attr "type" "ssemuladd")
6909 (set_attr "mode" "TI")])
6911 ;; Split pmacsww with two memory operands into a load and the pmacsww.
6913 [(set (match_operand:V8HI 0 "register_operand" "")
6915 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
6916 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6917 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
6919 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6920 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6921 && !reg_mentioned_p (operands[0], operands[1])
6922 && !reg_mentioned_p (operands[0], operands[2])
6923 && !reg_mentioned_p (operands[0], operands[3])"
6926 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
6927 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
6932 (define_insn "sse5_pmacssww"
6933 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6935 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6936 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6937 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6938 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6940 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6941 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6942 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6943 [(set_attr "type" "ssemuladd")
6944 (set_attr "mode" "TI")])
6946 ;; Note the instruction does not allow the value being added to be a memory
6947 ;; operation. However by pretending via the nonimmediate_operand predicate
6948 ;; that it does and splitting it later allows the following to be recognized:
6949 ;; a[i] = b[i] * c[i] + d[i];
6950 (define_insn "sse5_pmacsdd"
6951 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6954 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6955 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6956 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6957 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6959 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6960 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6961 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6962 [(set_attr "type" "ssemuladd")
6963 (set_attr "mode" "TI")])
6965 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
6967 [(set (match_operand:V4SI 0 "register_operand" "")
6969 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
6970 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6971 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
6973 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6974 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6975 && !reg_mentioned_p (operands[0], operands[1])
6976 && !reg_mentioned_p (operands[0], operands[2])
6977 && !reg_mentioned_p (operands[0], operands[3])"
6980 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
6981 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
6986 (define_insn "sse5_pmacssdd"
6987 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6989 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6990 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6991 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6992 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6994 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6995 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6996 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6997 [(set_attr "type" "ssemuladd")
6998 (set_attr "mode" "TI")])
7000 (define_insn "sse5_pmacssdql"
7001 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7006 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7007 (parallel [(const_int 1)
7010 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7011 (parallel [(const_int 1)
7013 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7014 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7016 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7017 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7018 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7019 [(set_attr "type" "ssemuladd")
7020 (set_attr "mode" "TI")])
7022 (define_insn "sse5_pmacssdqh"
7023 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7028 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7029 (parallel [(const_int 0)
7033 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7034 (parallel [(const_int 0)
7036 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7037 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7039 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7040 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7041 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7042 [(set_attr "type" "ssemuladd")
7043 (set_attr "mode" "TI")])
7045 (define_insn "sse5_pmacsdql"
7046 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7051 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7052 (parallel [(const_int 1)
7056 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7057 (parallel [(const_int 1)
7059 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7060 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7062 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7063 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7064 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7065 [(set_attr "type" "ssemuladd")
7066 (set_attr "mode" "TI")])
7068 (define_insn_and_split "*sse5_pmacsdql_mem"
7069 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7074 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7075 (parallel [(const_int 1)
7079 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7080 (parallel [(const_int 1)
7082 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7083 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
7085 "&& (reload_completed
7086 || (!reg_mentioned_p (operands[0], operands[1])
7087 && !reg_mentioned_p (operands[0], operands[2])))"
7096 (parallel [(const_int 1)
7101 (parallel [(const_int 1)
7105 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7106 ;; fake it with a multiply/add. In general, we expect the define_split to
7107 ;; occur before register allocation, so we have to handle the corner case where
7108 ;; the target is the same as operands 1/2
7109 (define_insn_and_split "sse5_mulv2div2di3_low"
7110 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7114 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7115 (parallel [(const_int 1)
7119 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7120 (parallel [(const_int 1)
7121 (const_int 3)])))))]
7124 "&& (reload_completed
7125 || (!reg_mentioned_p (operands[0], operands[1])
7126 && !reg_mentioned_p (operands[0], operands[2])))"
7135 (parallel [(const_int 1)
7140 (parallel [(const_int 1)
7144 operands[3] = CONST0_RTX (V2DImode);
7146 [(set_attr "type" "ssemuladd")
7147 (set_attr "mode" "TI")])
7149 (define_insn "sse5_pmacsdqh"
7150 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7155 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7156 (parallel [(const_int 0)
7160 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7161 (parallel [(const_int 0)
7163 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7164 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7166 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7167 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7168 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7169 [(set_attr "type" "ssemuladd")
7170 (set_attr "mode" "TI")])
7172 (define_insn_and_split "*sse5_pmacsdqh_mem"
7173 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7178 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7179 (parallel [(const_int 0)
7183 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7184 (parallel [(const_int 0)
7186 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7187 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
7189 "&& (reload_completed
7190 || (!reg_mentioned_p (operands[0], operands[1])
7191 && !reg_mentioned_p (operands[0], operands[2])))"
7200 (parallel [(const_int 0)
7205 (parallel [(const_int 0)
7209 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7210 ;; fake it with a multiply/add. In general, we expect the define_split to
7211 ;; occur before register allocation, so we have to handle the corner case where
7212 ;; the target is the same as either operands[1] or operands[2]
7213 (define_insn_and_split "sse5_mulv2div2di3_high"
7214 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7218 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7219 (parallel [(const_int 0)
7223 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7224 (parallel [(const_int 0)
7225 (const_int 2)])))))]
7228 "&& (reload_completed
7229 || (!reg_mentioned_p (operands[0], operands[1])
7230 && !reg_mentioned_p (operands[0], operands[2])))"
7239 (parallel [(const_int 0)
7244 (parallel [(const_int 0)
7248 operands[3] = CONST0_RTX (V2DImode);
7250 [(set_attr "type" "ssemuladd")
7251 (set_attr "mode" "TI")])
7253 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7254 (define_insn "sse5_pmacsswd"
7255 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7260 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7261 (parallel [(const_int 1)
7267 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7268 (parallel [(const_int 1)
7272 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7273 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7275 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7276 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7277 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7278 [(set_attr "type" "ssemuladd")
7279 (set_attr "mode" "TI")])
7281 (define_insn "sse5_pmacswd"
7282 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7287 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7288 (parallel [(const_int 1)
7294 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7295 (parallel [(const_int 1)
7299 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7300 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7302 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7303 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7304 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7305 [(set_attr "type" "ssemuladd")
7306 (set_attr "mode" "TI")])
7308 (define_insn "sse5_pmadcsswd"
7309 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7315 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7316 (parallel [(const_int 0)
7322 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7323 (parallel [(const_int 0)
7331 (parallel [(const_int 1)
7338 (parallel [(const_int 1)
7342 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7343 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7345 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7346 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7347 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7348 [(set_attr "type" "ssemuladd")
7349 (set_attr "mode" "TI")])
7351 (define_insn "sse5_pmadcswd"
7352 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7358 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7359 (parallel [(const_int 0)
7365 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7366 (parallel [(const_int 0)
7374 (parallel [(const_int 1)
7381 (parallel [(const_int 1)
7385 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7386 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7388 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7389 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7390 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7391 [(set_attr "type" "ssemuladd")
7392 (set_attr "mode" "TI")])
7394 ;; SSE5 parallel XMM conditional moves
7395 (define_insn "sse5_pcmov_<mode>"
7396 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
7397 (if_then_else:SSEMODE
7398 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
7399 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
7400 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
7401 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7403 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7404 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7405 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7406 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7407 [(set_attr "type" "sse4arg")])
7409 ;; SSE5 horizontal add/subtract instructions
7410 (define_insn "sse5_phaddbw"
7411 [(set (match_operand:V8HI 0 "register_operand" "=x")
7415 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7416 (parallel [(const_int 0)
7427 (parallel [(const_int 1)
7434 (const_int 15)])))))]
7436 "phaddbw\t{%1, %0|%0, %1}"
7437 [(set_attr "type" "sseiadd1")])
7439 (define_insn "sse5_phaddbd"
7440 [(set (match_operand:V4SI 0 "register_operand" "=x")
7445 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7446 (parallel [(const_int 0)
7453 (parallel [(const_int 1)
7461 (parallel [(const_int 2)
7468 (parallel [(const_int 3)
7471 (const_int 15)]))))))]
7473 "phaddbd\t{%1, %0|%0, %1}"
7474 [(set_attr "type" "sseiadd1")])
7476 (define_insn "sse5_phaddbq"
7477 [(set (match_operand:V2DI 0 "register_operand" "=x")
7483 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7484 (parallel [(const_int 0)
7489 (parallel [(const_int 1)
7495 (parallel [(const_int 2)
7500 (parallel [(const_int 3)
7507 (parallel [(const_int 8)
7512 (parallel [(const_int 9)
7518 (parallel [(const_int 10)
7523 (parallel [(const_int 11)
7524 (const_int 15)])))))))]
7526 "phaddbq\t{%1, %0|%0, %1}"
7527 [(set_attr "type" "sseiadd1")])
7529 (define_insn "sse5_phaddwd"
7530 [(set (match_operand:V4SI 0 "register_operand" "=x")
7534 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7535 (parallel [(const_int 0)
7542 (parallel [(const_int 1)
7545 (const_int 7)])))))]
7547 "phaddwd\t{%1, %0|%0, %1}"
7548 [(set_attr "type" "sseiadd1")])
7550 (define_insn "sse5_phaddwq"
7551 [(set (match_operand:V2DI 0 "register_operand" "=x")
7556 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7557 (parallel [(const_int 0)
7562 (parallel [(const_int 1)
7568 (parallel [(const_int 2)
7573 (parallel [(const_int 3)
7574 (const_int 7)]))))))]
7576 "phaddwq\t{%1, %0|%0, %1}"
7577 [(set_attr "type" "sseiadd1")])
7579 (define_insn "sse5_phadddq"
7580 [(set (match_operand:V2DI 0 "register_operand" "=x")
7584 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7585 (parallel [(const_int 0)
7590 (parallel [(const_int 1)
7591 (const_int 3)])))))]
7593 "phadddq\t{%1, %0|%0, %1}"
7594 [(set_attr "type" "sseiadd1")])
7596 (define_insn "sse5_phaddubw"
7597 [(set (match_operand:V8HI 0 "register_operand" "=x")
7601 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7602 (parallel [(const_int 0)
7613 (parallel [(const_int 1)
7620 (const_int 15)])))))]
7622 "phaddubw\t{%1, %0|%0, %1}"
7623 [(set_attr "type" "sseiadd1")])
7625 (define_insn "sse5_phaddubd"
7626 [(set (match_operand:V4SI 0 "register_operand" "=x")
7631 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7632 (parallel [(const_int 0)
7639 (parallel [(const_int 1)
7647 (parallel [(const_int 2)
7654 (parallel [(const_int 3)
7657 (const_int 15)]))))))]
7659 "phaddubd\t{%1, %0|%0, %1}"
7660 [(set_attr "type" "sseiadd1")])
7662 (define_insn "sse5_phaddubq"
7663 [(set (match_operand:V2DI 0 "register_operand" "=x")
7669 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7670 (parallel [(const_int 0)
7675 (parallel [(const_int 1)
7681 (parallel [(const_int 2)
7686 (parallel [(const_int 3)
7693 (parallel [(const_int 8)
7698 (parallel [(const_int 9)
7704 (parallel [(const_int 10)
7709 (parallel [(const_int 11)
7710 (const_int 15)])))))))]
7712 "phaddubq\t{%1, %0|%0, %1}"
7713 [(set_attr "type" "sseiadd1")])
7715 (define_insn "sse5_phadduwd"
7716 [(set (match_operand:V4SI 0 "register_operand" "=x")
7720 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7721 (parallel [(const_int 0)
7728 (parallel [(const_int 1)
7731 (const_int 7)])))))]
7733 "phadduwd\t{%1, %0|%0, %1}"
7734 [(set_attr "type" "sseiadd1")])
7736 (define_insn "sse5_phadduwq"
7737 [(set (match_operand:V2DI 0 "register_operand" "=x")
7742 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7743 (parallel [(const_int 0)
7748 (parallel [(const_int 1)
7754 (parallel [(const_int 2)
7759 (parallel [(const_int 3)
7760 (const_int 7)]))))))]
7762 "phadduwq\t{%1, %0|%0, %1}"
7763 [(set_attr "type" "sseiadd1")])
7765 (define_insn "sse5_phaddudq"
7766 [(set (match_operand:V2DI 0 "register_operand" "=x")
7770 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7771 (parallel [(const_int 0)
7776 (parallel [(const_int 1)
7777 (const_int 3)])))))]
7779 "phaddudq\t{%1, %0|%0, %1}"
7780 [(set_attr "type" "sseiadd1")])
7782 (define_insn "sse5_phsubbw"
7783 [(set (match_operand:V8HI 0 "register_operand" "=x")
7787 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7788 (parallel [(const_int 0)
7799 (parallel [(const_int 1)
7806 (const_int 15)])))))]
7808 "phsubbw\t{%1, %0|%0, %1}"
7809 [(set_attr "type" "sseiadd1")])
7811 (define_insn "sse5_phsubwd"
7812 [(set (match_operand:V4SI 0 "register_operand" "=x")
7816 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7817 (parallel [(const_int 0)
7824 (parallel [(const_int 1)
7827 (const_int 7)])))))]
7829 "phsubwd\t{%1, %0|%0, %1}"
7830 [(set_attr "type" "sseiadd1")])
7832 (define_insn "sse5_phsubdq"
7833 [(set (match_operand:V2DI 0 "register_operand" "=x")
7837 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7838 (parallel [(const_int 0)
7843 (parallel [(const_int 1)
7844 (const_int 3)])))))]
7846 "phsubdq\t{%1, %0|%0, %1}"
7847 [(set_attr "type" "sseiadd1")])
7849 ;; SSE5 permute instructions
7850 (define_insn "sse5_pperm"
7851 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7853 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7854 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7855 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7856 UNSPEC_SSE5_PERMUTE))]
7857 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7858 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7859 [(set_attr "type" "sse4arg")
7860 (set_attr "mode" "TI")])
7862 ;; The following are for the various unpack insns which doesn't need the first
7863 ;; source operand, so we can just use the output operand for the first operand.
7864 ;; This allows either of the other two operands to be a memory operand. We
7865 ;; can't just use the first operand as an argument to the normal pperm because
7866 ;; then an output only argument, suddenly becomes an input operand.
7867 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7868 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7871 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7872 (match_operand 2 "" "")))) ;; parallel with const_int's
7873 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7875 && (register_operand (operands[1], V16QImode)
7876 || register_operand (operands[2], V16QImode))"
7877 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7878 [(set_attr "type" "sseadd")
7879 (set_attr "mode" "TI")])
7881 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7882 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7885 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7886 (match_operand 2 "" "")))) ;; parallel with const_int's
7887 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7889 && (register_operand (operands[1], V16QImode)
7890 || register_operand (operands[2], V16QImode))"
7891 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7892 [(set_attr "type" "sseadd")
7893 (set_attr "mode" "TI")])
7895 (define_insn "sse5_pperm_zero_v8hi_v4si"
7896 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7899 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7900 (match_operand 2 "" "")))) ;; parallel with const_int's
7901 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7903 && (register_operand (operands[1], V8HImode)
7904 || register_operand (operands[2], V16QImode))"
7905 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7906 [(set_attr "type" "sseadd")
7907 (set_attr "mode" "TI")])
7909 (define_insn "sse5_pperm_sign_v8hi_v4si"
7910 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7913 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7914 (match_operand 2 "" "")))) ;; parallel with const_int's
7915 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7917 && (register_operand (operands[1], V8HImode)
7918 || register_operand (operands[2], V16QImode))"
7919 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7920 [(set_attr "type" "sseadd")
7921 (set_attr "mode" "TI")])
7923 (define_insn "sse5_pperm_zero_v4si_v2di"
7924 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7927 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7928 (match_operand 2 "" "")))) ;; parallel with const_int's
7929 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7931 && (register_operand (operands[1], V4SImode)
7932 || register_operand (operands[2], V16QImode))"
7933 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7934 [(set_attr "type" "sseadd")
7935 (set_attr "mode" "TI")])
7937 (define_insn "sse5_pperm_sign_v4si_v2di"
7938 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7941 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7942 (match_operand 2 "" "")))) ;; parallel with const_int's
7943 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7945 && (register_operand (operands[1], V4SImode)
7946 || register_operand (operands[2], V16QImode))"
7947 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7948 [(set_attr "type" "sseadd")
7949 (set_attr "mode" "TI")])
7951 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7952 (define_insn "sse5_pperm_pack_v2di_v4si"
7953 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7956 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7958 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7959 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7960 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7961 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7962 [(set_attr "type" "sse4arg")
7963 (set_attr "mode" "TI")])
7965 (define_insn "sse5_pperm_pack_v4si_v8hi"
7966 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7969 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7971 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7972 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7973 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7974 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7975 [(set_attr "type" "sse4arg")
7976 (set_attr "mode" "TI")])
7978 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7979 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7982 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7984 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7985 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7986 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7987 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7988 [(set_attr "type" "sse4arg")
7989 (set_attr "mode" "TI")])
7991 ;; Floating point permutation (permps, permpd)
7992 (define_insn "sse5_perm<mode>"
7993 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7995 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7996 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7997 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7998 UNSPEC_SSE5_PERMUTE))]
7999 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8000 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8001 [(set_attr "type" "sse4arg")
8002 (set_attr "mode" "<MODE>")])
8004 ;; SSE5 packed rotate instructions
8005 (define_expand "rotl<mode>3"
8006 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
8008 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
8009 (match_operand:SI 2 "general_operand")))]
8012 /* If we were given a scalar, convert it to parallel */
8013 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
8015 rtvec vs = rtvec_alloc (<ssescalarnum>);
8016 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
8017 rtx reg = gen_reg_rtx (<MODE>mode);
8018 rtx op2 = operands[2];
8021 if (GET_MODE (op2) != <ssescalarmode>mode)
8023 op2 = gen_reg_rtx (<ssescalarmode>mode);
8024 convert_move (op2, operands[2], false);
8027 for (i = 0; i < <ssescalarnum>; i++)
8028 RTVEC_ELT (vs, i) = op2;
8030 emit_insn (gen_vec_init<mode> (reg, par));
8031 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
8036 (define_expand "rotr<mode>3"
8037 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
8038 (rotatert:SSEMODE1248
8039 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
8040 (match_operand:SI 2 "general_operand")))]
8043 /* If we were given a scalar, convert it to parallel */
8044 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
8046 rtvec vs = rtvec_alloc (<ssescalarnum>);
8047 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
8048 rtx neg = gen_reg_rtx (<MODE>mode);
8049 rtx reg = gen_reg_rtx (<MODE>mode);
8050 rtx op2 = operands[2];
8053 if (GET_MODE (op2) != <ssescalarmode>mode)
8055 op2 = gen_reg_rtx (<ssescalarmode>mode);
8056 convert_move (op2, operands[2], false);
8059 for (i = 0; i < <ssescalarnum>; i++)
8060 RTVEC_ELT (vs, i) = op2;
8062 emit_insn (gen_vec_init<mode> (reg, par));
8063 emit_insn (gen_neg<mode>2 (neg, reg));
8064 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
8069 (define_insn "sse5_rotl<mode>3"
8070 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8072 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8073 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8075 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8076 [(set_attr "type" "sseishft")
8077 (set_attr "mode" "TI")])
8079 (define_insn "sse5_rotr<mode>3"
8080 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8081 (rotatert:SSEMODE1248
8082 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8083 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8086 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
8087 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
8089 [(set_attr "type" "sseishft")
8090 (set_attr "mode" "TI")])
8092 (define_expand "vrotr<mode>3"
8093 [(match_operand:SSEMODE1248 0 "register_operand" "")
8094 (match_operand:SSEMODE1248 1 "register_operand" "")
8095 (match_operand:SSEMODE1248 2 "register_operand" "")]
8098 rtx reg = gen_reg_rtx (<MODE>mode);
8099 emit_insn (gen_neg<mode>2 (reg, operands[2]));
8100 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
8104 (define_expand "vrotl<mode>3"
8105 [(match_operand:SSEMODE1248 0 "register_operand" "")
8106 (match_operand:SSEMODE1248 1 "register_operand" "")
8107 (match_operand:SSEMODE1248 2 "register_operand" "")]
8110 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
8114 (define_insn "sse5_vrotl<mode>3"
8115 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8116 (if_then_else:SSEMODE1248
8118 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8121 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8123 (rotatert:SSEMODE1248
8125 (neg:SSEMODE1248 (match_dup 2)))))]
8126 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8127 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8128 [(set_attr "type" "sseishft")
8129 (set_attr "mode" "TI")])
8131 ;; SSE5 packed shift instructions.
8132 ;; FIXME: add V2DI back in
8133 (define_expand "vlshr<mode>3"
8134 [(match_operand:SSEMODE124 0 "register_operand" "")
8135 (match_operand:SSEMODE124 1 "register_operand" "")
8136 (match_operand:SSEMODE124 2 "register_operand" "")]
8139 rtx neg = gen_reg_rtx (<MODE>mode);
8140 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8141 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
8145 (define_expand "vashr<mode>3"
8146 [(match_operand:SSEMODE124 0 "register_operand" "")
8147 (match_operand:SSEMODE124 1 "register_operand" "")
8148 (match_operand:SSEMODE124 2 "register_operand" "")]
8151 rtx neg = gen_reg_rtx (<MODE>mode);
8152 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8153 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
8157 (define_expand "vashl<mode>3"
8158 [(match_operand:SSEMODE124 0 "register_operand" "")
8159 (match_operand:SSEMODE124 1 "register_operand" "")
8160 (match_operand:SSEMODE124 2 "register_operand" "")]
8163 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
8167 (define_insn "sse5_ashl<mode>3"
8168 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8169 (if_then_else:SSEMODE1248
8171 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8174 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8176 (ashiftrt:SSEMODE1248
8178 (neg:SSEMODE1248 (match_dup 2)))))]
8179 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8180 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8181 [(set_attr "type" "sseishft")
8182 (set_attr "mode" "TI")])
8184 (define_insn "sse5_lshl<mode>3"
8185 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8186 (if_then_else:SSEMODE1248
8188 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8191 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8193 (lshiftrt:SSEMODE1248
8195 (neg:SSEMODE1248 (match_dup 2)))))]
8196 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8197 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8198 [(set_attr "type" "sseishft")
8199 (set_attr "mode" "TI")])
8201 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
8202 (define_expand "ashlv16qi3"
8203 [(match_operand:V16QI 0 "register_operand" "")
8204 (match_operand:V16QI 1 "register_operand" "")
8205 (match_operand:SI 2 "nonmemory_operand" "")]
8208 rtvec vs = rtvec_alloc (16);
8209 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8210 rtx reg = gen_reg_rtx (V16QImode);
8212 for (i = 0; i < 16; i++)
8213 RTVEC_ELT (vs, i) = operands[2];
8215 emit_insn (gen_vec_initv16qi (reg, par));
8216 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8220 (define_expand "lshlv16qi3"
8221 [(match_operand:V16QI 0 "register_operand" "")
8222 (match_operand:V16QI 1 "register_operand" "")
8223 (match_operand:SI 2 "nonmemory_operand" "")]
8226 rtvec vs = rtvec_alloc (16);
8227 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8228 rtx reg = gen_reg_rtx (V16QImode);
8230 for (i = 0; i < 16; i++)
8231 RTVEC_ELT (vs, i) = operands[2];
8233 emit_insn (gen_vec_initv16qi (reg, par));
8234 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
8238 (define_expand "ashrv16qi3"
8239 [(match_operand:V16QI 0 "register_operand" "")
8240 (match_operand:V16QI 1 "register_operand" "")
8241 (match_operand:SI 2 "nonmemory_operand" "")]
8244 rtvec vs = rtvec_alloc (16);
8245 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8246 rtx reg = gen_reg_rtx (V16QImode);
8248 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
8249 ? GEN_INT (- INTVAL (operands[2]))
8252 for (i = 0; i < 16; i++)
8253 RTVEC_ELT (vs, i) = ele;
8255 emit_insn (gen_vec_initv16qi (reg, par));
8257 if (GET_CODE (operands[2]) != CONST_INT)
8259 rtx neg = gen_reg_rtx (V16QImode);
8260 emit_insn (gen_negv16qi2 (neg, reg));
8261 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
8264 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8269 (define_expand "ashrv2di3"
8270 [(match_operand:V2DI 0 "register_operand" "")
8271 (match_operand:V2DI 1 "register_operand" "")
8272 (match_operand:DI 2 "nonmemory_operand" "")]
8275 rtvec vs = rtvec_alloc (2);
8276 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
8277 rtx reg = gen_reg_rtx (V2DImode);
8280 if (GET_CODE (operands[2]) == CONST_INT)
8281 ele = GEN_INT (- INTVAL (operands[2]));
8282 else if (GET_MODE (operands[2]) != DImode)
8284 rtx move = gen_reg_rtx (DImode);
8285 ele = gen_reg_rtx (DImode);
8286 convert_move (move, operands[2], false);
8287 emit_insn (gen_negdi2 (ele, move));
8291 ele = gen_reg_rtx (DImode);
8292 emit_insn (gen_negdi2 (ele, operands[2]));
8295 RTVEC_ELT (vs, 0) = ele;
8296 RTVEC_ELT (vs, 1) = ele;
8297 emit_insn (gen_vec_initv2di (reg, par));
8298 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
8302 ;; SSE5 FRCZ support
8304 (define_insn "sse5_frcz<mode>2"
8305 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8307 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8310 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
8311 [(set_attr "type" "ssecvt1")
8312 (set_attr "prefix_extra" "1")
8313 (set_attr "mode" "<MODE>")])
8316 (define_insn "sse5_vmfrcz<mode>2"
8317 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8318 (vec_merge:SSEMODEF2P
8320 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8322 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8325 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
8326 [(set_attr "type" "ssecvt1")
8327 (set_attr "prefix_extra" "1")
8328 (set_attr "mode" "<MODE>")])
8330 (define_insn "sse5_cvtph2ps"
8331 [(set (match_operand:V4SF 0 "register_operand" "=x")
8332 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8335 "cvtph2ps\t{%1, %0|%0, %1}"
8336 [(set_attr "type" "ssecvt")
8337 (set_attr "mode" "V4SF")])
8339 (define_insn "sse5_cvtps2ph"
8340 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8341 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8344 "cvtps2ph\t{%1, %0|%0, %1}"
8345 [(set_attr "type" "ssecvt")
8346 (set_attr "mode" "V4SF")])
8348 ;; Scalar versions of the com instructions that use vector types that are
8349 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8350 ;; com instructions fill in 0's in the upper bits instead of leaving them
8351 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8352 (define_expand "sse5_vmmaskcmp<mode>3"
8353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8354 (vec_merge:SSEMODEF2P
8355 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8356 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8357 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8362 operands[4] = CONST0_RTX (<MODE>mode);
8365 (define_insn "*sse5_vmmaskcmp<mode>3"
8366 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8367 (vec_merge:SSEMODEF2P
8368 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8369 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8370 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8371 (match_operand:SSEMODEF2P 4 "")
8374 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8375 [(set_attr "type" "sse4arg")
8376 (set_attr "mode" "<ssescalarmode>")])
8378 ;; We don't have a comparison operator that always returns true/false, so
8379 ;; handle comfalse and comtrue specially.
8380 (define_insn "sse5_com_tf<mode>3"
8381 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8383 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8384 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8385 (match_operand:SI 3 "const_int_operand" "n")]
8386 UNSPEC_SSE5_TRUEFALSE))]
8389 const char *ret = NULL;
8391 switch (INTVAL (operands[3]))
8394 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8398 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8402 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8406 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8415 [(set_attr "type" "ssecmp")
8416 (set_attr "mode" "<MODE>")])
8418 (define_insn "sse5_maskcmp<mode>3"
8419 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8420 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8421 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8422 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8424 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8425 [(set_attr "type" "ssecmp")
8426 (set_attr "mode" "<MODE>")])
8428 (define_insn "sse5_maskcmp<mode>3"
8429 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8430 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8431 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8432 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8434 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8435 [(set_attr "type" "sse4arg")
8436 (set_attr "mode" "TI")])
8438 (define_insn "sse5_maskcmp_uns<mode>3"
8439 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8440 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8441 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8442 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8444 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8445 [(set_attr "type" "ssecmp")
8446 (set_attr "mode" "TI")])
8448 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8449 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8450 ;; the exact instruction generated for the intrinsic.
8451 (define_insn "sse5_maskcmp_uns2<mode>3"
8452 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8454 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8455 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8456 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8457 UNSPEC_SSE5_UNSIGNED_CMP))]
8459 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8460 [(set_attr "type" "ssecmp")
8461 (set_attr "mode" "TI")])
8463 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8464 ;; being added here to be complete.
8465 (define_insn "sse5_pcom_tf<mode>3"
8466 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8468 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8469 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8470 (match_operand:SI 3 "const_int_operand" "n")]
8471 UNSPEC_SSE5_TRUEFALSE))]
8474 return ((INTVAL (operands[3]) != 0)
8475 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8476 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8478 [(set_attr "type" "ssecmp")
8479 (set_attr "mode" "TI")])
8481 (define_insn "aesenc"
8482 [(set (match_operand:V2DI 0 "register_operand" "=x")
8483 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8484 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8487 "aesenc\t{%2, %0|%0, %2}"
8488 [(set_attr "type" "sselog1")
8489 (set_attr "prefix_extra" "1")
8490 (set_attr "mode" "TI")])
8492 (define_insn "aesenclast"
8493 [(set (match_operand:V2DI 0 "register_operand" "=x")
8494 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8495 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8496 UNSPEC_AESENCLAST))]
8498 "aesenclast\t{%2, %0|%0, %2}"
8499 [(set_attr "type" "sselog1")
8500 (set_attr "prefix_extra" "1")
8501 (set_attr "mode" "TI")])
8503 (define_insn "aesdec"
8504 [(set (match_operand:V2DI 0 "register_operand" "=x")
8505 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8506 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8509 "aesdec\t{%2, %0|%0, %2}"
8510 [(set_attr "type" "sselog1")
8511 (set_attr "prefix_extra" "1")
8512 (set_attr "mode" "TI")])
8514 (define_insn "aesdeclast"
8515 [(set (match_operand:V2DI 0 "register_operand" "=x")
8516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8517 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8518 UNSPEC_AESDECLAST))]
8520 "aesdeclast\t{%2, %0|%0, %2}"
8521 [(set_attr "type" "sselog1")
8522 (set_attr "prefix_extra" "1")
8523 (set_attr "mode" "TI")])
8525 (define_insn "aesimc"
8526 [(set (match_operand:V2DI 0 "register_operand" "=x")
8527 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8530 "aesimc\t{%1, %0|%0, %1}"
8531 [(set_attr "type" "sselog1")
8532 (set_attr "prefix_extra" "1")
8533 (set_attr "mode" "TI")])
8535 (define_insn "aeskeygenassist"
8536 [(set (match_operand:V2DI 0 "register_operand" "=x")
8537 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
8538 (match_operand:SI 2 "const_0_to_255_operand" "n")]
8539 UNSPEC_AESKEYGENASSIST))]
8541 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
8542 [(set_attr "type" "sselog1")
8543 (set_attr "prefix_extra" "1")
8544 (set_attr "mode" "TI")])
8546 (define_insn "pclmulqdq"
8547 [(set (match_operand:V2DI 0 "register_operand" "=x")
8548 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8549 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
8550 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8553 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
8554 [(set_attr "type" "sselog1")
8555 (set_attr "prefix_extra" "1")
8556 (set_attr "mode" "TI")])