1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Int-float size matches
40 (define_mode_iterator SSEMODE4S [V4SF V4SI])
41 (define_mode_iterator SSEMODE2D [V2DF V2DI])
43 ;; Mapping from float mode to required SSE level
44 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
46 ;; Mapping from integer vector mode to mnemonic suffix
47 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
49 ;; Mapping of the sse5 suffix
50 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
51 (V4SF "ps") (V2DF "pd")])
52 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
53 (V4SF "ss") (V2DF "sd")])
54 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
56 ;; Mapping of the max integer size for sse5 rotate immediate constraint
57 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
59 ;; Mapping of vector modes back to the scalar modes
60 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
61 (V16QI "QI") (V8HI "HI")
62 (V4SI "SI") (V2DI "DI")])
64 ;; Mapping of vector modes to a vector mode of double size
65 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
66 (V4SF "V8SF") (V4SI "V8SI")])
68 ;; Number of scalar elements in each vector type
69 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
70 (V16QI "16") (V8HI "8")
71 (V4SI "4") (V2DI "2")])
73 ;; Mapping of immediate bits for blend instructions
74 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
76 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
78 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
82 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
84 ;; All of these patterns are enabled for SSE1 as well as SSE2.
85 ;; This is essential for maintaining stable calling conventions.
87 (define_expand "mov<mode>"
88 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
89 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
92 ix86_expand_vector_move (<MODE>mode, operands);
96 (define_insn "*mov<mode>_internal"
97 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
98 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
100 && (register_operand (operands[0], <MODE>mode)
101 || register_operand (operands[1], <MODE>mode))"
103 switch (which_alternative)
106 return standard_sse_constant_opcode (insn, operands[1]);
109 switch (get_attr_mode (insn))
112 return "movaps\t{%1, %0|%0, %1}";
114 return "movapd\t{%1, %0|%0, %1}";
116 return "movdqa\t{%1, %0|%0, %1}";
122 [(set_attr "type" "sselog1,ssemov,ssemov")
124 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
125 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
126 (and (eq_attr "alternative" "2")
127 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
129 (const_string "V4SF")
130 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
131 (const_string "V4SF")
132 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
133 (const_string "V2DF")
135 (const_string "TI")))])
137 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
138 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
139 ;; from memory, we'd prefer to load the memory directly into the %xmm
140 ;; register. To facilitate this happy circumstance, this pattern won't
141 ;; split until after register allocation. If the 64-bit value didn't
142 ;; come from memory, this is the best we can do. This is much better
143 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
146 (define_insn_and_split "movdi_to_sse"
148 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
149 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
150 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
151 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
153 "&& reload_completed"
156 if (register_operand (operands[1], DImode))
158 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
159 Assemble the 64-bit DImode value in an xmm register. */
160 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
161 gen_rtx_SUBREG (SImode, operands[1], 0)));
162 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
163 gen_rtx_SUBREG (SImode, operands[1], 4)));
164 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
166 else if (memory_operand (operands[1], DImode))
167 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
173 [(set (match_operand:V4SF 0 "register_operand" "")
174 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
175 "TARGET_SSE && reload_completed"
178 (vec_duplicate:V4SF (match_dup 1))
182 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
183 operands[2] = CONST0_RTX (V4SFmode);
187 [(set (match_operand:V2DF 0 "register_operand" "")
188 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
189 "TARGET_SSE2 && reload_completed"
190 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
192 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
193 operands[2] = CONST0_RTX (DFmode);
196 (define_expand "push<mode>1"
197 [(match_operand:SSEMODE 0 "register_operand" "")]
200 ix86_expand_push (<MODE>mode, operands[0]);
204 (define_expand "movmisalign<mode>"
205 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
206 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
209 ix86_expand_vector_move_misalign (<MODE>mode, operands);
213 (define_insn "sse2_movq128"
214 [(set (match_operand:V2DI 0 "register_operand" "=x")
217 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
218 (parallel [(const_int 0)]))
221 "movq\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
225 (define_insn "<sse>_movup<ssemodesuffixf2c>"
226 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
228 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
230 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
231 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
232 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
233 [(set_attr "type" "ssemov")
234 (set_attr "mode" "<MODE>")])
236 (define_insn "sse2_movdqu"
237 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
238 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
240 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
241 "movdqu\t{%1, %0|%0, %1}"
242 [(set_attr "type" "ssemov")
243 (set_attr "prefix_data16" "1")
244 (set_attr "mode" "TI")])
246 (define_insn "<sse>_movnt<mode>"
247 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
249 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
251 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
252 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
253 [(set_attr "type" "ssemov")
254 (set_attr "mode" "<MODE>")])
256 (define_insn "sse2_movntv2di"
257 [(set (match_operand:V2DI 0 "memory_operand" "=m")
258 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
261 "movntdq\t{%1, %0|%0, %1}"
262 [(set_attr "type" "ssecvt")
263 (set_attr "prefix_data16" "1")
264 (set_attr "mode" "TI")])
266 (define_insn "sse2_movntsi"
267 [(set (match_operand:SI 0 "memory_operand" "=m")
268 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
271 "movnti\t{%1, %0|%0, %1}"
272 [(set_attr "type" "ssecvt")
273 (set_attr "mode" "V2DF")])
275 (define_insn "sse3_lddqu"
276 [(set (match_operand:V16QI 0 "register_operand" "=x")
277 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
280 "lddqu\t{%1, %0|%0, %1}"
281 [(set_attr "type" "ssecvt")
282 (set_attr "prefix_rep" "1")
283 (set_attr "mode" "TI")])
285 ; Expand patterns for non-temporal stores. At the moment, only those
286 ; that directly map to insns are defined; it would be possible to
287 ; define patterns for other modes that would expand to several insns.
289 (define_expand "storent<mode>"
290 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
292 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
294 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
297 (define_expand "storent<mode>"
298 [(set (match_operand:MODEF 0 "memory_operand" "")
300 [(match_operand:MODEF 1 "register_operand" "")]
305 (define_expand "storentv2di"
306 [(set (match_operand:V2DI 0 "memory_operand" "")
307 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
312 (define_expand "storentsi"
313 [(set (match_operand:SI 0 "memory_operand" "")
314 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
321 ;; Parallel floating point arithmetic
323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
325 (define_expand "<code><mode>2"
326 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
328 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
329 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
330 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
332 (define_expand "<plusminus_insn><mode>3"
333 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
334 (plusminus:SSEMODEF2P
335 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
336 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
337 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
338 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
340 (define_insn "*<plusminus_insn><mode>3"
341 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
342 (plusminus:SSEMODEF2P
343 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
344 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
345 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
346 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
347 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
348 [(set_attr "type" "sseadd")
349 (set_attr "mode" "<MODE>")])
351 (define_insn "<sse>_vm<plusminus_insn><mode>3"
352 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
353 (vec_merge:SSEMODEF2P
354 (plusminus:SSEMODEF2P
355 (match_operand:SSEMODEF2P 1 "register_operand" "0")
356 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
359 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
360 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
361 [(set_attr "type" "sseadd")
362 (set_attr "mode" "<ssescalarmode>")])
364 (define_expand "mul<mode>3"
365 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
367 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
368 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
369 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
370 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
372 (define_insn "*mul<mode>3"
373 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
375 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
376 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
377 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
378 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
379 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
380 [(set_attr "type" "ssemul")
381 (set_attr "mode" "<MODE>")])
383 (define_insn "<sse>_vmmul<mode>3"
384 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
385 (vec_merge:SSEMODEF2P
387 (match_operand:SSEMODEF2P 1 "register_operand" "0")
388 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
391 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
392 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
393 [(set_attr "type" "ssemul")
394 (set_attr "mode" "<ssescalarmode>")])
396 (define_expand "divv4sf3"
397 [(set (match_operand:V4SF 0 "register_operand" "")
398 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
399 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
402 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
403 && flag_finite_math_only && !flag_trapping_math
404 && flag_unsafe_math_optimizations)
406 ix86_emit_swdivsf (operands[0], operands[1],
407 operands[2], V4SFmode);
412 (define_expand "divv2df3"
413 [(set (match_operand:V2DF 0 "register_operand" "")
414 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
415 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
419 (define_insn "<sse>_div<mode>3"
420 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
422 (match_operand:SSEMODEF2P 1 "register_operand" "0")
423 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
424 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
425 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
426 [(set_attr "type" "ssediv")
427 (set_attr "mode" "<MODE>")])
429 (define_insn "<sse>_vmdiv<mode>3"
430 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
431 (vec_merge:SSEMODEF2P
433 (match_operand:SSEMODEF2P 1 "register_operand" "0")
434 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
437 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
438 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
439 [(set_attr "type" "ssediv")
440 (set_attr "mode" "<ssescalarmode>")])
442 (define_insn "sse_rcpv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
445 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
447 "rcpps\t{%1, %0|%0, %1}"
448 [(set_attr "type" "sse")
449 (set_attr "mode" "V4SF")])
451 (define_insn "sse_vmrcpv4sf2"
452 [(set (match_operand:V4SF 0 "register_operand" "=x")
454 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
456 (match_operand:V4SF 2 "register_operand" "0")
459 "rcpss\t{%1, %0|%0, %1}"
460 [(set_attr "type" "sse")
461 (set_attr "mode" "SF")])
463 (define_expand "sqrtv4sf2"
464 [(set (match_operand:V4SF 0 "register_operand" "")
465 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
468 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
469 && flag_finite_math_only && !flag_trapping_math
470 && flag_unsafe_math_optimizations)
472 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
477 (define_insn "sse_sqrtv4sf2"
478 [(set (match_operand:V4SF 0 "register_operand" "=x")
479 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
481 "sqrtps\t{%1, %0|%0, %1}"
482 [(set_attr "type" "sse")
483 (set_attr "mode" "V4SF")])
485 (define_insn "sqrtv2df2"
486 [(set (match_operand:V2DF 0 "register_operand" "=x")
487 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
489 "sqrtpd\t{%1, %0|%0, %1}"
490 [(set_attr "type" "sse")
491 (set_attr "mode" "V2DF")])
493 (define_insn "<sse>_vmsqrt<mode>2"
494 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
495 (vec_merge:SSEMODEF2P
497 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
498 (match_operand:SSEMODEF2P 2 "register_operand" "0")
500 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
501 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
502 [(set_attr "type" "sse")
503 (set_attr "mode" "<ssescalarmode>")])
505 (define_expand "rsqrtv4sf2"
506 [(set (match_operand:V4SF 0 "register_operand" "")
508 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
511 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
515 (define_insn "sse_rsqrtv4sf2"
516 [(set (match_operand:V4SF 0 "register_operand" "=x")
518 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
520 "rsqrtps\t{%1, %0|%0, %1}"
521 [(set_attr "type" "sse")
522 (set_attr "mode" "V4SF")])
524 (define_insn "sse_vmrsqrtv4sf2"
525 [(set (match_operand:V4SF 0 "register_operand" "=x")
527 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
529 (match_operand:V4SF 2 "register_operand" "0")
532 "rsqrtss\t{%1, %0|%0, %1}"
533 [(set_attr "type" "sse")
534 (set_attr "mode" "SF")])
536 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
537 ;; isn't really correct, as those rtl operators aren't defined when
538 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
540 (define_expand "<code><mode>3"
541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
543 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
544 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
545 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
547 if (!flag_finite_math_only)
548 operands[1] = force_reg (<MODE>mode, operands[1]);
549 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
552 (define_insn "*<code><mode>3_finite"
553 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
555 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
556 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
557 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
558 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
559 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sseadd")
561 (set_attr "mode" "<MODE>")])
563 (define_insn "*<code><mode>3"
564 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
566 (match_operand:SSEMODEF2P 1 "register_operand" "0")
567 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
568 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
569 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
570 [(set_attr "type" "sseadd")
571 (set_attr "mode" "<MODE>")])
573 (define_insn "<sse>_vm<code><mode>3"
574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 (vec_merge:SSEMODEF2P
577 (match_operand:SSEMODEF2P 1 "register_operand" "0")
578 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
581 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
582 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
583 [(set_attr "type" "sse")
584 (set_attr "mode" "<ssescalarmode>")])
586 ;; These versions of the min/max patterns implement exactly the operations
587 ;; min = (op1 < op2 ? op1 : op2)
588 ;; max = (!(op1 < op2) ? op1 : op2)
589 ;; Their operands are not commutative, and thus they may be used in the
590 ;; presence of -0.0 and NaN.
592 (define_insn "*ieee_smin<mode>3"
593 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
595 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
596 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
598 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
599 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
600 [(set_attr "type" "sseadd")
601 (set_attr "mode" "<MODE>")])
603 (define_insn "*ieee_smax<mode>3"
604 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
606 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
607 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
609 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
610 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
611 [(set_attr "type" "sseadd")
612 (set_attr "mode" "<MODE>")])
614 (define_insn "sse3_addsubv4sf3"
615 [(set (match_operand:V4SF 0 "register_operand" "=x")
618 (match_operand:V4SF 1 "register_operand" "0")
619 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
620 (minus:V4SF (match_dup 1) (match_dup 2))
623 "addsubps\t{%2, %0|%0, %2}"
624 [(set_attr "type" "sseadd")
625 (set_attr "prefix_rep" "1")
626 (set_attr "mode" "V4SF")])
628 (define_insn "sse3_addsubv2df3"
629 [(set (match_operand:V2DF 0 "register_operand" "=x")
632 (match_operand:V2DF 1 "register_operand" "0")
633 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
634 (minus:V2DF (match_dup 1) (match_dup 2))
637 "addsubpd\t{%2, %0|%0, %2}"
638 [(set_attr "type" "sseadd")
639 (set_attr "mode" "V2DF")])
641 (define_insn "sse3_h<plusminus_insn>v4sf3"
642 [(set (match_operand:V4SF 0 "register_operand" "=x")
647 (match_operand:V4SF 1 "register_operand" "0")
648 (parallel [(const_int 0)]))
649 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
651 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
652 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
656 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
657 (parallel [(const_int 0)]))
658 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
660 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
661 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
663 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
664 [(set_attr "type" "sseadd")
665 (set_attr "prefix_rep" "1")
666 (set_attr "mode" "V4SF")])
668 (define_insn "sse3_h<plusminus_insn>v2df3"
669 [(set (match_operand:V2DF 0 "register_operand" "=x")
673 (match_operand:V2DF 1 "register_operand" "0")
674 (parallel [(const_int 0)]))
675 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
678 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
679 (parallel [(const_int 0)]))
680 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
682 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
683 [(set_attr "type" "sseadd")
684 (set_attr "mode" "V2DF")])
686 (define_expand "reduc_splus_v4sf"
687 [(match_operand:V4SF 0 "register_operand" "")
688 (match_operand:V4SF 1 "register_operand" "")]
693 rtx tmp = gen_reg_rtx (V4SFmode);
694 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
695 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
698 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
702 (define_expand "reduc_splus_v2df"
703 [(match_operand:V2DF 0 "register_operand" "")
704 (match_operand:V2DF 1 "register_operand" "")]
707 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
711 (define_expand "reduc_smax_v4sf"
712 [(match_operand:V4SF 0 "register_operand" "")
713 (match_operand:V4SF 1 "register_operand" "")]
716 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
720 (define_expand "reduc_smin_v4sf"
721 [(match_operand:V4SF 0 "register_operand" "")
722 (match_operand:V4SF 1 "register_operand" "")]
725 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
731 ;; Parallel floating point comparisons
733 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
735 (define_insn "<sse>_maskcmp<mode>3"
736 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
737 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
738 [(match_operand:SSEMODEF4 1 "register_operand" "0")
739 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
740 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
742 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
743 [(set_attr "type" "ssecmp")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "<sse>_vmmaskcmp<mode>3"
747 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
748 (vec_merge:SSEMODEF2P
749 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
750 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
751 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
754 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
755 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
756 [(set_attr "type" "ssecmp")
757 (set_attr "mode" "<ssescalarmode>")])
759 (define_insn "<sse>_comi"
760 [(set (reg:CCFP FLAGS_REG)
763 (match_operand:<ssevecmode> 0 "register_operand" "x")
764 (parallel [(const_int 0)]))
766 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
767 (parallel [(const_int 0)]))))]
768 "SSE_FLOAT_MODE_P (<MODE>mode)"
769 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
770 [(set_attr "type" "ssecomi")
771 (set_attr "mode" "<MODE>")])
773 (define_insn "<sse>_ucomi"
774 [(set (reg:CCFPU FLAGS_REG)
777 (match_operand:<ssevecmode> 0 "register_operand" "x")
778 (parallel [(const_int 0)]))
780 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))))]
782 "SSE_FLOAT_MODE_P (<MODE>mode)"
783 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
784 [(set_attr "type" "ssecomi")
785 (set_attr "mode" "<MODE>")])
787 (define_expand "vcond<mode>"
788 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
789 (if_then_else:SSEMODEF2P
791 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
792 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
793 (match_operand:SSEMODEF2P 1 "general_operand" "")
794 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
795 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
797 if (ix86_expand_fp_vcond (operands))
803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
805 ;; Parallel floating point logical operations
807 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
809 (define_insn "<sse>_nand<mode>3"
810 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
813 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
814 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
815 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
816 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
817 [(set_attr "type" "sselog")
818 (set_attr "mode" "<MODE>")])
820 (define_expand "<code><mode>3"
821 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
823 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
824 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
825 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
826 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
828 (define_insn "*<code><mode>3"
829 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
831 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
832 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
833 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
834 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
835 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
836 [(set_attr "type" "sselog")
837 (set_attr "mode" "<MODE>")])
839 ;; Also define scalar versions. These are used for abs, neg, and
840 ;; conditional move. Using subregs into vector modes causes register
841 ;; allocation lossage. These patterns do not allow memory operands
842 ;; because the native instructions read the full 128-bits.
844 (define_insn "*nand<mode>3"
845 [(set (match_operand:MODEF 0 "register_operand" "=x")
848 (match_operand:MODEF 1 "register_operand" "0"))
849 (match_operand:MODEF 2 "register_operand" "x")))]
850 "SSE_FLOAT_MODE_P (<MODE>mode)"
851 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
852 [(set_attr "type" "sselog")
853 (set_attr "mode" "<ssevecmode>")])
855 (define_insn "*<code><mode>3"
856 [(set (match_operand:MODEF 0 "register_operand" "=x")
858 (match_operand:MODEF 1 "register_operand" "0")
859 (match_operand:MODEF 2 "register_operand" "x")))]
860 "SSE_FLOAT_MODE_P (<MODE>mode)"
861 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
862 [(set_attr "type" "sselog")
863 (set_attr "mode" "<ssevecmode>")])
865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
867 ;; SSE5 floating point multiply/accumulate instructions This includes the
868 ;; scalar version of the instructions as well as the vector
870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
872 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
873 ;; combine to generate a multiply/add with two memory references. We then
874 ;; split this insn, into loading up the destination register with one of the
875 ;; memory operations. If we don't manage to split the insn, reload will
876 ;; generate the appropriate moves. The reason this is needed, is that combine
877 ;; has already folded one of the memory references into both the multiply and
878 ;; add insns, and it can't generate a new pseudo. I.e.:
879 ;; (set (reg1) (mem (addr1)))
880 ;; (set (reg2) (mult (reg1) (mem (addr2))))
881 ;; (set (reg3) (plus (reg2) (mem (addr3))))
883 (define_insn "sse5_fmadd<mode>4"
884 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
887 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
888 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
889 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
890 "TARGET_SSE5 && TARGET_FUSED_MADD
891 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
892 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
893 [(set_attr "type" "ssemuladd")
894 (set_attr "mode" "<MODE>")])
896 ;; Split fmadd with two memory operands into a load and the fmadd.
898 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
901 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
902 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
903 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
905 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
906 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
907 && !reg_mentioned_p (operands[0], operands[1])
908 && !reg_mentioned_p (operands[0], operands[2])
909 && !reg_mentioned_p (operands[0], operands[3])"
912 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
913 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
914 operands[2], operands[3]));
918 ;; For the scalar operations, use operand1 for the upper words that aren't
919 ;; modified, so restrict the forms that are generated.
920 ;; Scalar version of fmadd
921 (define_insn "sse5_vmfmadd<mode>4"
922 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
923 (vec_merge:SSEMODEF2P
926 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
927 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
928 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
931 "TARGET_SSE5 && TARGET_FUSED_MADD
932 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
933 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
934 [(set_attr "type" "ssemuladd")
935 (set_attr "mode" "<MODE>")])
937 ;; Floating multiply and subtract
938 ;; Allow two memory operands the same as fmadd
939 (define_insn "sse5_fmsub<mode>4"
940 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
943 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
944 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
945 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
946 "TARGET_SSE5 && TARGET_FUSED_MADD
947 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
948 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
949 [(set_attr "type" "ssemuladd")
950 (set_attr "mode" "<MODE>")])
952 ;; Split fmsub with two memory operands into a load and the fmsub.
954 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
957 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
958 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
959 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
961 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
962 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
963 && !reg_mentioned_p (operands[0], operands[1])
964 && !reg_mentioned_p (operands[0], operands[2])
965 && !reg_mentioned_p (operands[0], operands[3])"
968 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
969 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
970 operands[2], operands[3]));
974 ;; For the scalar operations, use operand1 for the upper words that aren't
975 ;; modified, so restrict the forms that are generated.
976 ;; Scalar version of fmsub
977 (define_insn "sse5_vmfmsub<mode>4"
978 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
979 (vec_merge:SSEMODEF2P
982 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
983 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
984 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
987 "TARGET_SSE5 && TARGET_FUSED_MADD
988 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
989 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
990 [(set_attr "type" "ssemuladd")
991 (set_attr "mode" "<MODE>")])
993 ;; Floating point negative multiply and add
994 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
995 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
996 ;; Allow two memory operands to help in optimizing.
997 (define_insn "sse5_fnmadd<mode>4"
998 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1000 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1002 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1003 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1004 "TARGET_SSE5 && TARGET_FUSED_MADD
1005 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1006 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1007 [(set_attr "type" "ssemuladd")
1008 (set_attr "mode" "<MODE>")])
1010 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1012 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1014 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1016 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1017 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1019 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1020 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1021 && !reg_mentioned_p (operands[0], operands[1])
1022 && !reg_mentioned_p (operands[0], operands[2])
1023 && !reg_mentioned_p (operands[0], operands[3])"
1026 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1027 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1028 operands[2], operands[3]));
1032 ;; For the scalar operations, use operand1 for the upper words that aren't
1033 ;; modified, so restrict the forms that are generated.
1034 ;; Scalar version of fnmadd
1035 (define_insn "sse5_vmfnmadd<mode>4"
1036 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1037 (vec_merge:SSEMODEF2P
1039 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1041 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1042 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1045 "TARGET_SSE5 && TARGET_FUSED_MADD
1046 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1047 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1048 [(set_attr "type" "ssemuladd")
1049 (set_attr "mode" "<MODE>")])
1051 ;; Floating point negative multiply and subtract
1052 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1053 ;; Allow 2 memory operands to help with optimization
1054 (define_insn "sse5_fnmsub<mode>4"
1055 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1059 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1060 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1061 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1062 "TARGET_SSE5 && TARGET_FUSED_MADD
1063 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1064 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1065 [(set_attr "type" "ssemuladd")
1066 (set_attr "mode" "<MODE>")])
1068 ;; Split fnmsub with two memory operands into a load and the fmsub.
1070 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1074 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1075 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1076 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1078 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1079 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1080 && !reg_mentioned_p (operands[0], operands[1])
1081 && !reg_mentioned_p (operands[0], operands[2])
1082 && !reg_mentioned_p (operands[0], operands[3])"
1085 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1086 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1087 operands[2], operands[3]));
1091 ;; For the scalar operations, use operand1 for the upper words that aren't
1092 ;; modified, so restrict the forms that are generated.
1093 ;; Scalar version of fnmsub
1094 (define_insn "sse5_vmfnmsub<mode>4"
1095 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1096 (vec_merge:SSEMODEF2P
1100 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1101 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1102 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1105 "TARGET_SSE5 && TARGET_FUSED_MADD
1106 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1107 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1108 [(set_attr "type" "ssemuladd")
1109 (set_attr "mode" "<MODE>")])
1111 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1112 ;; even if the user used -mno-fused-madd
1113 ;; Parallel instructions. During instruction generation, just default
1114 ;; to registers, and let combine later build the appropriate instruction.
1115 (define_expand "sse5i_fmadd<mode>4"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1120 (match_operand:SSEMODEF2P 1 "register_operand" "")
1121 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1122 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1123 UNSPEC_SSE5_INTRINSIC))]
1126 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1127 if (TARGET_FUSED_MADD)
1129 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1130 operands[2], operands[3]));
1135 (define_insn "*sse5i_fmadd<mode>4"
1136 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1140 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1141 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1142 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1143 UNSPEC_SSE5_INTRINSIC))]
1144 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1145 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1146 [(set_attr "type" "ssemuladd")
1147 (set_attr "mode" "<MODE>")])
1149 (define_expand "sse5i_fmsub<mode>4"
1150 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1154 (match_operand:SSEMODEF2P 1 "register_operand" "")
1155 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1156 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1157 UNSPEC_SSE5_INTRINSIC))]
1160 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1161 if (TARGET_FUSED_MADD)
1163 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1164 operands[2], operands[3]));
1169 (define_insn "*sse5i_fmsub<mode>4"
1170 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1174 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1175 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1176 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1177 UNSPEC_SSE5_INTRINSIC))]
1178 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1179 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1180 [(set_attr "type" "ssemuladd")
1181 (set_attr "mode" "<MODE>")])
1183 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1184 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1185 (define_expand "sse5i_fnmadd<mode>4"
1186 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1189 (match_operand:SSEMODEF2P 3 "register_operand" "")
1191 (match_operand:SSEMODEF2P 1 "register_operand" "")
1192 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1193 UNSPEC_SSE5_INTRINSIC))]
1196 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1197 if (TARGET_FUSED_MADD)
1199 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1200 operands[2], operands[3]));
1205 (define_insn "*sse5i_fnmadd<mode>4"
1206 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1209 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1211 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1212 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1213 UNSPEC_SSE5_INTRINSIC))]
1214 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1215 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1216 [(set_attr "type" "ssemuladd")
1217 (set_attr "mode" "<MODE>")])
1219 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1220 (define_expand "sse5i_fnmsub<mode>4"
1221 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1226 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1227 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1228 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1229 UNSPEC_SSE5_INTRINSIC))]
1232 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1233 if (TARGET_FUSED_MADD)
1235 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1236 operands[2], operands[3]));
1241 (define_insn "*sse5i_fnmsub<mode>4"
1242 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1247 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
1248 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1249 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1250 UNSPEC_SSE5_INTRINSIC))]
1251 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1252 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1253 [(set_attr "type" "ssemuladd")
1254 (set_attr "mode" "<MODE>")])
1256 ;; Scalar instructions
1257 (define_expand "sse5i_vmfmadd<mode>4"
1258 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1260 [(vec_merge:SSEMODEF2P
1263 (match_operand:SSEMODEF2P 1 "register_operand" "")
1264 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1265 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1268 UNSPEC_SSE5_INTRINSIC))]
1271 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1272 if (TARGET_FUSED_MADD)
1274 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1275 operands[2], operands[3]));
1280 ;; For the scalar operations, use operand1 for the upper words that aren't
1281 ;; modified, so restrict the forms that are accepted.
1282 (define_insn "*sse5i_vmfmadd<mode>4"
1283 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1285 [(vec_merge:SSEMODEF2P
1288 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1289 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1290 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1293 UNSPEC_SSE5_INTRINSIC))]
1294 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1295 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1296 [(set_attr "type" "ssemuladd")
1297 (set_attr "mode" "<ssescalarmode>")])
1299 (define_expand "sse5i_vmfmsub<mode>4"
1300 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1302 [(vec_merge:SSEMODEF2P
1305 (match_operand:SSEMODEF2P 1 "register_operand" "")
1306 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1307 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1310 UNSPEC_SSE5_INTRINSIC))]
1313 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1314 if (TARGET_FUSED_MADD)
1316 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1317 operands[2], operands[3]));
1322 (define_insn "*sse5i_vmfmsub<mode>4"
1323 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1325 [(vec_merge:SSEMODEF2P
1328 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1329 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1330 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1333 UNSPEC_SSE5_INTRINSIC))]
1334 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1335 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1336 [(set_attr "type" "ssemuladd")
1337 (set_attr "mode" "<ssescalarmode>")])
1339 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1340 (define_expand "sse5i_vmfnmadd<mode>4"
1341 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1343 [(vec_merge:SSEMODEF2P
1345 (match_operand:SSEMODEF2P 3 "register_operand" "")
1347 (match_operand:SSEMODEF2P 1 "register_operand" "")
1348 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1351 UNSPEC_SSE5_INTRINSIC))]
1354 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1355 if (TARGET_FUSED_MADD)
1357 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1358 operands[2], operands[3]));
1363 (define_insn "*sse5i_vmfnmadd<mode>4"
1364 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1366 [(vec_merge:SSEMODEF2P
1368 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1370 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
1371 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1374 UNSPEC_SSE5_INTRINSIC))]
1375 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1376 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1377 [(set_attr "type" "ssemuladd")
1378 (set_attr "mode" "<ssescalarmode>")])
1380 (define_expand "sse5i_vmfnmsub<mode>4"
1381 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1383 [(vec_merge:SSEMODEF2P
1387 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1388 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1389 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1392 UNSPEC_SSE5_INTRINSIC))]
1395 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1396 if (TARGET_FUSED_MADD)
1398 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1399 operands[2], operands[3]));
1404 (define_insn "*sse5i_vmfnmsub<mode>4"
1405 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1407 [(vec_merge:SSEMODEF2P
1411 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
1412 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1413 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1416 UNSPEC_SSE5_INTRINSIC))]
1417 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1418 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1419 [(set_attr "type" "ssemuladd")
1420 (set_attr "mode" "<ssescalarmode>")])
1422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1424 ;; Parallel single-precision floating point conversion operations
1426 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1428 (define_insn "sse_cvtpi2ps"
1429 [(set (match_operand:V4SF 0 "register_operand" "=x")
1432 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1433 (match_operand:V4SF 1 "register_operand" "0")
1436 "cvtpi2ps\t{%2, %0|%0, %2}"
1437 [(set_attr "type" "ssecvt")
1438 (set_attr "mode" "V4SF")])
1440 (define_insn "sse_cvtps2pi"
1441 [(set (match_operand:V2SI 0 "register_operand" "=y")
1443 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1445 (parallel [(const_int 0) (const_int 1)])))]
1447 "cvtps2pi\t{%1, %0|%0, %1}"
1448 [(set_attr "type" "ssecvt")
1449 (set_attr "unit" "mmx")
1450 (set_attr "mode" "DI")])
1452 (define_insn "sse_cvttps2pi"
1453 [(set (match_operand:V2SI 0 "register_operand" "=y")
1455 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1456 (parallel [(const_int 0) (const_int 1)])))]
1458 "cvttps2pi\t{%1, %0|%0, %1}"
1459 [(set_attr "type" "ssecvt")
1460 (set_attr "unit" "mmx")
1461 (set_attr "mode" "SF")])
1463 (define_insn "sse_cvtsi2ss"
1464 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1467 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1468 (match_operand:V4SF 1 "register_operand" "0,0")
1471 "cvtsi2ss\t{%2, %0|%0, %2}"
1472 [(set_attr "type" "sseicvt")
1473 (set_attr "athlon_decode" "vector,double")
1474 (set_attr "amdfam10_decode" "vector,double")
1475 (set_attr "mode" "SF")])
1477 (define_insn "sse_cvtsi2ssq"
1478 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1481 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1482 (match_operand:V4SF 1 "register_operand" "0,0")
1484 "TARGET_SSE && TARGET_64BIT"
1485 "cvtsi2ssq\t{%2, %0|%0, %2}"
1486 [(set_attr "type" "sseicvt")
1487 (set_attr "athlon_decode" "vector,double")
1488 (set_attr "amdfam10_decode" "vector,double")
1489 (set_attr "mode" "SF")])
1491 (define_insn "sse_cvtss2si"
1492 [(set (match_operand:SI 0 "register_operand" "=r,r")
1495 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1496 (parallel [(const_int 0)]))]
1497 UNSPEC_FIX_NOTRUNC))]
1499 "cvtss2si\t{%1, %0|%0, %1}"
1500 [(set_attr "type" "sseicvt")
1501 (set_attr "athlon_decode" "double,vector")
1502 (set_attr "prefix_rep" "1")
1503 (set_attr "mode" "SI")])
1505 (define_insn "sse_cvtss2si_2"
1506 [(set (match_operand:SI 0 "register_operand" "=r,r")
1507 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1508 UNSPEC_FIX_NOTRUNC))]
1510 "cvtss2si\t{%1, %0|%0, %1}"
1511 [(set_attr "type" "sseicvt")
1512 (set_attr "athlon_decode" "double,vector")
1513 (set_attr "amdfam10_decode" "double,double")
1514 (set_attr "prefix_rep" "1")
1515 (set_attr "mode" "SI")])
1517 (define_insn "sse_cvtss2siq"
1518 [(set (match_operand:DI 0 "register_operand" "=r,r")
1521 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1522 (parallel [(const_int 0)]))]
1523 UNSPEC_FIX_NOTRUNC))]
1524 "TARGET_SSE && TARGET_64BIT"
1525 "cvtss2siq\t{%1, %0|%0, %1}"
1526 [(set_attr "type" "sseicvt")
1527 (set_attr "athlon_decode" "double,vector")
1528 (set_attr "prefix_rep" "1")
1529 (set_attr "mode" "DI")])
1531 (define_insn "sse_cvtss2siq_2"
1532 [(set (match_operand:DI 0 "register_operand" "=r,r")
1533 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1534 UNSPEC_FIX_NOTRUNC))]
1535 "TARGET_SSE && TARGET_64BIT"
1536 "cvtss2siq\t{%1, %0|%0, %1}"
1537 [(set_attr "type" "sseicvt")
1538 (set_attr "athlon_decode" "double,vector")
1539 (set_attr "amdfam10_decode" "double,double")
1540 (set_attr "prefix_rep" "1")
1541 (set_attr "mode" "DI")])
1543 (define_insn "sse_cvttss2si"
1544 [(set (match_operand:SI 0 "register_operand" "=r,r")
1547 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1548 (parallel [(const_int 0)]))))]
1550 "cvttss2si\t{%1, %0|%0, %1}"
1551 [(set_attr "type" "sseicvt")
1552 (set_attr "athlon_decode" "double,vector")
1553 (set_attr "amdfam10_decode" "double,double")
1554 (set_attr "prefix_rep" "1")
1555 (set_attr "mode" "SI")])
1557 (define_insn "sse_cvttss2siq"
1558 [(set (match_operand:DI 0 "register_operand" "=r,r")
1561 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1562 (parallel [(const_int 0)]))))]
1563 "TARGET_SSE && TARGET_64BIT"
1564 "cvttss2siq\t{%1, %0|%0, %1}"
1565 [(set_attr "type" "sseicvt")
1566 (set_attr "athlon_decode" "double,vector")
1567 (set_attr "amdfam10_decode" "double,double")
1568 (set_attr "prefix_rep" "1")
1569 (set_attr "mode" "DI")])
1571 (define_insn "sse2_cvtdq2ps"
1572 [(set (match_operand:V4SF 0 "register_operand" "=x")
1573 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1575 "cvtdq2ps\t{%1, %0|%0, %1}"
1576 [(set_attr "type" "ssecvt")
1577 (set_attr "mode" "V4SF")])
1579 (define_insn "sse2_cvtps2dq"
1580 [(set (match_operand:V4SI 0 "register_operand" "=x")
1581 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1582 UNSPEC_FIX_NOTRUNC))]
1584 "cvtps2dq\t{%1, %0|%0, %1}"
1585 [(set_attr "type" "ssecvt")
1586 (set_attr "prefix_data16" "1")
1587 (set_attr "mode" "TI")])
1589 (define_insn "sse2_cvttps2dq"
1590 [(set (match_operand:V4SI 0 "register_operand" "=x")
1591 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1593 "cvttps2dq\t{%1, %0|%0, %1}"
1594 [(set_attr "type" "ssecvt")
1595 (set_attr "prefix_rep" "1")
1596 (set_attr "mode" "TI")])
1598 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1600 ;; Parallel double-precision floating point conversion operations
1602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1604 (define_insn "sse2_cvtpi2pd"
1605 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1606 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1608 "cvtpi2pd\t{%1, %0|%0, %1}"
1609 [(set_attr "type" "ssecvt")
1610 (set_attr "unit" "mmx,*")
1611 (set_attr "mode" "V2DF")])
1613 (define_insn "sse2_cvtpd2pi"
1614 [(set (match_operand:V2SI 0 "register_operand" "=y")
1615 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1616 UNSPEC_FIX_NOTRUNC))]
1618 "cvtpd2pi\t{%1, %0|%0, %1}"
1619 [(set_attr "type" "ssecvt")
1620 (set_attr "unit" "mmx")
1621 (set_attr "prefix_data16" "1")
1622 (set_attr "mode" "DI")])
1624 (define_insn "sse2_cvttpd2pi"
1625 [(set (match_operand:V2SI 0 "register_operand" "=y")
1626 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1628 "cvttpd2pi\t{%1, %0|%0, %1}"
1629 [(set_attr "type" "ssecvt")
1630 (set_attr "unit" "mmx")
1631 (set_attr "prefix_data16" "1")
1632 (set_attr "mode" "TI")])
1634 (define_insn "sse2_cvtsi2sd"
1635 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1638 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1639 (match_operand:V2DF 1 "register_operand" "0,0")
1642 "cvtsi2sd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "sseicvt")
1644 (set_attr "mode" "DF")
1645 (set_attr "athlon_decode" "double,direct")
1646 (set_attr "amdfam10_decode" "vector,double")])
1648 (define_insn "sse2_cvtsi2sdq"
1649 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1652 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1653 (match_operand:V2DF 1 "register_operand" "0,0")
1655 "TARGET_SSE2 && TARGET_64BIT"
1656 "cvtsi2sdq\t{%2, %0|%0, %2}"
1657 [(set_attr "type" "sseicvt")
1658 (set_attr "mode" "DF")
1659 (set_attr "athlon_decode" "double,direct")
1660 (set_attr "amdfam10_decode" "vector,double")])
1662 (define_insn "sse2_cvtsd2si"
1663 [(set (match_operand:SI 0 "register_operand" "=r,r")
1666 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1667 (parallel [(const_int 0)]))]
1668 UNSPEC_FIX_NOTRUNC))]
1670 "cvtsd2si\t{%1, %0|%0, %1}"
1671 [(set_attr "type" "sseicvt")
1672 (set_attr "athlon_decode" "double,vector")
1673 (set_attr "prefix_rep" "1")
1674 (set_attr "mode" "SI")])
1676 (define_insn "sse2_cvtsd2si_2"
1677 [(set (match_operand:SI 0 "register_operand" "=r,r")
1678 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1679 UNSPEC_FIX_NOTRUNC))]
1681 "cvtsd2si\t{%1, %0|%0, %1}"
1682 [(set_attr "type" "sseicvt")
1683 (set_attr "athlon_decode" "double,vector")
1684 (set_attr "amdfam10_decode" "double,double")
1685 (set_attr "prefix_rep" "1")
1686 (set_attr "mode" "SI")])
1688 (define_insn "sse2_cvtsd2siq"
1689 [(set (match_operand:DI 0 "register_operand" "=r,r")
1692 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1693 (parallel [(const_int 0)]))]
1694 UNSPEC_FIX_NOTRUNC))]
1695 "TARGET_SSE2 && TARGET_64BIT"
1696 "cvtsd2siq\t{%1, %0|%0, %1}"
1697 [(set_attr "type" "sseicvt")
1698 (set_attr "athlon_decode" "double,vector")
1699 (set_attr "prefix_rep" "1")
1700 (set_attr "mode" "DI")])
1702 (define_insn "sse2_cvtsd2siq_2"
1703 [(set (match_operand:DI 0 "register_operand" "=r,r")
1704 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1705 UNSPEC_FIX_NOTRUNC))]
1706 "TARGET_SSE2 && TARGET_64BIT"
1707 "cvtsd2siq\t{%1, %0|%0, %1}"
1708 [(set_attr "type" "sseicvt")
1709 (set_attr "athlon_decode" "double,vector")
1710 (set_attr "amdfam10_decode" "double,double")
1711 (set_attr "prefix_rep" "1")
1712 (set_attr "mode" "DI")])
1714 (define_insn "sse2_cvttsd2si"
1715 [(set (match_operand:SI 0 "register_operand" "=r,r")
1718 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1719 (parallel [(const_int 0)]))))]
1721 "cvttsd2si\t{%1, %0|%0, %1}"
1722 [(set_attr "type" "sseicvt")
1723 (set_attr "prefix_rep" "1")
1724 (set_attr "mode" "SI")
1725 (set_attr "athlon_decode" "double,vector")
1726 (set_attr "amdfam10_decode" "double,double")])
1728 (define_insn "sse2_cvttsd2siq"
1729 [(set (match_operand:DI 0 "register_operand" "=r,r")
1732 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1733 (parallel [(const_int 0)]))))]
1734 "TARGET_SSE2 && TARGET_64BIT"
1735 "cvttsd2siq\t{%1, %0|%0, %1}"
1736 [(set_attr "type" "sseicvt")
1737 (set_attr "prefix_rep" "1")
1738 (set_attr "mode" "DI")
1739 (set_attr "athlon_decode" "double,vector")
1740 (set_attr "amdfam10_decode" "double,double")])
1742 (define_insn "sse2_cvtdq2pd"
1743 [(set (match_operand:V2DF 0 "register_operand" "=x")
1746 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1747 (parallel [(const_int 0) (const_int 1)]))))]
1749 "cvtdq2pd\t{%1, %0|%0, %1}"
1750 [(set_attr "type" "ssecvt")
1751 (set_attr "mode" "V2DF")])
1753 (define_expand "sse2_cvtpd2dq"
1754 [(set (match_operand:V4SI 0 "register_operand" "")
1756 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1760 "operands[2] = CONST0_RTX (V2SImode);")
1762 (define_insn "*sse2_cvtpd2dq"
1763 [(set (match_operand:V4SI 0 "register_operand" "=x")
1765 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1767 (match_operand:V2SI 2 "const0_operand" "")))]
1769 "cvtpd2dq\t{%1, %0|%0, %1}"
1770 [(set_attr "type" "ssecvt")
1771 (set_attr "prefix_rep" "1")
1772 (set_attr "mode" "TI")
1773 (set_attr "amdfam10_decode" "double")])
1775 (define_expand "sse2_cvttpd2dq"
1776 [(set (match_operand:V4SI 0 "register_operand" "")
1778 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1781 "operands[2] = CONST0_RTX (V2SImode);")
1783 (define_insn "*sse2_cvttpd2dq"
1784 [(set (match_operand:V4SI 0 "register_operand" "=x")
1786 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1787 (match_operand:V2SI 2 "const0_operand" "")))]
1789 "cvttpd2dq\t{%1, %0|%0, %1}"
1790 [(set_attr "type" "ssecvt")
1791 (set_attr "prefix_rep" "1")
1792 (set_attr "mode" "TI")
1793 (set_attr "amdfam10_decode" "double")])
1795 (define_insn "sse2_cvtsd2ss"
1796 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1799 (float_truncate:V2SF
1800 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1801 (match_operand:V4SF 1 "register_operand" "0,0")
1804 "cvtsd2ss\t{%2, %0|%0, %2}"
1805 [(set_attr "type" "ssecvt")
1806 (set_attr "athlon_decode" "vector,double")
1807 (set_attr "amdfam10_decode" "vector,double")
1808 (set_attr "mode" "SF")])
1810 (define_insn "sse2_cvtss2sd"
1811 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1815 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1816 (parallel [(const_int 0) (const_int 1)])))
1817 (match_operand:V2DF 1 "register_operand" "0,0")
1820 "cvtss2sd\t{%2, %0|%0, %2}"
1821 [(set_attr "type" "ssecvt")
1822 (set_attr "amdfam10_decode" "vector,double")
1823 (set_attr "mode" "DF")])
1825 (define_expand "sse2_cvtpd2ps"
1826 [(set (match_operand:V4SF 0 "register_operand" "")
1828 (float_truncate:V2SF
1829 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1832 "operands[2] = CONST0_RTX (V2SFmode);")
1834 (define_insn "*sse2_cvtpd2ps"
1835 [(set (match_operand:V4SF 0 "register_operand" "=x")
1837 (float_truncate:V2SF
1838 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1839 (match_operand:V2SF 2 "const0_operand" "")))]
1841 "cvtpd2ps\t{%1, %0|%0, %1}"
1842 [(set_attr "type" "ssecvt")
1843 (set_attr "prefix_data16" "1")
1844 (set_attr "mode" "V4SF")
1845 (set_attr "amdfam10_decode" "double")])
1847 (define_insn "sse2_cvtps2pd"
1848 [(set (match_operand:V2DF 0 "register_operand" "=x")
1851 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1852 (parallel [(const_int 0) (const_int 1)]))))]
1854 "cvtps2pd\t{%1, %0|%0, %1}"
1855 [(set_attr "type" "ssecvt")
1856 (set_attr "mode" "V2DF")
1857 (set_attr "amdfam10_decode" "direct")])
1859 (define_expand "vec_unpacks_hi_v4sf"
1864 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1865 (parallel [(const_int 6)
1869 (set (match_operand:V2DF 0 "register_operand" "")
1873 (parallel [(const_int 0) (const_int 1)]))))]
1876 operands[2] = gen_reg_rtx (V4SFmode);
1879 (define_expand "vec_unpacks_lo_v4sf"
1880 [(set (match_operand:V2DF 0 "register_operand" "")
1883 (match_operand:V4SF 1 "nonimmediate_operand" "")
1884 (parallel [(const_int 0) (const_int 1)]))))]
1887 (define_expand "vec_unpacks_float_hi_v8hi"
1888 [(match_operand:V4SF 0 "register_operand" "")
1889 (match_operand:V8HI 1 "register_operand" "")]
1892 rtx tmp = gen_reg_rtx (V4SImode);
1894 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1895 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1899 (define_expand "vec_unpacks_float_lo_v8hi"
1900 [(match_operand:V4SF 0 "register_operand" "")
1901 (match_operand:V8HI 1 "register_operand" "")]
1904 rtx tmp = gen_reg_rtx (V4SImode);
1906 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1907 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1911 (define_expand "vec_unpacku_float_hi_v8hi"
1912 [(match_operand:V4SF 0 "register_operand" "")
1913 (match_operand:V8HI 1 "register_operand" "")]
1916 rtx tmp = gen_reg_rtx (V4SImode);
1918 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1919 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1923 (define_expand "vec_unpacku_float_lo_v8hi"
1924 [(match_operand:V4SF 0 "register_operand" "")
1925 (match_operand:V8HI 1 "register_operand" "")]
1928 rtx tmp = gen_reg_rtx (V4SImode);
1930 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1931 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1935 (define_expand "vec_unpacks_float_hi_v4si"
1938 (match_operand:V4SI 1 "nonimmediate_operand" "")
1939 (parallel [(const_int 2)
1943 (set (match_operand:V2DF 0 "register_operand" "")
1947 (parallel [(const_int 0) (const_int 1)]))))]
1950 operands[2] = gen_reg_rtx (V4SImode);
1953 (define_expand "vec_unpacks_float_lo_v4si"
1954 [(set (match_operand:V2DF 0 "register_operand" "")
1957 (match_operand:V4SI 1 "nonimmediate_operand" "")
1958 (parallel [(const_int 0) (const_int 1)]))))]
1961 (define_expand "vec_pack_trunc_v2df"
1962 [(match_operand:V4SF 0 "register_operand" "")
1963 (match_operand:V2DF 1 "nonimmediate_operand" "")
1964 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1969 r1 = gen_reg_rtx (V4SFmode);
1970 r2 = gen_reg_rtx (V4SFmode);
1972 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1973 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1974 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1978 (define_expand "vec_pack_sfix_trunc_v2df"
1979 [(match_operand:V4SI 0 "register_operand" "")
1980 (match_operand:V2DF 1 "nonimmediate_operand" "")
1981 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1986 r1 = gen_reg_rtx (V4SImode);
1987 r2 = gen_reg_rtx (V4SImode);
1989 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1990 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1991 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1992 gen_lowpart (V2DImode, r1),
1993 gen_lowpart (V2DImode, r2)));
1997 (define_expand "vec_pack_sfix_v2df"
1998 [(match_operand:V4SI 0 "register_operand" "")
1999 (match_operand:V2DF 1 "nonimmediate_operand" "")
2000 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2005 r1 = gen_reg_rtx (V4SImode);
2006 r2 = gen_reg_rtx (V4SImode);
2008 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2009 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2010 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2011 gen_lowpart (V2DImode, r1),
2012 gen_lowpart (V2DImode, r2)));
2016 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2018 ;; Parallel single-precision floating point element swizzling
2020 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2022 (define_expand "sse_movhlps_exp"
2023 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2026 (match_operand:V4SF 1 "nonimmediate_operand" "")
2027 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2028 (parallel [(const_int 6)
2033 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2035 (define_insn "sse_movhlps"
2036 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2039 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2040 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2041 (parallel [(const_int 6)
2045 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2047 movhlps\t{%2, %0|%0, %2}
2048 movlps\t{%H2, %0|%0, %H2}
2049 movhps\t{%2, %0|%0, %2}"
2050 [(set_attr "type" "ssemov")
2051 (set_attr "mode" "V4SF,V2SF,V2SF")])
2053 (define_expand "sse_movlhps_exp"
2054 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2057 (match_operand:V4SF 1 "nonimmediate_operand" "")
2058 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2059 (parallel [(const_int 0)
2064 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2066 (define_insn "sse_movlhps"
2067 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2070 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2071 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2072 (parallel [(const_int 0)
2076 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2078 movlhps\t{%2, %0|%0, %2}
2079 movhps\t{%2, %0|%0, %2}
2080 movlps\t{%2, %H0|%H0, %2}"
2081 [(set_attr "type" "ssemov")
2082 (set_attr "mode" "V4SF,V2SF,V2SF")])
2084 (define_insn "sse_unpckhps"
2085 [(set (match_operand:V4SF 0 "register_operand" "=x")
2088 (match_operand:V4SF 1 "register_operand" "0")
2089 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2090 (parallel [(const_int 2) (const_int 6)
2091 (const_int 3) (const_int 7)])))]
2093 "unpckhps\t{%2, %0|%0, %2}"
2094 [(set_attr "type" "sselog")
2095 (set_attr "mode" "V4SF")])
2097 (define_insn "sse_unpcklps"
2098 [(set (match_operand:V4SF 0 "register_operand" "=x")
2101 (match_operand:V4SF 1 "register_operand" "0")
2102 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2103 (parallel [(const_int 0) (const_int 4)
2104 (const_int 1) (const_int 5)])))]
2106 "unpcklps\t{%2, %0|%0, %2}"
2107 [(set_attr "type" "sselog")
2108 (set_attr "mode" "V4SF")])
2110 ;; These are modeled with the same vec_concat as the others so that we
2111 ;; capture users of shufps that can use the new instructions
2112 (define_insn "sse3_movshdup"
2113 [(set (match_operand:V4SF 0 "register_operand" "=x")
2116 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2118 (parallel [(const_int 1)
2123 "movshdup\t{%1, %0|%0, %1}"
2124 [(set_attr "type" "sse")
2125 (set_attr "prefix_rep" "1")
2126 (set_attr "mode" "V4SF")])
2128 (define_insn "sse3_movsldup"
2129 [(set (match_operand:V4SF 0 "register_operand" "=x")
2132 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2134 (parallel [(const_int 0)
2139 "movsldup\t{%1, %0|%0, %1}"
2140 [(set_attr "type" "sse")
2141 (set_attr "prefix_rep" "1")
2142 (set_attr "mode" "V4SF")])
2144 (define_expand "sse_shufps"
2145 [(match_operand:V4SF 0 "register_operand" "")
2146 (match_operand:V4SF 1 "register_operand" "")
2147 (match_operand:V4SF 2 "nonimmediate_operand" "")
2148 (match_operand:SI 3 "const_int_operand" "")]
2151 int mask = INTVAL (operands[3]);
2152 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
2153 GEN_INT ((mask >> 0) & 3),
2154 GEN_INT ((mask >> 2) & 3),
2155 GEN_INT (((mask >> 4) & 3) + 4),
2156 GEN_INT (((mask >> 6) & 3) + 4)));
2160 (define_insn "sse_shufps_<mode>"
2161 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
2162 (vec_select:SSEMODE4S
2163 (vec_concat:<ssedoublesizemode>
2164 (match_operand:SSEMODE4S 1 "register_operand" "0")
2165 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
2166 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2167 (match_operand 4 "const_0_to_3_operand" "")
2168 (match_operand 5 "const_4_to_7_operand" "")
2169 (match_operand 6 "const_4_to_7_operand" "")])))]
2173 mask |= INTVAL (operands[3]) << 0;
2174 mask |= INTVAL (operands[4]) << 2;
2175 mask |= (INTVAL (operands[5]) - 4) << 4;
2176 mask |= (INTVAL (operands[6]) - 4) << 6;
2177 operands[3] = GEN_INT (mask);
2179 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2181 [(set_attr "type" "sselog")
2182 (set_attr "mode" "V4SF")])
2184 (define_insn "sse_storehps"
2185 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2187 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2188 (parallel [(const_int 2) (const_int 3)])))]
2191 movhps\t{%1, %0|%0, %1}
2192 movhlps\t{%1, %0|%0, %1}
2193 movlps\t{%H1, %0|%0, %H1}"
2194 [(set_attr "type" "ssemov")
2195 (set_attr "mode" "V2SF,V4SF,V2SF")])
2197 (define_expand "sse_loadhps_exp"
2198 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2201 (match_operand:V4SF 1 "nonimmediate_operand" "")
2202 (parallel [(const_int 0) (const_int 1)]))
2203 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
2205 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2207 (define_insn "sse_loadhps"
2208 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2211 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2212 (parallel [(const_int 0) (const_int 1)]))
2213 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2216 movhps\t{%2, %0|%0, %2}
2217 movlhps\t{%2, %0|%0, %2}
2218 movlps\t{%2, %H0|%H0, %2}"
2219 [(set_attr "type" "ssemov")
2220 (set_attr "mode" "V2SF,V4SF,V2SF")])
2222 (define_insn "sse_storelps"
2223 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2225 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2226 (parallel [(const_int 0) (const_int 1)])))]
2229 movlps\t{%1, %0|%0, %1}
2230 movaps\t{%1, %0|%0, %1}
2231 movlps\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "ssemov")
2233 (set_attr "mode" "V2SF,V4SF,V2SF")])
2235 (define_expand "sse_loadlps_exp"
2236 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2238 (match_operand:V2SF 2 "nonimmediate_operand" "")
2240 (match_operand:V4SF 1 "nonimmediate_operand" "")
2241 (parallel [(const_int 2) (const_int 3)]))))]
2243 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2245 (define_insn "sse_loadlps"
2246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2248 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2250 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2251 (parallel [(const_int 2) (const_int 3)]))))]
2254 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2255 movlps\t{%2, %0|%0, %2}
2256 movlps\t{%2, %0|%0, %2}"
2257 [(set_attr "type" "sselog,ssemov,ssemov")
2258 (set_attr "mode" "V4SF,V2SF,V2SF")])
2260 (define_insn "sse_movss"
2261 [(set (match_operand:V4SF 0 "register_operand" "=x")
2263 (match_operand:V4SF 2 "register_operand" "x")
2264 (match_operand:V4SF 1 "register_operand" "0")
2267 "movss\t{%2, %0|%0, %2}"
2268 [(set_attr "type" "ssemov")
2269 (set_attr "mode" "SF")])
2271 (define_insn "*vec_dupv4sf"
2272 [(set (match_operand:V4SF 0 "register_operand" "=x")
2274 (match_operand:SF 1 "register_operand" "0")))]
2276 "shufps\t{$0, %0, %0|%0, %0, 0}"
2277 [(set_attr "type" "sselog1")
2278 (set_attr "mode" "V4SF")])
2280 ;; Although insertps takes register source, we prefer
2281 ;; unpcklps with register source since it is shorter.
2282 (define_insn "*vec_concatv2sf_sse4_1"
2283 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
2285 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
2286 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
2289 unpcklps\t{%2, %0|%0, %2}
2290 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
2291 movss\t{%1, %0|%0, %1}
2292 punpckldq\t{%2, %0|%0, %2}
2293 movd\t{%1, %0|%0, %1}"
2294 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
2295 (set_attr "prefix_extra" "*,1,*,*,*")
2296 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
2298 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2299 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2300 ;; alternatives pretty much forces the MMX alternative to be chosen.
2301 (define_insn "*vec_concatv2sf_sse"
2302 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2304 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2305 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2308 unpcklps\t{%2, %0|%0, %2}
2309 movss\t{%1, %0|%0, %1}
2310 punpckldq\t{%2, %0|%0, %2}
2311 movd\t{%1, %0|%0, %1}"
2312 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2313 (set_attr "mode" "V4SF,SF,DI,DI")])
2315 (define_insn "*vec_concatv4sf_sse"
2316 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2318 (match_operand:V2SF 1 "register_operand" " 0,0")
2319 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2322 movlhps\t{%2, %0|%0, %2}
2323 movhps\t{%2, %0|%0, %2}"
2324 [(set_attr "type" "ssemov")
2325 (set_attr "mode" "V4SF,V2SF")])
2327 (define_expand "vec_init<mode>"
2328 [(match_operand:SSEMODE 0 "register_operand" "")
2329 (match_operand 1 "" "")]
2332 ix86_expand_vector_init (false, operands[0], operands[1]);
2336 (define_insn "vec_setv4sf_0"
2337 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2340 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2341 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2345 movss\t{%2, %0|%0, %2}
2346 movss\t{%2, %0|%0, %2}
2347 movd\t{%2, %0|%0, %2}
2349 [(set_attr "type" "ssemov")
2350 (set_attr "mode" "SF")])
2352 ;; A subset is vec_setv4sf.
2353 (define_insn "*vec_setv4sf_sse4_1"
2354 [(set (match_operand:V4SF 0 "register_operand" "=x")
2357 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2358 (match_operand:V4SF 1 "register_operand" "0")
2359 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2362 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2363 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2365 [(set_attr "type" "sselog")
2366 (set_attr "prefix_extra" "1")
2367 (set_attr "mode" "V4SF")])
2369 (define_insn "sse4_1_insertps"
2370 [(set (match_operand:V4SF 0 "register_operand" "=x")
2371 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2372 (match_operand:V4SF 1 "register_operand" "0")
2373 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2376 "insertps\t{%3, %2, %0|%0, %2, %3}";
2377 [(set_attr "type" "sselog")
2378 (set_attr "prefix_extra" "1")
2379 (set_attr "mode" "V4SF")])
2382 [(set (match_operand:V4SF 0 "memory_operand" "")
2385 (match_operand:SF 1 "nonmemory_operand" ""))
2388 "TARGET_SSE && reload_completed"
2391 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2395 (define_expand "vec_set<mode>"
2396 [(match_operand:SSEMODE 0 "register_operand" "")
2397 (match_operand:<ssescalarmode> 1 "register_operand" "")
2398 (match_operand 2 "const_int_operand" "")]
2401 ix86_expand_vector_set (false, operands[0], operands[1],
2402 INTVAL (operands[2]));
2406 (define_insn_and_split "*vec_extractv4sf_0"
2407 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2409 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2410 (parallel [(const_int 0)])))]
2411 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2413 "&& reload_completed"
2416 rtx op1 = operands[1];
2418 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2420 op1 = gen_lowpart (SFmode, op1);
2421 emit_move_insn (operands[0], op1);
2425 (define_insn "*sse4_1_extractps"
2426 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2428 (match_operand:V4SF 1 "register_operand" "x")
2429 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2431 "extractps\t{%2, %1, %0|%0, %1, %2}"
2432 [(set_attr "type" "sselog")
2433 (set_attr "prefix_extra" "1")
2434 (set_attr "mode" "V4SF")])
2436 (define_insn_and_split "*vec_extract_v4sf_mem"
2437 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2439 (match_operand:V4SF 1 "memory_operand" "o")
2440 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2446 int i = INTVAL (operands[2]);
2448 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2452 (define_expand "vec_extract<mode>"
2453 [(match_operand:<ssescalarmode> 0 "register_operand" "")
2454 (match_operand:SSEMODE 1 "register_operand" "")
2455 (match_operand 2 "const_int_operand" "")]
2458 ix86_expand_vector_extract (false, operands[0], operands[1],
2459 INTVAL (operands[2]));
2463 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2465 ;; Parallel double-precision floating point element swizzling
2467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2469 (define_expand "sse2_unpckhpd_exp"
2470 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2473 (match_operand:V2DF 1 "nonimmediate_operand" "")
2474 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2475 (parallel [(const_int 1)
2478 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2480 (define_insn "sse2_unpckhpd"
2481 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2484 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2485 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2486 (parallel [(const_int 1)
2488 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2490 unpckhpd\t{%2, %0|%0, %2}
2491 movlpd\t{%H1, %0|%0, %H1}
2492 movhpd\t{%1, %0|%0, %1}"
2493 [(set_attr "type" "sselog,ssemov,ssemov")
2494 (set_attr "mode" "V2DF,V1DF,V1DF")])
2496 (define_insn "*sse3_movddup"
2497 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2500 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2502 (parallel [(const_int 0)
2504 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2506 movddup\t{%1, %0|%0, %1}
2508 [(set_attr "type" "sselog1,ssemov")
2509 (set_attr "mode" "V2DF")])
2512 [(set (match_operand:V2DF 0 "memory_operand" "")
2515 (match_operand:V2DF 1 "register_operand" "")
2517 (parallel [(const_int 0)
2519 "TARGET_SSE3 && reload_completed"
2522 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2523 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2524 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2528 (define_expand "sse2_unpcklpd_exp"
2529 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2532 (match_operand:V2DF 1 "nonimmediate_operand" "")
2533 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2534 (parallel [(const_int 0)
2537 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2539 (define_insn "sse2_unpcklpd"
2540 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2543 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2544 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2545 (parallel [(const_int 0)
2547 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2549 unpcklpd\t{%2, %0|%0, %2}
2550 movhpd\t{%2, %0|%0, %2}
2551 movlpd\t{%2, %H0|%H0, %2}"
2552 [(set_attr "type" "sselog,ssemov,ssemov")
2553 (set_attr "mode" "V2DF,V1DF,V1DF")])
2555 (define_expand "sse2_shufpd"
2556 [(match_operand:V2DF 0 "register_operand" "")
2557 (match_operand:V2DF 1 "register_operand" "")
2558 (match_operand:V2DF 2 "nonimmediate_operand" "")
2559 (match_operand:SI 3 "const_int_operand" "")]
2562 int mask = INTVAL (operands[3]);
2563 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
2565 GEN_INT (mask & 2 ? 3 : 2)));
2569 (define_expand "vec_extract_even<mode>"
2570 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
2571 (vec_select:SSEMODE4S
2572 (vec_concat:<ssedoublesizemode>
2573 (match_operand:SSEMODE4S 1 "register_operand" "")
2574 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
2575 (parallel [(const_int 0)
2581 (define_expand "vec_extract_odd<mode>"
2582 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
2583 (vec_select:SSEMODE4S
2584 (vec_concat:<ssedoublesizemode>
2585 (match_operand:SSEMODE4S 1 "register_operand" "")
2586 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
2587 (parallel [(const_int 1)
2593 (define_expand "vec_extract_even<mode>"
2594 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
2595 (vec_select:SSEMODE2D
2596 (vec_concat:<ssedoublesizemode>
2597 (match_operand:SSEMODE2D 1 "register_operand" "")
2598 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
2599 (parallel [(const_int 0)
2603 (define_expand "vec_extract_odd<mode>"
2604 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
2605 (vec_select:SSEMODE2D
2606 (vec_concat:<ssedoublesizemode>
2607 (match_operand:SSEMODE2D 1 "register_operand" "")
2608 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
2609 (parallel [(const_int 1)
2613 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
2614 (define_insn "sse2_punpckhqdq"
2615 [(set (match_operand:V2DI 0 "register_operand" "=x")
2618 (match_operand:V2DI 1 "register_operand" "0")
2619 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
2620 (parallel [(const_int 1)
2623 "punpckhqdq\t{%2, %0|%0, %2}"
2624 [(set_attr "type" "sselog")
2625 (set_attr "prefix_data16" "1")
2626 (set_attr "mode" "TI")])
2628 (define_insn "sse2_punpcklqdq"
2629 [(set (match_operand:V2DI 0 "register_operand" "=x")
2632 (match_operand:V2DI 1 "register_operand" "0")
2633 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
2634 (parallel [(const_int 0)
2637 "punpcklqdq\t{%2, %0|%0, %2}"
2638 [(set_attr "type" "sselog")
2639 (set_attr "prefix_data16" "1")
2640 (set_attr "mode" "TI")])
2642 (define_insn "sse2_shufpd_<mode>"
2643 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
2644 (vec_select:SSEMODE2D
2645 (vec_concat:<ssedoublesizemode>
2646 (match_operand:SSEMODE2D 1 "register_operand" "0")
2647 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
2648 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2649 (match_operand 4 "const_2_to_3_operand" "")])))]
2653 mask = INTVAL (operands[3]);
2654 mask |= (INTVAL (operands[4]) - 2) << 1;
2655 operands[3] = GEN_INT (mask);
2657 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2659 [(set_attr "type" "sselog")
2660 (set_attr "mode" "V2DF")])
2662 (define_insn "sse2_storehpd"
2663 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2665 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2666 (parallel [(const_int 1)])))]
2667 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2669 movhpd\t{%1, %0|%0, %1}
2672 [(set_attr "type" "ssemov,sselog1,ssemov")
2673 (set_attr "mode" "V1DF,V2DF,DF")])
2676 [(set (match_operand:DF 0 "register_operand" "")
2678 (match_operand:V2DF 1 "memory_operand" "")
2679 (parallel [(const_int 1)])))]
2680 "TARGET_SSE2 && reload_completed"
2681 [(set (match_dup 0) (match_dup 1))]
2683 operands[1] = adjust_address (operands[1], DFmode, 8);
2686 (define_insn "sse2_storelpd"
2687 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2689 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2690 (parallel [(const_int 0)])))]
2691 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2693 movlpd\t{%1, %0|%0, %1}
2696 [(set_attr "type" "ssemov")
2697 (set_attr "mode" "V1DF,DF,DF")])
2700 [(set (match_operand:DF 0 "register_operand" "")
2702 (match_operand:V2DF 1 "nonimmediate_operand" "")
2703 (parallel [(const_int 0)])))]
2704 "TARGET_SSE2 && reload_completed"
2707 rtx op1 = operands[1];
2709 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2711 op1 = gen_lowpart (DFmode, op1);
2712 emit_move_insn (operands[0], op1);
2716 (define_expand "sse2_loadhpd_exp"
2717 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2720 (match_operand:V2DF 1 "nonimmediate_operand" "")
2721 (parallel [(const_int 0)]))
2722 (match_operand:DF 2 "nonimmediate_operand" "")))]
2724 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2726 (define_insn "sse2_loadhpd"
2727 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2730 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2731 (parallel [(const_int 0)]))
2732 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2733 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2735 movhpd\t{%2, %0|%0, %2}
2736 unpcklpd\t{%2, %0|%0, %2}
2737 shufpd\t{$1, %1, %0|%0, %1, 1}
2739 [(set_attr "type" "ssemov,sselog,sselog,other")
2740 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2743 [(set (match_operand:V2DF 0 "memory_operand" "")
2745 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2746 (match_operand:DF 1 "register_operand" "")))]
2747 "TARGET_SSE2 && reload_completed"
2748 [(set (match_dup 0) (match_dup 1))]
2750 operands[0] = adjust_address (operands[0], DFmode, 8);
2753 (define_expand "sse2_loadlpd_exp"
2754 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2756 (match_operand:DF 2 "nonimmediate_operand" "")
2758 (match_operand:V2DF 1 "nonimmediate_operand" "")
2759 (parallel [(const_int 1)]))))]
2761 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2763 (define_insn "sse2_loadlpd"
2764 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2766 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2768 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2769 (parallel [(const_int 1)]))))]
2770 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2772 movsd\t{%2, %0|%0, %2}
2773 movlpd\t{%2, %0|%0, %2}
2774 movsd\t{%2, %0|%0, %2}
2775 shufpd\t{$2, %2, %0|%0, %2, 2}
2776 movhpd\t{%H1, %0|%0, %H1}
2778 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2779 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2782 [(set (match_operand:V2DF 0 "memory_operand" "")
2784 (match_operand:DF 1 "register_operand" "")
2785 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2786 "TARGET_SSE2 && reload_completed"
2787 [(set (match_dup 0) (match_dup 1))]
2789 operands[0] = adjust_address (operands[0], DFmode, 8);
2792 ;; Not sure these two are ever used, but it doesn't hurt to have
2794 (define_insn "*vec_extractv2df_1_sse"
2795 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2797 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2798 (parallel [(const_int 1)])))]
2799 "!TARGET_SSE2 && TARGET_SSE
2800 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2802 movhps\t{%1, %0|%0, %1}
2803 movhlps\t{%1, %0|%0, %1}
2804 movlps\t{%H1, %0|%0, %H1}"
2805 [(set_attr "type" "ssemov")
2806 (set_attr "mode" "V2SF,V4SF,V2SF")])
2808 (define_insn "*vec_extractv2df_0_sse"
2809 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2811 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2812 (parallel [(const_int 0)])))]
2813 "!TARGET_SSE2 && TARGET_SSE
2814 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2816 movlps\t{%1, %0|%0, %1}
2817 movaps\t{%1, %0|%0, %1}
2818 movlps\t{%1, %0|%0, %1}"
2819 [(set_attr "type" "ssemov")
2820 (set_attr "mode" "V2SF,V4SF,V2SF")])
2822 (define_insn "sse2_movsd"
2823 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2825 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2826 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2830 movsd\t{%2, %0|%0, %2}
2831 movlpd\t{%2, %0|%0, %2}
2832 movlpd\t{%2, %0|%0, %2}
2833 shufpd\t{$2, %2, %0|%0, %2, 2}
2834 movhps\t{%H1, %0|%0, %H1}
2835 movhps\t{%1, %H0|%H0, %1}"
2836 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2837 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2839 (define_insn "*vec_dupv2df_sse3"
2840 [(set (match_operand:V2DF 0 "register_operand" "=x")
2842 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2844 "movddup\t{%1, %0|%0, %1}"
2845 [(set_attr "type" "sselog1")
2846 (set_attr "mode" "DF")])
2848 (define_insn "vec_dupv2df"
2849 [(set (match_operand:V2DF 0 "register_operand" "=x")
2851 (match_operand:DF 1 "register_operand" "0")))]
2854 [(set_attr "type" "sselog1")
2855 (set_attr "mode" "V2DF")])
2857 (define_insn "*vec_concatv2df_sse3"
2858 [(set (match_operand:V2DF 0 "register_operand" "=x")
2860 (match_operand:DF 1 "nonimmediate_operand" "xm")
2863 "movddup\t{%1, %0|%0, %1}"
2864 [(set_attr "type" "sselog1")
2865 (set_attr "mode" "DF")])
2867 (define_insn "*vec_concatv2df"
2868 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2870 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2871 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2874 unpcklpd\t{%2, %0|%0, %2}
2875 movhpd\t{%2, %0|%0, %2}
2876 movsd\t{%1, %0|%0, %1}
2877 movlhps\t{%2, %0|%0, %2}
2878 movhps\t{%2, %0|%0, %2}"
2879 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2880 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2884 ;; Parallel integral arithmetic
2886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2888 (define_expand "neg<mode>2"
2889 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2892 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2894 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2896 (define_expand "<plusminus_insn><mode>3"
2897 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2899 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2900 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2902 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2904 (define_insn "*<plusminus_insn><mode>3"
2905 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2907 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2908 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2909 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2910 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2911 [(set_attr "type" "sseiadd")
2912 (set_attr "prefix_data16" "1")
2913 (set_attr "mode" "TI")])
2915 (define_expand "sse2_<plusminus_insn><mode>3"
2916 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2917 (sat_plusminus:SSEMODE12
2918 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
2919 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
2921 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2923 (define_insn "*sse2_<plusminus_insn><mode>3"
2924 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2925 (sat_plusminus:SSEMODE12
2926 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2927 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2928 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2929 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2930 [(set_attr "type" "sseiadd")
2931 (set_attr "prefix_data16" "1")
2932 (set_attr "mode" "TI")])
2934 (define_insn_and_split "mulv16qi3"
2935 [(set (match_operand:V16QI 0 "register_operand" "")
2936 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2937 (match_operand:V16QI 2 "register_operand" "")))]
2939 && !(reload_completed || reload_in_progress)"
2944 rtx t[12], op0, op[3];
2949 /* On SSE5, we can take advantage of the pperm instruction to pack and
2950 unpack the bytes. Unpack data such that we've got a source byte in
2951 each low byte of each word. We don't care what goes into the high
2952 byte, so put 0 there. */
2953 for (i = 0; i < 6; ++i)
2954 t[i] = gen_reg_rtx (V8HImode);
2956 for (i = 0; i < 2; i++)
2959 op[1] = operands[i+1];
2960 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2963 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2966 /* Multiply words. */
2967 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2968 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2970 /* Pack the low byte of each word back into a single xmm */
2971 op[0] = operands[0];
2974 ix86_expand_sse5_pack (op);
2978 for (i = 0; i < 12; ++i)
2979 t[i] = gen_reg_rtx (V16QImode);
2981 /* Unpack data such that we've got a source byte in each low byte of
2982 each word. We don't care what goes into the high byte of each word.
2983 Rather than trying to get zero in there, most convenient is to let
2984 it be a copy of the low byte. */
2985 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2986 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2987 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2988 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2990 /* Multiply words. The end-of-line annotations here give a picture of what
2991 the output of that instruction looks like. Dot means don't care; the
2992 letters are the bytes of the result with A being the most significant. */
2993 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2994 gen_lowpart (V8HImode, t[0]),
2995 gen_lowpart (V8HImode, t[1])));
2996 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2997 gen_lowpart (V8HImode, t[2]),
2998 gen_lowpart (V8HImode, t[3])));
3000 /* Extract the relevant bytes and merge them back together. */
3001 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
3002 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
3003 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
3004 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
3005 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
3006 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
3009 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
3013 (define_expand "mulv8hi3"
3014 [(set (match_operand:V8HI 0 "register_operand" "")
3015 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3016 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3018 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3020 (define_insn "*mulv8hi3"
3021 [(set (match_operand:V8HI 0 "register_operand" "=x")
3022 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3023 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3024 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3025 "pmullw\t{%2, %0|%0, %2}"
3026 [(set_attr "type" "sseimul")
3027 (set_attr "prefix_data16" "1")
3028 (set_attr "mode" "TI")])
3030 (define_expand "smulv8hi3_highpart"
3031 [(set (match_operand:V8HI 0 "register_operand" "")
3036 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3038 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3041 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3043 (define_insn "*smulv8hi3_highpart"
3044 [(set (match_operand:V8HI 0 "register_operand" "=x")
3049 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3051 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3053 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3054 "pmulhw\t{%2, %0|%0, %2}"
3055 [(set_attr "type" "sseimul")
3056 (set_attr "prefix_data16" "1")
3057 (set_attr "mode" "TI")])
3059 (define_expand "umulv8hi3_highpart"
3060 [(set (match_operand:V8HI 0 "register_operand" "")
3065 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3067 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3070 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3072 (define_insn "*umulv8hi3_highpart"
3073 [(set (match_operand:V8HI 0 "register_operand" "=x")
3078 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3080 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3082 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3083 "pmulhuw\t{%2, %0|%0, %2}"
3084 [(set_attr "type" "sseimul")
3085 (set_attr "prefix_data16" "1")
3086 (set_attr "mode" "TI")])
3088 (define_expand "sse2_umulv2siv2di3"
3089 [(set (match_operand:V2DI 0 "register_operand" "")
3093 (match_operand:V4SI 1 "nonimmediate_operand" "")
3094 (parallel [(const_int 0) (const_int 2)])))
3097 (match_operand:V4SI 2 "nonimmediate_operand" "")
3098 (parallel [(const_int 0) (const_int 2)])))))]
3100 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3102 (define_insn "*sse2_umulv2siv2di3"
3103 [(set (match_operand:V2DI 0 "register_operand" "=x")
3107 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3108 (parallel [(const_int 0) (const_int 2)])))
3111 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3112 (parallel [(const_int 0) (const_int 2)])))))]
3113 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3114 "pmuludq\t{%2, %0|%0, %2}"
3115 [(set_attr "type" "sseimul")
3116 (set_attr "prefix_data16" "1")
3117 (set_attr "mode" "TI")])
3119 (define_expand "sse4_1_mulv2siv2di3"
3120 [(set (match_operand:V2DI 0 "register_operand" "")
3124 (match_operand:V4SI 1 "nonimmediate_operand" "")
3125 (parallel [(const_int 0) (const_int 2)])))
3128 (match_operand:V4SI 2 "nonimmediate_operand" "")
3129 (parallel [(const_int 0) (const_int 2)])))))]
3131 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3133 (define_insn "*sse4_1_mulv2siv2di3"
3134 [(set (match_operand:V2DI 0 "register_operand" "=x")
3138 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3139 (parallel [(const_int 0) (const_int 2)])))
3142 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3143 (parallel [(const_int 0) (const_int 2)])))))]
3144 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3145 "pmuldq\t{%2, %0|%0, %2}"
3146 [(set_attr "type" "sseimul")
3147 (set_attr "prefix_extra" "1")
3148 (set_attr "mode" "TI")])
3150 (define_expand "sse2_pmaddwd"
3151 [(set (match_operand:V4SI 0 "register_operand" "")
3156 (match_operand:V8HI 1 "nonimmediate_operand" "")
3157 (parallel [(const_int 0)
3163 (match_operand:V8HI 2 "nonimmediate_operand" "")
3164 (parallel [(const_int 0)
3170 (vec_select:V4HI (match_dup 1)
3171 (parallel [(const_int 1)
3176 (vec_select:V4HI (match_dup 2)
3177 (parallel [(const_int 1)
3180 (const_int 7)]))))))]
3182 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3184 (define_insn "*sse2_pmaddwd"
3185 [(set (match_operand:V4SI 0 "register_operand" "=x")
3190 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3191 (parallel [(const_int 0)
3197 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3198 (parallel [(const_int 0)
3204 (vec_select:V4HI (match_dup 1)
3205 (parallel [(const_int 1)
3210 (vec_select:V4HI (match_dup 2)
3211 (parallel [(const_int 1)
3214 (const_int 7)]))))))]
3215 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3216 "pmaddwd\t{%2, %0|%0, %2}"
3217 [(set_attr "type" "sseiadd")
3218 (set_attr "prefix_data16" "1")
3219 (set_attr "mode" "TI")])
3221 (define_expand "mulv4si3"
3222 [(set (match_operand:V4SI 0 "register_operand" "")
3223 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3224 (match_operand:V4SI 2 "register_operand" "")))]
3227 if (TARGET_SSE4_1 || TARGET_SSE5)
3228 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3231 (define_insn "*sse4_1_mulv4si3"
3232 [(set (match_operand:V4SI 0 "register_operand" "=x")
3233 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3234 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3235 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3236 "pmulld\t{%2, %0|%0, %2}"
3237 [(set_attr "type" "sseimul")
3238 (set_attr "prefix_extra" "1")
3239 (set_attr "mode" "TI")])
3241 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3242 ;; multiply/add. In general, we expect the define_split to occur before
3243 ;; register allocation, so we have to handle the corner case where the target
3244 ;; is the same as one of the inputs.
3245 (define_insn_and_split "*sse5_mulv4si3"
3246 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3247 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3248 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3251 "&& (reload_completed
3252 || (!reg_mentioned_p (operands[0], operands[1])
3253 && !reg_mentioned_p (operands[0], operands[2])))"
3257 (plus:V4SI (mult:V4SI (match_dup 1)
3261 operands[3] = CONST0_RTX (V4SImode);
3263 [(set_attr "type" "ssemuladd")
3264 (set_attr "mode" "TI")])
3266 (define_insn_and_split "*sse2_mulv4si3"
3267 [(set (match_operand:V4SI 0 "register_operand" "")
3268 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3269 (match_operand:V4SI 2 "register_operand" "")))]
3270 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3271 && !(reload_completed || reload_in_progress)"
3276 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3282 t1 = gen_reg_rtx (V4SImode);
3283 t2 = gen_reg_rtx (V4SImode);
3284 t3 = gen_reg_rtx (V4SImode);
3285 t4 = gen_reg_rtx (V4SImode);
3286 t5 = gen_reg_rtx (V4SImode);
3287 t6 = gen_reg_rtx (V4SImode);
3288 thirtytwo = GEN_INT (32);
3290 /* Multiply elements 2 and 0. */
3291 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3294 /* Shift both input vectors down one element, so that elements 3
3295 and 1 are now in the slots for elements 2 and 0. For K8, at
3296 least, this is faster than using a shuffle. */
3297 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3298 gen_lowpart (TImode, op1),
3300 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3301 gen_lowpart (TImode, op2),
3303 /* Multiply elements 3 and 1. */
3304 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3307 /* Move the results in element 2 down to element 1; we don't care
3308 what goes in elements 2 and 3. */
3309 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3310 const0_rtx, const0_rtx));
3311 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3312 const0_rtx, const0_rtx));
3314 /* Merge the parts back together. */
3315 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3319 (define_insn_and_split "mulv2di3"
3320 [(set (match_operand:V2DI 0 "register_operand" "")
3321 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3322 (match_operand:V2DI 2 "register_operand" "")))]
3324 && !(reload_completed || reload_in_progress)"
3329 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3334 /* op1: A,B,C,D, op2: E,F,G,H */
3336 op1 = gen_lowpart (V4SImode, operands[1]);
3337 op2 = gen_lowpart (V4SImode, operands[2]);
3338 t1 = gen_reg_rtx (V4SImode);
3339 t2 = gen_reg_rtx (V4SImode);
3340 t3 = gen_reg_rtx (V4SImode);
3341 t4 = gen_reg_rtx (V2DImode);
3342 t5 = gen_reg_rtx (V2DImode);
3345 emit_insn (gen_sse2_pshufd_1 (t1, op1,
3352 emit_move_insn (t2, CONST0_RTX (V4SImode));
3354 /* t3: (B*E),(A*F),(D*G),(C*H) */
3355 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
3357 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
3358 emit_insn (gen_sse5_phadddq (t4, t3));
3360 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
3361 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
3363 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
3364 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
3371 t1 = gen_reg_rtx (V2DImode);
3372 t2 = gen_reg_rtx (V2DImode);
3373 t3 = gen_reg_rtx (V2DImode);
3374 t4 = gen_reg_rtx (V2DImode);
3375 t5 = gen_reg_rtx (V2DImode);
3376 t6 = gen_reg_rtx (V2DImode);
3377 thirtytwo = GEN_INT (32);
3379 /* Multiply low parts. */
3380 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3381 gen_lowpart (V4SImode, op2)));
3383 /* Shift input vectors left 32 bits so we can multiply high parts. */
3384 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3385 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3387 /* Multiply high parts by low parts. */
3388 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3389 gen_lowpart (V4SImode, t3)));
3390 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3391 gen_lowpart (V4SImode, t2)));
3393 /* Shift them back. */
3394 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3395 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3397 /* Add the three parts together. */
3398 emit_insn (gen_addv2di3 (t6, t1, t4));
3399 emit_insn (gen_addv2di3 (op0, t6, t5));
3403 (define_expand "vec_widen_smult_hi_v8hi"
3404 [(match_operand:V4SI 0 "register_operand" "")
3405 (match_operand:V8HI 1 "register_operand" "")
3406 (match_operand:V8HI 2 "register_operand" "")]
3409 rtx op1, op2, t1, t2, dest;
3413 t1 = gen_reg_rtx (V8HImode);
3414 t2 = gen_reg_rtx (V8HImode);
3415 dest = gen_lowpart (V8HImode, operands[0]);
3417 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3418 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3419 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3423 (define_expand "vec_widen_smult_lo_v8hi"
3424 [(match_operand:V4SI 0 "register_operand" "")
3425 (match_operand:V8HI 1 "register_operand" "")
3426 (match_operand:V8HI 2 "register_operand" "")]
3429 rtx op1, op2, t1, t2, dest;
3433 t1 = gen_reg_rtx (V8HImode);
3434 t2 = gen_reg_rtx (V8HImode);
3435 dest = gen_lowpart (V8HImode, operands[0]);
3437 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3438 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3439 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3443 (define_expand "vec_widen_umult_hi_v8hi"
3444 [(match_operand:V4SI 0 "register_operand" "")
3445 (match_operand:V8HI 1 "register_operand" "")
3446 (match_operand:V8HI 2 "register_operand" "")]
3449 rtx op1, op2, t1, t2, dest;
3453 t1 = gen_reg_rtx (V8HImode);
3454 t2 = gen_reg_rtx (V8HImode);
3455 dest = gen_lowpart (V8HImode, operands[0]);
3457 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3458 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3459 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3463 (define_expand "vec_widen_umult_lo_v8hi"
3464 [(match_operand:V4SI 0 "register_operand" "")
3465 (match_operand:V8HI 1 "register_operand" "")
3466 (match_operand:V8HI 2 "register_operand" "")]
3469 rtx op1, op2, t1, t2, dest;
3473 t1 = gen_reg_rtx (V8HImode);
3474 t2 = gen_reg_rtx (V8HImode);
3475 dest = gen_lowpart (V8HImode, operands[0]);
3477 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3478 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3479 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3483 (define_expand "vec_widen_smult_hi_v4si"
3484 [(match_operand:V2DI 0 "register_operand" "")
3485 (match_operand:V4SI 1 "register_operand" "")
3486 (match_operand:V4SI 2 "register_operand" "")]
3491 t1 = gen_reg_rtx (V4SImode);
3492 t2 = gen_reg_rtx (V4SImode);
3494 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3499 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3504 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
3508 (define_expand "vec_widen_smult_lo_v4si"
3509 [(match_operand:V2DI 0 "register_operand" "")
3510 (match_operand:V4SI 1 "register_operand" "")
3511 (match_operand:V4SI 2 "register_operand" "")]
3516 t1 = gen_reg_rtx (V4SImode);
3517 t2 = gen_reg_rtx (V4SImode);
3519 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3524 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3529 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
3534 (define_expand "vec_widen_umult_hi_v4si"
3535 [(match_operand:V2DI 0 "register_operand" "")
3536 (match_operand:V4SI 1 "register_operand" "")
3537 (match_operand:V4SI 2 "register_operand" "")]
3540 rtx op1, op2, t1, t2;
3544 t1 = gen_reg_rtx (V4SImode);
3545 t2 = gen_reg_rtx (V4SImode);
3547 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3548 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3549 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3553 (define_expand "vec_widen_umult_lo_v4si"
3554 [(match_operand:V2DI 0 "register_operand" "")
3555 (match_operand:V4SI 1 "register_operand" "")
3556 (match_operand:V4SI 2 "register_operand" "")]
3559 rtx op1, op2, t1, t2;
3563 t1 = gen_reg_rtx (V4SImode);
3564 t2 = gen_reg_rtx (V4SImode);
3566 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3567 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3568 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3572 (define_expand "sdot_prodv8hi"
3573 [(match_operand:V4SI 0 "register_operand" "")
3574 (match_operand:V8HI 1 "register_operand" "")
3575 (match_operand:V8HI 2 "register_operand" "")
3576 (match_operand:V4SI 3 "register_operand" "")]
3579 rtx t = gen_reg_rtx (V4SImode);
3580 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3581 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3585 (define_expand "udot_prodv4si"
3586 [(match_operand:V2DI 0 "register_operand" "")
3587 (match_operand:V4SI 1 "register_operand" "")
3588 (match_operand:V4SI 2 "register_operand" "")
3589 (match_operand:V2DI 3 "register_operand" "")]
3594 t1 = gen_reg_rtx (V2DImode);
3595 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3596 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3598 t2 = gen_reg_rtx (V4SImode);
3599 t3 = gen_reg_rtx (V4SImode);
3600 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3601 gen_lowpart (TImode, operands[1]),
3603 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3604 gen_lowpart (TImode, operands[2]),
3607 t4 = gen_reg_rtx (V2DImode);
3608 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3610 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3614 (define_insn "ashr<mode>3"
3615 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3617 (match_operand:SSEMODE24 1 "register_operand" "0")
3618 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3620 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3621 [(set_attr "type" "sseishft")
3622 (set_attr "prefix_data16" "1")
3623 (set_attr "mode" "TI")])
3625 (define_insn "lshr<mode>3"
3626 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3627 (lshiftrt:SSEMODE248
3628 (match_operand:SSEMODE248 1 "register_operand" "0")
3629 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3631 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3632 [(set_attr "type" "sseishft")
3633 (set_attr "prefix_data16" "1")
3634 (set_attr "mode" "TI")])
3636 (define_insn "ashl<mode>3"
3637 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3639 (match_operand:SSEMODE248 1 "register_operand" "0")
3640 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3642 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3643 [(set_attr "type" "sseishft")
3644 (set_attr "prefix_data16" "1")
3645 (set_attr "mode" "TI")])
3647 (define_expand "vec_shl_<mode>"
3648 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3649 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3650 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3653 operands[0] = gen_lowpart (TImode, operands[0]);
3654 operands[1] = gen_lowpart (TImode, operands[1]);
3657 (define_expand "vec_shr_<mode>"
3658 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3659 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3660 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3663 operands[0] = gen_lowpart (TImode, operands[0]);
3664 operands[1] = gen_lowpart (TImode, operands[1]);
3667 (define_expand "<code>v16qi3"
3668 [(set (match_operand:V16QI 0 "register_operand" "")
3670 (match_operand:V16QI 1 "nonimmediate_operand" "")
3671 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3673 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3675 (define_insn "*<code>v16qi3"
3676 [(set (match_operand:V16QI 0 "register_operand" "=x")
3678 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3679 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3680 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3681 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3682 [(set_attr "type" "sseiadd")
3683 (set_attr "prefix_data16" "1")
3684 (set_attr "mode" "TI")])
3686 (define_expand "<code>v8hi3"
3687 [(set (match_operand:V8HI 0 "register_operand" "")
3689 (match_operand:V8HI 1 "nonimmediate_operand" "")
3690 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3692 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3694 (define_insn "*<code>v8hi3"
3695 [(set (match_operand:V8HI 0 "register_operand" "=x")
3697 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3698 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3699 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3700 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3701 [(set_attr "type" "sseiadd")
3702 (set_attr "prefix_data16" "1")
3703 (set_attr "mode" "TI")])
3705 (define_expand "umaxv8hi3"
3706 [(set (match_operand:V8HI 0 "register_operand" "")
3707 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3708 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3712 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3715 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3716 if (rtx_equal_p (op3, op2))
3717 op3 = gen_reg_rtx (V8HImode);
3718 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3719 emit_insn (gen_addv8hi3 (op0, op3, op2));
3724 (define_expand "smax<mode>3"
3725 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3726 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3727 (match_operand:SSEMODE14 2 "register_operand" "")))]
3731 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3737 xops[0] = operands[0];
3738 xops[1] = operands[1];
3739 xops[2] = operands[2];
3740 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3741 xops[4] = operands[1];
3742 xops[5] = operands[2];
3743 ok = ix86_expand_int_vcond (xops);
3749 (define_insn "*sse4_1_<code><mode>3"
3750 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3752 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3753 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3754 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3755 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3756 [(set_attr "type" "sseiadd")
3757 (set_attr "prefix_extra" "1")
3758 (set_attr "mode" "TI")])
3760 (define_expand "umaxv4si3"
3761 [(set (match_operand:V4SI 0 "register_operand" "")
3762 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3763 (match_operand:V4SI 2 "register_operand" "")))]
3767 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3773 xops[0] = operands[0];
3774 xops[1] = operands[1];
3775 xops[2] = operands[2];
3776 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3777 xops[4] = operands[1];
3778 xops[5] = operands[2];
3779 ok = ix86_expand_int_vcond (xops);
3785 (define_insn "*sse4_1_<code><mode>3"
3786 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3788 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3789 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3790 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3791 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3792 [(set_attr "type" "sseiadd")
3793 (set_attr "prefix_extra" "1")
3794 (set_attr "mode" "TI")])
3796 (define_expand "smin<mode>3"
3797 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3798 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3799 (match_operand:SSEMODE14 2 "register_operand" "")))]
3803 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3809 xops[0] = operands[0];
3810 xops[1] = operands[2];
3811 xops[2] = operands[1];
3812 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3813 xops[4] = operands[1];
3814 xops[5] = operands[2];
3815 ok = ix86_expand_int_vcond (xops);
3821 (define_expand "umin<mode>3"
3822 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3823 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3824 (match_operand:SSEMODE24 2 "register_operand" "")))]
3828 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3834 xops[0] = operands[0];
3835 xops[1] = operands[2];
3836 xops[2] = operands[1];
3837 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3838 xops[4] = operands[1];
3839 xops[5] = operands[2];
3840 ok = ix86_expand_int_vcond (xops);
3846 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3848 ;; Parallel integral comparisons
3850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3852 (define_expand "sse2_eq<mode>3"
3853 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3855 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
3856 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
3857 "TARGET_SSE2 && !TARGET_SSE5"
3858 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
3860 (define_insn "*sse2_eq<mode>3"
3861 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3863 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3864 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3865 "TARGET_SSE2 && !TARGET_SSE5
3866 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3867 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3868 [(set_attr "type" "ssecmp")
3869 (set_attr "prefix_data16" "1")
3870 (set_attr "mode" "TI")])
3872 (define_expand "sse4_1_eqv2di3"
3873 [(set (match_operand:V2DI 0 "register_operand" "")
3875 (match_operand:V2DI 1 "nonimmediate_operand" "")
3876 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
3878 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
3880 (define_insn "*sse4_1_eqv2di3"
3881 [(set (match_operand:V2DI 0 "register_operand" "=x")
3883 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3884 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3885 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3886 "pcmpeqq\t{%2, %0|%0, %2}"
3887 [(set_attr "type" "ssecmp")
3888 (set_attr "prefix_extra" "1")
3889 (set_attr "mode" "TI")])
3891 (define_insn "sse2_gt<mode>3"
3892 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3894 (match_operand:SSEMODE124 1 "register_operand" "0")
3895 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3896 "TARGET_SSE2 && !TARGET_SSE5"
3897 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3898 [(set_attr "type" "ssecmp")
3899 (set_attr "prefix_data16" "1")
3900 (set_attr "mode" "TI")])
3902 (define_insn "sse4_2_gtv2di3"
3903 [(set (match_operand:V2DI 0 "register_operand" "=x")
3905 (match_operand:V2DI 1 "register_operand" "0")
3906 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3908 "pcmpgtq\t{%2, %0|%0, %2}"
3909 [(set_attr "type" "ssecmp")
3910 (set_attr "mode" "TI")])
3912 (define_expand "vcond<mode>"
3913 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3914 (if_then_else:SSEMODEI
3915 (match_operator 3 ""
3916 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3917 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3918 (match_operand:SSEMODEI 1 "general_operand" "")
3919 (match_operand:SSEMODEI 2 "general_operand" "")))]
3922 if (ix86_expand_int_vcond (operands))
3928 (define_expand "vcondu<mode>"
3929 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3930 (if_then_else:SSEMODEI
3931 (match_operator 3 ""
3932 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3933 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3934 (match_operand:SSEMODEI 1 "general_operand" "")
3935 (match_operand:SSEMODEI 2 "general_operand" "")))]
3938 if (ix86_expand_int_vcond (operands))
3944 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3946 ;; Parallel bitwise logical operations
3948 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3950 (define_expand "one_cmpl<mode>2"
3951 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3952 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3956 int i, n = GET_MODE_NUNITS (<MODE>mode);
3957 rtvec v = rtvec_alloc (n);
3959 for (i = 0; i < n; ++i)
3960 RTVEC_ELT (v, i) = constm1_rtx;
3962 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3965 (define_insn "*sse_nand<mode>3"
3966 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3968 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3969 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3970 "(TARGET_SSE && !TARGET_SSE2)"
3971 "andnps\t{%2, %0|%0, %2}"
3972 [(set_attr "type" "sselog")
3973 (set_attr "mode" "V4SF")])
3975 (define_insn "sse2_nand<mode>3"
3976 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3978 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3979 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3981 "pandn\t{%2, %0|%0, %2}"
3982 [(set_attr "type" "sselog")
3983 (set_attr "prefix_data16" "1")
3984 (set_attr "mode" "TI")])
3986 (define_insn "*nandtf3"
3987 [(set (match_operand:TF 0 "register_operand" "=x")
3989 (not:TF (match_operand:TF 1 "register_operand" "0"))
3990 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3992 "pandn\t{%2, %0|%0, %2}"
3993 [(set_attr "type" "sselog")
3994 (set_attr "prefix_data16" "1")
3995 (set_attr "mode" "TI")])
3997 (define_expand "<code><mode>3"
3998 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4000 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4001 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4003 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4005 (define_insn "*sse_<code><mode>3"
4006 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4008 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4009 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4010 "(TARGET_SSE && !TARGET_SSE2)
4011 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4012 "<plogicprefix>ps\t{%2, %0|%0, %2}"
4013 [(set_attr "type" "sselog")
4014 (set_attr "mode" "V4SF")])
4016 (define_insn "*sse2_<code><mode>3"
4017 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4019 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4020 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4021 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4022 "p<plogicprefix>\t{%2, %0|%0, %2}"
4023 [(set_attr "type" "sselog")
4024 (set_attr "prefix_data16" "1")
4025 (set_attr "mode" "TI")])
4027 (define_expand "<code>tf3"
4028 [(set (match_operand:TF 0 "register_operand" "")
4030 (match_operand:TF 1 "nonimmediate_operand" "")
4031 (match_operand:TF 2 "nonimmediate_operand" "")))]
4033 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
4035 (define_insn "*<code>tf3"
4036 [(set (match_operand:TF 0 "register_operand" "=x")
4038 (match_operand:TF 1 "nonimmediate_operand" "%0")
4039 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4040 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
4041 "p<plogicprefix>\t{%2, %0|%0, %2}"
4042 [(set_attr "type" "sselog")
4043 (set_attr "prefix_data16" "1")
4044 (set_attr "mode" "TI")])
4046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4048 ;; Parallel integral element swizzling
4050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4053 ;; op1 = abcdefghijklmnop
4054 ;; op2 = qrstuvwxyz012345
4055 ;; h1 = aqbrcsdteufvgwhx
4056 ;; l1 = iyjzk0l1m2n3o4p5
4057 ;; h2 = aiqybjrzcks0dlt1
4058 ;; l2 = emu2fnv3gow4hpx5
4059 ;; h3 = aeimquy2bfjnrvz3
4060 ;; l3 = cgkosw04dhlptx15
4061 ;; result = bdfhjlnprtvxz135
4062 (define_expand "vec_pack_trunc_v8hi"
4063 [(match_operand:V16QI 0 "register_operand" "")
4064 (match_operand:V8HI 1 "register_operand" "")
4065 (match_operand:V8HI 2 "register_operand" "")]
4068 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4072 ix86_expand_sse5_pack (operands);
4076 op1 = gen_lowpart (V16QImode, operands[1]);
4077 op2 = gen_lowpart (V16QImode, operands[2]);
4078 h1 = gen_reg_rtx (V16QImode);
4079 l1 = gen_reg_rtx (V16QImode);
4080 h2 = gen_reg_rtx (V16QImode);
4081 l2 = gen_reg_rtx (V16QImode);
4082 h3 = gen_reg_rtx (V16QImode);
4083 l3 = gen_reg_rtx (V16QImode);
4085 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4086 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4087 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4088 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4089 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4090 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4091 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4102 ;; result = bdfhjlnp
4103 (define_expand "vec_pack_trunc_v4si"
4104 [(match_operand:V8HI 0 "register_operand" "")
4105 (match_operand:V4SI 1 "register_operand" "")
4106 (match_operand:V4SI 2 "register_operand" "")]
4109 rtx op1, op2, h1, l1, h2, l2;
4113 ix86_expand_sse5_pack (operands);
4117 op1 = gen_lowpart (V8HImode, operands[1]);
4118 op2 = gen_lowpart (V8HImode, operands[2]);
4119 h1 = gen_reg_rtx (V8HImode);
4120 l1 = gen_reg_rtx (V8HImode);
4121 h2 = gen_reg_rtx (V8HImode);
4122 l2 = gen_reg_rtx (V8HImode);
4124 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4125 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4126 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4127 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4128 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4138 (define_expand "vec_pack_trunc_v2di"
4139 [(match_operand:V4SI 0 "register_operand" "")
4140 (match_operand:V2DI 1 "register_operand" "")
4141 (match_operand:V2DI 2 "register_operand" "")]
4144 rtx op1, op2, h1, l1;
4148 ix86_expand_sse5_pack (operands);
4152 op1 = gen_lowpart (V4SImode, operands[1]);
4153 op2 = gen_lowpart (V4SImode, operands[2]);
4154 h1 = gen_reg_rtx (V4SImode);
4155 l1 = gen_reg_rtx (V4SImode);
4157 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4158 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4159 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4163 (define_expand "vec_interleave_highv16qi"
4164 [(set (match_operand:V16QI 0 "register_operand" "")
4167 (match_operand:V16QI 1 "register_operand" "")
4168 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4169 (parallel [(const_int 8) (const_int 24)
4170 (const_int 9) (const_int 25)
4171 (const_int 10) (const_int 26)
4172 (const_int 11) (const_int 27)
4173 (const_int 12) (const_int 28)
4174 (const_int 13) (const_int 29)
4175 (const_int 14) (const_int 30)
4176 (const_int 15) (const_int 31)])))]
4179 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4183 (define_expand "vec_interleave_lowv16qi"
4184 [(set (match_operand:V16QI 0 "register_operand" "")
4187 (match_operand:V16QI 1 "register_operand" "")
4188 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4189 (parallel [(const_int 0) (const_int 16)
4190 (const_int 1) (const_int 17)
4191 (const_int 2) (const_int 18)
4192 (const_int 3) (const_int 19)
4193 (const_int 4) (const_int 20)
4194 (const_int 5) (const_int 21)
4195 (const_int 6) (const_int 22)
4196 (const_int 7) (const_int 23)])))]
4199 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4203 (define_expand "vec_interleave_highv8hi"
4204 [(set (match_operand:V8HI 0 "register_operand" "=")
4207 (match_operand:V8HI 1 "register_operand" "")
4208 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4209 (parallel [(const_int 4) (const_int 12)
4210 (const_int 5) (const_int 13)
4211 (const_int 6) (const_int 14)
4212 (const_int 7) (const_int 15)])))]
4215 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4219 (define_expand "vec_interleave_lowv8hi"
4220 [(set (match_operand:V8HI 0 "register_operand" "")
4223 (match_operand:V8HI 1 "register_operand" "")
4224 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4225 (parallel [(const_int 0) (const_int 8)
4226 (const_int 1) (const_int 9)
4227 (const_int 2) (const_int 10)
4228 (const_int 3) (const_int 11)])))]
4231 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4235 (define_expand "vec_interleave_highv4si"
4236 [(set (match_operand:V4SI 0 "register_operand" "")
4239 (match_operand:V4SI 1 "register_operand" "")
4240 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4241 (parallel [(const_int 2) (const_int 6)
4242 (const_int 3) (const_int 7)])))]
4245 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4249 (define_expand "vec_interleave_lowv4si"
4250 [(set (match_operand:V4SI 0 "register_operand" "")
4253 (match_operand:V4SI 1 "register_operand" "")
4254 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4255 (parallel [(const_int 0) (const_int 4)
4256 (const_int 1) (const_int 5)])))]
4259 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4263 (define_expand "vec_interleave_highv2di"
4264 [(set (match_operand:V2DI 0 "register_operand" "")
4267 (match_operand:V2DI 1 "register_operand" "")
4268 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4269 (parallel [(const_int 1)
4273 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4277 (define_expand "vec_interleave_lowv2di"
4278 [(set (match_operand:V2DI 0 "register_operand" "")
4281 (match_operand:V2DI 1 "register_operand" "")
4282 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4283 (parallel [(const_int 0)
4287 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4291 (define_expand "vec_interleave_highv4sf"
4292 [(set (match_operand:V4SF 0 "register_operand" "")
4295 (match_operand:V4SF 1 "register_operand" "")
4296 (match_operand:V4SF 2 "nonimmediate_operand" ""))
4297 (parallel [(const_int 2) (const_int 6)
4298 (const_int 3) (const_int 7)])))]
4301 (define_expand "vec_interleave_lowv4sf"
4302 [(set (match_operand:V4SF 0 "register_operand" "")
4305 (match_operand:V4SF 1 "register_operand" "")
4306 (match_operand:V4SF 2 "nonimmediate_operand" ""))
4307 (parallel [(const_int 0) (const_int 4)
4308 (const_int 1) (const_int 5)])))]
4311 (define_expand "vec_interleave_highv2df"
4312 [(set (match_operand:V2DF 0 "register_operand" "")
4315 (match_operand:V2DF 1 "register_operand" "")
4316 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4317 (parallel [(const_int 1)
4321 (define_expand "vec_interleave_lowv2df"
4322 [(set (match_operand:V2DF 0 "register_operand" "")
4325 (match_operand:V2DF 1 "register_operand" "")
4326 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4327 (parallel [(const_int 0)
4331 (define_insn "sse2_packsswb"
4332 [(set (match_operand:V16QI 0 "register_operand" "=x")
4335 (match_operand:V8HI 1 "register_operand" "0"))
4337 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4339 "packsswb\t{%2, %0|%0, %2}"
4340 [(set_attr "type" "sselog")
4341 (set_attr "prefix_data16" "1")
4342 (set_attr "mode" "TI")])
4344 (define_insn "sse2_packssdw"
4345 [(set (match_operand:V8HI 0 "register_operand" "=x")
4348 (match_operand:V4SI 1 "register_operand" "0"))
4350 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4352 "packssdw\t{%2, %0|%0, %2}"
4353 [(set_attr "type" "sselog")
4354 (set_attr "prefix_data16" "1")
4355 (set_attr "mode" "TI")])
4357 (define_insn "sse2_packuswb"
4358 [(set (match_operand:V16QI 0 "register_operand" "=x")
4361 (match_operand:V8HI 1 "register_operand" "0"))
4363 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4365 "packuswb\t{%2, %0|%0, %2}"
4366 [(set_attr "type" "sselog")
4367 (set_attr "prefix_data16" "1")
4368 (set_attr "mode" "TI")])
4370 (define_insn "sse2_punpckhbw"
4371 [(set (match_operand:V16QI 0 "register_operand" "=x")
4374 (match_operand:V16QI 1 "register_operand" "0")
4375 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4376 (parallel [(const_int 8) (const_int 24)
4377 (const_int 9) (const_int 25)
4378 (const_int 10) (const_int 26)
4379 (const_int 11) (const_int 27)
4380 (const_int 12) (const_int 28)
4381 (const_int 13) (const_int 29)
4382 (const_int 14) (const_int 30)
4383 (const_int 15) (const_int 31)])))]
4385 "punpckhbw\t{%2, %0|%0, %2}"
4386 [(set_attr "type" "sselog")
4387 (set_attr "prefix_data16" "1")
4388 (set_attr "mode" "TI")])
4390 (define_insn "sse2_punpcklbw"
4391 [(set (match_operand:V16QI 0 "register_operand" "=x")
4394 (match_operand:V16QI 1 "register_operand" "0")
4395 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4396 (parallel [(const_int 0) (const_int 16)
4397 (const_int 1) (const_int 17)
4398 (const_int 2) (const_int 18)
4399 (const_int 3) (const_int 19)
4400 (const_int 4) (const_int 20)
4401 (const_int 5) (const_int 21)
4402 (const_int 6) (const_int 22)
4403 (const_int 7) (const_int 23)])))]
4405 "punpcklbw\t{%2, %0|%0, %2}"
4406 [(set_attr "type" "sselog")
4407 (set_attr "prefix_data16" "1")
4408 (set_attr "mode" "TI")])
4410 (define_insn "sse2_punpckhwd"
4411 [(set (match_operand:V8HI 0 "register_operand" "=x")
4414 (match_operand:V8HI 1 "register_operand" "0")
4415 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4416 (parallel [(const_int 4) (const_int 12)
4417 (const_int 5) (const_int 13)
4418 (const_int 6) (const_int 14)
4419 (const_int 7) (const_int 15)])))]
4421 "punpckhwd\t{%2, %0|%0, %2}"
4422 [(set_attr "type" "sselog")
4423 (set_attr "prefix_data16" "1")
4424 (set_attr "mode" "TI")])
4426 (define_insn "sse2_punpcklwd"
4427 [(set (match_operand:V8HI 0 "register_operand" "=x")
4430 (match_operand:V8HI 1 "register_operand" "0")
4431 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4432 (parallel [(const_int 0) (const_int 8)
4433 (const_int 1) (const_int 9)
4434 (const_int 2) (const_int 10)
4435 (const_int 3) (const_int 11)])))]
4437 "punpcklwd\t{%2, %0|%0, %2}"
4438 [(set_attr "type" "sselog")
4439 (set_attr "prefix_data16" "1")
4440 (set_attr "mode" "TI")])
4442 (define_insn "sse2_punpckhdq"
4443 [(set (match_operand:V4SI 0 "register_operand" "=x")
4446 (match_operand:V4SI 1 "register_operand" "0")
4447 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4448 (parallel [(const_int 2) (const_int 6)
4449 (const_int 3) (const_int 7)])))]
4451 "punpckhdq\t{%2, %0|%0, %2}"
4452 [(set_attr "type" "sselog")
4453 (set_attr "prefix_data16" "1")
4454 (set_attr "mode" "TI")])
4456 (define_insn "sse2_punpckldq"
4457 [(set (match_operand:V4SI 0 "register_operand" "=x")
4460 (match_operand:V4SI 1 "register_operand" "0")
4461 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4462 (parallel [(const_int 0) (const_int 4)
4463 (const_int 1) (const_int 5)])))]
4465 "punpckldq\t{%2, %0|%0, %2}"
4466 [(set_attr "type" "sselog")
4467 (set_attr "prefix_data16" "1")
4468 (set_attr "mode" "TI")])
4470 (define_insn "*sse4_1_pinsrb"
4471 [(set (match_operand:V16QI 0 "register_operand" "=x")
4473 (vec_duplicate:V16QI
4474 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4475 (match_operand:V16QI 1 "register_operand" "0")
4476 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4479 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4480 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4482 [(set_attr "type" "sselog")
4483 (set_attr "prefix_extra" "1")
4484 (set_attr "mode" "TI")])
4486 (define_insn "*sse2_pinsrw"
4487 [(set (match_operand:V8HI 0 "register_operand" "=x")
4490 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4491 (match_operand:V8HI 1 "register_operand" "0")
4492 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4495 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4496 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4498 [(set_attr "type" "sselog")
4499 (set_attr "prefix_data16" "1")
4500 (set_attr "mode" "TI")])
4502 ;; It must come before sse2_loadld since it is preferred.
4503 (define_insn "*sse4_1_pinsrd"
4504 [(set (match_operand:V4SI 0 "register_operand" "=x")
4507 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4508 (match_operand:V4SI 1 "register_operand" "0")
4509 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4512 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4513 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4515 [(set_attr "type" "sselog")
4516 (set_attr "prefix_extra" "1")
4517 (set_attr "mode" "TI")])
4519 (define_insn "*sse4_1_pinsrq"
4520 [(set (match_operand:V2DI 0 "register_operand" "=x")
4523 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4524 (match_operand:V2DI 1 "register_operand" "0")
4525 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4526 "TARGET_SSE4_1 && TARGET_64BIT"
4528 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4529 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4531 [(set_attr "type" "sselog")
4532 (set_attr "prefix_extra" "1")
4533 (set_attr "mode" "TI")])
4535 (define_insn "*sse4_1_pextrb"
4536 [(set (match_operand:SI 0 "register_operand" "=r")
4539 (match_operand:V16QI 1 "register_operand" "x")
4540 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4542 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4543 [(set_attr "type" "sselog")
4544 (set_attr "prefix_extra" "1")
4545 (set_attr "mode" "TI")])
4547 (define_insn "*sse4_1_pextrb_memory"
4548 [(set (match_operand:QI 0 "memory_operand" "=m")
4550 (match_operand:V16QI 1 "register_operand" "x")
4551 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4553 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4554 [(set_attr "type" "sselog")
4555 (set_attr "prefix_extra" "1")
4556 (set_attr "mode" "TI")])
4558 (define_insn "*sse2_pextrw"
4559 [(set (match_operand:SI 0 "register_operand" "=r")
4562 (match_operand:V8HI 1 "register_operand" "x")
4563 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4565 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4566 [(set_attr "type" "sselog")
4567 (set_attr "prefix_data16" "1")
4568 (set_attr "mode" "TI")])
4570 (define_insn "*sse4_1_pextrw_memory"
4571 [(set (match_operand:HI 0 "memory_operand" "=m")
4573 (match_operand:V8HI 1 "register_operand" "x")
4574 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4576 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4577 [(set_attr "type" "sselog")
4578 (set_attr "prefix_extra" "1")
4579 (set_attr "mode" "TI")])
4581 (define_insn "*sse4_1_pextrd"
4582 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4584 (match_operand:V4SI 1 "register_operand" "x")
4585 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4587 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4588 [(set_attr "type" "sselog")
4589 (set_attr "prefix_extra" "1")
4590 (set_attr "mode" "TI")])
4592 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4593 (define_insn "*sse4_1_pextrq"
4594 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4596 (match_operand:V2DI 1 "register_operand" "x")
4597 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4598 "TARGET_SSE4_1 && TARGET_64BIT"
4599 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4600 [(set_attr "type" "sselog")
4601 (set_attr "prefix_extra" "1")
4602 (set_attr "mode" "TI")])
4604 (define_expand "sse2_pshufd"
4605 [(match_operand:V4SI 0 "register_operand" "")
4606 (match_operand:V4SI 1 "nonimmediate_operand" "")
4607 (match_operand:SI 2 "const_int_operand" "")]
4610 int mask = INTVAL (operands[2]);
4611 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4612 GEN_INT ((mask >> 0) & 3),
4613 GEN_INT ((mask >> 2) & 3),
4614 GEN_INT ((mask >> 4) & 3),
4615 GEN_INT ((mask >> 6) & 3)));
4619 (define_insn "sse2_pshufd_1"
4620 [(set (match_operand:V4SI 0 "register_operand" "=x")
4622 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4623 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4624 (match_operand 3 "const_0_to_3_operand" "")
4625 (match_operand 4 "const_0_to_3_operand" "")
4626 (match_operand 5 "const_0_to_3_operand" "")])))]
4630 mask |= INTVAL (operands[2]) << 0;
4631 mask |= INTVAL (operands[3]) << 2;
4632 mask |= INTVAL (operands[4]) << 4;
4633 mask |= INTVAL (operands[5]) << 6;
4634 operands[2] = GEN_INT (mask);
4636 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4638 [(set_attr "type" "sselog1")
4639 (set_attr "prefix_data16" "1")
4640 (set_attr "mode" "TI")])
4642 (define_expand "sse2_pshuflw"
4643 [(match_operand:V8HI 0 "register_operand" "")
4644 (match_operand:V8HI 1 "nonimmediate_operand" "")
4645 (match_operand:SI 2 "const_int_operand" "")]
4648 int mask = INTVAL (operands[2]);
4649 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4650 GEN_INT ((mask >> 0) & 3),
4651 GEN_INT ((mask >> 2) & 3),
4652 GEN_INT ((mask >> 4) & 3),
4653 GEN_INT ((mask >> 6) & 3)));
4657 (define_insn "sse2_pshuflw_1"
4658 [(set (match_operand:V8HI 0 "register_operand" "=x")
4660 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4661 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4662 (match_operand 3 "const_0_to_3_operand" "")
4663 (match_operand 4 "const_0_to_3_operand" "")
4664 (match_operand 5 "const_0_to_3_operand" "")
4672 mask |= INTVAL (operands[2]) << 0;
4673 mask |= INTVAL (operands[3]) << 2;
4674 mask |= INTVAL (operands[4]) << 4;
4675 mask |= INTVAL (operands[5]) << 6;
4676 operands[2] = GEN_INT (mask);
4678 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4680 [(set_attr "type" "sselog")
4681 (set_attr "prefix_rep" "1")
4682 (set_attr "mode" "TI")])
4684 (define_expand "sse2_pshufhw"
4685 [(match_operand:V8HI 0 "register_operand" "")
4686 (match_operand:V8HI 1 "nonimmediate_operand" "")
4687 (match_operand:SI 2 "const_int_operand" "")]
4690 int mask = INTVAL (operands[2]);
4691 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4692 GEN_INT (((mask >> 0) & 3) + 4),
4693 GEN_INT (((mask >> 2) & 3) + 4),
4694 GEN_INT (((mask >> 4) & 3) + 4),
4695 GEN_INT (((mask >> 6) & 3) + 4)));
4699 (define_insn "sse2_pshufhw_1"
4700 [(set (match_operand:V8HI 0 "register_operand" "=x")
4702 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4703 (parallel [(const_int 0)
4707 (match_operand 2 "const_4_to_7_operand" "")
4708 (match_operand 3 "const_4_to_7_operand" "")
4709 (match_operand 4 "const_4_to_7_operand" "")
4710 (match_operand 5 "const_4_to_7_operand" "")])))]
4714 mask |= (INTVAL (operands[2]) - 4) << 0;
4715 mask |= (INTVAL (operands[3]) - 4) << 2;
4716 mask |= (INTVAL (operands[4]) - 4) << 4;
4717 mask |= (INTVAL (operands[5]) - 4) << 6;
4718 operands[2] = GEN_INT (mask);
4720 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4722 [(set_attr "type" "sselog")
4723 (set_attr "prefix_rep" "1")
4724 (set_attr "mode" "TI")])
4726 (define_expand "sse2_loadd"
4727 [(set (match_operand:V4SI 0 "register_operand" "")
4730 (match_operand:SI 1 "nonimmediate_operand" ""))
4734 "operands[2] = CONST0_RTX (V4SImode);")
4736 (define_insn "sse2_loadld"
4737 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4740 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4741 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4745 movd\t{%2, %0|%0, %2}
4746 movd\t{%2, %0|%0, %2}
4747 movss\t{%2, %0|%0, %2}
4748 movss\t{%2, %0|%0, %2}"
4749 [(set_attr "type" "ssemov")
4750 (set_attr "mode" "TI,TI,V4SF,SF")])
4752 (define_insn_and_split "sse2_stored"
4753 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4755 (match_operand:V4SI 1 "register_operand" "x,Yi")
4756 (parallel [(const_int 0)])))]
4759 "&& reload_completed
4760 && (TARGET_INTER_UNIT_MOVES
4761 || MEM_P (operands [0])
4762 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4763 [(set (match_dup 0) (match_dup 1))]
4765 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4768 (define_insn_and_split "*vec_ext_v4si_mem"
4769 [(set (match_operand:SI 0 "register_operand" "=r")
4771 (match_operand:V4SI 1 "memory_operand" "o")
4772 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4778 int i = INTVAL (operands[2]);
4780 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4784 (define_expand "sse_storeq"
4785 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4787 (match_operand:V2DI 1 "register_operand" "")
4788 (parallel [(const_int 0)])))]
4792 (define_insn "*sse2_storeq_rex64"
4793 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
4795 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4796 (parallel [(const_int 0)])))]
4797 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4801 mov{q}\t{%1, %0|%0, %1}"
4802 [(set_attr "type" "*,*,imov")
4803 (set_attr "mode" "*,*,DI")])
4805 (define_insn "*sse2_storeq"
4806 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4808 (match_operand:V2DI 1 "register_operand" "x")
4809 (parallel [(const_int 0)])))]
4814 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4816 (match_operand:V2DI 1 "register_operand" "")
4817 (parallel [(const_int 0)])))]
4820 && (TARGET_INTER_UNIT_MOVES
4821 || MEM_P (operands [0])
4822 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4823 [(set (match_dup 0) (match_dup 1))]
4825 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4828 (define_insn "*vec_extractv2di_1_rex64"
4829 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4831 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4832 (parallel [(const_int 1)])))]
4833 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4835 movhps\t{%1, %0|%0, %1}
4836 psrldq\t{$8, %0|%0, 8}
4837 movq\t{%H1, %0|%0, %H1}
4838 mov{q}\t{%H1, %0|%0, %H1}"
4839 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4840 (set_attr "memory" "*,none,*,*")
4841 (set_attr "mode" "V2SF,TI,TI,DI")])
4843 (define_insn "*vec_extractv2di_1_sse2"
4844 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4846 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4847 (parallel [(const_int 1)])))]
4849 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4851 movhps\t{%1, %0|%0, %1}
4852 psrldq\t{$8, %0|%0, 8}
4853 movq\t{%H1, %0|%0, %H1}"
4854 [(set_attr "type" "ssemov,sseishft,ssemov")
4855 (set_attr "memory" "*,none,*")
4856 (set_attr "mode" "V2SF,TI,TI")])
4858 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4859 (define_insn "*vec_extractv2di_1_sse"
4860 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4862 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4863 (parallel [(const_int 1)])))]
4864 "!TARGET_SSE2 && TARGET_SSE
4865 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4867 movhps\t{%1, %0|%0, %1}
4868 movhlps\t{%1, %0|%0, %1}
4869 movlps\t{%H1, %0|%0, %H1}"
4870 [(set_attr "type" "ssemov")
4871 (set_attr "mode" "V2SF,V4SF,V2SF")])
4873 (define_insn "*vec_dupv4si"
4874 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4876 (match_operand:SI 1 "register_operand" " Y2,0")))]
4879 pshufd\t{$0, %1, %0|%0, %1, 0}
4880 shufps\t{$0, %0, %0|%0, %0, 0}"
4881 [(set_attr "type" "sselog1")
4882 (set_attr "mode" "TI,V4SF")])
4884 (define_insn "*vec_dupv2di"
4885 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4887 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4892 [(set_attr "type" "sselog1,ssemov")
4893 (set_attr "mode" "TI,V4SF")])
4895 (define_insn "*vec_concatv2si_sse4_1"
4896 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
4898 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
4899 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
4902 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
4903 punpckldq\t{%2, %0|%0, %2}
4904 movd\t{%1, %0|%0, %1}
4905 punpckldq\t{%2, %0|%0, %2}
4906 movd\t{%1, %0|%0, %1}"
4907 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4908 (set_attr "prefix_extra" "1,*,*,*,*")
4909 (set_attr "mode" "TI,TI,TI,DI,DI")])
4911 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4912 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4913 ;; alternatives pretty much forces the MMX alternative to be chosen.
4914 (define_insn "*vec_concatv2si_sse2"
4915 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
4917 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
4918 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
4921 punpckldq\t{%2, %0|%0, %2}
4922 movd\t{%1, %0|%0, %1}
4923 punpckldq\t{%2, %0|%0, %2}
4924 movd\t{%1, %0|%0, %1}"
4925 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4926 (set_attr "mode" "TI,TI,DI,DI")])
4928 (define_insn "*vec_concatv2si_sse"
4929 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4931 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4932 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4935 unpcklps\t{%2, %0|%0, %2}
4936 movss\t{%1, %0|%0, %1}
4937 punpckldq\t{%2, %0|%0, %2}
4938 movd\t{%1, %0|%0, %1}"
4939 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4940 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4942 (define_insn "*vec_concatv4si_1"
4943 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4945 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4946 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4949 punpcklqdq\t{%2, %0|%0, %2}
4950 movlhps\t{%2, %0|%0, %2}
4951 movhps\t{%2, %0|%0, %2}"
4952 [(set_attr "type" "sselog,ssemov,ssemov")
4953 (set_attr "mode" "TI,V4SF,V2SF")])
4955 (define_insn "vec_concatv2di"
4956 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
4958 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
4959 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
4960 "!TARGET_64BIT && TARGET_SSE"
4962 movq\t{%1, %0|%0, %1}
4963 movq2dq\t{%1, %0|%0, %1}
4964 punpcklqdq\t{%2, %0|%0, %2}
4965 movlhps\t{%2, %0|%0, %2}
4966 movhps\t{%2, %0|%0, %2}"
4967 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
4968 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
4970 (define_insn "*vec_concatv2di_rex64_sse4_1"
4971 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
4973 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
4974 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
4975 "TARGET_64BIT && TARGET_SSE4_1"
4977 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
4978 movq\t{%1, %0|%0, %1}
4979 movq\t{%1, %0|%0, %1}
4980 movq2dq\t{%1, %0|%0, %1}
4981 punpcklqdq\t{%2, %0|%0, %2}
4982 movlhps\t{%2, %0|%0, %2}
4983 movhps\t{%2, %0|%0, %2}"
4984 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4985 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
4986 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
4988 (define_insn "*vec_concatv2di_rex64_sse"
4989 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
4991 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
4992 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
4993 "TARGET_64BIT && TARGET_SSE"
4995 movq\t{%1, %0|%0, %1}
4996 movq\t{%1, %0|%0, %1}
4997 movq2dq\t{%1, %0|%0, %1}
4998 punpcklqdq\t{%2, %0|%0, %2}
4999 movlhps\t{%2, %0|%0, %2}
5000 movhps\t{%2, %0|%0, %2}"
5001 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5002 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
5004 (define_expand "vec_unpacku_hi_v16qi"
5005 [(match_operand:V8HI 0 "register_operand" "")
5006 (match_operand:V16QI 1 "register_operand" "")]
5010 ix86_expand_sse4_unpack (operands, true, true);
5011 else if (TARGET_SSE5)
5012 ix86_expand_sse5_unpack (operands, true, true);
5014 ix86_expand_sse_unpack (operands, true, true);
5018 (define_expand "vec_unpacks_hi_v16qi"
5019 [(match_operand:V8HI 0 "register_operand" "")
5020 (match_operand:V16QI 1 "register_operand" "")]
5024 ix86_expand_sse4_unpack (operands, false, true);
5025 else if (TARGET_SSE5)
5026 ix86_expand_sse5_unpack (operands, false, true);
5028 ix86_expand_sse_unpack (operands, false, true);
5032 (define_expand "vec_unpacku_lo_v16qi"
5033 [(match_operand:V8HI 0 "register_operand" "")
5034 (match_operand:V16QI 1 "register_operand" "")]
5038 ix86_expand_sse4_unpack (operands, true, false);
5039 else if (TARGET_SSE5)
5040 ix86_expand_sse5_unpack (operands, true, false);
5042 ix86_expand_sse_unpack (operands, true, false);
5046 (define_expand "vec_unpacks_lo_v16qi"
5047 [(match_operand:V8HI 0 "register_operand" "")
5048 (match_operand:V16QI 1 "register_operand" "")]
5052 ix86_expand_sse4_unpack (operands, false, false);
5053 else if (TARGET_SSE5)
5054 ix86_expand_sse5_unpack (operands, false, false);
5056 ix86_expand_sse_unpack (operands, false, false);
5060 (define_expand "vec_unpacku_hi_v8hi"
5061 [(match_operand:V4SI 0 "register_operand" "")
5062 (match_operand:V8HI 1 "register_operand" "")]
5066 ix86_expand_sse4_unpack (operands, true, true);
5067 else if (TARGET_SSE5)
5068 ix86_expand_sse5_unpack (operands, true, true);
5070 ix86_expand_sse_unpack (operands, true, true);
5074 (define_expand "vec_unpacks_hi_v8hi"
5075 [(match_operand:V4SI 0 "register_operand" "")
5076 (match_operand:V8HI 1 "register_operand" "")]
5080 ix86_expand_sse4_unpack (operands, false, true);
5081 else if (TARGET_SSE5)
5082 ix86_expand_sse5_unpack (operands, false, true);
5084 ix86_expand_sse_unpack (operands, false, true);
5088 (define_expand "vec_unpacku_lo_v8hi"
5089 [(match_operand:V4SI 0 "register_operand" "")
5090 (match_operand:V8HI 1 "register_operand" "")]
5094 ix86_expand_sse4_unpack (operands, true, false);
5095 else if (TARGET_SSE5)
5096 ix86_expand_sse5_unpack (operands, true, false);
5098 ix86_expand_sse_unpack (operands, true, false);
5102 (define_expand "vec_unpacks_lo_v8hi"
5103 [(match_operand:V4SI 0 "register_operand" "")
5104 (match_operand:V8HI 1 "register_operand" "")]
5108 ix86_expand_sse4_unpack (operands, false, false);
5109 else if (TARGET_SSE5)
5110 ix86_expand_sse5_unpack (operands, false, false);
5112 ix86_expand_sse_unpack (operands, false, false);
5116 (define_expand "vec_unpacku_hi_v4si"
5117 [(match_operand:V2DI 0 "register_operand" "")
5118 (match_operand:V4SI 1 "register_operand" "")]
5122 ix86_expand_sse4_unpack (operands, true, true);
5123 else if (TARGET_SSE5)
5124 ix86_expand_sse5_unpack (operands, true, true);
5126 ix86_expand_sse_unpack (operands, true, true);
5130 (define_expand "vec_unpacks_hi_v4si"
5131 [(match_operand:V2DI 0 "register_operand" "")
5132 (match_operand:V4SI 1 "register_operand" "")]
5136 ix86_expand_sse4_unpack (operands, false, true);
5137 else if (TARGET_SSE5)
5138 ix86_expand_sse5_unpack (operands, false, true);
5140 ix86_expand_sse_unpack (operands, false, true);
5144 (define_expand "vec_unpacku_lo_v4si"
5145 [(match_operand:V2DI 0 "register_operand" "")
5146 (match_operand:V4SI 1 "register_operand" "")]
5150 ix86_expand_sse4_unpack (operands, true, false);
5151 else if (TARGET_SSE5)
5152 ix86_expand_sse5_unpack (operands, true, false);
5154 ix86_expand_sse_unpack (operands, true, false);
5158 (define_expand "vec_unpacks_lo_v4si"
5159 [(match_operand:V2DI 0 "register_operand" "")
5160 (match_operand:V4SI 1 "register_operand" "")]
5164 ix86_expand_sse4_unpack (operands, false, false);
5165 else if (TARGET_SSE5)
5166 ix86_expand_sse5_unpack (operands, false, false);
5168 ix86_expand_sse_unpack (operands, false, false);
5172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5178 (define_expand "sse2_uavgv16qi3"
5179 [(set (match_operand:V16QI 0 "register_operand" "")
5185 (match_operand:V16QI 1 "nonimmediate_operand" ""))
5187 (match_operand:V16QI 2 "nonimmediate_operand" "")))
5188 (const_vector:V16QI [(const_int 1) (const_int 1)
5189 (const_int 1) (const_int 1)
5190 (const_int 1) (const_int 1)
5191 (const_int 1) (const_int 1)
5192 (const_int 1) (const_int 1)
5193 (const_int 1) (const_int 1)
5194 (const_int 1) (const_int 1)
5195 (const_int 1) (const_int 1)]))
5198 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
5200 (define_insn "*sse2_uavgv16qi3"
5201 [(set (match_operand:V16QI 0 "register_operand" "=x")
5207 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5209 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5210 (const_vector:V16QI [(const_int 1) (const_int 1)
5211 (const_int 1) (const_int 1)
5212 (const_int 1) (const_int 1)
5213 (const_int 1) (const_int 1)
5214 (const_int 1) (const_int 1)
5215 (const_int 1) (const_int 1)
5216 (const_int 1) (const_int 1)
5217 (const_int 1) (const_int 1)]))
5219 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5220 "pavgb\t{%2, %0|%0, %2}"
5221 [(set_attr "type" "sseiadd")
5222 (set_attr "prefix_data16" "1")
5223 (set_attr "mode" "TI")])
5225 (define_expand "sse2_uavgv8hi3"
5226 [(set (match_operand:V8HI 0 "register_operand" "")
5232 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5234 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5235 (const_vector:V8HI [(const_int 1) (const_int 1)
5236 (const_int 1) (const_int 1)
5237 (const_int 1) (const_int 1)
5238 (const_int 1) (const_int 1)]))
5241 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
5243 (define_insn "*sse2_uavgv8hi3"
5244 [(set (match_operand:V8HI 0 "register_operand" "=x")
5250 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5252 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5253 (const_vector:V8HI [(const_int 1) (const_int 1)
5254 (const_int 1) (const_int 1)
5255 (const_int 1) (const_int 1)
5256 (const_int 1) (const_int 1)]))
5258 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5259 "pavgw\t{%2, %0|%0, %2}"
5260 [(set_attr "type" "sseiadd")
5261 (set_attr "prefix_data16" "1")
5262 (set_attr "mode" "TI")])
5264 ;; The correct representation for this is absolutely enormous, and
5265 ;; surely not generally useful.
5266 (define_insn "sse2_psadbw"
5267 [(set (match_operand:V2DI 0 "register_operand" "=x")
5268 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5269 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5272 "psadbw\t{%2, %0|%0, %2}"
5273 [(set_attr "type" "sseiadd")
5274 (set_attr "prefix_data16" "1")
5275 (set_attr "mode" "TI")])
5277 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5278 [(set (match_operand:SI 0 "register_operand" "=r")
5280 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5282 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5283 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5284 [(set_attr "type" "ssecvt")
5285 (set_attr "mode" "<MODE>")])
5287 (define_insn "sse2_pmovmskb"
5288 [(set (match_operand:SI 0 "register_operand" "=r")
5289 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5292 "pmovmskb\t{%1, %0|%0, %1}"
5293 [(set_attr "type" "ssecvt")
5294 (set_attr "prefix_data16" "1")
5295 (set_attr "mode" "SI")])
5297 (define_expand "sse2_maskmovdqu"
5298 [(set (match_operand:V16QI 0 "memory_operand" "")
5299 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5300 (match_operand:V16QI 2 "register_operand" "")
5306 (define_insn "*sse2_maskmovdqu"
5307 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5308 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5309 (match_operand:V16QI 2 "register_operand" "x")
5310 (mem:V16QI (match_dup 0))]
5312 "TARGET_SSE2 && !TARGET_64BIT"
5313 ;; @@@ check ordering of operands in intel/nonintel syntax
5314 "maskmovdqu\t{%2, %1|%1, %2}"
5315 [(set_attr "type" "ssecvt")
5316 (set_attr "prefix_data16" "1")
5317 (set_attr "mode" "TI")])
5319 (define_insn "*sse2_maskmovdqu_rex64"
5320 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5321 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5322 (match_operand:V16QI 2 "register_operand" "x")
5323 (mem:V16QI (match_dup 0))]
5325 "TARGET_SSE2 && TARGET_64BIT"
5326 ;; @@@ check ordering of operands in intel/nonintel syntax
5327 "maskmovdqu\t{%2, %1|%1, %2}"
5328 [(set_attr "type" "ssecvt")
5329 (set_attr "prefix_data16" "1")
5330 (set_attr "mode" "TI")])
5332 (define_insn "sse_ldmxcsr"
5333 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5337 [(set_attr "type" "sse")
5338 (set_attr "memory" "load")])
5340 (define_insn "sse_stmxcsr"
5341 [(set (match_operand:SI 0 "memory_operand" "=m")
5342 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5345 [(set_attr "type" "sse")
5346 (set_attr "memory" "store")])
5348 (define_expand "sse_sfence"
5350 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5351 "TARGET_SSE || TARGET_3DNOW_A"
5353 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5354 MEM_VOLATILE_P (operands[0]) = 1;
5357 (define_insn "*sse_sfence"
5358 [(set (match_operand:BLK 0 "" "")
5359 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5360 "TARGET_SSE || TARGET_3DNOW_A"
5362 [(set_attr "type" "sse")
5363 (set_attr "memory" "unknown")])
5365 (define_insn "sse2_clflush"
5366 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5370 [(set_attr "type" "sse")
5371 (set_attr "memory" "unknown")])
5373 (define_expand "sse2_mfence"
5375 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5378 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5379 MEM_VOLATILE_P (operands[0]) = 1;
5382 (define_insn "*sse2_mfence"
5383 [(set (match_operand:BLK 0 "" "")
5384 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5387 [(set_attr "type" "sse")
5388 (set_attr "memory" "unknown")])
5390 (define_expand "sse2_lfence"
5392 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5395 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5396 MEM_VOLATILE_P (operands[0]) = 1;
5399 (define_insn "*sse2_lfence"
5400 [(set (match_operand:BLK 0 "" "")
5401 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5404 [(set_attr "type" "sse")
5405 (set_attr "memory" "unknown")])
5407 (define_insn "sse3_mwait"
5408 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5409 (match_operand:SI 1 "register_operand" "c")]
5412 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5413 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5414 ;; we only need to set up 32bit registers.
5416 [(set_attr "length" "3")])
5418 (define_insn "sse3_monitor"
5419 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5420 (match_operand:SI 1 "register_operand" "c")
5421 (match_operand:SI 2 "register_operand" "d")]
5423 "TARGET_SSE3 && !TARGET_64BIT"
5424 "monitor\t%0, %1, %2"
5425 [(set_attr "length" "3")])
5427 (define_insn "sse3_monitor64"
5428 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5429 (match_operand:SI 1 "register_operand" "c")
5430 (match_operand:SI 2 "register_operand" "d")]
5432 "TARGET_SSE3 && TARGET_64BIT"
5433 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5434 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5435 ;; zero extended to 64bit, we only need to set up 32bit registers.
5437 [(set_attr "length" "3")])
5439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5441 ;; SSSE3 instructions
5443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5445 (define_insn "ssse3_phaddwv8hi3"
5446 [(set (match_operand:V8HI 0 "register_operand" "=x")
5452 (match_operand:V8HI 1 "register_operand" "0")
5453 (parallel [(const_int 0)]))
5454 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5456 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5457 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5460 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5461 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5463 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5464 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5469 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5470 (parallel [(const_int 0)]))
5471 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5473 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5474 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5477 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5478 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5480 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5481 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5483 "phaddw\t{%2, %0|%0, %2}"
5484 [(set_attr "type" "sseiadd")
5485 (set_attr "prefix_data16" "1")
5486 (set_attr "prefix_extra" "1")
5487 (set_attr "mode" "TI")])
5489 (define_insn "ssse3_phaddwv4hi3"
5490 [(set (match_operand:V4HI 0 "register_operand" "=y")
5495 (match_operand:V4HI 1 "register_operand" "0")
5496 (parallel [(const_int 0)]))
5497 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5499 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5500 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5504 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5505 (parallel [(const_int 0)]))
5506 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5508 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5509 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5511 "phaddw\t{%2, %0|%0, %2}"
5512 [(set_attr "type" "sseiadd")
5513 (set_attr "prefix_extra" "1")
5514 (set_attr "mode" "DI")])
5516 (define_insn "ssse3_phadddv4si3"
5517 [(set (match_operand:V4SI 0 "register_operand" "=x")
5522 (match_operand:V4SI 1 "register_operand" "0")
5523 (parallel [(const_int 0)]))
5524 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5526 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5527 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5531 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5532 (parallel [(const_int 0)]))
5533 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5535 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5536 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5538 "phaddd\t{%2, %0|%0, %2}"
5539 [(set_attr "type" "sseiadd")
5540 (set_attr "prefix_data16" "1")
5541 (set_attr "prefix_extra" "1")
5542 (set_attr "mode" "TI")])
5544 (define_insn "ssse3_phadddv2si3"
5545 [(set (match_operand:V2SI 0 "register_operand" "=y")
5549 (match_operand:V2SI 1 "register_operand" "0")
5550 (parallel [(const_int 0)]))
5551 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5554 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5555 (parallel [(const_int 0)]))
5556 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5558 "phaddd\t{%2, %0|%0, %2}"
5559 [(set_attr "type" "sseiadd")
5560 (set_attr "prefix_extra" "1")
5561 (set_attr "mode" "DI")])
5563 (define_insn "ssse3_phaddswv8hi3"
5564 [(set (match_operand:V8HI 0 "register_operand" "=x")
5570 (match_operand:V8HI 1 "register_operand" "0")
5571 (parallel [(const_int 0)]))
5572 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5574 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5575 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5578 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5579 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5581 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5582 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5587 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5588 (parallel [(const_int 0)]))
5589 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5591 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5592 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5595 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5596 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5598 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5599 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5601 "phaddsw\t{%2, %0|%0, %2}"
5602 [(set_attr "type" "sseiadd")
5603 (set_attr "prefix_data16" "1")
5604 (set_attr "prefix_extra" "1")
5605 (set_attr "mode" "TI")])
5607 (define_insn "ssse3_phaddswv4hi3"
5608 [(set (match_operand:V4HI 0 "register_operand" "=y")
5613 (match_operand:V4HI 1 "register_operand" "0")
5614 (parallel [(const_int 0)]))
5615 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5617 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5618 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5622 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5623 (parallel [(const_int 0)]))
5624 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5626 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5627 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5629 "phaddsw\t{%2, %0|%0, %2}"
5630 [(set_attr "type" "sseiadd")
5631 (set_attr "prefix_extra" "1")
5632 (set_attr "mode" "DI")])
5634 (define_insn "ssse3_phsubwv8hi3"
5635 [(set (match_operand:V8HI 0 "register_operand" "=x")
5641 (match_operand:V8HI 1 "register_operand" "0")
5642 (parallel [(const_int 0)]))
5643 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5645 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5646 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5649 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5650 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5652 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5653 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5658 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5659 (parallel [(const_int 0)]))
5660 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5662 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5663 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5666 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5667 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5669 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5670 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5672 "phsubw\t{%2, %0|%0, %2}"
5673 [(set_attr "type" "sseiadd")
5674 (set_attr "prefix_data16" "1")
5675 (set_attr "prefix_extra" "1")
5676 (set_attr "mode" "TI")])
5678 (define_insn "ssse3_phsubwv4hi3"
5679 [(set (match_operand:V4HI 0 "register_operand" "=y")
5684 (match_operand:V4HI 1 "register_operand" "0")
5685 (parallel [(const_int 0)]))
5686 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5688 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5689 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5693 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5694 (parallel [(const_int 0)]))
5695 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5697 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5698 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5700 "phsubw\t{%2, %0|%0, %2}"
5701 [(set_attr "type" "sseiadd")
5702 (set_attr "prefix_extra" "1")
5703 (set_attr "mode" "DI")])
5705 (define_insn "ssse3_phsubdv4si3"
5706 [(set (match_operand:V4SI 0 "register_operand" "=x")
5711 (match_operand:V4SI 1 "register_operand" "0")
5712 (parallel [(const_int 0)]))
5713 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5715 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5716 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5720 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5721 (parallel [(const_int 0)]))
5722 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5724 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5725 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5727 "phsubd\t{%2, %0|%0, %2}"
5728 [(set_attr "type" "sseiadd")
5729 (set_attr "prefix_data16" "1")
5730 (set_attr "prefix_extra" "1")
5731 (set_attr "mode" "TI")])
5733 (define_insn "ssse3_phsubdv2si3"
5734 [(set (match_operand:V2SI 0 "register_operand" "=y")
5738 (match_operand:V2SI 1 "register_operand" "0")
5739 (parallel [(const_int 0)]))
5740 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5743 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5744 (parallel [(const_int 0)]))
5745 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5747 "phsubd\t{%2, %0|%0, %2}"
5748 [(set_attr "type" "sseiadd")
5749 (set_attr "prefix_extra" "1")
5750 (set_attr "mode" "DI")])
5752 (define_insn "ssse3_phsubswv8hi3"
5753 [(set (match_operand:V8HI 0 "register_operand" "=x")
5759 (match_operand:V8HI 1 "register_operand" "0")
5760 (parallel [(const_int 0)]))
5761 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5763 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5764 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5767 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5768 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5770 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5771 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5776 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5777 (parallel [(const_int 0)]))
5778 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5780 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5781 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5784 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5785 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5787 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5788 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5790 "phsubsw\t{%2, %0|%0, %2}"
5791 [(set_attr "type" "sseiadd")
5792 (set_attr "prefix_data16" "1")
5793 (set_attr "prefix_extra" "1")
5794 (set_attr "mode" "TI")])
5796 (define_insn "ssse3_phsubswv4hi3"
5797 [(set (match_operand:V4HI 0 "register_operand" "=y")
5802 (match_operand:V4HI 1 "register_operand" "0")
5803 (parallel [(const_int 0)]))
5804 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5806 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5807 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5811 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5812 (parallel [(const_int 0)]))
5813 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5815 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5816 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5818 "phsubsw\t{%2, %0|%0, %2}"
5819 [(set_attr "type" "sseiadd")
5820 (set_attr "prefix_extra" "1")
5821 (set_attr "mode" "DI")])
5823 (define_insn "ssse3_pmaddubsw128"
5824 [(set (match_operand:V8HI 0 "register_operand" "=x")
5829 (match_operand:V16QI 1 "register_operand" "0")
5830 (parallel [(const_int 0)
5840 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5841 (parallel [(const_int 0)
5851 (vec_select:V16QI (match_dup 1)
5852 (parallel [(const_int 1)
5861 (vec_select:V16QI (match_dup 2)
5862 (parallel [(const_int 1)
5869 (const_int 15)]))))))]
5871 "pmaddubsw\t{%2, %0|%0, %2}"
5872 [(set_attr "type" "sseiadd")
5873 (set_attr "prefix_data16" "1")
5874 (set_attr "prefix_extra" "1")
5875 (set_attr "mode" "TI")])
5877 (define_insn "ssse3_pmaddubsw"
5878 [(set (match_operand:V4HI 0 "register_operand" "=y")
5883 (match_operand:V8QI 1 "register_operand" "0")
5884 (parallel [(const_int 0)
5890 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5891 (parallel [(const_int 0)
5897 (vec_select:V8QI (match_dup 1)
5898 (parallel [(const_int 1)
5903 (vec_select:V8QI (match_dup 2)
5904 (parallel [(const_int 1)
5907 (const_int 7)]))))))]
5909 "pmaddubsw\t{%2, %0|%0, %2}"
5910 [(set_attr "type" "sseiadd")
5911 (set_attr "prefix_extra" "1")
5912 (set_attr "mode" "DI")])
5914 (define_expand "ssse3_pmulhrswv8hi3"
5915 [(set (match_operand:V8HI 0 "register_operand" "")
5922 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5924 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5926 (const_vector:V8HI [(const_int 1) (const_int 1)
5927 (const_int 1) (const_int 1)
5928 (const_int 1) (const_int 1)
5929 (const_int 1) (const_int 1)]))
5932 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5934 (define_insn "*ssse3_pmulhrswv8hi3"
5935 [(set (match_operand:V8HI 0 "register_operand" "=x")
5942 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5944 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5946 (const_vector:V8HI [(const_int 1) (const_int 1)
5947 (const_int 1) (const_int 1)
5948 (const_int 1) (const_int 1)
5949 (const_int 1) (const_int 1)]))
5951 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5952 "pmulhrsw\t{%2, %0|%0, %2}"
5953 [(set_attr "type" "sseimul")
5954 (set_attr "prefix_data16" "1")
5955 (set_attr "prefix_extra" "1")
5956 (set_attr "mode" "TI")])
5958 (define_expand "ssse3_pmulhrswv4hi3"
5959 [(set (match_operand:V4HI 0 "register_operand" "")
5966 (match_operand:V4HI 1 "nonimmediate_operand" ""))
5968 (match_operand:V4HI 2 "nonimmediate_operand" "")))
5970 (const_vector:V4HI [(const_int 1) (const_int 1)
5971 (const_int 1) (const_int 1)]))
5974 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
5976 (define_insn "*ssse3_pmulhrswv4hi3"
5977 [(set (match_operand:V4HI 0 "register_operand" "=y")
5984 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5986 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5988 (const_vector:V4HI [(const_int 1) (const_int 1)
5989 (const_int 1) (const_int 1)]))
5991 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5992 "pmulhrsw\t{%2, %0|%0, %2}"
5993 [(set_attr "type" "sseimul")
5994 (set_attr "prefix_extra" "1")
5995 (set_attr "mode" "DI")])
5997 (define_insn "ssse3_pshufbv16qi3"
5998 [(set (match_operand:V16QI 0 "register_operand" "=x")
5999 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6000 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
6003 "pshufb\t{%2, %0|%0, %2}";
6004 [(set_attr "type" "sselog1")
6005 (set_attr "prefix_data16" "1")
6006 (set_attr "prefix_extra" "1")
6007 (set_attr "mode" "TI")])
6009 (define_insn "ssse3_pshufbv8qi3"
6010 [(set (match_operand:V8QI 0 "register_operand" "=y")
6011 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
6012 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
6015 "pshufb\t{%2, %0|%0, %2}";
6016 [(set_attr "type" "sselog1")
6017 (set_attr "prefix_extra" "1")
6018 (set_attr "mode" "DI")])
6020 (define_insn "ssse3_psign<mode>3"
6021 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6023 [(match_operand:SSEMODE124 1 "register_operand" "0")
6024 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
6027 "psign<ssevecsize>\t{%2, %0|%0, %2}";
6028 [(set_attr "type" "sselog1")
6029 (set_attr "prefix_data16" "1")
6030 (set_attr "prefix_extra" "1")
6031 (set_attr "mode" "TI")])
6033 (define_insn "ssse3_psign<mode>3"
6034 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6036 [(match_operand:MMXMODEI 1 "register_operand" "0")
6037 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
6040 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
6041 [(set_attr "type" "sselog1")
6042 (set_attr "prefix_extra" "1")
6043 (set_attr "mode" "DI")])
6045 (define_insn "ssse3_palignrti"
6046 [(set (match_operand:TI 0 "register_operand" "=x")
6047 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6048 (match_operand:TI 2 "nonimmediate_operand" "xm")
6049 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6053 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6054 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6056 [(set_attr "type" "sseishft")
6057 (set_attr "prefix_data16" "1")
6058 (set_attr "prefix_extra" "1")
6059 (set_attr "mode" "TI")])
6061 (define_insn "ssse3_palignrdi"
6062 [(set (match_operand:DI 0 "register_operand" "=y")
6063 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6064 (match_operand:DI 2 "nonimmediate_operand" "ym")
6065 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6069 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6070 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6072 [(set_attr "type" "sseishft")
6073 (set_attr "prefix_extra" "1")
6074 (set_attr "mode" "DI")])
6076 (define_insn "abs<mode>2"
6077 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6078 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6080 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6081 [(set_attr "type" "sselog1")
6082 (set_attr "prefix_data16" "1")
6083 (set_attr "prefix_extra" "1")
6084 (set_attr "mode" "TI")])
6086 (define_insn "abs<mode>2"
6087 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6088 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6090 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6091 [(set_attr "type" "sselog1")
6092 (set_attr "prefix_extra" "1")
6093 (set_attr "mode" "DI")])
6095 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6097 ;; AMD SSE4A instructions
6099 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6101 (define_insn "sse4a_movnt<mode>"
6102 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6104 [(match_operand:MODEF 1 "register_operand" "x")]
6107 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6108 [(set_attr "type" "ssemov")
6109 (set_attr "mode" "<MODE>")])
6111 (define_insn "sse4a_vmmovnt<mode>"
6112 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6113 (unspec:<ssescalarmode>
6114 [(vec_select:<ssescalarmode>
6115 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6116 (parallel [(const_int 0)]))]
6119 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6120 [(set_attr "type" "ssemov")
6121 (set_attr "mode" "<ssescalarmode>")])
6123 (define_insn "sse4a_extrqi"
6124 [(set (match_operand:V2DI 0 "register_operand" "=x")
6125 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6126 (match_operand 2 "const_int_operand" "")
6127 (match_operand 3 "const_int_operand" "")]
6130 "extrq\t{%3, %2, %0|%0, %2, %3}"
6131 [(set_attr "type" "sse")
6132 (set_attr "prefix_data16" "1")
6133 (set_attr "mode" "TI")])
6135 (define_insn "sse4a_extrq"
6136 [(set (match_operand:V2DI 0 "register_operand" "=x")
6137 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6138 (match_operand:V16QI 2 "register_operand" "x")]
6141 "extrq\t{%2, %0|%0, %2}"
6142 [(set_attr "type" "sse")
6143 (set_attr "prefix_data16" "1")
6144 (set_attr "mode" "TI")])
6146 (define_insn "sse4a_insertqi"
6147 [(set (match_operand:V2DI 0 "register_operand" "=x")
6148 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6149 (match_operand:V2DI 2 "register_operand" "x")
6150 (match_operand 3 "const_int_operand" "")
6151 (match_operand 4 "const_int_operand" "")]
6154 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6155 [(set_attr "type" "sseins")
6156 (set_attr "prefix_rep" "1")
6157 (set_attr "mode" "TI")])
6159 (define_insn "sse4a_insertq"
6160 [(set (match_operand:V2DI 0 "register_operand" "=x")
6161 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6162 (match_operand:V2DI 2 "register_operand" "x")]
6165 "insertq\t{%2, %0|%0, %2}"
6166 [(set_attr "type" "sseins")
6167 (set_attr "prefix_rep" "1")
6168 (set_attr "mode" "TI")])
6170 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6172 ;; Intel SSE4.1 instructions
6174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6176 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6177 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6178 (vec_merge:SSEMODEF2P
6179 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6180 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6181 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
6183 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6184 [(set_attr "type" "ssemov")
6185 (set_attr "prefix_extra" "1")
6186 (set_attr "mode" "<MODE>")])
6188 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6189 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6191 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6192 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6193 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6196 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6197 [(set_attr "type" "ssemov")
6198 (set_attr "prefix_extra" "1")
6199 (set_attr "mode" "<MODE>")])
6201 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6202 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6204 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6205 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6206 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6209 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6210 [(set_attr "type" "ssemul")
6211 (set_attr "prefix_extra" "1")
6212 (set_attr "mode" "<MODE>")])
6214 (define_insn "sse4_1_movntdqa"
6215 [(set (match_operand:V2DI 0 "register_operand" "=x")
6216 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6219 "movntdqa\t{%1, %0|%0, %1}"
6220 [(set_attr "type" "ssecvt")
6221 (set_attr "prefix_extra" "1")
6222 (set_attr "mode" "TI")])
6224 (define_insn "sse4_1_mpsadbw"
6225 [(set (match_operand:V16QI 0 "register_operand" "=x")
6226 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6227 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6228 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6231 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6232 [(set_attr "type" "sselog1")
6233 (set_attr "prefix_extra" "1")
6234 (set_attr "mode" "TI")])
6236 (define_insn "sse4_1_packusdw"
6237 [(set (match_operand:V8HI 0 "register_operand" "=x")
6240 (match_operand:V4SI 1 "register_operand" "0"))
6242 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6244 "packusdw\t{%2, %0|%0, %2}"
6245 [(set_attr "type" "sselog")
6246 (set_attr "prefix_extra" "1")
6247 (set_attr "mode" "TI")])
6249 (define_insn "sse4_1_pblendvb"
6250 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6251 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6252 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6253 (match_operand:V16QI 3 "register_operand" "Yz")]
6256 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6257 [(set_attr "type" "ssemov")
6258 (set_attr "prefix_extra" "1")
6259 (set_attr "mode" "TI")])
6261 (define_insn "sse4_1_pblendw"
6262 [(set (match_operand:V8HI 0 "register_operand" "=x")
6264 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6265 (match_operand:V8HI 1 "register_operand" "0")
6266 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6268 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6269 [(set_attr "type" "ssemov")
6270 (set_attr "prefix_extra" "1")
6271 (set_attr "mode" "TI")])
6273 (define_insn "sse4_1_phminposuw"
6274 [(set (match_operand:V8HI 0 "register_operand" "=x")
6275 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6276 UNSPEC_PHMINPOSUW))]
6278 "phminposuw\t{%1, %0|%0, %1}"
6279 [(set_attr "type" "sselog1")
6280 (set_attr "prefix_extra" "1")
6281 (set_attr "mode" "TI")])
6283 (define_insn "sse4_1_extendv8qiv8hi2"
6284 [(set (match_operand:V8HI 0 "register_operand" "=x")
6287 (match_operand:V16QI 1 "register_operand" "x")
6288 (parallel [(const_int 0)
6297 "pmovsxbw\t{%1, %0|%0, %1}"
6298 [(set_attr "type" "ssemov")
6299 (set_attr "prefix_extra" "1")
6300 (set_attr "mode" "TI")])
6302 (define_insn "*sse4_1_extendv8qiv8hi2"
6303 [(set (match_operand:V8HI 0 "register_operand" "=x")
6306 (vec_duplicate:V16QI
6307 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6308 (parallel [(const_int 0)
6317 "pmovsxbw\t{%1, %0|%0, %1}"
6318 [(set_attr "type" "ssemov")
6319 (set_attr "prefix_extra" "1")
6320 (set_attr "mode" "TI")])
6322 (define_insn "sse4_1_extendv4qiv4si2"
6323 [(set (match_operand:V4SI 0 "register_operand" "=x")
6326 (match_operand:V16QI 1 "register_operand" "x")
6327 (parallel [(const_int 0)
6332 "pmovsxbd\t{%1, %0|%0, %1}"
6333 [(set_attr "type" "ssemov")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "mode" "TI")])
6337 (define_insn "*sse4_1_extendv4qiv4si2"
6338 [(set (match_operand:V4SI 0 "register_operand" "=x")
6341 (vec_duplicate:V16QI
6342 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6343 (parallel [(const_int 0)
6348 "pmovsxbd\t{%1, %0|%0, %1}"
6349 [(set_attr "type" "ssemov")
6350 (set_attr "prefix_extra" "1")
6351 (set_attr "mode" "TI")])
6353 (define_insn "sse4_1_extendv2qiv2di2"
6354 [(set (match_operand:V2DI 0 "register_operand" "=x")
6357 (match_operand:V16QI 1 "register_operand" "x")
6358 (parallel [(const_int 0)
6361 "pmovsxbq\t{%1, %0|%0, %1}"
6362 [(set_attr "type" "ssemov")
6363 (set_attr "prefix_extra" "1")
6364 (set_attr "mode" "TI")])
6366 (define_insn "*sse4_1_extendv2qiv2di2"
6367 [(set (match_operand:V2DI 0 "register_operand" "=x")
6370 (vec_duplicate:V16QI
6371 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6372 (parallel [(const_int 0)
6375 "pmovsxbq\t{%1, %0|%0, %1}"
6376 [(set_attr "type" "ssemov")
6377 (set_attr "prefix_extra" "1")
6378 (set_attr "mode" "TI")])
6380 (define_insn "sse4_1_extendv4hiv4si2"
6381 [(set (match_operand:V4SI 0 "register_operand" "=x")
6384 (match_operand:V8HI 1 "register_operand" "x")
6385 (parallel [(const_int 0)
6390 "pmovsxwd\t{%1, %0|%0, %1}"
6391 [(set_attr "type" "ssemov")
6392 (set_attr "prefix_extra" "1")
6393 (set_attr "mode" "TI")])
6395 (define_insn "*sse4_1_extendv4hiv4si2"
6396 [(set (match_operand:V4SI 0 "register_operand" "=x")
6400 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6401 (parallel [(const_int 0)
6406 "pmovsxwd\t{%1, %0|%0, %1}"
6407 [(set_attr "type" "ssemov")
6408 (set_attr "prefix_extra" "1")
6409 (set_attr "mode" "TI")])
6411 (define_insn "sse4_1_extendv2hiv2di2"
6412 [(set (match_operand:V2DI 0 "register_operand" "=x")
6415 (match_operand:V8HI 1 "register_operand" "x")
6416 (parallel [(const_int 0)
6419 "pmovsxwq\t{%1, %0|%0, %1}"
6420 [(set_attr "type" "ssemov")
6421 (set_attr "prefix_extra" "1")
6422 (set_attr "mode" "TI")])
6424 (define_insn "*sse4_1_extendv2hiv2di2"
6425 [(set (match_operand:V2DI 0 "register_operand" "=x")
6429 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6430 (parallel [(const_int 0)
6433 "pmovsxwq\t{%1, %0|%0, %1}"
6434 [(set_attr "type" "ssemov")
6435 (set_attr "prefix_extra" "1")
6436 (set_attr "mode" "TI")])
6438 (define_insn "sse4_1_extendv2siv2di2"
6439 [(set (match_operand:V2DI 0 "register_operand" "=x")
6442 (match_operand:V4SI 1 "register_operand" "x")
6443 (parallel [(const_int 0)
6446 "pmovsxdq\t{%1, %0|%0, %1}"
6447 [(set_attr "type" "ssemov")
6448 (set_attr "prefix_extra" "1")
6449 (set_attr "mode" "TI")])
6451 (define_insn "*sse4_1_extendv2siv2di2"
6452 [(set (match_operand:V2DI 0 "register_operand" "=x")
6456 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6457 (parallel [(const_int 0)
6460 "pmovsxdq\t{%1, %0|%0, %1}"
6461 [(set_attr "type" "ssemov")
6462 (set_attr "prefix_extra" "1")
6463 (set_attr "mode" "TI")])
6465 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6466 [(set (match_operand:V8HI 0 "register_operand" "=x")
6469 (match_operand:V16QI 1 "register_operand" "x")
6470 (parallel [(const_int 0)
6479 "pmovzxbw\t{%1, %0|%0, %1}"
6480 [(set_attr "type" "ssemov")
6481 (set_attr "prefix_extra" "1")
6482 (set_attr "mode" "TI")])
6484 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6485 [(set (match_operand:V8HI 0 "register_operand" "=x")
6488 (vec_duplicate:V16QI
6489 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6490 (parallel [(const_int 0)
6499 "pmovzxbw\t{%1, %0|%0, %1}"
6500 [(set_attr "type" "ssemov")
6501 (set_attr "prefix_extra" "1")
6502 (set_attr "mode" "TI")])
6504 (define_insn "sse4_1_zero_extendv4qiv4si2"
6505 [(set (match_operand:V4SI 0 "register_operand" "=x")
6508 (match_operand:V16QI 1 "register_operand" "x")
6509 (parallel [(const_int 0)
6514 "pmovzxbd\t{%1, %0|%0, %1}"
6515 [(set_attr "type" "ssemov")
6516 (set_attr "prefix_extra" "1")
6517 (set_attr "mode" "TI")])
6519 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6520 [(set (match_operand:V4SI 0 "register_operand" "=x")
6523 (vec_duplicate:V16QI
6524 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6525 (parallel [(const_int 0)
6530 "pmovzxbd\t{%1, %0|%0, %1}"
6531 [(set_attr "type" "ssemov")
6532 (set_attr "prefix_extra" "1")
6533 (set_attr "mode" "TI")])
6535 (define_insn "sse4_1_zero_extendv2qiv2di2"
6536 [(set (match_operand:V2DI 0 "register_operand" "=x")
6539 (match_operand:V16QI 1 "register_operand" "x")
6540 (parallel [(const_int 0)
6543 "pmovzxbq\t{%1, %0|%0, %1}"
6544 [(set_attr "type" "ssemov")
6545 (set_attr "prefix_extra" "1")
6546 (set_attr "mode" "TI")])
6548 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6549 [(set (match_operand:V2DI 0 "register_operand" "=x")
6552 (vec_duplicate:V16QI
6553 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6554 (parallel [(const_int 0)
6557 "pmovzxbq\t{%1, %0|%0, %1}"
6558 [(set_attr "type" "ssemov")
6559 (set_attr "prefix_extra" "1")
6560 (set_attr "mode" "TI")])
6562 (define_insn "sse4_1_zero_extendv4hiv4si2"
6563 [(set (match_operand:V4SI 0 "register_operand" "=x")
6566 (match_operand:V8HI 1 "register_operand" "x")
6567 (parallel [(const_int 0)
6572 "pmovzxwd\t{%1, %0|%0, %1}"
6573 [(set_attr "type" "ssemov")
6574 (set_attr "prefix_extra" "1")
6575 (set_attr "mode" "TI")])
6577 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6578 [(set (match_operand:V4SI 0 "register_operand" "=x")
6582 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6583 (parallel [(const_int 0)
6588 "pmovzxwd\t{%1, %0|%0, %1}"
6589 [(set_attr "type" "ssemov")
6590 (set_attr "prefix_extra" "1")
6591 (set_attr "mode" "TI")])
6593 (define_insn "sse4_1_zero_extendv2hiv2di2"
6594 [(set (match_operand:V2DI 0 "register_operand" "=x")
6597 (match_operand:V8HI 1 "register_operand" "x")
6598 (parallel [(const_int 0)
6601 "pmovzxwq\t{%1, %0|%0, %1}"
6602 [(set_attr "type" "ssemov")
6603 (set_attr "prefix_extra" "1")
6604 (set_attr "mode" "TI")])
6606 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6607 [(set (match_operand:V2DI 0 "register_operand" "=x")
6611 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6612 (parallel [(const_int 0)
6615 "pmovzxwq\t{%1, %0|%0, %1}"
6616 [(set_attr "type" "ssemov")
6617 (set_attr "prefix_extra" "1")
6618 (set_attr "mode" "TI")])
6620 (define_insn "sse4_1_zero_extendv2siv2di2"
6621 [(set (match_operand:V2DI 0 "register_operand" "=x")
6624 (match_operand:V4SI 1 "register_operand" "x")
6625 (parallel [(const_int 0)
6628 "pmovzxdq\t{%1, %0|%0, %1}"
6629 [(set_attr "type" "ssemov")
6630 (set_attr "prefix_extra" "1")
6631 (set_attr "mode" "TI")])
6633 (define_insn "*sse4_1_zero_extendv2siv2di2"
6634 [(set (match_operand:V2DI 0 "register_operand" "=x")
6638 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6639 (parallel [(const_int 0)
6642 "pmovzxdq\t{%1, %0|%0, %1}"
6643 [(set_attr "type" "ssemov")
6644 (set_attr "prefix_extra" "1")
6645 (set_attr "mode" "TI")])
6647 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6648 ;; But it is not a really compare instruction.
6649 (define_insn "sse4_1_ptest"
6650 [(set (reg:CC FLAGS_REG)
6651 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6652 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6655 "ptest\t{%1, %0|%0, %1}"
6656 [(set_attr "type" "ssecomi")
6657 (set_attr "prefix_extra" "1")
6658 (set_attr "mode" "TI")])
6660 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6663 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6664 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6667 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6668 [(set_attr "type" "ssecvt")
6669 (set_attr "prefix_extra" "1")
6670 (set_attr "mode" "<MODE>")])
6672 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6673 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6674 (vec_merge:SSEMODEF2P
6676 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6677 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6679 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6682 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6683 [(set_attr "type" "ssecvt")
6684 (set_attr "prefix_extra" "1")
6685 (set_attr "mode" "<MODE>")])
6687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6689 ;; Intel SSE4.2 string/text processing instructions
6691 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6693 (define_insn_and_split "sse4_2_pcmpestr"
6694 [(set (match_operand:SI 0 "register_operand" "=c,c")
6696 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6697 (match_operand:SI 3 "register_operand" "a,a")
6698 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6699 (match_operand:SI 5 "register_operand" "d,d")
6700 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6702 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6710 (set (reg:CC FLAGS_REG)
6719 && !(reload_completed || reload_in_progress)"
6724 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6725 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6726 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6729 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6730 operands[3], operands[4],
6731 operands[5], operands[6]));
6733 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6734 operands[3], operands[4],
6735 operands[5], operands[6]));
6736 if (flags && !(ecx || xmm0))
6737 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6738 operands[2], operands[3],
6739 operands[4], operands[5],
6743 [(set_attr "type" "sselog")
6744 (set_attr "prefix_data16" "1")
6745 (set_attr "prefix_extra" "1")
6746 (set_attr "memory" "none,load")
6747 (set_attr "mode" "TI")])
6749 (define_insn "sse4_2_pcmpestri"
6750 [(set (match_operand:SI 0 "register_operand" "=c,c")
6752 [(match_operand:V16QI 1 "register_operand" "x,x")
6753 (match_operand:SI 2 "register_operand" "a,a")
6754 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6755 (match_operand:SI 4 "register_operand" "d,d")
6756 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6758 (set (reg:CC FLAGS_REG)
6767 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6768 [(set_attr "type" "sselog")
6769 (set_attr "prefix_data16" "1")
6770 (set_attr "prefix_extra" "1")
6771 (set_attr "memory" "none,load")
6772 (set_attr "mode" "TI")])
6774 (define_insn "sse4_2_pcmpestrm"
6775 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6777 [(match_operand:V16QI 1 "register_operand" "x,x")
6778 (match_operand:SI 2 "register_operand" "a,a")
6779 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6780 (match_operand:SI 4 "register_operand" "d,d")
6781 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6783 (set (reg:CC FLAGS_REG)
6792 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6793 [(set_attr "type" "sselog")
6794 (set_attr "prefix_data16" "1")
6795 (set_attr "prefix_extra" "1")
6796 (set_attr "memory" "none,load")
6797 (set_attr "mode" "TI")])
6799 (define_insn "sse4_2_pcmpestr_cconly"
6800 [(set (reg:CC FLAGS_REG)
6802 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6803 (match_operand:SI 3 "register_operand" "a,a,a,a")
6804 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6805 (match_operand:SI 5 "register_operand" "d,d,d,d")
6806 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6808 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6809 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6812 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6813 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6814 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6815 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6816 [(set_attr "type" "sselog")
6817 (set_attr "prefix_data16" "1")
6818 (set_attr "prefix_extra" "1")
6819 (set_attr "memory" "none,load,none,load")
6820 (set_attr "mode" "TI")])
6822 (define_insn_and_split "sse4_2_pcmpistr"
6823 [(set (match_operand:SI 0 "register_operand" "=c,c")
6825 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6826 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6827 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6829 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6835 (set (reg:CC FLAGS_REG)
6842 && !(reload_completed || reload_in_progress)"
6847 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6848 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6849 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6852 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6853 operands[3], operands[4]));
6855 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6856 operands[3], operands[4]));
6857 if (flags && !(ecx || xmm0))
6858 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6859 operands[2], operands[3],
6863 [(set_attr "type" "sselog")
6864 (set_attr "prefix_data16" "1")
6865 (set_attr "prefix_extra" "1")
6866 (set_attr "memory" "none,load")
6867 (set_attr "mode" "TI")])
6869 (define_insn "sse4_2_pcmpistri"
6870 [(set (match_operand:SI 0 "register_operand" "=c,c")
6872 [(match_operand:V16QI 1 "register_operand" "x,x")
6873 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6874 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6876 (set (reg:CC FLAGS_REG)
6883 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6884 [(set_attr "type" "sselog")
6885 (set_attr "prefix_data16" "1")
6886 (set_attr "prefix_extra" "1")
6887 (set_attr "memory" "none,load")
6888 (set_attr "mode" "TI")])
6890 (define_insn "sse4_2_pcmpistrm"
6891 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6893 [(match_operand:V16QI 1 "register_operand" "x,x")
6894 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6895 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6897 (set (reg:CC FLAGS_REG)
6904 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6905 [(set_attr "type" "sselog")
6906 (set_attr "prefix_data16" "1")
6907 (set_attr "prefix_extra" "1")
6908 (set_attr "memory" "none,load")
6909 (set_attr "mode" "TI")])
6911 (define_insn "sse4_2_pcmpistr_cconly"
6912 [(set (reg:CC FLAGS_REG)
6914 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6915 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6916 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6918 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6919 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6922 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6923 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6924 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6925 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6926 [(set_attr "type" "sselog")
6927 (set_attr "prefix_data16" "1")
6928 (set_attr "prefix_extra" "1")
6929 (set_attr "memory" "none,load,none,load")
6930 (set_attr "mode" "TI")])
6932 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6934 ;; SSE5 instructions
6936 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6938 ;; SSE5 parallel integer multiply/add instructions.
6939 ;; Note the instruction does not allow the value being added to be a memory
6940 ;; operation. However by pretending via the nonimmediate_operand predicate
6941 ;; that it does and splitting it later allows the following to be recognized:
6942 ;; a[i] = b[i] * c[i] + d[i];
6943 (define_insn "sse5_pmacsww"
6944 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6947 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
6948 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
6949 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
6950 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
6952 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6953 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6954 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6955 [(set_attr "type" "ssemuladd")
6956 (set_attr "mode" "TI")])
6958 ;; Split pmacsww with two memory operands into a load and the pmacsww.
6960 [(set (match_operand:V8HI 0 "register_operand" "")
6962 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
6963 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6964 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
6966 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
6967 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
6968 && !reg_mentioned_p (operands[0], operands[1])
6969 && !reg_mentioned_p (operands[0], operands[2])
6970 && !reg_mentioned_p (operands[0], operands[3])"
6973 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
6974 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
6979 (define_insn "sse5_pmacssww"
6980 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6982 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6983 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6984 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
6985 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
6987 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6988 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6989 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6990 [(set_attr "type" "ssemuladd")
6991 (set_attr "mode" "TI")])
6993 ;; Note the instruction does not allow the value being added to be a memory
6994 ;; operation. However by pretending via the nonimmediate_operand predicate
6995 ;; that it does and splitting it later allows the following to be recognized:
6996 ;; a[i] = b[i] * c[i] + d[i];
6997 (define_insn "sse5_pmacsdd"
6998 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7001 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7002 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7003 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7004 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
7006 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7007 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7008 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7009 [(set_attr "type" "ssemuladd")
7010 (set_attr "mode" "TI")])
7012 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
7014 [(set (match_operand:V4SI 0 "register_operand" "")
7016 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
7017 (match_operand:V4SI 2 "nonimmediate_operand" ""))
7018 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
7020 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
7021 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
7022 && !reg_mentioned_p (operands[0], operands[1])
7023 && !reg_mentioned_p (operands[0], operands[2])
7024 && !reg_mentioned_p (operands[0], operands[3])"
7027 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
7028 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
7033 (define_insn "sse5_pmacssdd"
7034 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7036 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7037 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7038 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7039 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7041 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7042 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7043 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7044 [(set_attr "type" "ssemuladd")
7045 (set_attr "mode" "TI")])
7047 (define_insn "sse5_pmacssdql"
7048 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7053 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7054 (parallel [(const_int 1)
7057 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7058 (parallel [(const_int 1)
7060 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7061 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7063 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7064 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7065 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7066 [(set_attr "type" "ssemuladd")
7067 (set_attr "mode" "TI")])
7069 (define_insn "sse5_pmacssdqh"
7070 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7075 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7076 (parallel [(const_int 0)
7080 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7081 (parallel [(const_int 0)
7083 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7084 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7086 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7087 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7088 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7089 [(set_attr "type" "ssemuladd")
7090 (set_attr "mode" "TI")])
7092 (define_insn "sse5_pmacsdql"
7093 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7098 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7099 (parallel [(const_int 1)
7103 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7104 (parallel [(const_int 1)
7106 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7107 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7109 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7110 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7111 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7112 [(set_attr "type" "ssemuladd")
7113 (set_attr "mode" "TI")])
7115 (define_insn_and_split "*sse5_pmacsdql_mem"
7116 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7121 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7122 (parallel [(const_int 1)
7126 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7127 (parallel [(const_int 1)
7129 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7130 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
7132 "&& (reload_completed
7133 || (!reg_mentioned_p (operands[0], operands[1])
7134 && !reg_mentioned_p (operands[0], operands[2])))"
7143 (parallel [(const_int 1)
7148 (parallel [(const_int 1)
7152 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7153 ;; fake it with a multiply/add. In general, we expect the define_split to
7154 ;; occur before register allocation, so we have to handle the corner case where
7155 ;; the target is the same as operands 1/2
7156 (define_insn_and_split "sse5_mulv2div2di3_low"
7157 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7161 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7162 (parallel [(const_int 1)
7166 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7167 (parallel [(const_int 1)
7168 (const_int 3)])))))]
7171 "&& (reload_completed
7172 || (!reg_mentioned_p (operands[0], operands[1])
7173 && !reg_mentioned_p (operands[0], operands[2])))"
7182 (parallel [(const_int 1)
7187 (parallel [(const_int 1)
7191 operands[3] = CONST0_RTX (V2DImode);
7193 [(set_attr "type" "ssemuladd")
7194 (set_attr "mode" "TI")])
7196 (define_insn "sse5_pmacsdqh"
7197 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7202 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7203 (parallel [(const_int 0)
7207 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7208 (parallel [(const_int 0)
7210 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7211 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7213 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7214 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7215 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7216 [(set_attr "type" "ssemuladd")
7217 (set_attr "mode" "TI")])
7219 (define_insn_and_split "*sse5_pmacsdqh_mem"
7220 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7225 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7226 (parallel [(const_int 0)
7230 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7231 (parallel [(const_int 0)
7233 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7234 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
7236 "&& (reload_completed
7237 || (!reg_mentioned_p (operands[0], operands[1])
7238 && !reg_mentioned_p (operands[0], operands[2])))"
7247 (parallel [(const_int 0)
7252 (parallel [(const_int 0)
7256 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7257 ;; fake it with a multiply/add. In general, we expect the define_split to
7258 ;; occur before register allocation, so we have to handle the corner case where
7259 ;; the target is the same as either operands[1] or operands[2]
7260 (define_insn_and_split "sse5_mulv2div2di3_high"
7261 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7265 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7266 (parallel [(const_int 0)
7270 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7271 (parallel [(const_int 0)
7272 (const_int 2)])))))]
7275 "&& (reload_completed
7276 || (!reg_mentioned_p (operands[0], operands[1])
7277 && !reg_mentioned_p (operands[0], operands[2])))"
7286 (parallel [(const_int 0)
7291 (parallel [(const_int 0)
7295 operands[3] = CONST0_RTX (V2DImode);
7297 [(set_attr "type" "ssemuladd")
7298 (set_attr "mode" "TI")])
7300 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7301 (define_insn "sse5_pmacsswd"
7302 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7307 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7308 (parallel [(const_int 1)
7314 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7315 (parallel [(const_int 1)
7319 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7320 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7322 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7323 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7324 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7325 [(set_attr "type" "ssemuladd")
7326 (set_attr "mode" "TI")])
7328 (define_insn "sse5_pmacswd"
7329 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7334 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7335 (parallel [(const_int 1)
7341 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7342 (parallel [(const_int 1)
7346 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7347 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7349 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7350 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7351 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7352 [(set_attr "type" "ssemuladd")
7353 (set_attr "mode" "TI")])
7355 (define_insn "sse5_pmadcsswd"
7356 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7362 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7363 (parallel [(const_int 0)
7369 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7370 (parallel [(const_int 0)
7378 (parallel [(const_int 1)
7385 (parallel [(const_int 1)
7389 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7390 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7392 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7393 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7394 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7395 [(set_attr "type" "ssemuladd")
7396 (set_attr "mode" "TI")])
7398 (define_insn "sse5_pmadcswd"
7399 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7405 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7406 (parallel [(const_int 0)
7412 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7413 (parallel [(const_int 0)
7421 (parallel [(const_int 1)
7428 (parallel [(const_int 1)
7432 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7433 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
7435 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7436 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7437 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7438 [(set_attr "type" "ssemuladd")
7439 (set_attr "mode" "TI")])
7441 ;; SSE5 parallel XMM conditional moves
7442 (define_insn "sse5_pcmov_<mode>"
7443 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
7444 (if_then_else:SSEMODE
7445 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
7446 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
7447 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
7448 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
7450 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7451 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7452 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7453 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7454 [(set_attr "type" "sse4arg")])
7456 ;; SSE5 horizontal add/subtract instructions
7457 (define_insn "sse5_phaddbw"
7458 [(set (match_operand:V8HI 0 "register_operand" "=x")
7462 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7463 (parallel [(const_int 0)
7474 (parallel [(const_int 1)
7481 (const_int 15)])))))]
7483 "phaddbw\t{%1, %0|%0, %1}"
7484 [(set_attr "type" "sseiadd1")])
7486 (define_insn "sse5_phaddbd"
7487 [(set (match_operand:V4SI 0 "register_operand" "=x")
7492 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7493 (parallel [(const_int 0)
7500 (parallel [(const_int 1)
7508 (parallel [(const_int 2)
7515 (parallel [(const_int 3)
7518 (const_int 15)]))))))]
7520 "phaddbd\t{%1, %0|%0, %1}"
7521 [(set_attr "type" "sseiadd1")])
7523 (define_insn "sse5_phaddbq"
7524 [(set (match_operand:V2DI 0 "register_operand" "=x")
7530 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7531 (parallel [(const_int 0)
7536 (parallel [(const_int 1)
7542 (parallel [(const_int 2)
7547 (parallel [(const_int 3)
7554 (parallel [(const_int 8)
7559 (parallel [(const_int 9)
7565 (parallel [(const_int 10)
7570 (parallel [(const_int 11)
7571 (const_int 15)])))))))]
7573 "phaddbq\t{%1, %0|%0, %1}"
7574 [(set_attr "type" "sseiadd1")])
7576 (define_insn "sse5_phaddwd"
7577 [(set (match_operand:V4SI 0 "register_operand" "=x")
7581 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7582 (parallel [(const_int 0)
7589 (parallel [(const_int 1)
7592 (const_int 7)])))))]
7594 "phaddwd\t{%1, %0|%0, %1}"
7595 [(set_attr "type" "sseiadd1")])
7597 (define_insn "sse5_phaddwq"
7598 [(set (match_operand:V2DI 0 "register_operand" "=x")
7603 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7604 (parallel [(const_int 0)
7609 (parallel [(const_int 1)
7615 (parallel [(const_int 2)
7620 (parallel [(const_int 3)
7621 (const_int 7)]))))))]
7623 "phaddwq\t{%1, %0|%0, %1}"
7624 [(set_attr "type" "sseiadd1")])
7626 (define_insn "sse5_phadddq"
7627 [(set (match_operand:V2DI 0 "register_operand" "=x")
7631 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7632 (parallel [(const_int 0)
7637 (parallel [(const_int 1)
7638 (const_int 3)])))))]
7640 "phadddq\t{%1, %0|%0, %1}"
7641 [(set_attr "type" "sseiadd1")])
7643 (define_insn "sse5_phaddubw"
7644 [(set (match_operand:V8HI 0 "register_operand" "=x")
7648 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7649 (parallel [(const_int 0)
7660 (parallel [(const_int 1)
7667 (const_int 15)])))))]
7669 "phaddubw\t{%1, %0|%0, %1}"
7670 [(set_attr "type" "sseiadd1")])
7672 (define_insn "sse5_phaddubd"
7673 [(set (match_operand:V4SI 0 "register_operand" "=x")
7678 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7679 (parallel [(const_int 0)
7686 (parallel [(const_int 1)
7694 (parallel [(const_int 2)
7701 (parallel [(const_int 3)
7704 (const_int 15)]))))))]
7706 "phaddubd\t{%1, %0|%0, %1}"
7707 [(set_attr "type" "sseiadd1")])
7709 (define_insn "sse5_phaddubq"
7710 [(set (match_operand:V2DI 0 "register_operand" "=x")
7716 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7717 (parallel [(const_int 0)
7722 (parallel [(const_int 1)
7728 (parallel [(const_int 2)
7733 (parallel [(const_int 3)
7740 (parallel [(const_int 8)
7745 (parallel [(const_int 9)
7751 (parallel [(const_int 10)
7756 (parallel [(const_int 11)
7757 (const_int 15)])))))))]
7759 "phaddubq\t{%1, %0|%0, %1}"
7760 [(set_attr "type" "sseiadd1")])
7762 (define_insn "sse5_phadduwd"
7763 [(set (match_operand:V4SI 0 "register_operand" "=x")
7767 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7768 (parallel [(const_int 0)
7775 (parallel [(const_int 1)
7778 (const_int 7)])))))]
7780 "phadduwd\t{%1, %0|%0, %1}"
7781 [(set_attr "type" "sseiadd1")])
7783 (define_insn "sse5_phadduwq"
7784 [(set (match_operand:V2DI 0 "register_operand" "=x")
7789 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7790 (parallel [(const_int 0)
7795 (parallel [(const_int 1)
7801 (parallel [(const_int 2)
7806 (parallel [(const_int 3)
7807 (const_int 7)]))))))]
7809 "phadduwq\t{%1, %0|%0, %1}"
7810 [(set_attr "type" "sseiadd1")])
7812 (define_insn "sse5_phaddudq"
7813 [(set (match_operand:V2DI 0 "register_operand" "=x")
7817 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7818 (parallel [(const_int 0)
7823 (parallel [(const_int 1)
7824 (const_int 3)])))))]
7826 "phaddudq\t{%1, %0|%0, %1}"
7827 [(set_attr "type" "sseiadd1")])
7829 (define_insn "sse5_phsubbw"
7830 [(set (match_operand:V8HI 0 "register_operand" "=x")
7834 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7835 (parallel [(const_int 0)
7846 (parallel [(const_int 1)
7853 (const_int 15)])))))]
7855 "phsubbw\t{%1, %0|%0, %1}"
7856 [(set_attr "type" "sseiadd1")])
7858 (define_insn "sse5_phsubwd"
7859 [(set (match_operand:V4SI 0 "register_operand" "=x")
7863 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7864 (parallel [(const_int 0)
7871 (parallel [(const_int 1)
7874 (const_int 7)])))))]
7876 "phsubwd\t{%1, %0|%0, %1}"
7877 [(set_attr "type" "sseiadd1")])
7879 (define_insn "sse5_phsubdq"
7880 [(set (match_operand:V2DI 0 "register_operand" "=x")
7884 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7885 (parallel [(const_int 0)
7890 (parallel [(const_int 1)
7891 (const_int 3)])))))]
7893 "phsubdq\t{%1, %0|%0, %1}"
7894 [(set_attr "type" "sseiadd1")])
7896 ;; SSE5 permute instructions
7897 (define_insn "sse5_pperm"
7898 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7900 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7901 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7902 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7903 UNSPEC_SSE5_PERMUTE))]
7904 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
7905 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7906 [(set_attr "type" "sse4arg")
7907 (set_attr "mode" "TI")])
7909 ;; The following are for the various unpack insns which doesn't need the first
7910 ;; source operand, so we can just use the output operand for the first operand.
7911 ;; This allows either of the other two operands to be a memory operand. We
7912 ;; can't just use the first operand as an argument to the normal pperm because
7913 ;; then an output only argument, suddenly becomes an input operand.
7914 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7915 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7918 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7919 (match_operand 2 "" "")))) ;; parallel with const_int's
7920 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7922 && (register_operand (operands[1], V16QImode)
7923 || register_operand (operands[2], V16QImode))"
7924 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7925 [(set_attr "type" "sseadd")
7926 (set_attr "mode" "TI")])
7928 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7929 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7932 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7933 (match_operand 2 "" "")))) ;; parallel with const_int's
7934 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7936 && (register_operand (operands[1], V16QImode)
7937 || register_operand (operands[2], V16QImode))"
7938 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7939 [(set_attr "type" "sseadd")
7940 (set_attr "mode" "TI")])
7942 (define_insn "sse5_pperm_zero_v8hi_v4si"
7943 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7946 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7947 (match_operand 2 "" "")))) ;; parallel with const_int's
7948 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7950 && (register_operand (operands[1], V8HImode)
7951 || register_operand (operands[2], V16QImode))"
7952 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7953 [(set_attr "type" "sseadd")
7954 (set_attr "mode" "TI")])
7956 (define_insn "sse5_pperm_sign_v8hi_v4si"
7957 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7960 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7961 (match_operand 2 "" "")))) ;; parallel with const_int's
7962 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7964 && (register_operand (operands[1], V8HImode)
7965 || register_operand (operands[2], V16QImode))"
7966 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7967 [(set_attr "type" "sseadd")
7968 (set_attr "mode" "TI")])
7970 (define_insn "sse5_pperm_zero_v4si_v2di"
7971 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7974 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7975 (match_operand 2 "" "")))) ;; parallel with const_int's
7976 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7978 && (register_operand (operands[1], V4SImode)
7979 || register_operand (operands[2], V16QImode))"
7980 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7981 [(set_attr "type" "sseadd")
7982 (set_attr "mode" "TI")])
7984 (define_insn "sse5_pperm_sign_v4si_v2di"
7985 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7988 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7989 (match_operand 2 "" "")))) ;; parallel with const_int's
7990 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7992 && (register_operand (operands[1], V4SImode)
7993 || register_operand (operands[2], V16QImode))"
7994 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7995 [(set_attr "type" "sseadd")
7996 (set_attr "mode" "TI")])
7998 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7999 (define_insn "sse5_pperm_pack_v2di_v4si"
8000 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
8003 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
8005 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8006 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8007 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
8008 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8009 [(set_attr "type" "sse4arg")
8010 (set_attr "mode" "TI")])
8012 (define_insn "sse5_pperm_pack_v4si_v8hi"
8013 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
8016 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
8018 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8019 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8020 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
8021 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8022 [(set_attr "type" "sse4arg")
8023 (set_attr "mode" "TI")])
8025 (define_insn "sse5_pperm_pack_v8hi_v16qi"
8026 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
8029 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
8031 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8032 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8033 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
8034 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8035 [(set_attr "type" "sse4arg")
8036 (set_attr "mode" "TI")])
8038 ;; Floating point permutation (permps, permpd)
8039 (define_insn "sse5_perm<mode>"
8040 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
8042 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
8043 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
8044 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
8045 UNSPEC_SSE5_PERMUTE))]
8046 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
8047 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8048 [(set_attr "type" "sse4arg")
8049 (set_attr "mode" "<MODE>")])
8051 ;; SSE5 packed rotate instructions
8052 (define_expand "rotl<mode>3"
8053 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
8055 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
8056 (match_operand:SI 2 "general_operand")))]
8059 /* If we were given a scalar, convert it to parallel */
8060 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
8062 rtvec vs = rtvec_alloc (<ssescalarnum>);
8063 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
8064 rtx reg = gen_reg_rtx (<MODE>mode);
8065 rtx op2 = operands[2];
8068 if (GET_MODE (op2) != <ssescalarmode>mode)
8070 op2 = gen_reg_rtx (<ssescalarmode>mode);
8071 convert_move (op2, operands[2], false);
8074 for (i = 0; i < <ssescalarnum>; i++)
8075 RTVEC_ELT (vs, i) = op2;
8077 emit_insn (gen_vec_init<mode> (reg, par));
8078 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
8083 (define_expand "rotr<mode>3"
8084 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
8085 (rotatert:SSEMODE1248
8086 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
8087 (match_operand:SI 2 "general_operand")))]
8090 /* If we were given a scalar, convert it to parallel */
8091 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
8093 rtvec vs = rtvec_alloc (<ssescalarnum>);
8094 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
8095 rtx neg = gen_reg_rtx (<MODE>mode);
8096 rtx reg = gen_reg_rtx (<MODE>mode);
8097 rtx op2 = operands[2];
8100 if (GET_MODE (op2) != <ssescalarmode>mode)
8102 op2 = gen_reg_rtx (<ssescalarmode>mode);
8103 convert_move (op2, operands[2], false);
8106 for (i = 0; i < <ssescalarnum>; i++)
8107 RTVEC_ELT (vs, i) = op2;
8109 emit_insn (gen_vec_init<mode> (reg, par));
8110 emit_insn (gen_neg<mode>2 (neg, reg));
8111 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
8116 (define_insn "sse5_rotl<mode>3"
8117 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8119 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8120 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8122 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8123 [(set_attr "type" "sseishft")
8124 (set_attr "mode" "TI")])
8126 (define_insn "sse5_rotr<mode>3"
8127 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8128 (rotatert:SSEMODE1248
8129 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8130 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8133 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
8134 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
8136 [(set_attr "type" "sseishft")
8137 (set_attr "mode" "TI")])
8139 (define_expand "vrotr<mode>3"
8140 [(match_operand:SSEMODE1248 0 "register_operand" "")
8141 (match_operand:SSEMODE1248 1 "register_operand" "")
8142 (match_operand:SSEMODE1248 2 "register_operand" "")]
8145 rtx reg = gen_reg_rtx (<MODE>mode);
8146 emit_insn (gen_neg<mode>2 (reg, operands[2]));
8147 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
8151 (define_expand "vrotl<mode>3"
8152 [(match_operand:SSEMODE1248 0 "register_operand" "")
8153 (match_operand:SSEMODE1248 1 "register_operand" "")
8154 (match_operand:SSEMODE1248 2 "register_operand" "")]
8157 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
8161 (define_insn "sse5_vrotl<mode>3"
8162 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8163 (if_then_else:SSEMODE1248
8165 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8168 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8170 (rotatert:SSEMODE1248
8172 (neg:SSEMODE1248 (match_dup 2)))))]
8173 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
8174 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8175 [(set_attr "type" "sseishft")
8176 (set_attr "mode" "TI")])
8178 ;; SSE5 packed shift instructions.
8179 ;; FIXME: add V2DI back in
8180 (define_expand "vlshr<mode>3"
8181 [(match_operand:SSEMODE124 0 "register_operand" "")
8182 (match_operand:SSEMODE124 1 "register_operand" "")
8183 (match_operand:SSEMODE124 2 "register_operand" "")]
8186 rtx neg = gen_reg_rtx (<MODE>mode);
8187 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8188 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
8192 (define_expand "vashr<mode>3"
8193 [(match_operand:SSEMODE124 0 "register_operand" "")
8194 (match_operand:SSEMODE124 1 "register_operand" "")
8195 (match_operand:SSEMODE124 2 "register_operand" "")]
8198 rtx neg = gen_reg_rtx (<MODE>mode);
8199 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8200 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
8204 (define_expand "vashl<mode>3"
8205 [(match_operand:SSEMODE124 0 "register_operand" "")
8206 (match_operand:SSEMODE124 1 "register_operand" "")
8207 (match_operand:SSEMODE124 2 "register_operand" "")]
8210 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
8214 (define_insn "sse5_ashl<mode>3"
8215 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8216 (if_then_else:SSEMODE1248
8218 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8221 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8223 (ashiftrt:SSEMODE1248
8225 (neg:SSEMODE1248 (match_dup 2)))))]
8226 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
8227 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8228 [(set_attr "type" "sseishft")
8229 (set_attr "mode" "TI")])
8231 (define_insn "sse5_lshl<mode>3"
8232 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8233 (if_then_else:SSEMODE1248
8235 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8238 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8240 (lshiftrt:SSEMODE1248
8242 (neg:SSEMODE1248 (match_dup 2)))))]
8243 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
8244 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8245 [(set_attr "type" "sseishft")
8246 (set_attr "mode" "TI")])
8248 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
8249 (define_expand "ashlv16qi3"
8250 [(match_operand:V16QI 0 "register_operand" "")
8251 (match_operand:V16QI 1 "register_operand" "")
8252 (match_operand:SI 2 "nonmemory_operand" "")]
8255 rtvec vs = rtvec_alloc (16);
8256 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8257 rtx reg = gen_reg_rtx (V16QImode);
8259 for (i = 0; i < 16; i++)
8260 RTVEC_ELT (vs, i) = operands[2];
8262 emit_insn (gen_vec_initv16qi (reg, par));
8263 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8267 (define_expand "lshlv16qi3"
8268 [(match_operand:V16QI 0 "register_operand" "")
8269 (match_operand:V16QI 1 "register_operand" "")
8270 (match_operand:SI 2 "nonmemory_operand" "")]
8273 rtvec vs = rtvec_alloc (16);
8274 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8275 rtx reg = gen_reg_rtx (V16QImode);
8277 for (i = 0; i < 16; i++)
8278 RTVEC_ELT (vs, i) = operands[2];
8280 emit_insn (gen_vec_initv16qi (reg, par));
8281 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
8285 (define_expand "ashrv16qi3"
8286 [(match_operand:V16QI 0 "register_operand" "")
8287 (match_operand:V16QI 1 "register_operand" "")
8288 (match_operand:SI 2 "nonmemory_operand" "")]
8291 rtvec vs = rtvec_alloc (16);
8292 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8293 rtx reg = gen_reg_rtx (V16QImode);
8295 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
8296 ? GEN_INT (- INTVAL (operands[2]))
8299 for (i = 0; i < 16; i++)
8300 RTVEC_ELT (vs, i) = ele;
8302 emit_insn (gen_vec_initv16qi (reg, par));
8304 if (GET_CODE (operands[2]) != CONST_INT)
8306 rtx neg = gen_reg_rtx (V16QImode);
8307 emit_insn (gen_negv16qi2 (neg, reg));
8308 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
8311 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8316 (define_expand "ashrv2di3"
8317 [(match_operand:V2DI 0 "register_operand" "")
8318 (match_operand:V2DI 1 "register_operand" "")
8319 (match_operand:DI 2 "nonmemory_operand" "")]
8322 rtvec vs = rtvec_alloc (2);
8323 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
8324 rtx reg = gen_reg_rtx (V2DImode);
8327 if (GET_CODE (operands[2]) == CONST_INT)
8328 ele = GEN_INT (- INTVAL (operands[2]));
8329 else if (GET_MODE (operands[2]) != DImode)
8331 rtx move = gen_reg_rtx (DImode);
8332 ele = gen_reg_rtx (DImode);
8333 convert_move (move, operands[2], false);
8334 emit_insn (gen_negdi2 (ele, move));
8338 ele = gen_reg_rtx (DImode);
8339 emit_insn (gen_negdi2 (ele, operands[2]));
8342 RTVEC_ELT (vs, 0) = ele;
8343 RTVEC_ELT (vs, 1) = ele;
8344 emit_insn (gen_vec_initv2di (reg, par));
8345 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
8349 ;; SSE5 FRCZ support
8351 (define_insn "sse5_frcz<mode>2"
8352 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8354 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8357 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
8358 [(set_attr "type" "ssecvt1")
8359 (set_attr "prefix_extra" "1")
8360 (set_attr "mode" "<MODE>")])
8363 (define_insn "sse5_vmfrcz<mode>2"
8364 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8365 (vec_merge:SSEMODEF2P
8367 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8369 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8372 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
8373 [(set_attr "type" "ssecvt1")
8374 (set_attr "prefix_extra" "1")
8375 (set_attr "mode" "<MODE>")])
8377 (define_insn "sse5_cvtph2ps"
8378 [(set (match_operand:V4SF 0 "register_operand" "=x")
8379 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8382 "cvtph2ps\t{%1, %0|%0, %1}"
8383 [(set_attr "type" "ssecvt")
8384 (set_attr "mode" "V4SF")])
8386 (define_insn "sse5_cvtps2ph"
8387 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8388 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8391 "cvtps2ph\t{%1, %0|%0, %1}"
8392 [(set_attr "type" "ssecvt")
8393 (set_attr "mode" "V4SF")])
8395 ;; Scalar versions of the com instructions that use vector types that are
8396 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8397 ;; com instructions fill in 0's in the upper bits instead of leaving them
8398 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8399 (define_expand "sse5_vmmaskcmp<mode>3"
8400 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8401 (vec_merge:SSEMODEF2P
8402 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8403 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8404 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8409 operands[4] = CONST0_RTX (<MODE>mode);
8412 (define_insn "*sse5_vmmaskcmp<mode>3"
8413 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8414 (vec_merge:SSEMODEF2P
8415 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8416 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8417 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8418 (match_operand:SSEMODEF2P 4 "")
8421 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8422 [(set_attr "type" "sse4arg")
8423 (set_attr "mode" "<ssescalarmode>")])
8425 ;; We don't have a comparison operator that always returns true/false, so
8426 ;; handle comfalse and comtrue specially.
8427 (define_insn "sse5_com_tf<mode>3"
8428 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8430 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8431 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8432 (match_operand:SI 3 "const_int_operand" "n")]
8433 UNSPEC_SSE5_TRUEFALSE))]
8436 const char *ret = NULL;
8438 switch (INTVAL (operands[3]))
8441 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8445 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8449 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8453 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8462 [(set_attr "type" "ssecmp")
8463 (set_attr "mode" "<MODE>")])
8465 (define_insn "sse5_maskcmp<mode>3"
8466 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8467 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8468 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8469 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8471 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8472 [(set_attr "type" "ssecmp")
8473 (set_attr "mode" "<MODE>")])
8475 (define_insn "sse5_maskcmp<mode>3"
8476 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8477 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8478 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8479 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8481 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8482 [(set_attr "type" "sse4arg")
8483 (set_attr "mode" "TI")])
8485 (define_insn "sse5_maskcmp_uns<mode>3"
8486 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8487 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8488 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8489 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8491 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8492 [(set_attr "type" "ssecmp")
8493 (set_attr "mode" "TI")])
8495 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8496 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8497 ;; the exact instruction generated for the intrinsic.
8498 (define_insn "sse5_maskcmp_uns2<mode>3"
8499 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8501 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8502 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8503 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8504 UNSPEC_SSE5_UNSIGNED_CMP))]
8506 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8507 [(set_attr "type" "ssecmp")
8508 (set_attr "mode" "TI")])
8510 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8511 ;; being added here to be complete.
8512 (define_insn "sse5_pcom_tf<mode>3"
8513 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8515 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8516 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8517 (match_operand:SI 3 "const_int_operand" "n")]
8518 UNSPEC_SSE5_TRUEFALSE))]
8521 return ((INTVAL (operands[3]) != 0)
8522 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8523 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8525 [(set_attr "type" "ssecmp")
8526 (set_attr "mode" "TI")])
8528 (define_insn "aesenc"
8529 [(set (match_operand:V2DI 0 "register_operand" "=x")
8530 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8531 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8534 "aesenc\t{%2, %0|%0, %2}"
8535 [(set_attr "type" "sselog1")
8536 (set_attr "prefix_extra" "1")
8537 (set_attr "mode" "TI")])
8539 (define_insn "aesenclast"
8540 [(set (match_operand:V2DI 0 "register_operand" "=x")
8541 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8542 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8543 UNSPEC_AESENCLAST))]
8545 "aesenclast\t{%2, %0|%0, %2}"
8546 [(set_attr "type" "sselog1")
8547 (set_attr "prefix_extra" "1")
8548 (set_attr "mode" "TI")])
8550 (define_insn "aesdec"
8551 [(set (match_operand:V2DI 0 "register_operand" "=x")
8552 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8553 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8556 "aesdec\t{%2, %0|%0, %2}"
8557 [(set_attr "type" "sselog1")
8558 (set_attr "prefix_extra" "1")
8559 (set_attr "mode" "TI")])
8561 (define_insn "aesdeclast"
8562 [(set (match_operand:V2DI 0 "register_operand" "=x")
8563 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8564 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8565 UNSPEC_AESDECLAST))]
8567 "aesdeclast\t{%2, %0|%0, %2}"
8568 [(set_attr "type" "sselog1")
8569 (set_attr "prefix_extra" "1")
8570 (set_attr "mode" "TI")])
8572 (define_insn "aesimc"
8573 [(set (match_operand:V2DI 0 "register_operand" "=x")
8574 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8577 "aesimc\t{%1, %0|%0, %1}"
8578 [(set_attr "type" "sselog1")
8579 (set_attr "prefix_extra" "1")
8580 (set_attr "mode" "TI")])
8582 (define_insn "aeskeygenassist"
8583 [(set (match_operand:V2DI 0 "register_operand" "=x")
8584 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
8585 (match_operand:SI 2 "const_0_to_255_operand" "n")]
8586 UNSPEC_AESKEYGENASSIST))]
8588 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
8589 [(set_attr "type" "sselog1")
8590 (set_attr "prefix_extra" "1")
8591 (set_attr "mode" "TI")])
8593 (define_insn "pclmulqdq"
8594 [(set (match_operand:V2DI 0 "register_operand" "=x")
8595 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8596 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
8597 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8600 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
8601 [(set_attr "type" "sselog1")
8602 (set_attr "prefix_extra" "1")
8603 (set_attr "mode" "TI")])