1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
67 switch (which_alternative)
70 return standard_sse_constant_opcode (insn, operands[1]);
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
90 (const_string "TI")))])
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
101 (define_insn_and_split "movdi_to_sse"
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
108 "&& reload_completed"
111 switch (which_alternative)
114 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
115 Assemble the 64-bit DImode value in an xmm register. */
116 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 0)));
118 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
119 gen_rtx_SUBREG (SImode, operands[1], 4)));
120 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
124 emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
133 (define_expand "movv4sf"
134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
135 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
138 ix86_expand_vector_move (V4SFmode, operands);
142 (define_insn "*movv4sf_internal"
143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
144 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
146 && (register_operand (operands[0], V4SFmode)
147 || register_operand (operands[1], V4SFmode))"
149 switch (which_alternative)
152 return standard_sse_constant_opcode (insn, operands[1]);
155 return "movaps\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set_attr "mode" "V4SF")])
164 [(set (match_operand:V4SF 0 "register_operand" "")
165 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
166 "TARGET_SSE && reload_completed"
169 (vec_duplicate:V4SF (match_dup 1))
173 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
174 operands[2] = CONST0_RTX (V4SFmode);
177 (define_expand "movv2df"
178 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
179 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
182 ix86_expand_vector_move (V2DFmode, operands);
186 (define_insn "*movv2df_internal"
187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
188 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
190 && (register_operand (operands[0], V2DFmode)
191 || register_operand (operands[1], V2DFmode))"
193 switch (which_alternative)
196 return standard_sse_constant_opcode (insn, operands[1]);
199 if (get_attr_mode (insn) == MODE_V4SF)
200 return "movaps\t{%1, %0|%0, %1}";
202 return "movapd\t{%1, %0|%0, %1}";
207 [(set_attr "type" "sselog1,ssemov,ssemov")
210 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
211 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
212 (and (eq_attr "alternative" "2")
213 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
215 (const_string "V4SF")
216 (const_string "V2DF")))])
219 [(set (match_operand:V2DF 0 "register_operand" "")
220 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
221 "TARGET_SSE2 && reload_completed"
222 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
224 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
225 operands[2] = CONST0_RTX (DFmode);
228 (define_expand "push<mode>1"
229 [(match_operand:SSEMODE 0 "register_operand" "")]
232 ix86_expand_push (<MODE>mode, operands[0]);
236 (define_expand "movmisalign<mode>"
237 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
238 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
241 ix86_expand_vector_move_misalign (<MODE>mode, operands);
245 (define_insn "sse_movups"
246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
247 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
249 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
250 "movups\t{%1, %0|%0, %1}"
251 [(set_attr "type" "ssemov")
252 (set_attr "mode" "V2DF")])
254 (define_insn "sse2_movupd"
255 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
256 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
258 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
259 "movupd\t{%1, %0|%0, %1}"
260 [(set_attr "type" "ssemov")
261 (set_attr "mode" "V2DF")])
263 (define_insn "sse2_movdqu"
264 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
265 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
267 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
268 "movdqu\t{%1, %0|%0, %1}"
269 [(set_attr "type" "ssemov")
270 (set_attr "prefix_data16" "1")
271 (set_attr "mode" "TI")])
273 (define_insn "sse_movntv4sf"
274 [(set (match_operand:V4SF 0 "memory_operand" "=m")
275 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
278 "movntps\t{%1, %0|%0, %1}"
279 [(set_attr "type" "ssemov")
280 (set_attr "mode" "V4SF")])
282 (define_insn "sse2_movntv2df"
283 [(set (match_operand:V2DF 0 "memory_operand" "=m")
284 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
287 "movntpd\t{%1, %0|%0, %1}"
288 [(set_attr "type" "ssecvt")
289 (set_attr "mode" "V2DF")])
291 (define_insn "sse2_movntv2di"
292 [(set (match_operand:V2DI 0 "memory_operand" "=m")
293 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
296 "movntdq\t{%1, %0|%0, %1}"
297 [(set_attr "type" "ssecvt")
298 (set_attr "prefix_data16" "1")
299 (set_attr "mode" "TI")])
301 (define_insn "sse2_movntsi"
302 [(set (match_operand:SI 0 "memory_operand" "=m")
303 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
306 "movnti\t{%1, %0|%0, %1}"
307 [(set_attr "type" "ssecvt")
308 (set_attr "mode" "V2DF")])
310 (define_insn "sse3_lddqu"
311 [(set (match_operand:V16QI 0 "register_operand" "=x")
312 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
315 "lddqu\t{%1, %0|%0, %1}"
316 [(set_attr "type" "ssecvt")
317 (set_attr "prefix_rep" "1")
318 (set_attr "mode" "TI")])
320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
322 ;; Parallel single-precision floating point arithmetic
324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
326 (define_expand "negv4sf2"
327 [(set (match_operand:V4SF 0 "register_operand" "")
328 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
330 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
332 (define_expand "absv4sf2"
333 [(set (match_operand:V4SF 0 "register_operand" "")
334 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
336 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
338 (define_expand "addv4sf3"
339 [(set (match_operand:V4SF 0 "register_operand" "")
340 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
341 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
343 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
345 (define_insn "*addv4sf3"
346 [(set (match_operand:V4SF 0 "register_operand" "=x")
347 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
348 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
349 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
350 "addps\t{%2, %0|%0, %2}"
351 [(set_attr "type" "sseadd")
352 (set_attr "mode" "V4SF")])
354 (define_insn "sse_vmaddv4sf3"
355 [(set (match_operand:V4SF 0 "register_operand" "=x")
357 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
358 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
361 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
362 "addss\t{%2, %0|%0, %2}"
363 [(set_attr "type" "sseadd")
364 (set_attr "mode" "SF")])
366 (define_expand "subv4sf3"
367 [(set (match_operand:V4SF 0 "register_operand" "")
368 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
369 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
371 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
373 (define_insn "*subv4sf3"
374 [(set (match_operand:V4SF 0 "register_operand" "=x")
375 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
376 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
378 "subps\t{%2, %0|%0, %2}"
379 [(set_attr "type" "sseadd")
380 (set_attr "mode" "V4SF")])
382 (define_insn "sse_vmsubv4sf3"
383 [(set (match_operand:V4SF 0 "register_operand" "=x")
385 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
386 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
390 "subss\t{%2, %0|%0, %2}"
391 [(set_attr "type" "sseadd")
392 (set_attr "mode" "SF")])
394 (define_expand "mulv4sf3"
395 [(set (match_operand:V4SF 0 "register_operand" "")
396 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
397 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
399 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
401 (define_insn "*mulv4sf3"
402 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
404 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
405 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
406 "mulps\t{%2, %0|%0, %2}"
407 [(set_attr "type" "ssemul")
408 (set_attr "mode" "V4SF")])
410 (define_insn "sse_vmmulv4sf3"
411 [(set (match_operand:V4SF 0 "register_operand" "=x")
413 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
414 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
417 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
418 "mulss\t{%2, %0|%0, %2}"
419 [(set_attr "type" "ssemul")
420 (set_attr "mode" "SF")])
422 (define_expand "divv4sf3"
423 [(set (match_operand:V4SF 0 "register_operand" "")
424 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
425 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
427 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
429 (define_insn "*divv4sf3"
430 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
432 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
434 "divps\t{%2, %0|%0, %2}"
435 [(set_attr "type" "ssediv")
436 (set_attr "mode" "V4SF")])
438 (define_insn "sse_vmdivv4sf3"
439 [(set (match_operand:V4SF 0 "register_operand" "=x")
441 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
442 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
446 "divss\t{%2, %0|%0, %2}"
447 [(set_attr "type" "ssediv")
448 (set_attr "mode" "SF")])
450 (define_insn "sse_rcpv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
455 "rcpps\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "V4SF")])
459 (define_insn "sse_vmrcpv4sf2"
460 [(set (match_operand:V4SF 0 "register_operand" "=x")
462 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
464 (match_operand:V4SF 2 "register_operand" "0")
467 "rcpss\t{%1, %0|%0, %1}"
468 [(set_attr "type" "sse")
469 (set_attr "mode" "SF")])
471 (define_insn "sse_rsqrtv4sf2"
472 [(set (match_operand:V4SF 0 "register_operand" "=x")
474 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
476 "rsqrtps\t{%1, %0|%0, %1}"
477 [(set_attr "type" "sse")
478 (set_attr "mode" "V4SF")])
480 (define_insn "sse_vmrsqrtv4sf2"
481 [(set (match_operand:V4SF 0 "register_operand" "=x")
483 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
485 (match_operand:V4SF 2 "register_operand" "0")
488 "rsqrtss\t{%1, %0|%0, %1}"
489 [(set_attr "type" "sse")
490 (set_attr "mode" "SF")])
492 (define_insn "sqrtv4sf2"
493 [(set (match_operand:V4SF 0 "register_operand" "=x")
494 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
496 "sqrtps\t{%1, %0|%0, %1}"
497 [(set_attr "type" "sse")
498 (set_attr "mode" "V4SF")])
500 (define_insn "sse_vmsqrtv4sf2"
501 [(set (match_operand:V4SF 0 "register_operand" "=x")
503 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
504 (match_operand:V4SF 2 "register_operand" "0")
507 "sqrtss\t{%1, %0|%0, %1}"
508 [(set_attr "type" "sse")
509 (set_attr "mode" "SF")])
511 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
512 ;; isn't really correct, as those rtl operators aren't defined when
513 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
515 (define_expand "smaxv4sf3"
516 [(set (match_operand:V4SF 0 "register_operand" "")
517 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
518 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
521 if (!flag_finite_math_only)
522 operands[1] = force_reg (V4SFmode, operands[1]);
523 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
526 (define_insn "*smaxv4sf3_finite"
527 [(set (match_operand:V4SF 0 "register_operand" "=x")
528 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
529 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
530 "TARGET_SSE && flag_finite_math_only
531 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
532 "maxps\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sse")
534 (set_attr "mode" "V4SF")])
536 (define_insn "*smaxv4sf3"
537 [(set (match_operand:V4SF 0 "register_operand" "=x")
538 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
539 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
541 "maxps\t{%2, %0|%0, %2}"
542 [(set_attr "type" "sse")
543 (set_attr "mode" "V4SF")])
545 (define_insn "sse_vmsmaxv4sf3"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
549 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
553 "maxss\t{%2, %0|%0, %2}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "SF")])
557 (define_expand "sminv4sf3"
558 [(set (match_operand:V4SF 0 "register_operand" "")
559 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
560 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
563 if (!flag_finite_math_only)
564 operands[1] = force_reg (V4SFmode, operands[1]);
565 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
568 (define_insn "*sminv4sf3_finite"
569 [(set (match_operand:V4SF 0 "register_operand" "=x")
570 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
571 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
572 "TARGET_SSE && flag_finite_math_only
573 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
574 "minps\t{%2, %0|%0, %2}"
575 [(set_attr "type" "sse")
576 (set_attr "mode" "V4SF")])
578 (define_insn "*sminv4sf3"
579 [(set (match_operand:V4SF 0 "register_operand" "=x")
580 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
581 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
583 "minps\t{%2, %0|%0, %2}"
584 [(set_attr "type" "sse")
585 (set_attr "mode" "V4SF")])
587 (define_insn "sse_vmsminv4sf3"
588 [(set (match_operand:V4SF 0 "register_operand" "=x")
590 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
591 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
595 "minss\t{%2, %0|%0, %2}"
596 [(set_attr "type" "sse")
597 (set_attr "mode" "SF")])
599 ;; These versions of the min/max patterns implement exactly the operations
600 ;; min = (op1 < op2 ? op1 : op2)
601 ;; max = (!(op1 < op2) ? op1 : op2)
602 ;; Their operands are not commutative, and thus they may be used in the
603 ;; presence of -0.0 and NaN.
605 (define_insn "*ieee_sminv4sf3"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
607 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
608 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
611 "minps\t{%2, %0|%0, %2}"
612 [(set_attr "type" "sseadd")
613 (set_attr "mode" "V4SF")])
615 (define_insn "*ieee_smaxv4sf3"
616 [(set (match_operand:V4SF 0 "register_operand" "=x")
617 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
618 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
621 "maxps\t{%2, %0|%0, %2}"
622 [(set_attr "type" "sseadd")
623 (set_attr "mode" "V4SF")])
625 (define_insn "*ieee_sminv2df3"
626 [(set (match_operand:V2DF 0 "register_operand" "=x")
627 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
628 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
631 "minpd\t{%2, %0|%0, %2}"
632 [(set_attr "type" "sseadd")
633 (set_attr "mode" "V2DF")])
635 (define_insn "*ieee_smaxv2df3"
636 [(set (match_operand:V2DF 0 "register_operand" "=x")
637 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
638 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
641 "maxpd\t{%2, %0|%0, %2}"
642 [(set_attr "type" "sseadd")
643 (set_attr "mode" "V2DF")])
645 (define_insn "sse3_addsubv4sf3"
646 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (match_operand:V4SF 1 "register_operand" "0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
651 (minus:V4SF (match_dup 1) (match_dup 2))
654 "addsubps\t{%2, %0|%0, %2}"
655 [(set_attr "type" "sseadd")
656 (set_attr "prefix_rep" "1")
657 (set_attr "mode" "V4SF")])
659 (define_insn "sse3_haddv4sf3"
660 [(set (match_operand:V4SF 0 "register_operand" "=x")
665 (match_operand:V4SF 1 "register_operand" "0")
666 (parallel [(const_int 0)]))
667 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
669 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
670 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
674 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
675 (parallel [(const_int 0)]))
676 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
678 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
679 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
681 "haddps\t{%2, %0|%0, %2}"
682 [(set_attr "type" "sseadd")
683 (set_attr "prefix_rep" "1")
684 (set_attr "mode" "V4SF")])
686 (define_insn "sse3_hsubv4sf3"
687 [(set (match_operand:V4SF 0 "register_operand" "=x")
692 (match_operand:V4SF 1 "register_operand" "0")
693 (parallel [(const_int 0)]))
694 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
696 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
697 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
701 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
702 (parallel [(const_int 0)]))
703 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
705 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
706 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
708 "hsubps\t{%2, %0|%0, %2}"
709 [(set_attr "type" "sseadd")
710 (set_attr "prefix_rep" "1")
711 (set_attr "mode" "V4SF")])
713 (define_expand "reduc_splus_v4sf"
714 [(match_operand:V4SF 0 "register_operand" "")
715 (match_operand:V4SF 1 "register_operand" "")]
720 rtx tmp = gen_reg_rtx (V4SFmode);
721 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
722 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
725 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
729 (define_expand "reduc_smax_v4sf"
730 [(match_operand:V4SF 0 "register_operand" "")
731 (match_operand:V4SF 1 "register_operand" "")]
734 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
738 (define_expand "reduc_smin_v4sf"
739 [(match_operand:V4SF 0 "register_operand" "")
740 (match_operand:V4SF 1 "register_operand" "")]
743 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
747 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
749 ;; Parallel single-precision floating point comparisons
751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
753 (define_insn "sse_maskcmpv4sf3"
754 [(set (match_operand:V4SF 0 "register_operand" "=x")
755 (match_operator:V4SF 3 "sse_comparison_operator"
756 [(match_operand:V4SF 1 "register_operand" "0")
757 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
759 "cmp%D3ps\t{%2, %0|%0, %2}"
760 [(set_attr "type" "ssecmp")
761 (set_attr "mode" "V4SF")])
763 (define_insn "sse_maskcmpsf3"
764 [(set (match_operand:SF 0 "register_operand" "=x")
765 (match_operator:SF 3 "sse_comparison_operator"
766 [(match_operand:SF 1 "register_operand" "0")
767 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
769 "cmp%D3ss\t{%2, %0|%0, %2}"
770 [(set_attr "type" "ssecmp")
771 (set_attr "mode" "SF")])
773 (define_insn "sse_vmmaskcmpv4sf3"
774 [(set (match_operand:V4SF 0 "register_operand" "=x")
776 (match_operator:V4SF 3 "sse_comparison_operator"
777 [(match_operand:V4SF 1 "register_operand" "0")
778 (match_operand:V4SF 2 "register_operand" "x")])
782 "cmp%D3ss\t{%2, %0|%0, %2}"
783 [(set_attr "type" "ssecmp")
784 (set_attr "mode" "SF")])
786 (define_insn "sse_comi"
787 [(set (reg:CCFP FLAGS_REG)
790 (match_operand:V4SF 0 "register_operand" "x")
791 (parallel [(const_int 0)]))
793 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
794 (parallel [(const_int 0)]))))]
796 "comiss\t{%1, %0|%0, %1}"
797 [(set_attr "type" "ssecomi")
798 (set_attr "mode" "SF")])
800 (define_insn "sse_ucomi"
801 [(set (reg:CCFPU FLAGS_REG)
804 (match_operand:V4SF 0 "register_operand" "x")
805 (parallel [(const_int 0)]))
807 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
808 (parallel [(const_int 0)]))))]
810 "ucomiss\t{%1, %0|%0, %1}"
811 [(set_attr "type" "ssecomi")
812 (set_attr "mode" "SF")])
814 (define_expand "vcondv4sf"
815 [(set (match_operand:V4SF 0 "register_operand" "")
818 [(match_operand:V4SF 4 "nonimmediate_operand" "")
819 (match_operand:V4SF 5 "nonimmediate_operand" "")])
820 (match_operand:V4SF 1 "general_operand" "")
821 (match_operand:V4SF 2 "general_operand" "")))]
824 if (ix86_expand_fp_vcond (operands))
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
832 ;; Parallel single-precision floating point logical operations
834 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
836 (define_expand "andv4sf3"
837 [(set (match_operand:V4SF 0 "register_operand" "")
838 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
839 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
841 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
843 (define_insn "*andv4sf3"
844 [(set (match_operand:V4SF 0 "register_operand" "=x")
845 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
846 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
847 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
848 "andps\t{%2, %0|%0, %2}"
849 [(set_attr "type" "sselog")
850 (set_attr "mode" "V4SF")])
852 (define_insn "sse_nandv4sf3"
853 [(set (match_operand:V4SF 0 "register_operand" "=x")
854 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
855 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
857 "andnps\t{%2, %0|%0, %2}"
858 [(set_attr "type" "sselog")
859 (set_attr "mode" "V4SF")])
861 (define_expand "iorv4sf3"
862 [(set (match_operand:V4SF 0 "register_operand" "")
863 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
864 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
866 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
868 (define_insn "*iorv4sf3"
869 [(set (match_operand:V4SF 0 "register_operand" "=x")
870 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
871 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
872 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
873 "orps\t{%2, %0|%0, %2}"
874 [(set_attr "type" "sselog")
875 (set_attr "mode" "V4SF")])
877 (define_expand "xorv4sf3"
878 [(set (match_operand:V4SF 0 "register_operand" "")
879 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
880 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
882 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
884 (define_insn "*xorv4sf3"
885 [(set (match_operand:V4SF 0 "register_operand" "=x")
886 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
887 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
888 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
889 "xorps\t{%2, %0|%0, %2}"
890 [(set_attr "type" "sselog")
891 (set_attr "mode" "V4SF")])
893 ;; Also define scalar versions. These are used for abs, neg, and
894 ;; conditional move. Using subregs into vector modes causes register
895 ;; allocation lossage. These patterns do not allow memory operands
896 ;; because the native instructions read the full 128-bits.
898 (define_insn "*andsf3"
899 [(set (match_operand:SF 0 "register_operand" "=x")
900 (and:SF (match_operand:SF 1 "register_operand" "0")
901 (match_operand:SF 2 "register_operand" "x")))]
903 "andps\t{%2, %0|%0, %2}"
904 [(set_attr "type" "sselog")
905 (set_attr "mode" "V4SF")])
907 (define_insn "*nandsf3"
908 [(set (match_operand:SF 0 "register_operand" "=x")
909 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
910 (match_operand:SF 2 "register_operand" "x")))]
912 "andnps\t{%2, %0|%0, %2}"
913 [(set_attr "type" "sselog")
914 (set_attr "mode" "V4SF")])
916 (define_insn "*iorsf3"
917 [(set (match_operand:SF 0 "register_operand" "=x")
918 (ior:SF (match_operand:SF 1 "register_operand" "0")
919 (match_operand:SF 2 "register_operand" "x")))]
921 "orps\t{%2, %0|%0, %2}"
922 [(set_attr "type" "sselog")
923 (set_attr "mode" "V4SF")])
925 (define_insn "*xorsf3"
926 [(set (match_operand:SF 0 "register_operand" "=x")
927 (xor:SF (match_operand:SF 1 "register_operand" "0")
928 (match_operand:SF 2 "register_operand" "x")))]
930 "xorps\t{%2, %0|%0, %2}"
931 [(set_attr "type" "sselog")
932 (set_attr "mode" "V4SF")])
934 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
936 ;; Parallel single-precision floating point conversion operations
938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
940 (define_insn "sse_cvtpi2ps"
941 [(set (match_operand:V4SF 0 "register_operand" "=x")
944 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
945 (match_operand:V4SF 1 "register_operand" "0")
948 "cvtpi2ps\t{%2, %0|%0, %2}"
949 [(set_attr "type" "ssecvt")
950 (set_attr "mode" "V4SF")])
952 (define_insn "sse_cvtps2pi"
953 [(set (match_operand:V2SI 0 "register_operand" "=y")
955 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
957 (parallel [(const_int 0) (const_int 1)])))]
959 "cvtps2pi\t{%1, %0|%0, %1}"
960 [(set_attr "type" "ssecvt")
961 (set_attr "unit" "mmx")
962 (set_attr "mode" "DI")])
964 (define_insn "sse_cvttps2pi"
965 [(set (match_operand:V2SI 0 "register_operand" "=y")
967 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
968 (parallel [(const_int 0) (const_int 1)])))]
970 "cvttps2pi\t{%1, %0|%0, %1}"
971 [(set_attr "type" "ssecvt")
972 (set_attr "unit" "mmx")
973 (set_attr "mode" "SF")])
975 (define_insn "sse_cvtsi2ss"
976 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
979 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
980 (match_operand:V4SF 1 "register_operand" "0,0")
983 "cvtsi2ss\t{%2, %0|%0, %2}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "vector,double")
986 (set_attr "amdfam10_decode" "vector,double")
987 (set_attr "mode" "SF")])
989 (define_insn "sse_cvtsi2ssq"
990 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
993 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
994 (match_operand:V4SF 1 "register_operand" "0,0")
996 "TARGET_SSE && TARGET_64BIT"
997 "cvtsi2ssq\t{%2, %0|%0, %2}"
998 [(set_attr "type" "sseicvt")
999 (set_attr "athlon_decode" "vector,double")
1000 (set_attr "amdfam10_decode" "vector,double")
1001 (set_attr "mode" "SF")])
1003 (define_insn "sse_cvtss2si"
1004 [(set (match_operand:SI 0 "register_operand" "=r,r")
1007 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1008 (parallel [(const_int 0)]))]
1009 UNSPEC_FIX_NOTRUNC))]
1011 "cvtss2si\t{%1, %0|%0, %1}"
1012 [(set_attr "type" "sseicvt")
1013 (set_attr "athlon_decode" "double,vector")
1014 (set_attr "prefix_rep" "1")
1015 (set_attr "mode" "SI")])
1017 (define_insn "sse_cvtss2si_2"
1018 [(set (match_operand:SI 0 "register_operand" "=r,r")
1019 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1020 UNSPEC_FIX_NOTRUNC))]
1022 "cvtss2si\t{%1, %0|%0, %1}"
1023 [(set_attr "type" "sseicvt")
1024 (set_attr "athlon_decode" "double,vector")
1025 (set_attr "amdfam10_decode" "double,double")
1026 (set_attr "prefix_rep" "1")
1027 (set_attr "mode" "SI")])
1029 (define_insn "sse_cvtss2siq"
1030 [(set (match_operand:DI 0 "register_operand" "=r,r")
1033 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1034 (parallel [(const_int 0)]))]
1035 UNSPEC_FIX_NOTRUNC))]
1036 "TARGET_SSE && TARGET_64BIT"
1037 "cvtss2siq\t{%1, %0|%0, %1}"
1038 [(set_attr "type" "sseicvt")
1039 (set_attr "athlon_decode" "double,vector")
1040 (set_attr "prefix_rep" "1")
1041 (set_attr "mode" "DI")])
1043 (define_insn "sse_cvtss2siq_2"
1044 [(set (match_operand:DI 0 "register_operand" "=r,r")
1045 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1046 UNSPEC_FIX_NOTRUNC))]
1047 "TARGET_SSE && TARGET_64BIT"
1048 "cvtss2siq\t{%1, %0|%0, %1}"
1049 [(set_attr "type" "sseicvt")
1050 (set_attr "athlon_decode" "double,vector")
1051 (set_attr "amdfam10_decode" "double,double")
1052 (set_attr "prefix_rep" "1")
1053 (set_attr "mode" "DI")])
1055 (define_insn "sse_cvttss2si"
1056 [(set (match_operand:SI 0 "register_operand" "=r,r")
1059 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1060 (parallel [(const_int 0)]))))]
1062 "cvttss2si\t{%1, %0|%0, %1}"
1063 [(set_attr "type" "sseicvt")
1064 (set_attr "athlon_decode" "double,vector")
1065 (set_attr "amdfam10_decode" "double,double")
1066 (set_attr "prefix_rep" "1")
1067 (set_attr "mode" "SI")])
1069 (define_insn "sse_cvttss2siq"
1070 [(set (match_operand:DI 0 "register_operand" "=r,r")
1073 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1074 (parallel [(const_int 0)]))))]
1075 "TARGET_SSE && TARGET_64BIT"
1076 "cvttss2siq\t{%1, %0|%0, %1}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "double,vector")
1079 (set_attr "amdfam10_decode" "double,double")
1080 (set_attr "prefix_rep" "1")
1081 (set_attr "mode" "DI")])
1083 (define_insn "sse2_cvtdq2ps"
1084 [(set (match_operand:V4SF 0 "register_operand" "=x")
1085 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1087 "cvtdq2ps\t{%1, %0|%0, %1}"
1088 [(set_attr "type" "ssecvt")
1089 (set_attr "mode" "V4SF")])
1091 (define_insn "sse2_cvtps2dq"
1092 [(set (match_operand:V4SI 0 "register_operand" "=x")
1093 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1094 UNSPEC_FIX_NOTRUNC))]
1096 "cvtps2dq\t{%1, %0|%0, %1}"
1097 [(set_attr "type" "ssecvt")
1098 (set_attr "prefix_data16" "1")
1099 (set_attr "mode" "TI")])
1101 (define_insn "sse2_cvttps2dq"
1102 [(set (match_operand:V4SI 0 "register_operand" "=x")
1103 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1105 "cvttps2dq\t{%1, %0|%0, %1}"
1106 [(set_attr "type" "ssecvt")
1107 (set_attr "prefix_rep" "1")
1108 (set_attr "mode" "TI")])
1110 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1112 ;; Parallel single-precision floating point element swizzling
1114 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1116 (define_insn "sse_movhlps"
1117 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1120 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1121 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1122 (parallel [(const_int 6)
1126 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1128 movhlps\t{%2, %0|%0, %2}
1129 movlps\t{%H2, %0|%0, %H2}
1130 movhps\t{%2, %0|%0, %2}"
1131 [(set_attr "type" "ssemov")
1132 (set_attr "mode" "V4SF,V2SF,V2SF")])
1134 (define_insn "sse_movlhps"
1135 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1138 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1139 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1140 (parallel [(const_int 0)
1144 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1146 movlhps\t{%2, %0|%0, %2}
1147 movhps\t{%2, %0|%0, %2}
1148 movlps\t{%2, %H0|%H0, %2}"
1149 [(set_attr "type" "ssemov")
1150 (set_attr "mode" "V4SF,V2SF,V2SF")])
1152 (define_insn "sse_unpckhps"
1153 [(set (match_operand:V4SF 0 "register_operand" "=x")
1156 (match_operand:V4SF 1 "register_operand" "0")
1157 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1158 (parallel [(const_int 2) (const_int 6)
1159 (const_int 3) (const_int 7)])))]
1161 "unpckhps\t{%2, %0|%0, %2}"
1162 [(set_attr "type" "sselog")
1163 (set_attr "mode" "V4SF")])
1165 (define_insn "sse_unpcklps"
1166 [(set (match_operand:V4SF 0 "register_operand" "=x")
1169 (match_operand:V4SF 1 "register_operand" "0")
1170 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1171 (parallel [(const_int 0) (const_int 4)
1172 (const_int 1) (const_int 5)])))]
1174 "unpcklps\t{%2, %0|%0, %2}"
1175 [(set_attr "type" "sselog")
1176 (set_attr "mode" "V4SF")])
1178 ;; These are modeled with the same vec_concat as the others so that we
1179 ;; capture users of shufps that can use the new instructions
1180 (define_insn "sse3_movshdup"
1181 [(set (match_operand:V4SF 0 "register_operand" "=x")
1184 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1186 (parallel [(const_int 1)
1191 "movshdup\t{%1, %0|%0, %1}"
1192 [(set_attr "type" "sse")
1193 (set_attr "prefix_rep" "1")
1194 (set_attr "mode" "V4SF")])
1196 (define_insn "sse3_movsldup"
1197 [(set (match_operand:V4SF 0 "register_operand" "=x")
1200 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1202 (parallel [(const_int 0)
1207 "movsldup\t{%1, %0|%0, %1}"
1208 [(set_attr "type" "sse")
1209 (set_attr "prefix_rep" "1")
1210 (set_attr "mode" "V4SF")])
1212 (define_expand "sse_shufps"
1213 [(match_operand:V4SF 0 "register_operand" "")
1214 (match_operand:V4SF 1 "register_operand" "")
1215 (match_operand:V4SF 2 "nonimmediate_operand" "")
1216 (match_operand:SI 3 "const_int_operand" "")]
1219 int mask = INTVAL (operands[3]);
1220 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1221 GEN_INT ((mask >> 0) & 3),
1222 GEN_INT ((mask >> 2) & 3),
1223 GEN_INT (((mask >> 4) & 3) + 4),
1224 GEN_INT (((mask >> 6) & 3) + 4)));
1228 (define_insn "sse_shufps_1"
1229 [(set (match_operand:V4SF 0 "register_operand" "=x")
1232 (match_operand:V4SF 1 "register_operand" "0")
1233 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1234 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1235 (match_operand 4 "const_0_to_3_operand" "")
1236 (match_operand 5 "const_4_to_7_operand" "")
1237 (match_operand 6 "const_4_to_7_operand" "")])))]
1241 mask |= INTVAL (operands[3]) << 0;
1242 mask |= INTVAL (operands[4]) << 2;
1243 mask |= (INTVAL (operands[5]) - 4) << 4;
1244 mask |= (INTVAL (operands[6]) - 4) << 6;
1245 operands[3] = GEN_INT (mask);
1247 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1249 [(set_attr "type" "sselog")
1250 (set_attr "mode" "V4SF")])
1252 (define_insn "sse_storehps"
1253 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1255 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1256 (parallel [(const_int 2) (const_int 3)])))]
1259 movhps\t{%1, %0|%0, %1}
1260 movhlps\t{%1, %0|%0, %1}
1261 movlps\t{%H1, %0|%0, %H1}"
1262 [(set_attr "type" "ssemov")
1263 (set_attr "mode" "V2SF,V4SF,V2SF")])
1265 (define_insn "sse_loadhps"
1266 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1269 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1270 (parallel [(const_int 0) (const_int 1)]))
1271 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1274 movhps\t{%2, %0|%0, %2}
1275 movlhps\t{%2, %0|%0, %2}
1276 movlps\t{%2, %H0|%H0, %2}"
1277 [(set_attr "type" "ssemov")
1278 (set_attr "mode" "V2SF,V4SF,V2SF")])
1280 (define_insn "sse_storelps"
1281 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1283 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1284 (parallel [(const_int 0) (const_int 1)])))]
1287 movlps\t{%1, %0|%0, %1}
1288 movaps\t{%1, %0|%0, %1}
1289 movlps\t{%1, %0|%0, %1}"
1290 [(set_attr "type" "ssemov")
1291 (set_attr "mode" "V2SF,V4SF,V2SF")])
1293 (define_insn "sse_loadlps"
1294 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1296 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1298 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1299 (parallel [(const_int 2) (const_int 3)]))))]
1302 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1303 movlps\t{%2, %0|%0, %2}
1304 movlps\t{%2, %0|%0, %2}"
1305 [(set_attr "type" "sselog,ssemov,ssemov")
1306 (set_attr "mode" "V4SF,V2SF,V2SF")])
1308 (define_insn "sse_movss"
1309 [(set (match_operand:V4SF 0 "register_operand" "=x")
1311 (match_operand:V4SF 2 "register_operand" "x")
1312 (match_operand:V4SF 1 "register_operand" "0")
1315 "movss\t{%2, %0|%0, %2}"
1316 [(set_attr "type" "ssemov")
1317 (set_attr "mode" "SF")])
1319 (define_insn "*vec_dupv4sf"
1320 [(set (match_operand:V4SF 0 "register_operand" "=x")
1322 (match_operand:SF 1 "register_operand" "0")))]
1324 "shufps\t{$0, %0, %0|%0, %0, 0}"
1325 [(set_attr "type" "sselog1")
1326 (set_attr "mode" "V4SF")])
1328 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1329 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1330 ;; alternatives pretty much forces the MMX alternative to be chosen.
1331 (define_insn "*sse_concatv2sf"
1332 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1334 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1335 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1338 unpcklps\t{%2, %0|%0, %2}
1339 movss\t{%1, %0|%0, %1}
1340 punpckldq\t{%2, %0|%0, %2}
1341 movd\t{%1, %0|%0, %1}"
1342 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1343 (set_attr "mode" "V4SF,SF,DI,DI")])
1345 (define_insn "*sse_concatv4sf"
1346 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1348 (match_operand:V2SF 1 "register_operand" " 0,0")
1349 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1352 movlhps\t{%2, %0|%0, %2}
1353 movhps\t{%2, %0|%0, %2}"
1354 [(set_attr "type" "ssemov")
1355 (set_attr "mode" "V4SF,V2SF")])
1357 (define_expand "vec_initv4sf"
1358 [(match_operand:V4SF 0 "register_operand" "")
1359 (match_operand 1 "" "")]
1362 ix86_expand_vector_init (false, operands[0], operands[1]);
1366 (define_insn "vec_setv4sf_0"
1367 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
1370 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1371 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1375 movss\t{%2, %0|%0, %2}
1376 movss\t{%2, %0|%0, %2}
1377 movd\t{%2, %0|%0, %2}
1379 [(set_attr "type" "ssemov")
1380 (set_attr "mode" "SF")])
1382 ;; A subset is vec_setv4sf.
1383 (define_insn "*vec_setv4sf_sse4_1"
1384 [(set (match_operand:V4SF 0 "register_operand" "=x")
1387 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1388 (match_operand:V4SF 1 "register_operand" "0")
1389 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1392 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1393 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1395 [(set_attr "type" "sselog")
1396 (set_attr "prefix_extra" "1")
1397 (set_attr "mode" "V4SF")])
1399 (define_insn "sse4_1_insertps"
1400 [(set (match_operand:V4SF 0 "register_operand" "=x")
1401 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1402 (match_operand:V4SF 1 "register_operand" "0")
1403 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1406 "insertps\t{%3, %2, %0|%0, %2, %3}";
1407 [(set_attr "type" "sselog")
1408 (set_attr "prefix_extra" "1")
1409 (set_attr "mode" "V4SF")])
1412 [(set (match_operand:V4SF 0 "memory_operand" "")
1415 (match_operand:SF 1 "nonmemory_operand" ""))
1418 "TARGET_SSE && reload_completed"
1421 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1425 (define_expand "vec_setv4sf"
1426 [(match_operand:V4SF 0 "register_operand" "")
1427 (match_operand:SF 1 "register_operand" "")
1428 (match_operand 2 "const_int_operand" "")]
1431 ix86_expand_vector_set (false, operands[0], operands[1],
1432 INTVAL (operands[2]));
1436 (define_insn_and_split "*vec_extractv4sf_0"
1437 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1439 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1440 (parallel [(const_int 0)])))]
1441 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1443 "&& reload_completed"
1446 rtx op1 = operands[1];
1448 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1450 op1 = gen_lowpart (SFmode, op1);
1451 emit_move_insn (operands[0], op1);
1455 (define_insn "*sse4_1_extractps"
1456 [(set (match_operand:SF 0 "register_operand" "=rm")
1458 (match_operand:V4SF 1 "register_operand" "x")
1459 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1461 "extractps\t{%2, %1, %0|%0, %1, %2}"
1462 [(set_attr "type" "sselog")
1463 (set_attr "prefix_extra" "1")
1464 (set_attr "mode" "V4SF")])
1466 (define_expand "vec_extractv4sf"
1467 [(match_operand:SF 0 "register_operand" "")
1468 (match_operand:V4SF 1 "register_operand" "")
1469 (match_operand 2 "const_int_operand" "")]
1472 ix86_expand_vector_extract (false, operands[0], operands[1],
1473 INTVAL (operands[2]));
1477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1479 ;; Parallel double-precision floating point arithmetic
1481 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1483 (define_expand "negv2df2"
1484 [(set (match_operand:V2DF 0 "register_operand" "")
1485 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1487 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1489 (define_expand "absv2df2"
1490 [(set (match_operand:V2DF 0 "register_operand" "")
1491 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1493 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1495 (define_expand "addv2df3"
1496 [(set (match_operand:V2DF 0 "register_operand" "")
1497 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1498 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1500 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1502 (define_insn "*addv2df3"
1503 [(set (match_operand:V2DF 0 "register_operand" "=x")
1504 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1505 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1506 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1507 "addpd\t{%2, %0|%0, %2}"
1508 [(set_attr "type" "sseadd")
1509 (set_attr "mode" "V2DF")])
1511 (define_insn "sse2_vmaddv2df3"
1512 [(set (match_operand:V2DF 0 "register_operand" "=x")
1514 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1515 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1518 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1519 "addsd\t{%2, %0|%0, %2}"
1520 [(set_attr "type" "sseadd")
1521 (set_attr "mode" "DF")])
1523 (define_expand "subv2df3"
1524 [(set (match_operand:V2DF 0 "register_operand" "")
1525 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1526 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1528 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1530 (define_insn "*subv2df3"
1531 [(set (match_operand:V2DF 0 "register_operand" "=x")
1532 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1533 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1535 "subpd\t{%2, %0|%0, %2}"
1536 [(set_attr "type" "sseadd")
1537 (set_attr "mode" "V2DF")])
1539 (define_insn "sse2_vmsubv2df3"
1540 [(set (match_operand:V2DF 0 "register_operand" "=x")
1542 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1543 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1547 "subsd\t{%2, %0|%0, %2}"
1548 [(set_attr "type" "sseadd")
1549 (set_attr "mode" "DF")])
1551 (define_expand "mulv2df3"
1552 [(set (match_operand:V2DF 0 "register_operand" "")
1553 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1554 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1556 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1558 (define_insn "*mulv2df3"
1559 [(set (match_operand:V2DF 0 "register_operand" "=x")
1560 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1561 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1562 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1563 "mulpd\t{%2, %0|%0, %2}"
1564 [(set_attr "type" "ssemul")
1565 (set_attr "mode" "V2DF")])
1567 (define_insn "sse2_vmmulv2df3"
1568 [(set (match_operand:V2DF 0 "register_operand" "=x")
1570 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1571 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1574 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1575 "mulsd\t{%2, %0|%0, %2}"
1576 [(set_attr "type" "ssemul")
1577 (set_attr "mode" "DF")])
1579 (define_expand "divv2df3"
1580 [(set (match_operand:V2DF 0 "register_operand" "")
1581 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1582 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1584 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1586 (define_insn "*divv2df3"
1587 [(set (match_operand:V2DF 0 "register_operand" "=x")
1588 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1589 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1591 "divpd\t{%2, %0|%0, %2}"
1592 [(set_attr "type" "ssediv")
1593 (set_attr "mode" "V2DF")])
1595 (define_insn "sse2_vmdivv2df3"
1596 [(set (match_operand:V2DF 0 "register_operand" "=x")
1598 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1599 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1603 "divsd\t{%2, %0|%0, %2}"
1604 [(set_attr "type" "ssediv")
1605 (set_attr "mode" "DF")])
1607 (define_insn "sqrtv2df2"
1608 [(set (match_operand:V2DF 0 "register_operand" "=x")
1609 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1611 "sqrtpd\t{%1, %0|%0, %1}"
1612 [(set_attr "type" "sse")
1613 (set_attr "mode" "V2DF")])
1615 (define_insn "sse2_vmsqrtv2df2"
1616 [(set (match_operand:V2DF 0 "register_operand" "=x")
1618 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1619 (match_operand:V2DF 2 "register_operand" "0")
1622 "sqrtsd\t{%1, %0|%0, %1}"
1623 [(set_attr "type" "sse")
1624 (set_attr "mode" "DF")])
1626 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1627 ;; isn't really correct, as those rtl operators aren't defined when
1628 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1630 (define_expand "smaxv2df3"
1631 [(set (match_operand:V2DF 0 "register_operand" "")
1632 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1633 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1636 if (!flag_finite_math_only)
1637 operands[1] = force_reg (V2DFmode, operands[1]);
1638 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1641 (define_insn "*smaxv2df3_finite"
1642 [(set (match_operand:V2DF 0 "register_operand" "=x")
1643 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1644 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1645 "TARGET_SSE2 && flag_finite_math_only
1646 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1647 "maxpd\t{%2, %0|%0, %2}"
1648 [(set_attr "type" "sseadd")
1649 (set_attr "mode" "V2DF")])
1651 (define_insn "*smaxv2df3"
1652 [(set (match_operand:V2DF 0 "register_operand" "=x")
1653 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1654 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1656 "maxpd\t{%2, %0|%0, %2}"
1657 [(set_attr "type" "sseadd")
1658 (set_attr "mode" "V2DF")])
1660 (define_insn "sse2_vmsmaxv2df3"
1661 [(set (match_operand:V2DF 0 "register_operand" "=x")
1663 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1664 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1668 "maxsd\t{%2, %0|%0, %2}"
1669 [(set_attr "type" "sseadd")
1670 (set_attr "mode" "DF")])
1672 (define_expand "sminv2df3"
1673 [(set (match_operand:V2DF 0 "register_operand" "")
1674 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1675 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1678 if (!flag_finite_math_only)
1679 operands[1] = force_reg (V2DFmode, operands[1]);
1680 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1683 (define_insn "*sminv2df3_finite"
1684 [(set (match_operand:V2DF 0 "register_operand" "=x")
1685 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1686 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1687 "TARGET_SSE2 && flag_finite_math_only
1688 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1689 "minpd\t{%2, %0|%0, %2}"
1690 [(set_attr "type" "sseadd")
1691 (set_attr "mode" "V2DF")])
1693 (define_insn "*sminv2df3"
1694 [(set (match_operand:V2DF 0 "register_operand" "=x")
1695 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1696 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1698 "minpd\t{%2, %0|%0, %2}"
1699 [(set_attr "type" "sseadd")
1700 (set_attr "mode" "V2DF")])
1702 (define_insn "sse2_vmsminv2df3"
1703 [(set (match_operand:V2DF 0 "register_operand" "=x")
1705 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1706 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1710 "minsd\t{%2, %0|%0, %2}"
1711 [(set_attr "type" "sseadd")
1712 (set_attr "mode" "DF")])
1714 (define_insn "sse3_addsubv2df3"
1715 [(set (match_operand:V2DF 0 "register_operand" "=x")
1718 (match_operand:V2DF 1 "register_operand" "0")
1719 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1720 (minus:V2DF (match_dup 1) (match_dup 2))
1723 "addsubpd\t{%2, %0|%0, %2}"
1724 [(set_attr "type" "sseadd")
1725 (set_attr "mode" "V2DF")])
1727 (define_insn "sse3_haddv2df3"
1728 [(set (match_operand:V2DF 0 "register_operand" "=x")
1732 (match_operand:V2DF 1 "register_operand" "0")
1733 (parallel [(const_int 0)]))
1734 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1737 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1738 (parallel [(const_int 0)]))
1739 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1741 "haddpd\t{%2, %0|%0, %2}"
1742 [(set_attr "type" "sseadd")
1743 (set_attr "mode" "V2DF")])
1745 (define_insn "sse3_hsubv2df3"
1746 [(set (match_operand:V2DF 0 "register_operand" "=x")
1750 (match_operand:V2DF 1 "register_operand" "0")
1751 (parallel [(const_int 0)]))
1752 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1755 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1756 (parallel [(const_int 0)]))
1757 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1759 "hsubpd\t{%2, %0|%0, %2}"
1760 [(set_attr "type" "sseadd")
1761 (set_attr "mode" "V2DF")])
1763 (define_expand "reduc_splus_v2df"
1764 [(match_operand:V2DF 0 "register_operand" "")
1765 (match_operand:V2DF 1 "register_operand" "")]
1768 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1772 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1774 ;; Parallel double-precision floating point comparisons
1776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1778 (define_insn "sse2_maskcmpv2df3"
1779 [(set (match_operand:V2DF 0 "register_operand" "=x")
1780 (match_operator:V2DF 3 "sse_comparison_operator"
1781 [(match_operand:V2DF 1 "register_operand" "0")
1782 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1784 "cmp%D3pd\t{%2, %0|%0, %2}"
1785 [(set_attr "type" "ssecmp")
1786 (set_attr "mode" "V2DF")])
1788 (define_insn "sse2_maskcmpdf3"
1789 [(set (match_operand:DF 0 "register_operand" "=x")
1790 (match_operator:DF 3 "sse_comparison_operator"
1791 [(match_operand:DF 1 "register_operand" "0")
1792 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1794 "cmp%D3sd\t{%2, %0|%0, %2}"
1795 [(set_attr "type" "ssecmp")
1796 (set_attr "mode" "DF")])
1798 (define_insn "sse2_vmmaskcmpv2df3"
1799 [(set (match_operand:V2DF 0 "register_operand" "=x")
1801 (match_operator:V2DF 3 "sse_comparison_operator"
1802 [(match_operand:V2DF 1 "register_operand" "0")
1803 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1807 "cmp%D3sd\t{%2, %0|%0, %2}"
1808 [(set_attr "type" "ssecmp")
1809 (set_attr "mode" "DF")])
1811 (define_insn "sse2_comi"
1812 [(set (reg:CCFP FLAGS_REG)
1815 (match_operand:V2DF 0 "register_operand" "x")
1816 (parallel [(const_int 0)]))
1818 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1819 (parallel [(const_int 0)]))))]
1821 "comisd\t{%1, %0|%0, %1}"
1822 [(set_attr "type" "ssecomi")
1823 (set_attr "mode" "DF")])
1825 (define_insn "sse2_ucomi"
1826 [(set (reg:CCFPU FLAGS_REG)
1829 (match_operand:V2DF 0 "register_operand" "x")
1830 (parallel [(const_int 0)]))
1832 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1833 (parallel [(const_int 0)]))))]
1835 "ucomisd\t{%1, %0|%0, %1}"
1836 [(set_attr "type" "ssecomi")
1837 (set_attr "mode" "DF")])
1839 (define_expand "vcondv2df"
1840 [(set (match_operand:V2DF 0 "register_operand" "")
1842 (match_operator 3 ""
1843 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1844 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1845 (match_operand:V2DF 1 "general_operand" "")
1846 (match_operand:V2DF 2 "general_operand" "")))]
1849 if (ix86_expand_fp_vcond (operands))
1855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1857 ;; Parallel double-precision floating point logical operations
1859 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1861 (define_expand "andv2df3"
1862 [(set (match_operand:V2DF 0 "register_operand" "")
1863 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1864 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1866 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1868 (define_insn "*andv2df3"
1869 [(set (match_operand:V2DF 0 "register_operand" "=x")
1870 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1871 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1872 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1873 "andpd\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "sselog")
1875 (set_attr "mode" "V2DF")])
1877 (define_insn "sse2_nandv2df3"
1878 [(set (match_operand:V2DF 0 "register_operand" "=x")
1879 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1880 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1882 "andnpd\t{%2, %0|%0, %2}"
1883 [(set_attr "type" "sselog")
1884 (set_attr "mode" "V2DF")])
1886 (define_expand "iorv2df3"
1887 [(set (match_operand:V2DF 0 "register_operand" "")
1888 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1889 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1891 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1893 (define_insn "*iorv2df3"
1894 [(set (match_operand:V2DF 0 "register_operand" "=x")
1895 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1896 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1897 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1898 "orpd\t{%2, %0|%0, %2}"
1899 [(set_attr "type" "sselog")
1900 (set_attr "mode" "V2DF")])
1902 (define_expand "xorv2df3"
1903 [(set (match_operand:V2DF 0 "register_operand" "")
1904 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1905 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1907 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1909 (define_insn "*xorv2df3"
1910 [(set (match_operand:V2DF 0 "register_operand" "=x")
1911 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1912 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1913 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1914 "xorpd\t{%2, %0|%0, %2}"
1915 [(set_attr "type" "sselog")
1916 (set_attr "mode" "V2DF")])
1918 ;; Also define scalar versions. These are used for abs, neg, and
1919 ;; conditional move. Using subregs into vector modes causes register
1920 ;; allocation lossage. These patterns do not allow memory operands
1921 ;; because the native instructions read the full 128-bits.
1923 (define_insn "*anddf3"
1924 [(set (match_operand:DF 0 "register_operand" "=x")
1925 (and:DF (match_operand:DF 1 "register_operand" "0")
1926 (match_operand:DF 2 "register_operand" "x")))]
1928 "andpd\t{%2, %0|%0, %2}"
1929 [(set_attr "type" "sselog")
1930 (set_attr "mode" "V2DF")])
1932 (define_insn "*nanddf3"
1933 [(set (match_operand:DF 0 "register_operand" "=x")
1934 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1935 (match_operand:DF 2 "register_operand" "x")))]
1937 "andnpd\t{%2, %0|%0, %2}"
1938 [(set_attr "type" "sselog")
1939 (set_attr "mode" "V2DF")])
1941 (define_insn "*iordf3"
1942 [(set (match_operand:DF 0 "register_operand" "=x")
1943 (ior:DF (match_operand:DF 1 "register_operand" "0")
1944 (match_operand:DF 2 "register_operand" "x")))]
1946 "orpd\t{%2, %0|%0, %2}"
1947 [(set_attr "type" "sselog")
1948 (set_attr "mode" "V2DF")])
1950 (define_insn "*xordf3"
1951 [(set (match_operand:DF 0 "register_operand" "=x")
1952 (xor:DF (match_operand:DF 1 "register_operand" "0")
1953 (match_operand:DF 2 "register_operand" "x")))]
1955 "xorpd\t{%2, %0|%0, %2}"
1956 [(set_attr "type" "sselog")
1957 (set_attr "mode" "V2DF")])
1959 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1961 ;; Parallel double-precision floating point conversion operations
1963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1965 (define_insn "sse2_cvtpi2pd"
1966 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1967 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1969 "cvtpi2pd\t{%1, %0|%0, %1}"
1970 [(set_attr "type" "ssecvt")
1971 (set_attr "unit" "mmx,*")
1972 (set_attr "mode" "V2DF")])
1974 (define_insn "sse2_cvtpd2pi"
1975 [(set (match_operand:V2SI 0 "register_operand" "=y")
1976 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1977 UNSPEC_FIX_NOTRUNC))]
1979 "cvtpd2pi\t{%1, %0|%0, %1}"
1980 [(set_attr "type" "ssecvt")
1981 (set_attr "unit" "mmx")
1982 (set_attr "prefix_data16" "1")
1983 (set_attr "mode" "DI")])
1985 (define_insn "sse2_cvttpd2pi"
1986 [(set (match_operand:V2SI 0 "register_operand" "=y")
1987 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1989 "cvttpd2pi\t{%1, %0|%0, %1}"
1990 [(set_attr "type" "ssecvt")
1991 (set_attr "unit" "mmx")
1992 (set_attr "prefix_data16" "1")
1993 (set_attr "mode" "TI")])
1995 (define_insn "sse2_cvtsi2sd"
1996 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1999 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2000 (match_operand:V2DF 1 "register_operand" "0,0")
2003 "cvtsi2sd\t{%2, %0|%0, %2}"
2004 [(set_attr "type" "sseicvt")
2005 (set_attr "mode" "DF")
2006 (set_attr "athlon_decode" "double,direct")
2007 (set_attr "amdfam10_decode" "vector,double")])
2009 (define_insn "sse2_cvtsi2sdq"
2010 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2013 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2014 (match_operand:V2DF 1 "register_operand" "0,0")
2016 "TARGET_SSE2 && TARGET_64BIT"
2017 "cvtsi2sdq\t{%2, %0|%0, %2}"
2018 [(set_attr "type" "sseicvt")
2019 (set_attr "mode" "DF")
2020 (set_attr "athlon_decode" "double,direct")
2021 (set_attr "amdfam10_decode" "vector,double")])
2023 (define_insn "sse2_cvtsd2si"
2024 [(set (match_operand:SI 0 "register_operand" "=r,r")
2027 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2028 (parallel [(const_int 0)]))]
2029 UNSPEC_FIX_NOTRUNC))]
2031 "cvtsd2si\t{%1, %0|%0, %1}"
2032 [(set_attr "type" "sseicvt")
2033 (set_attr "athlon_decode" "double,vector")
2034 (set_attr "prefix_rep" "1")
2035 (set_attr "mode" "SI")])
2037 (define_insn "sse2_cvtsd2si_2"
2038 [(set (match_operand:SI 0 "register_operand" "=r,r")
2039 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2040 UNSPEC_FIX_NOTRUNC))]
2042 "cvtsd2si\t{%1, %0|%0, %1}"
2043 [(set_attr "type" "sseicvt")
2044 (set_attr "athlon_decode" "double,vector")
2045 (set_attr "amdfam10_decode" "double,double")
2046 (set_attr "prefix_rep" "1")
2047 (set_attr "mode" "SI")])
2049 (define_insn "sse2_cvtsd2siq"
2050 [(set (match_operand:DI 0 "register_operand" "=r,r")
2053 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2054 (parallel [(const_int 0)]))]
2055 UNSPEC_FIX_NOTRUNC))]
2056 "TARGET_SSE2 && TARGET_64BIT"
2057 "cvtsd2siq\t{%1, %0|%0, %1}"
2058 [(set_attr "type" "sseicvt")
2059 (set_attr "athlon_decode" "double,vector")
2060 (set_attr "prefix_rep" "1")
2061 (set_attr "mode" "DI")])
2063 (define_insn "sse2_cvtsd2siq_2"
2064 [(set (match_operand:DI 0 "register_operand" "=r,r")
2065 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2066 UNSPEC_FIX_NOTRUNC))]
2067 "TARGET_SSE2 && TARGET_64BIT"
2068 "cvtsd2siq\t{%1, %0|%0, %1}"
2069 [(set_attr "type" "sseicvt")
2070 (set_attr "athlon_decode" "double,vector")
2071 (set_attr "amdfam10_decode" "double,double")
2072 (set_attr "prefix_rep" "1")
2073 (set_attr "mode" "DI")])
2075 (define_insn "sse2_cvttsd2si"
2076 [(set (match_operand:SI 0 "register_operand" "=r,r")
2079 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2080 (parallel [(const_int 0)]))))]
2082 "cvttsd2si\t{%1, %0|%0, %1}"
2083 [(set_attr "type" "sseicvt")
2084 (set_attr "prefix_rep" "1")
2085 (set_attr "mode" "SI")
2086 (set_attr "athlon_decode" "double,vector")
2087 (set_attr "amdfam10_decode" "double,double")])
2089 (define_insn "sse2_cvttsd2siq"
2090 [(set (match_operand:DI 0 "register_operand" "=r,r")
2093 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2094 (parallel [(const_int 0)]))))]
2095 "TARGET_SSE2 && TARGET_64BIT"
2096 "cvttsd2siq\t{%1, %0|%0, %1}"
2097 [(set_attr "type" "sseicvt")
2098 (set_attr "prefix_rep" "1")
2099 (set_attr "mode" "DI")
2100 (set_attr "athlon_decode" "double,vector")
2101 (set_attr "amdfam10_decode" "double,double")])
2103 (define_insn "sse2_cvtdq2pd"
2104 [(set (match_operand:V2DF 0 "register_operand" "=x")
2107 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2108 (parallel [(const_int 0) (const_int 1)]))))]
2110 "cvtdq2pd\t{%1, %0|%0, %1}"
2111 [(set_attr "type" "ssecvt")
2112 (set_attr "mode" "V2DF")])
2114 (define_expand "sse2_cvtpd2dq"
2115 [(set (match_operand:V4SI 0 "register_operand" "")
2117 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2121 "operands[2] = CONST0_RTX (V2SImode);")
2123 (define_insn "*sse2_cvtpd2dq"
2124 [(set (match_operand:V4SI 0 "register_operand" "=x")
2126 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2128 (match_operand:V2SI 2 "const0_operand" "")))]
2130 "cvtpd2dq\t{%1, %0|%0, %1}"
2131 [(set_attr "type" "ssecvt")
2132 (set_attr "prefix_rep" "1")
2133 (set_attr "mode" "TI")
2134 (set_attr "amdfam10_decode" "double")])
2136 (define_expand "sse2_cvttpd2dq"
2137 [(set (match_operand:V4SI 0 "register_operand" "")
2139 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2142 "operands[2] = CONST0_RTX (V2SImode);")
2144 (define_insn "*sse2_cvttpd2dq"
2145 [(set (match_operand:V4SI 0 "register_operand" "=x")
2147 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2148 (match_operand:V2SI 2 "const0_operand" "")))]
2150 "cvttpd2dq\t{%1, %0|%0, %1}"
2151 [(set_attr "type" "ssecvt")
2152 (set_attr "prefix_rep" "1")
2153 (set_attr "mode" "TI")
2154 (set_attr "amdfam10_decode" "double")])
2156 (define_insn "sse2_cvtsd2ss"
2157 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2160 (float_truncate:V2SF
2161 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2162 (match_operand:V4SF 1 "register_operand" "0,0")
2165 "cvtsd2ss\t{%2, %0|%0, %2}"
2166 [(set_attr "type" "ssecvt")
2167 (set_attr "athlon_decode" "vector,double")
2168 (set_attr "amdfam10_decode" "vector,double")
2169 (set_attr "mode" "SF")])
2171 (define_insn "sse2_cvtss2sd"
2172 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2176 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2177 (parallel [(const_int 0) (const_int 1)])))
2178 (match_operand:V2DF 1 "register_operand" "0,0")
2181 "cvtss2sd\t{%2, %0|%0, %2}"
2182 [(set_attr "type" "ssecvt")
2183 (set_attr "amdfam10_decode" "vector,double")
2184 (set_attr "mode" "DF")])
2186 (define_expand "sse2_cvtpd2ps"
2187 [(set (match_operand:V4SF 0 "register_operand" "")
2189 (float_truncate:V2SF
2190 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2193 "operands[2] = CONST0_RTX (V2SFmode);")
2195 (define_insn "*sse2_cvtpd2ps"
2196 [(set (match_operand:V4SF 0 "register_operand" "=x")
2198 (float_truncate:V2SF
2199 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2200 (match_operand:V2SF 2 "const0_operand" "")))]
2202 "cvtpd2ps\t{%1, %0|%0, %1}"
2203 [(set_attr "type" "ssecvt")
2204 (set_attr "prefix_data16" "1")
2205 (set_attr "mode" "V4SF")
2206 (set_attr "amdfam10_decode" "double")])
2208 (define_insn "sse2_cvtps2pd"
2209 [(set (match_operand:V2DF 0 "register_operand" "=x")
2212 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2213 (parallel [(const_int 0) (const_int 1)]))))]
2215 "cvtps2pd\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "ssecvt")
2217 (set_attr "mode" "V2DF")
2218 (set_attr "amdfam10_decode" "direct")])
2220 (define_expand "vec_unpacks_hi_v4sf"
2225 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2226 (parallel [(const_int 6)
2230 (set (match_operand:V2DF 0 "register_operand" "")
2234 (parallel [(const_int 0) (const_int 1)]))))]
2237 operands[2] = gen_reg_rtx (V4SFmode);
2240 (define_expand "vec_unpacks_lo_v4sf"
2241 [(set (match_operand:V2DF 0 "register_operand" "")
2244 (match_operand:V4SF 1 "nonimmediate_operand" "")
2245 (parallel [(const_int 0) (const_int 1)]))))]
2248 (define_expand "vec_unpacks_float_hi_v8hi"
2249 [(match_operand:V4SF 0 "register_operand" "")
2250 (match_operand:V8HI 1 "register_operand" "")]
2253 rtx tmp = gen_reg_rtx (V4SImode);
2255 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2256 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2260 (define_expand "vec_unpacks_float_lo_v8hi"
2261 [(match_operand:V4SF 0 "register_operand" "")
2262 (match_operand:V8HI 1 "register_operand" "")]
2265 rtx tmp = gen_reg_rtx (V4SImode);
2267 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2268 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2272 (define_expand "vec_unpacku_float_hi_v8hi"
2273 [(match_operand:V4SF 0 "register_operand" "")
2274 (match_operand:V8HI 1 "register_operand" "")]
2277 rtx tmp = gen_reg_rtx (V4SImode);
2279 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2280 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2284 (define_expand "vec_unpacku_float_lo_v8hi"
2285 [(match_operand:V4SF 0 "register_operand" "")
2286 (match_operand:V8HI 1 "register_operand" "")]
2289 rtx tmp = gen_reg_rtx (V4SImode);
2291 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2292 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2296 (define_expand "vec_unpacks_float_hi_v4si"
2299 (match_operand:V4SI 1 "nonimmediate_operand" "")
2300 (parallel [(const_int 2)
2304 (set (match_operand:V2DF 0 "register_operand" "")
2308 (parallel [(const_int 0) (const_int 1)]))))]
2311 operands[2] = gen_reg_rtx (V4SImode);
2314 (define_expand "vec_unpacks_float_lo_v4si"
2315 [(set (match_operand:V2DF 0 "register_operand" "")
2318 (match_operand:V4SI 1 "nonimmediate_operand" "")
2319 (parallel [(const_int 0) (const_int 1)]))))]
2322 (define_expand "vec_pack_trunc_v2df"
2323 [(match_operand:V4SF 0 "register_operand" "")
2324 (match_operand:V2DF 1 "nonimmediate_operand" "")
2325 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2330 r1 = gen_reg_rtx (V4SFmode);
2331 r2 = gen_reg_rtx (V4SFmode);
2333 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2334 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2335 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2339 (define_expand "vec_pack_sfix_trunc_v2df"
2340 [(match_operand:V4SI 0 "register_operand" "")
2341 (match_operand:V2DF 1 "nonimmediate_operand" "")
2342 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2347 r1 = gen_reg_rtx (V4SImode);
2348 r2 = gen_reg_rtx (V4SImode);
2350 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2351 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2352 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2353 gen_lowpart (V2DImode, r1),
2354 gen_lowpart (V2DImode, r2)));
2358 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2360 ;; Parallel double-precision floating point element swizzling
2362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2364 (define_insn "sse2_unpckhpd"
2365 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2368 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2369 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2370 (parallel [(const_int 1)
2372 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2374 unpckhpd\t{%2, %0|%0, %2}
2375 movlpd\t{%H1, %0|%0, %H1}
2376 movhpd\t{%1, %0|%0, %1}"
2377 [(set_attr "type" "sselog,ssemov,ssemov")
2378 (set_attr "mode" "V2DF,V1DF,V1DF")])
2380 (define_insn "*sse3_movddup"
2381 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2384 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2386 (parallel [(const_int 0)
2388 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2390 movddup\t{%1, %0|%0, %1}
2392 [(set_attr "type" "sselog1,ssemov")
2393 (set_attr "mode" "V2DF")])
2396 [(set (match_operand:V2DF 0 "memory_operand" "")
2399 (match_operand:V2DF 1 "register_operand" "")
2401 (parallel [(const_int 0)
2403 "TARGET_SSE3 && reload_completed"
2406 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2407 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2408 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2412 (define_insn "sse2_unpcklpd"
2413 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2416 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2417 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2418 (parallel [(const_int 0)
2420 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2422 unpcklpd\t{%2, %0|%0, %2}
2423 movhpd\t{%2, %0|%0, %2}
2424 movlpd\t{%2, %H0|%H0, %2}"
2425 [(set_attr "type" "sselog,ssemov,ssemov")
2426 (set_attr "mode" "V2DF,V1DF,V1DF")])
2428 (define_expand "sse2_shufpd"
2429 [(match_operand:V2DF 0 "register_operand" "")
2430 (match_operand:V2DF 1 "register_operand" "")
2431 (match_operand:V2DF 2 "nonimmediate_operand" "")
2432 (match_operand:SI 3 "const_int_operand" "")]
2435 int mask = INTVAL (operands[3]);
2436 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2438 GEN_INT (mask & 2 ? 3 : 2)));
2442 (define_insn "sse2_shufpd_1"
2443 [(set (match_operand:V2DF 0 "register_operand" "=x")
2446 (match_operand:V2DF 1 "register_operand" "0")
2447 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2448 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2449 (match_operand 4 "const_2_to_3_operand" "")])))]
2453 mask = INTVAL (operands[3]);
2454 mask |= (INTVAL (operands[4]) - 2) << 1;
2455 operands[3] = GEN_INT (mask);
2457 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2459 [(set_attr "type" "sselog")
2460 (set_attr "mode" "V2DF")])
2462 (define_insn "sse2_storehpd"
2463 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2465 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2466 (parallel [(const_int 1)])))]
2467 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2469 movhpd\t{%1, %0|%0, %1}
2472 [(set_attr "type" "ssemov,sselog1,ssemov")
2473 (set_attr "mode" "V1DF,V2DF,DF")])
2476 [(set (match_operand:DF 0 "register_operand" "")
2478 (match_operand:V2DF 1 "memory_operand" "")
2479 (parallel [(const_int 1)])))]
2480 "TARGET_SSE2 && reload_completed"
2481 [(set (match_dup 0) (match_dup 1))]
2483 operands[1] = adjust_address (operands[1], DFmode, 8);
2486 (define_insn "sse2_storelpd"
2487 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2489 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2490 (parallel [(const_int 0)])))]
2491 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2493 movlpd\t{%1, %0|%0, %1}
2496 [(set_attr "type" "ssemov")
2497 (set_attr "mode" "V1DF,DF,DF")])
2500 [(set (match_operand:DF 0 "register_operand" "")
2502 (match_operand:V2DF 1 "nonimmediate_operand" "")
2503 (parallel [(const_int 0)])))]
2504 "TARGET_SSE2 && reload_completed"
2507 rtx op1 = operands[1];
2509 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2511 op1 = gen_lowpart (DFmode, op1);
2512 emit_move_insn (operands[0], op1);
2516 (define_insn "sse2_loadhpd"
2517 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2520 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2521 (parallel [(const_int 0)]))
2522 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2523 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2525 movhpd\t{%2, %0|%0, %2}
2526 unpcklpd\t{%2, %0|%0, %2}
2527 shufpd\t{$1, %1, %0|%0, %1, 1}
2529 [(set_attr "type" "ssemov,sselog,sselog,other")
2530 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2533 [(set (match_operand:V2DF 0 "memory_operand" "")
2535 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2536 (match_operand:DF 1 "register_operand" "")))]
2537 "TARGET_SSE2 && reload_completed"
2538 [(set (match_dup 0) (match_dup 1))]
2540 operands[0] = adjust_address (operands[0], DFmode, 8);
2543 (define_insn "sse2_loadlpd"
2544 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2546 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2548 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2549 (parallel [(const_int 1)]))))]
2550 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2552 movsd\t{%2, %0|%0, %2}
2553 movlpd\t{%2, %0|%0, %2}
2554 movsd\t{%2, %0|%0, %2}
2555 shufpd\t{$2, %2, %0|%0, %2, 2}
2556 movhpd\t{%H1, %0|%0, %H1}
2558 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2559 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2562 [(set (match_operand:V2DF 0 "memory_operand" "")
2564 (match_operand:DF 1 "register_operand" "")
2565 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2566 "TARGET_SSE2 && reload_completed"
2567 [(set (match_dup 0) (match_dup 1))]
2569 operands[0] = adjust_address (operands[0], DFmode, 8);
2572 ;; Not sure these two are ever used, but it doesn't hurt to have
2574 (define_insn "*vec_extractv2df_1_sse"
2575 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2577 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2578 (parallel [(const_int 1)])))]
2579 "!TARGET_SSE2 && TARGET_SSE
2580 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2582 movhps\t{%1, %0|%0, %1}
2583 movhlps\t{%1, %0|%0, %1}
2584 movlps\t{%H1, %0|%0, %H1}"
2585 [(set_attr "type" "ssemov")
2586 (set_attr "mode" "V2SF,V4SF,V2SF")])
2588 (define_insn "*vec_extractv2df_0_sse"
2589 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2591 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2592 (parallel [(const_int 0)])))]
2593 "!TARGET_SSE2 && TARGET_SSE
2594 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2596 movlps\t{%1, %0|%0, %1}
2597 movaps\t{%1, %0|%0, %1}
2598 movlps\t{%1, %0|%0, %1}"
2599 [(set_attr "type" "ssemov")
2600 (set_attr "mode" "V2SF,V4SF,V2SF")])
2602 (define_insn "sse2_movsd"
2603 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2605 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2606 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2610 movsd\t{%2, %0|%0, %2}
2611 movlpd\t{%2, %0|%0, %2}
2612 movlpd\t{%2, %0|%0, %2}
2613 shufpd\t{$2, %2, %0|%0, %2, 2}
2614 movhps\t{%H1, %0|%0, %H1}
2615 movhps\t{%1, %H0|%H0, %1}"
2616 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2617 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2619 (define_insn "*vec_dupv2df_sse3"
2620 [(set (match_operand:V2DF 0 "register_operand" "=x")
2622 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2624 "movddup\t{%1, %0|%0, %1}"
2625 [(set_attr "type" "sselog1")
2626 (set_attr "mode" "DF")])
2628 (define_insn "*vec_dupv2df"
2629 [(set (match_operand:V2DF 0 "register_operand" "=x")
2631 (match_operand:DF 1 "register_operand" "0")))]
2634 [(set_attr "type" "sselog1")
2635 (set_attr "mode" "V2DF")])
2637 (define_insn "*vec_concatv2df_sse3"
2638 [(set (match_operand:V2DF 0 "register_operand" "=x")
2640 (match_operand:DF 1 "nonimmediate_operand" "xm")
2643 "movddup\t{%1, %0|%0, %1}"
2644 [(set_attr "type" "sselog1")
2645 (set_attr "mode" "DF")])
2647 (define_insn "*vec_concatv2df"
2648 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2650 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2651 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2654 unpcklpd\t{%2, %0|%0, %2}
2655 movhpd\t{%2, %0|%0, %2}
2656 movsd\t{%1, %0|%0, %1}
2657 movlhps\t{%2, %0|%0, %2}
2658 movhps\t{%2, %0|%0, %2}"
2659 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2660 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2662 (define_expand "vec_setv2df"
2663 [(match_operand:V2DF 0 "register_operand" "")
2664 (match_operand:DF 1 "register_operand" "")
2665 (match_operand 2 "const_int_operand" "")]
2668 ix86_expand_vector_set (false, operands[0], operands[1],
2669 INTVAL (operands[2]));
2673 (define_expand "vec_extractv2df"
2674 [(match_operand:DF 0 "register_operand" "")
2675 (match_operand:V2DF 1 "register_operand" "")
2676 (match_operand 2 "const_int_operand" "")]
2679 ix86_expand_vector_extract (false, operands[0], operands[1],
2680 INTVAL (operands[2]));
2684 (define_expand "vec_initv2df"
2685 [(match_operand:V2DF 0 "register_operand" "")
2686 (match_operand 1 "" "")]
2689 ix86_expand_vector_init (false, operands[0], operands[1]);
2693 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2695 ;; Parallel integral arithmetic
2697 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2699 (define_expand "neg<mode>2"
2700 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2703 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2705 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2707 (define_expand "add<mode>3"
2708 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2709 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2710 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2712 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2714 (define_insn "*add<mode>3"
2715 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2717 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2718 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2719 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2720 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2721 [(set_attr "type" "sseiadd")
2722 (set_attr "prefix_data16" "1")
2723 (set_attr "mode" "TI")])
2725 (define_insn "sse2_ssadd<mode>3"
2726 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2728 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2729 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2730 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2731 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2732 [(set_attr "type" "sseiadd")
2733 (set_attr "prefix_data16" "1")
2734 (set_attr "mode" "TI")])
2736 (define_insn "sse2_usadd<mode>3"
2737 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2739 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2740 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2741 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2742 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2743 [(set_attr "type" "sseiadd")
2744 (set_attr "prefix_data16" "1")
2745 (set_attr "mode" "TI")])
2747 (define_expand "sub<mode>3"
2748 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2749 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2750 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2752 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2754 (define_insn "*sub<mode>3"
2755 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2757 (match_operand:SSEMODEI 1 "register_operand" "0")
2758 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2760 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2761 [(set_attr "type" "sseiadd")
2762 (set_attr "prefix_data16" "1")
2763 (set_attr "mode" "TI")])
2765 (define_insn "sse2_sssub<mode>3"
2766 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2768 (match_operand:SSEMODE12 1 "register_operand" "0")
2769 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2771 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2772 [(set_attr "type" "sseiadd")
2773 (set_attr "prefix_data16" "1")
2774 (set_attr "mode" "TI")])
2776 (define_insn "sse2_ussub<mode>3"
2777 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2779 (match_operand:SSEMODE12 1 "register_operand" "0")
2780 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2782 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2783 [(set_attr "type" "sseiadd")
2784 (set_attr "prefix_data16" "1")
2785 (set_attr "mode" "TI")])
2787 (define_expand "mulv16qi3"
2788 [(set (match_operand:V16QI 0 "register_operand" "")
2789 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2790 (match_operand:V16QI 2 "register_operand" "")))]
2796 for (i = 0; i < 12; ++i)
2797 t[i] = gen_reg_rtx (V16QImode);
2799 /* Unpack data such that we've got a source byte in each low byte of
2800 each word. We don't care what goes into the high byte of each word.
2801 Rather than trying to get zero in there, most convenient is to let
2802 it be a copy of the low byte. */
2803 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2804 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2805 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2806 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2808 /* Multiply words. The end-of-line annotations here give a picture of what
2809 the output of that instruction looks like. Dot means don't care; the
2810 letters are the bytes of the result with A being the most significant. */
2811 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2812 gen_lowpart (V8HImode, t[0]),
2813 gen_lowpart (V8HImode, t[1])));
2814 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2815 gen_lowpart (V8HImode, t[2]),
2816 gen_lowpart (V8HImode, t[3])));
2818 /* Extract the relevant bytes and merge them back together. */
2819 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2820 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2821 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2822 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2823 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2824 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2827 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2831 (define_expand "mulv8hi3"
2832 [(set (match_operand:V8HI 0 "register_operand" "")
2833 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2834 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2836 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2838 (define_insn "*mulv8hi3"
2839 [(set (match_operand:V8HI 0 "register_operand" "=x")
2840 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2841 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2842 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2843 "pmullw\t{%2, %0|%0, %2}"
2844 [(set_attr "type" "sseimul")
2845 (set_attr "prefix_data16" "1")
2846 (set_attr "mode" "TI")])
2848 (define_expand "smulv8hi3_highpart"
2849 [(set (match_operand:V8HI 0 "register_operand" "")
2854 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2856 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2859 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2861 (define_insn "*smulv8hi3_highpart"
2862 [(set (match_operand:V8HI 0 "register_operand" "=x")
2867 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2869 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2871 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2872 "pmulhw\t{%2, %0|%0, %2}"
2873 [(set_attr "type" "sseimul")
2874 (set_attr "prefix_data16" "1")
2875 (set_attr "mode" "TI")])
2877 (define_expand "umulv8hi3_highpart"
2878 [(set (match_operand:V8HI 0 "register_operand" "")
2883 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2885 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2888 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2890 (define_insn "*umulv8hi3_highpart"
2891 [(set (match_operand:V8HI 0 "register_operand" "=x")
2896 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2898 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2900 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2901 "pmulhuw\t{%2, %0|%0, %2}"
2902 [(set_attr "type" "sseimul")
2903 (set_attr "prefix_data16" "1")
2904 (set_attr "mode" "TI")])
2906 (define_insn "sse2_umulv2siv2di3"
2907 [(set (match_operand:V2DI 0 "register_operand" "=x")
2911 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2912 (parallel [(const_int 0) (const_int 2)])))
2915 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2916 (parallel [(const_int 0) (const_int 2)])))))]
2917 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2918 "pmuludq\t{%2, %0|%0, %2}"
2919 [(set_attr "type" "sseimul")
2920 (set_attr "prefix_data16" "1")
2921 (set_attr "mode" "TI")])
2923 (define_insn "sse4_1_mulv2siv2di3"
2924 [(set (match_operand:V2DI 0 "register_operand" "=x")
2928 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2929 (parallel [(const_int 0) (const_int 2)])))
2932 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2933 (parallel [(const_int 0) (const_int 2)])))))]
2934 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2935 "pmuldq\t{%2, %0|%0, %2}"
2936 [(set_attr "type" "sseimul")
2937 (set_attr "prefix_extra" "1")
2938 (set_attr "mode" "TI")])
2940 (define_insn "sse2_pmaddwd"
2941 [(set (match_operand:V4SI 0 "register_operand" "=x")
2946 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2947 (parallel [(const_int 0)
2953 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2954 (parallel [(const_int 0)
2960 (vec_select:V4HI (match_dup 1)
2961 (parallel [(const_int 1)
2966 (vec_select:V4HI (match_dup 2)
2967 (parallel [(const_int 1)
2970 (const_int 7)]))))))]
2971 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2972 "pmaddwd\t{%2, %0|%0, %2}"
2973 [(set_attr "type" "sseiadd")
2974 (set_attr "prefix_data16" "1")
2975 (set_attr "mode" "TI")])
2977 (define_expand "mulv4si3"
2978 [(set (match_operand:V4SI 0 "register_operand" "")
2979 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2980 (match_operand:V4SI 2 "register_operand" "")))]
2984 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
2987 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2993 t1 = gen_reg_rtx (V4SImode);
2994 t2 = gen_reg_rtx (V4SImode);
2995 t3 = gen_reg_rtx (V4SImode);
2996 t4 = gen_reg_rtx (V4SImode);
2997 t5 = gen_reg_rtx (V4SImode);
2998 t6 = gen_reg_rtx (V4SImode);
2999 thirtytwo = GEN_INT (32);
3001 /* Multiply elements 2 and 0. */
3002 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3005 /* Shift both input vectors down one element, so that elements 3
3006 and 1 are now in the slots for elements 2 and 0. For K8, at
3007 least, this is faster than using a shuffle. */
3008 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3009 gen_lowpart (TImode, op1),
3011 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3012 gen_lowpart (TImode, op2),
3014 /* Multiply elements 3 and 1. */
3015 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3018 /* Move the results in element 2 down to element 1; we don't care
3019 what goes in elements 2 and 3. */
3020 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3021 const0_rtx, const0_rtx));
3022 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3023 const0_rtx, const0_rtx));
3025 /* Merge the parts back together. */
3026 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3031 (define_insn "*sse4_1_mulv4si3"
3032 [(set (match_operand:V4SI 0 "register_operand" "=x")
3033 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3034 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3035 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3036 "pmulld\t{%2, %0|%0, %2}"
3037 [(set_attr "type" "sseimul")
3038 (set_attr "prefix_extra" "1")
3039 (set_attr "mode" "TI")])
3041 (define_expand "mulv2di3"
3042 [(set (match_operand:V2DI 0 "register_operand" "")
3043 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3044 (match_operand:V2DI 2 "register_operand" "")))]
3047 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3053 t1 = gen_reg_rtx (V2DImode);
3054 t2 = gen_reg_rtx (V2DImode);
3055 t3 = gen_reg_rtx (V2DImode);
3056 t4 = gen_reg_rtx (V2DImode);
3057 t5 = gen_reg_rtx (V2DImode);
3058 t6 = gen_reg_rtx (V2DImode);
3059 thirtytwo = GEN_INT (32);
3061 /* Multiply low parts. */
3062 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3063 gen_lowpart (V4SImode, op2)));
3065 /* Shift input vectors left 32 bits so we can multiply high parts. */
3066 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3067 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3069 /* Multiply high parts by low parts. */
3070 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3071 gen_lowpart (V4SImode, t3)));
3072 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3073 gen_lowpart (V4SImode, t2)));
3075 /* Shift them back. */
3076 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3077 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3079 /* Add the three parts together. */
3080 emit_insn (gen_addv2di3 (t6, t1, t4));
3081 emit_insn (gen_addv2di3 (op0, t6, t5));
3085 (define_expand "vec_widen_smult_hi_v8hi"
3086 [(match_operand:V4SI 0 "register_operand" "")
3087 (match_operand:V8HI 1 "register_operand" "")
3088 (match_operand:V8HI 2 "register_operand" "")]
3091 rtx op1, op2, t1, t2, dest;
3095 t1 = gen_reg_rtx (V8HImode);
3096 t2 = gen_reg_rtx (V8HImode);
3097 dest = gen_lowpart (V8HImode, operands[0]);
3099 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3100 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3101 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3105 (define_expand "vec_widen_smult_lo_v8hi"
3106 [(match_operand:V4SI 0 "register_operand" "")
3107 (match_operand:V8HI 1 "register_operand" "")
3108 (match_operand:V8HI 2 "register_operand" "")]
3111 rtx op1, op2, t1, t2, dest;
3115 t1 = gen_reg_rtx (V8HImode);
3116 t2 = gen_reg_rtx (V8HImode);
3117 dest = gen_lowpart (V8HImode, operands[0]);
3119 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3120 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3121 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3125 (define_expand "vec_widen_umult_hi_v8hi"
3126 [(match_operand:V4SI 0 "register_operand" "")
3127 (match_operand:V8HI 1 "register_operand" "")
3128 (match_operand:V8HI 2 "register_operand" "")]
3131 rtx op1, op2, t1, t2, dest;
3135 t1 = gen_reg_rtx (V8HImode);
3136 t2 = gen_reg_rtx (V8HImode);
3137 dest = gen_lowpart (V8HImode, operands[0]);
3139 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3140 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3141 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3145 (define_expand "vec_widen_umult_lo_v8hi"
3146 [(match_operand:V4SI 0 "register_operand" "")
3147 (match_operand:V8HI 1 "register_operand" "")
3148 (match_operand:V8HI 2 "register_operand" "")]
3151 rtx op1, op2, t1, t2, dest;
3155 t1 = gen_reg_rtx (V8HImode);
3156 t2 = gen_reg_rtx (V8HImode);
3157 dest = gen_lowpart (V8HImode, operands[0]);
3159 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3160 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3161 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3165 (define_expand "vec_widen_smult_hi_v4si"
3166 [(match_operand:V2DI 0 "register_operand" "")
3167 (match_operand:V4SI 1 "register_operand" "")
3168 (match_operand:V4SI 2 "register_operand" "")]
3171 rtx op1, op2, t1, t2;
3175 t1 = gen_reg_rtx (V4SImode);
3176 t2 = gen_reg_rtx (V4SImode);
3178 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3179 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3180 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3184 (define_expand "vec_widen_smult_lo_v4si"
3185 [(match_operand:V2DI 0 "register_operand" "")
3186 (match_operand:V4SI 1 "register_operand" "")
3187 (match_operand:V4SI 2 "register_operand" "")]
3190 rtx op1, op2, t1, t2;
3194 t1 = gen_reg_rtx (V4SImode);
3195 t2 = gen_reg_rtx (V4SImode);
3197 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3198 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3199 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3203 (define_expand "vec_widen_umult_hi_v4si"
3204 [(match_operand:V2DI 0 "register_operand" "")
3205 (match_operand:V4SI 1 "register_operand" "")
3206 (match_operand:V4SI 2 "register_operand" "")]
3209 rtx op1, op2, t1, t2;
3213 t1 = gen_reg_rtx (V4SImode);
3214 t2 = gen_reg_rtx (V4SImode);
3216 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3217 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3218 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3222 (define_expand "vec_widen_umult_lo_v4si"
3223 [(match_operand:V2DI 0 "register_operand" "")
3224 (match_operand:V4SI 1 "register_operand" "")
3225 (match_operand:V4SI 2 "register_operand" "")]
3228 rtx op1, op2, t1, t2;
3232 t1 = gen_reg_rtx (V4SImode);
3233 t2 = gen_reg_rtx (V4SImode);
3235 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3236 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3237 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3241 (define_expand "sdot_prodv8hi"
3242 [(match_operand:V4SI 0 "register_operand" "")
3243 (match_operand:V8HI 1 "register_operand" "")
3244 (match_operand:V8HI 2 "register_operand" "")
3245 (match_operand:V4SI 3 "register_operand" "")]
3248 rtx t = gen_reg_rtx (V4SImode);
3249 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3250 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3254 (define_expand "udot_prodv4si"
3255 [(match_operand:V2DI 0 "register_operand" "")
3256 (match_operand:V4SI 1 "register_operand" "")
3257 (match_operand:V4SI 2 "register_operand" "")
3258 (match_operand:V2DI 3 "register_operand" "")]
3263 t1 = gen_reg_rtx (V2DImode);
3264 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3265 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3267 t2 = gen_reg_rtx (V4SImode);
3268 t3 = gen_reg_rtx (V4SImode);
3269 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3270 gen_lowpart (TImode, operands[1]),
3272 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3273 gen_lowpart (TImode, operands[2]),
3276 t4 = gen_reg_rtx (V2DImode);
3277 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3279 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3283 (define_insn "ashr<mode>3"
3284 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3286 (match_operand:SSEMODE24 1 "register_operand" "0")
3287 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3289 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3290 [(set_attr "type" "sseishft")
3291 (set_attr "prefix_data16" "1")
3292 (set_attr "mode" "TI")])
3294 (define_insn "lshr<mode>3"
3295 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3296 (lshiftrt:SSEMODE248
3297 (match_operand:SSEMODE248 1 "register_operand" "0")
3298 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3300 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3301 [(set_attr "type" "sseishft")
3302 (set_attr "prefix_data16" "1")
3303 (set_attr "mode" "TI")])
3305 (define_insn "ashl<mode>3"
3306 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3308 (match_operand:SSEMODE248 1 "register_operand" "0")
3309 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3311 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3312 [(set_attr "type" "sseishft")
3313 (set_attr "prefix_data16" "1")
3314 (set_attr "mode" "TI")])
3316 (define_insn "sse2_ashlti3"
3317 [(set (match_operand:TI 0 "register_operand" "=x")
3318 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3319 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3322 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3323 return "pslldq\t{%2, %0|%0, %2}";
3325 [(set_attr "type" "sseishft")
3326 (set_attr "prefix_data16" "1")
3327 (set_attr "mode" "TI")])
3329 (define_expand "vec_shl_<mode>"
3330 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3331 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3332 (match_operand:SI 2 "general_operand" "")))]
3335 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3337 operands[0] = gen_lowpart (TImode, operands[0]);
3338 operands[1] = gen_lowpart (TImode, operands[1]);
3341 (define_insn "sse2_lshrti3"
3342 [(set (match_operand:TI 0 "register_operand" "=x")
3343 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3344 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3347 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3348 return "psrldq\t{%2, %0|%0, %2}";
3350 [(set_attr "type" "sseishft")
3351 (set_attr "prefix_data16" "1")
3352 (set_attr "mode" "TI")])
3354 (define_expand "vec_shr_<mode>"
3355 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3356 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3357 (match_operand:SI 2 "general_operand" "")))]
3360 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3362 operands[0] = gen_lowpart (TImode, operands[0]);
3363 operands[1] = gen_lowpart (TImode, operands[1]);
3366 (define_expand "umaxv16qi3"
3367 [(set (match_operand:V16QI 0 "register_operand" "")
3368 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3369 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3371 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3373 (define_insn "*umaxv16qi3"
3374 [(set (match_operand:V16QI 0 "register_operand" "=x")
3375 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3376 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3377 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3378 "pmaxub\t{%2, %0|%0, %2}"
3379 [(set_attr "type" "sseiadd")
3380 (set_attr "prefix_data16" "1")
3381 (set_attr "mode" "TI")])
3383 (define_expand "smaxv8hi3"
3384 [(set (match_operand:V8HI 0 "register_operand" "")
3385 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3386 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3388 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3390 (define_insn "*smaxv8hi3"
3391 [(set (match_operand:V8HI 0 "register_operand" "=x")
3392 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3393 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3394 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3395 "pmaxsw\t{%2, %0|%0, %2}"
3396 [(set_attr "type" "sseiadd")
3397 (set_attr "prefix_data16" "1")
3398 (set_attr "mode" "TI")])
3400 (define_expand "umaxv8hi3"
3401 [(set (match_operand:V8HI 0 "register_operand" "")
3402 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3403 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3407 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3410 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3411 if (rtx_equal_p (op3, op2))
3412 op3 = gen_reg_rtx (V8HImode);
3413 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3414 emit_insn (gen_addv8hi3 (op0, op3, op2));
3419 (define_expand "smax<mode>3"
3420 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3421 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3422 (match_operand:SSEMODE14 2 "register_operand" "")))]
3426 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3432 xops[0] = operands[0];
3433 xops[1] = operands[1];
3434 xops[2] = operands[2];
3435 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3436 xops[4] = operands[1];
3437 xops[5] = operands[2];
3438 ok = ix86_expand_int_vcond (xops);
3444 (define_insn "*sse4_1_smax<mode>3"
3445 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3447 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3448 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3449 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3450 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3451 [(set_attr "type" "sseiadd")
3452 (set_attr "prefix_extra" "1")
3453 (set_attr "mode" "TI")])
3455 (define_expand "umaxv4si3"
3456 [(set (match_operand:V4SI 0 "register_operand" "")
3457 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3458 (match_operand:V4SI 2 "register_operand" "")))]
3462 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3468 xops[0] = operands[0];
3469 xops[1] = operands[1];
3470 xops[2] = operands[2];
3471 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3472 xops[4] = operands[1];
3473 xops[5] = operands[2];
3474 ok = ix86_expand_int_vcond (xops);
3480 (define_insn "*sse4_1_umax<mode>3"
3481 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3483 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3484 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3485 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3486 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3487 [(set_attr "type" "sseiadd")
3488 (set_attr "prefix_extra" "1")
3489 (set_attr "mode" "TI")])
3491 (define_expand "uminv16qi3"
3492 [(set (match_operand:V16QI 0 "register_operand" "")
3493 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3494 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3496 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3498 (define_insn "*uminv16qi3"
3499 [(set (match_operand:V16QI 0 "register_operand" "=x")
3500 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3501 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3502 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3503 "pminub\t{%2, %0|%0, %2}"
3504 [(set_attr "type" "sseiadd")
3505 (set_attr "prefix_data16" "1")
3506 (set_attr "mode" "TI")])
3508 (define_expand "sminv8hi3"
3509 [(set (match_operand:V8HI 0 "register_operand" "")
3510 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3511 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3513 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3515 (define_insn "*sminv8hi3"
3516 [(set (match_operand:V8HI 0 "register_operand" "=x")
3517 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3518 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3519 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3520 "pminsw\t{%2, %0|%0, %2}"
3521 [(set_attr "type" "sseiadd")
3522 (set_attr "prefix_data16" "1")
3523 (set_attr "mode" "TI")])
3525 (define_expand "smin<mode>3"
3526 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3527 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3528 (match_operand:SSEMODE14 2 "register_operand" "")))]
3532 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3538 xops[0] = operands[0];
3539 xops[1] = operands[2];
3540 xops[2] = operands[1];
3541 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3542 xops[4] = operands[1];
3543 xops[5] = operands[2];
3544 ok = ix86_expand_int_vcond (xops);
3550 (define_insn "*sse4_1_smin<mode>3"
3551 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3553 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3554 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3555 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3556 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3557 [(set_attr "type" "sseiadd")
3558 (set_attr "prefix_extra" "1")
3559 (set_attr "mode" "TI")])
3561 (define_expand "umin<mode>3"
3562 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3563 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3564 (match_operand:SSEMODE24 2 "register_operand" "")))]
3568 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3574 xops[0] = operands[0];
3575 xops[1] = operands[2];
3576 xops[2] = operands[1];
3577 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3578 xops[4] = operands[1];
3579 xops[5] = operands[2];
3580 ok = ix86_expand_int_vcond (xops);
3586 (define_insn "*sse4_1_umin<mode>3"
3587 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3589 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3590 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3591 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3592 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3593 [(set_attr "type" "sseiadd")
3594 (set_attr "prefix_extra" "1")
3595 (set_attr "mode" "TI")])
3597 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3599 ;; Parallel integral comparisons
3601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3603 (define_insn "sse2_eq<mode>3"
3604 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3606 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3607 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3608 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3609 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3610 [(set_attr "type" "ssecmp")
3611 (set_attr "prefix_data16" "1")
3612 (set_attr "mode" "TI")])
3614 (define_insn "sse4_1_eqv2di3"
3615 [(set (match_operand:V2DI 0 "register_operand" "=x")
3617 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3618 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3619 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3620 "pcmpeqq\t{%2, %0|%0, %2}"
3621 [(set_attr "type" "ssecmp")
3622 (set_attr "prefix_extra" "1")
3623 (set_attr "mode" "TI")])
3625 (define_insn "sse2_gt<mode>3"
3626 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3628 (match_operand:SSEMODE124 1 "register_operand" "0")
3629 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3631 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3632 [(set_attr "type" "ssecmp")
3633 (set_attr "prefix_data16" "1")
3634 (set_attr "mode" "TI")])
3636 (define_insn "sse4_2_gtv2di3"
3637 [(set (match_operand:V2DI 0 "register_operand" "=x")
3639 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3640 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3642 "pcmpgtq\t{%2, %0|%0, %2}"
3643 [(set_attr "type" "ssecmp")
3644 (set_attr "mode" "TI")])
3646 (define_expand "vcond<mode>"
3647 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3648 (if_then_else:SSEMODEI
3649 (match_operator 3 ""
3650 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3651 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3652 (match_operand:SSEMODEI 1 "general_operand" "")
3653 (match_operand:SSEMODEI 2 "general_operand" "")))]
3656 if (ix86_expand_int_vcond (operands))
3662 (define_expand "vcondu<mode>"
3663 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3664 (if_then_else:SSEMODEI
3665 (match_operator 3 ""
3666 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3667 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3668 (match_operand:SSEMODEI 1 "general_operand" "")
3669 (match_operand:SSEMODEI 2 "general_operand" "")))]
3672 if (ix86_expand_int_vcond (operands))
3678 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3680 ;; Parallel integral logical operations
3682 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3684 (define_expand "one_cmpl<mode>2"
3685 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3686 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3690 int i, n = GET_MODE_NUNITS (<MODE>mode);
3691 rtvec v = rtvec_alloc (n);
3693 for (i = 0; i < n; ++i)
3694 RTVEC_ELT (v, i) = constm1_rtx;
3696 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3699 (define_expand "and<mode>3"
3700 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3701 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3702 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3704 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3706 (define_insn "*and<mode>3"
3707 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3709 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3710 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3711 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3712 "pand\t{%2, %0|%0, %2}"
3713 [(set_attr "type" "sselog")
3714 (set_attr "prefix_data16" "1")
3715 (set_attr "mode" "TI")])
3717 (define_insn "sse2_nand<mode>3"
3718 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3720 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3721 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3723 "pandn\t{%2, %0|%0, %2}"
3724 [(set_attr "type" "sselog")
3725 (set_attr "prefix_data16" "1")
3726 (set_attr "mode" "TI")])
3728 (define_expand "ior<mode>3"
3729 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3730 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3731 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3733 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3735 (define_insn "*ior<mode>3"
3736 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3738 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3739 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3740 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3741 "por\t{%2, %0|%0, %2}"
3742 [(set_attr "type" "sselog")
3743 (set_attr "prefix_data16" "1")
3744 (set_attr "mode" "TI")])
3746 (define_expand "xor<mode>3"
3747 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3748 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3749 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3751 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3753 (define_insn "*xor<mode>3"
3754 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3756 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3757 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3758 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3759 "pxor\t{%2, %0|%0, %2}"
3760 [(set_attr "type" "sselog")
3761 (set_attr "prefix_data16" "1")
3762 (set_attr "mode" "TI")])
3764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3766 ;; Parallel integral element swizzling
3768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3771 ;; op1 = abcdefghijklmnop
3772 ;; op2 = qrstuvwxyz012345
3773 ;; h1 = aqbrcsdteufvgwhx
3774 ;; l1 = iyjzk0l1m2n3o4p5
3775 ;; h2 = aiqybjrzcks0dlt1
3776 ;; l2 = emu2fnv3gow4hpx5
3777 ;; h3 = aeimquy2bfjnrvz3
3778 ;; l3 = cgkosw04dhlptx15
3779 ;; result = bdfhjlnprtvxz135
3780 (define_expand "vec_pack_trunc_v8hi"
3781 [(match_operand:V16QI 0 "register_operand" "")
3782 (match_operand:V8HI 1 "register_operand" "")
3783 (match_operand:V8HI 2 "register_operand" "")]
3786 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3788 op1 = gen_lowpart (V16QImode, operands[1]);
3789 op2 = gen_lowpart (V16QImode, operands[2]);
3790 h1 = gen_reg_rtx (V16QImode);
3791 l1 = gen_reg_rtx (V16QImode);
3792 h2 = gen_reg_rtx (V16QImode);
3793 l2 = gen_reg_rtx (V16QImode);
3794 h3 = gen_reg_rtx (V16QImode);
3795 l3 = gen_reg_rtx (V16QImode);
3797 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3798 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3799 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3800 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3801 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3802 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3803 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3814 ;; result = bdfhjlnp
3815 (define_expand "vec_pack_trunc_v4si"
3816 [(match_operand:V8HI 0 "register_operand" "")
3817 (match_operand:V4SI 1 "register_operand" "")
3818 (match_operand:V4SI 2 "register_operand" "")]
3821 rtx op1, op2, h1, l1, h2, l2;
3823 op1 = gen_lowpart (V8HImode, operands[1]);
3824 op2 = gen_lowpart (V8HImode, operands[2]);
3825 h1 = gen_reg_rtx (V8HImode);
3826 l1 = gen_reg_rtx (V8HImode);
3827 h2 = gen_reg_rtx (V8HImode);
3828 l2 = gen_reg_rtx (V8HImode);
3830 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3831 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3832 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3833 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3834 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3844 (define_expand "vec_pack_trunc_v2di"
3845 [(match_operand:V4SI 0 "register_operand" "")
3846 (match_operand:V2DI 1 "register_operand" "")
3847 (match_operand:V2DI 2 "register_operand" "")]
3850 rtx op1, op2, h1, l1;
3852 op1 = gen_lowpart (V4SImode, operands[1]);
3853 op2 = gen_lowpart (V4SImode, operands[2]);
3854 h1 = gen_reg_rtx (V4SImode);
3855 l1 = gen_reg_rtx (V4SImode);
3857 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3858 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3859 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3863 (define_expand "vec_interleave_highv16qi"
3864 [(set (match_operand:V16QI 0 "register_operand" "=x")
3867 (match_operand:V16QI 1 "register_operand" "0")
3868 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3869 (parallel [(const_int 8) (const_int 24)
3870 (const_int 9) (const_int 25)
3871 (const_int 10) (const_int 26)
3872 (const_int 11) (const_int 27)
3873 (const_int 12) (const_int 28)
3874 (const_int 13) (const_int 29)
3875 (const_int 14) (const_int 30)
3876 (const_int 15) (const_int 31)])))]
3879 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3883 (define_expand "vec_interleave_lowv16qi"
3884 [(set (match_operand:V16QI 0 "register_operand" "=x")
3887 (match_operand:V16QI 1 "register_operand" "0")
3888 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3889 (parallel [(const_int 0) (const_int 16)
3890 (const_int 1) (const_int 17)
3891 (const_int 2) (const_int 18)
3892 (const_int 3) (const_int 19)
3893 (const_int 4) (const_int 20)
3894 (const_int 5) (const_int 21)
3895 (const_int 6) (const_int 22)
3896 (const_int 7) (const_int 23)])))]
3899 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3903 (define_expand "vec_interleave_highv8hi"
3904 [(set (match_operand:V8HI 0 "register_operand" "=x")
3907 (match_operand:V8HI 1 "register_operand" "0")
3908 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3909 (parallel [(const_int 4) (const_int 12)
3910 (const_int 5) (const_int 13)
3911 (const_int 6) (const_int 14)
3912 (const_int 7) (const_int 15)])))]
3915 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3919 (define_expand "vec_interleave_lowv8hi"
3920 [(set (match_operand:V8HI 0 "register_operand" "=x")
3923 (match_operand:V8HI 1 "register_operand" "0")
3924 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3925 (parallel [(const_int 0) (const_int 8)
3926 (const_int 1) (const_int 9)
3927 (const_int 2) (const_int 10)
3928 (const_int 3) (const_int 11)])))]
3931 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
3935 (define_expand "vec_interleave_highv4si"
3936 [(set (match_operand:V4SI 0 "register_operand" "=x")
3939 (match_operand:V4SI 1 "register_operand" "0")
3940 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3941 (parallel [(const_int 2) (const_int 6)
3942 (const_int 3) (const_int 7)])))]
3945 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
3949 (define_expand "vec_interleave_lowv4si"
3950 [(set (match_operand:V4SI 0 "register_operand" "=x")
3953 (match_operand:V4SI 1 "register_operand" "0")
3954 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3955 (parallel [(const_int 0) (const_int 4)
3956 (const_int 1) (const_int 5)])))]
3959 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
3963 (define_expand "vec_interleave_highv2di"
3964 [(set (match_operand:V2DI 0 "register_operand" "=x")
3967 (match_operand:V2DI 1 "register_operand" "0")
3968 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3969 (parallel [(const_int 1)
3973 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
3977 (define_expand "vec_interleave_lowv2di"
3978 [(set (match_operand:V2DI 0 "register_operand" "=x")
3981 (match_operand:V2DI 1 "register_operand" "0")
3982 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3983 (parallel [(const_int 0)
3987 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
3991 (define_insn "sse2_packsswb"
3992 [(set (match_operand:V16QI 0 "register_operand" "=x")
3995 (match_operand:V8HI 1 "register_operand" "0"))
3997 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3999 "packsswb\t{%2, %0|%0, %2}"
4000 [(set_attr "type" "sselog")
4001 (set_attr "prefix_data16" "1")
4002 (set_attr "mode" "TI")])
4004 (define_insn "sse2_packssdw"
4005 [(set (match_operand:V8HI 0 "register_operand" "=x")
4008 (match_operand:V4SI 1 "register_operand" "0"))
4010 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4012 "packssdw\t{%2, %0|%0, %2}"
4013 [(set_attr "type" "sselog")
4014 (set_attr "prefix_data16" "1")
4015 (set_attr "mode" "TI")])
4017 (define_insn "sse2_packuswb"
4018 [(set (match_operand:V16QI 0 "register_operand" "=x")
4021 (match_operand:V8HI 1 "register_operand" "0"))
4023 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4025 "packuswb\t{%2, %0|%0, %2}"
4026 [(set_attr "type" "sselog")
4027 (set_attr "prefix_data16" "1")
4028 (set_attr "mode" "TI")])
4030 (define_insn "sse2_punpckhbw"
4031 [(set (match_operand:V16QI 0 "register_operand" "=x")
4034 (match_operand:V16QI 1 "register_operand" "0")
4035 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4036 (parallel [(const_int 8) (const_int 24)
4037 (const_int 9) (const_int 25)
4038 (const_int 10) (const_int 26)
4039 (const_int 11) (const_int 27)
4040 (const_int 12) (const_int 28)
4041 (const_int 13) (const_int 29)
4042 (const_int 14) (const_int 30)
4043 (const_int 15) (const_int 31)])))]
4045 "punpckhbw\t{%2, %0|%0, %2}"
4046 [(set_attr "type" "sselog")
4047 (set_attr "prefix_data16" "1")
4048 (set_attr "mode" "TI")])
4050 (define_insn "sse2_punpcklbw"
4051 [(set (match_operand:V16QI 0 "register_operand" "=x")
4054 (match_operand:V16QI 1 "register_operand" "0")
4055 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4056 (parallel [(const_int 0) (const_int 16)
4057 (const_int 1) (const_int 17)
4058 (const_int 2) (const_int 18)
4059 (const_int 3) (const_int 19)
4060 (const_int 4) (const_int 20)
4061 (const_int 5) (const_int 21)
4062 (const_int 6) (const_int 22)
4063 (const_int 7) (const_int 23)])))]
4065 "punpcklbw\t{%2, %0|%0, %2}"
4066 [(set_attr "type" "sselog")
4067 (set_attr "prefix_data16" "1")
4068 (set_attr "mode" "TI")])
4070 (define_insn "sse2_punpckhwd"
4071 [(set (match_operand:V8HI 0 "register_operand" "=x")
4074 (match_operand:V8HI 1 "register_operand" "0")
4075 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4076 (parallel [(const_int 4) (const_int 12)
4077 (const_int 5) (const_int 13)
4078 (const_int 6) (const_int 14)
4079 (const_int 7) (const_int 15)])))]
4081 "punpckhwd\t{%2, %0|%0, %2}"
4082 [(set_attr "type" "sselog")
4083 (set_attr "prefix_data16" "1")
4084 (set_attr "mode" "TI")])
4086 (define_insn "sse2_punpcklwd"
4087 [(set (match_operand:V8HI 0 "register_operand" "=x")
4090 (match_operand:V8HI 1 "register_operand" "0")
4091 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4092 (parallel [(const_int 0) (const_int 8)
4093 (const_int 1) (const_int 9)
4094 (const_int 2) (const_int 10)
4095 (const_int 3) (const_int 11)])))]
4097 "punpcklwd\t{%2, %0|%0, %2}"
4098 [(set_attr "type" "sselog")
4099 (set_attr "prefix_data16" "1")
4100 (set_attr "mode" "TI")])
4102 (define_insn "sse2_punpckhdq"
4103 [(set (match_operand:V4SI 0 "register_operand" "=x")
4106 (match_operand:V4SI 1 "register_operand" "0")
4107 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4108 (parallel [(const_int 2) (const_int 6)
4109 (const_int 3) (const_int 7)])))]
4111 "punpckhdq\t{%2, %0|%0, %2}"
4112 [(set_attr "type" "sselog")
4113 (set_attr "prefix_data16" "1")
4114 (set_attr "mode" "TI")])
4116 (define_insn "sse2_punpckldq"
4117 [(set (match_operand:V4SI 0 "register_operand" "=x")
4120 (match_operand:V4SI 1 "register_operand" "0")
4121 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4122 (parallel [(const_int 0) (const_int 4)
4123 (const_int 1) (const_int 5)])))]
4125 "punpckldq\t{%2, %0|%0, %2}"
4126 [(set_attr "type" "sselog")
4127 (set_attr "prefix_data16" "1")
4128 (set_attr "mode" "TI")])
4130 (define_insn "sse2_punpckhqdq"
4131 [(set (match_operand:V2DI 0 "register_operand" "=x")
4134 (match_operand:V2DI 1 "register_operand" "0")
4135 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4136 (parallel [(const_int 1)
4139 "punpckhqdq\t{%2, %0|%0, %2}"
4140 [(set_attr "type" "sselog")
4141 (set_attr "prefix_data16" "1")
4142 (set_attr "mode" "TI")])
4144 (define_insn "sse2_punpcklqdq"
4145 [(set (match_operand:V2DI 0 "register_operand" "=x")
4148 (match_operand:V2DI 1 "register_operand" "0")
4149 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4150 (parallel [(const_int 0)
4153 "punpcklqdq\t{%2, %0|%0, %2}"
4154 [(set_attr "type" "sselog")
4155 (set_attr "prefix_data16" "1")
4156 (set_attr "mode" "TI")])
4158 (define_insn "*sse4_1_pinsrb"
4159 [(set (match_operand:V16QI 0 "register_operand" "=x")
4161 (vec_duplicate:V16QI
4162 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4163 (match_operand:V16QI 1 "register_operand" "0")
4164 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4167 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4168 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4170 [(set_attr "type" "sselog")
4171 (set_attr "prefix_extra" "1")
4172 (set_attr "mode" "TI")])
4174 (define_insn "*sse2_pinsrw"
4175 [(set (match_operand:V8HI 0 "register_operand" "=x")
4178 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4179 (match_operand:V8HI 1 "register_operand" "0")
4180 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4183 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4184 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4186 [(set_attr "type" "sselog")
4187 (set_attr "prefix_data16" "1")
4188 (set_attr "mode" "TI")])
4190 ;; It must come before sse2_loadld since it is preferred.
4191 (define_insn "*sse4_1_pinsrd"
4192 [(set (match_operand:V4SI 0 "register_operand" "=x")
4195 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4196 (match_operand:V4SI 1 "register_operand" "0")
4197 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4200 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4201 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4203 [(set_attr "type" "sselog")
4204 (set_attr "prefix_extra" "1")
4205 (set_attr "mode" "TI")])
4207 (define_insn "*sse4_1_pinsrq"
4208 [(set (match_operand:V2DI 0 "register_operand" "=x")
4211 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4212 (match_operand:V2DI 1 "register_operand" "0")
4213 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4216 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4217 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4219 [(set_attr "type" "sselog")
4220 (set_attr "prefix_extra" "1")
4221 (set_attr "mode" "TI")])
4223 (define_insn "*sse4_1_pextrb"
4224 [(set (match_operand:SI 0 "register_operand" "=r")
4227 (match_operand:V16QI 1 "register_operand" "x")
4228 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4230 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4231 [(set_attr "type" "sselog")
4232 (set_attr "prefix_extra" "1")
4233 (set_attr "mode" "TI")])
4235 (define_insn "*sse4_1_pextrb_memory"
4236 [(set (match_operand:QI 0 "memory_operand" "=m")
4238 (match_operand:V16QI 1 "register_operand" "x")
4239 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4241 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4242 [(set_attr "type" "sselog")
4243 (set_attr "prefix_extra" "1")
4244 (set_attr "mode" "TI")])
4246 (define_insn "*sse2_pextrw"
4247 [(set (match_operand:SI 0 "register_operand" "=r")
4250 (match_operand:V8HI 1 "register_operand" "x")
4251 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4253 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4254 [(set_attr "type" "sselog")
4255 (set_attr "prefix_data16" "1")
4256 (set_attr "mode" "TI")])
4258 (define_insn "*sse4_1_pextrw_memory"
4259 [(set (match_operand:HI 0 "memory_operand" "=m")
4261 (match_operand:V8HI 1 "register_operand" "x")
4262 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4264 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4265 [(set_attr "type" "sselog")
4266 (set_attr "prefix_extra" "1")
4267 (set_attr "mode" "TI")])
4269 (define_insn "*sse4_1_pextrd"
4270 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4272 (match_operand:V4SI 1 "register_operand" "x")
4273 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4275 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4276 [(set_attr "type" "sselog")
4277 (set_attr "prefix_extra" "1")
4278 (set_attr "mode" "TI")])
4280 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4281 (define_insn "*sse4_1_pextrq"
4282 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4284 (match_operand:V2DI 1 "register_operand" "x")
4285 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4286 "TARGET_SSE4_1 && TARGET_64BIT"
4287 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4288 [(set_attr "type" "sselog")
4289 (set_attr "prefix_extra" "1")
4290 (set_attr "mode" "TI")])
4292 (define_expand "sse2_pshufd"
4293 [(match_operand:V4SI 0 "register_operand" "")
4294 (match_operand:V4SI 1 "nonimmediate_operand" "")
4295 (match_operand:SI 2 "const_int_operand" "")]
4298 int mask = INTVAL (operands[2]);
4299 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4300 GEN_INT ((mask >> 0) & 3),
4301 GEN_INT ((mask >> 2) & 3),
4302 GEN_INT ((mask >> 4) & 3),
4303 GEN_INT ((mask >> 6) & 3)));
4307 (define_insn "sse2_pshufd_1"
4308 [(set (match_operand:V4SI 0 "register_operand" "=x")
4310 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4311 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4312 (match_operand 3 "const_0_to_3_operand" "")
4313 (match_operand 4 "const_0_to_3_operand" "")
4314 (match_operand 5 "const_0_to_3_operand" "")])))]
4318 mask |= INTVAL (operands[2]) << 0;
4319 mask |= INTVAL (operands[3]) << 2;
4320 mask |= INTVAL (operands[4]) << 4;
4321 mask |= INTVAL (operands[5]) << 6;
4322 operands[2] = GEN_INT (mask);
4324 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4326 [(set_attr "type" "sselog1")
4327 (set_attr "prefix_data16" "1")
4328 (set_attr "mode" "TI")])
4330 (define_expand "sse2_pshuflw"
4331 [(match_operand:V8HI 0 "register_operand" "")
4332 (match_operand:V8HI 1 "nonimmediate_operand" "")
4333 (match_operand:SI 2 "const_int_operand" "")]
4336 int mask = INTVAL (operands[2]);
4337 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4338 GEN_INT ((mask >> 0) & 3),
4339 GEN_INT ((mask >> 2) & 3),
4340 GEN_INT ((mask >> 4) & 3),
4341 GEN_INT ((mask >> 6) & 3)));
4345 (define_insn "sse2_pshuflw_1"
4346 [(set (match_operand:V8HI 0 "register_operand" "=x")
4348 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4349 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4350 (match_operand 3 "const_0_to_3_operand" "")
4351 (match_operand 4 "const_0_to_3_operand" "")
4352 (match_operand 5 "const_0_to_3_operand" "")
4360 mask |= INTVAL (operands[2]) << 0;
4361 mask |= INTVAL (operands[3]) << 2;
4362 mask |= INTVAL (operands[4]) << 4;
4363 mask |= INTVAL (operands[5]) << 6;
4364 operands[2] = GEN_INT (mask);
4366 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4368 [(set_attr "type" "sselog")
4369 (set_attr "prefix_rep" "1")
4370 (set_attr "mode" "TI")])
4372 (define_expand "sse2_pshufhw"
4373 [(match_operand:V8HI 0 "register_operand" "")
4374 (match_operand:V8HI 1 "nonimmediate_operand" "")
4375 (match_operand:SI 2 "const_int_operand" "")]
4378 int mask = INTVAL (operands[2]);
4379 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4380 GEN_INT (((mask >> 0) & 3) + 4),
4381 GEN_INT (((mask >> 2) & 3) + 4),
4382 GEN_INT (((mask >> 4) & 3) + 4),
4383 GEN_INT (((mask >> 6) & 3) + 4)));
4387 (define_insn "sse2_pshufhw_1"
4388 [(set (match_operand:V8HI 0 "register_operand" "=x")
4390 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4391 (parallel [(const_int 0)
4395 (match_operand 2 "const_4_to_7_operand" "")
4396 (match_operand 3 "const_4_to_7_operand" "")
4397 (match_operand 4 "const_4_to_7_operand" "")
4398 (match_operand 5 "const_4_to_7_operand" "")])))]
4402 mask |= (INTVAL (operands[2]) - 4) << 0;
4403 mask |= (INTVAL (operands[3]) - 4) << 2;
4404 mask |= (INTVAL (operands[4]) - 4) << 4;
4405 mask |= (INTVAL (operands[5]) - 4) << 6;
4406 operands[2] = GEN_INT (mask);
4408 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4410 [(set_attr "type" "sselog")
4411 (set_attr "prefix_rep" "1")
4412 (set_attr "mode" "TI")])
4414 (define_expand "sse2_loadd"
4415 [(set (match_operand:V4SI 0 "register_operand" "")
4418 (match_operand:SI 1 "nonimmediate_operand" ""))
4422 "operands[2] = CONST0_RTX (V4SImode);")
4424 (define_insn "sse2_loadld"
4425 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4428 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4429 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4433 movd\t{%2, %0|%0, %2}
4434 movd\t{%2, %0|%0, %2}
4435 movss\t{%2, %0|%0, %2}
4436 movss\t{%2, %0|%0, %2}"
4437 [(set_attr "type" "ssemov")
4438 (set_attr "mode" "TI,TI,V4SF,SF")])
4440 (define_insn_and_split "sse2_stored"
4441 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4443 (match_operand:V4SI 1 "register_operand" "x,Yi")
4444 (parallel [(const_int 0)])))]
4447 "&& reload_completed
4448 && (TARGET_INTER_UNIT_MOVES
4449 || MEM_P (operands [0])
4450 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4451 [(set (match_dup 0) (match_dup 1))]
4453 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4456 (define_expand "sse_storeq"
4457 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4459 (match_operand:V2DI 1 "register_operand" "")
4460 (parallel [(const_int 0)])))]
4464 (define_insn "*sse2_storeq_rex64"
4465 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4467 (match_operand:V2DI 1 "register_operand" "x,Yi")
4468 (parallel [(const_int 0)])))]
4469 "TARGET_64BIT && TARGET_SSE"
4472 (define_insn "*sse2_storeq"
4473 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4475 (match_operand:V2DI 1 "register_operand" "x")
4476 (parallel [(const_int 0)])))]
4481 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4483 (match_operand:V2DI 1 "register_operand" "")
4484 (parallel [(const_int 0)])))]
4487 && (TARGET_INTER_UNIT_MOVES
4488 || MEM_P (operands [0])
4489 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4490 [(set (match_dup 0) (match_dup 1))]
4492 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4495 (define_insn "*vec_extractv2di_1_sse2"
4496 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4498 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4499 (parallel [(const_int 1)])))]
4500 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4502 movhps\t{%1, %0|%0, %1}
4503 psrldq\t{$8, %0|%0, 8}
4504 movq\t{%H1, %0|%0, %H1}"
4505 [(set_attr "type" "ssemov,sseishft,ssemov")
4506 (set_attr "memory" "*,none,*")
4507 (set_attr "mode" "V2SF,TI,TI")])
4509 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4510 (define_insn "*vec_extractv2di_1_sse"
4511 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4513 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4514 (parallel [(const_int 1)])))]
4515 "!TARGET_SSE2 && TARGET_SSE
4516 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4518 movhps\t{%1, %0|%0, %1}
4519 movhlps\t{%1, %0|%0, %1}
4520 movlps\t{%H1, %0|%0, %H1}"
4521 [(set_attr "type" "ssemov")
4522 (set_attr "mode" "V2SF,V4SF,V2SF")])
4524 (define_insn "*vec_dupv4si"
4525 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4527 (match_operand:SI 1 "register_operand" " Y2,0")))]
4530 pshufd\t{$0, %1, %0|%0, %1, 0}
4531 shufps\t{$0, %0, %0|%0, %0, 0}"
4532 [(set_attr "type" "sselog1")
4533 (set_attr "mode" "TI,V4SF")])
4535 (define_insn "*vec_dupv2di"
4536 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4538 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4543 [(set_attr "type" "sselog1,ssemov")
4544 (set_attr "mode" "TI,V4SF")])
4546 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4547 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4548 ;; alternatives pretty much forces the MMX alternative to be chosen.
4549 (define_insn "*sse2_concatv2si"
4550 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4552 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4553 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4556 punpckldq\t{%2, %0|%0, %2}
4557 movd\t{%1, %0|%0, %1}
4558 punpckldq\t{%2, %0|%0, %2}
4559 movd\t{%1, %0|%0, %1}"
4560 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4561 (set_attr "mode" "TI,TI,DI,DI")])
4563 (define_insn "*sse1_concatv2si"
4564 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4566 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4567 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4570 unpcklps\t{%2, %0|%0, %2}
4571 movss\t{%1, %0|%0, %1}
4572 punpckldq\t{%2, %0|%0, %2}
4573 movd\t{%1, %0|%0, %1}"
4574 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4575 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4577 (define_insn "*vec_concatv4si_1"
4578 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4580 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4581 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4584 punpcklqdq\t{%2, %0|%0, %2}
4585 movlhps\t{%2, %0|%0, %2}
4586 movhps\t{%2, %0|%0, %2}"
4587 [(set_attr "type" "sselog,ssemov,ssemov")
4588 (set_attr "mode" "TI,V4SF,V2SF")])
4590 (define_insn "vec_concatv2di"
4591 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4593 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4594 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4597 movq\t{%1, %0|%0, %1}
4598 movq2dq\t{%1, %0|%0, %1}
4599 punpcklqdq\t{%2, %0|%0, %2}
4600 movlhps\t{%2, %0|%0, %2}
4601 movhps\t{%2, %0|%0, %2}
4602 movlps\t{%1, %0|%0, %1}"
4603 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4604 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4606 (define_expand "vec_setv2di"
4607 [(match_operand:V2DI 0 "register_operand" "")
4608 (match_operand:DI 1 "register_operand" "")
4609 (match_operand 2 "const_int_operand" "")]
4612 ix86_expand_vector_set (false, operands[0], operands[1],
4613 INTVAL (operands[2]));
4617 (define_expand "vec_extractv2di"
4618 [(match_operand:DI 0 "register_operand" "")
4619 (match_operand:V2DI 1 "register_operand" "")
4620 (match_operand 2 "const_int_operand" "")]
4623 ix86_expand_vector_extract (false, operands[0], operands[1],
4624 INTVAL (operands[2]));
4628 (define_expand "vec_initv2di"
4629 [(match_operand:V2DI 0 "register_operand" "")
4630 (match_operand 1 "" "")]
4633 ix86_expand_vector_init (false, operands[0], operands[1]);
4637 (define_expand "vec_setv4si"
4638 [(match_operand:V4SI 0 "register_operand" "")
4639 (match_operand:SI 1 "register_operand" "")
4640 (match_operand 2 "const_int_operand" "")]
4643 ix86_expand_vector_set (false, operands[0], operands[1],
4644 INTVAL (operands[2]));
4648 (define_expand "vec_extractv4si"
4649 [(match_operand:SI 0 "register_operand" "")
4650 (match_operand:V4SI 1 "register_operand" "")
4651 (match_operand 2 "const_int_operand" "")]
4654 ix86_expand_vector_extract (false, operands[0], operands[1],
4655 INTVAL (operands[2]));
4659 (define_expand "vec_initv4si"
4660 [(match_operand:V4SI 0 "register_operand" "")
4661 (match_operand 1 "" "")]
4664 ix86_expand_vector_init (false, operands[0], operands[1]);
4668 (define_expand "vec_setv8hi"
4669 [(match_operand:V8HI 0 "register_operand" "")
4670 (match_operand:HI 1 "register_operand" "")
4671 (match_operand 2 "const_int_operand" "")]
4674 ix86_expand_vector_set (false, operands[0], operands[1],
4675 INTVAL (operands[2]));
4679 (define_expand "vec_extractv8hi"
4680 [(match_operand:HI 0 "register_operand" "")
4681 (match_operand:V8HI 1 "register_operand" "")
4682 (match_operand 2 "const_int_operand" "")]
4685 ix86_expand_vector_extract (false, operands[0], operands[1],
4686 INTVAL (operands[2]));
4690 (define_expand "vec_initv8hi"
4691 [(match_operand:V8HI 0 "register_operand" "")
4692 (match_operand 1 "" "")]
4695 ix86_expand_vector_init (false, operands[0], operands[1]);
4699 (define_expand "vec_setv16qi"
4700 [(match_operand:V16QI 0 "register_operand" "")
4701 (match_operand:QI 1 "register_operand" "")
4702 (match_operand 2 "const_int_operand" "")]
4705 ix86_expand_vector_set (false, operands[0], operands[1],
4706 INTVAL (operands[2]));
4710 (define_expand "vec_extractv16qi"
4711 [(match_operand:QI 0 "register_operand" "")
4712 (match_operand:V16QI 1 "register_operand" "")
4713 (match_operand 2 "const_int_operand" "")]
4716 ix86_expand_vector_extract (false, operands[0], operands[1],
4717 INTVAL (operands[2]));
4721 (define_expand "vec_initv16qi"
4722 [(match_operand:V16QI 0 "register_operand" "")
4723 (match_operand 1 "" "")]
4726 ix86_expand_vector_init (false, operands[0], operands[1]);
4730 (define_expand "vec_unpacku_hi_v16qi"
4731 [(match_operand:V8HI 0 "register_operand" "")
4732 (match_operand:V16QI 1 "register_operand" "")]
4736 ix86_expand_sse4_unpack (operands, true, true);
4738 ix86_expand_sse_unpack (operands, true, true);
4742 (define_expand "vec_unpacks_hi_v16qi"
4743 [(match_operand:V8HI 0 "register_operand" "")
4744 (match_operand:V16QI 1 "register_operand" "")]
4748 ix86_expand_sse4_unpack (operands, false, true);
4750 ix86_expand_sse_unpack (operands, false, true);
4754 (define_expand "vec_unpacku_lo_v16qi"
4755 [(match_operand:V8HI 0 "register_operand" "")
4756 (match_operand:V16QI 1 "register_operand" "")]
4760 ix86_expand_sse4_unpack (operands, true, false);
4762 ix86_expand_sse_unpack (operands, true, false);
4766 (define_expand "vec_unpacks_lo_v16qi"
4767 [(match_operand:V8HI 0 "register_operand" "")
4768 (match_operand:V16QI 1 "register_operand" "")]
4772 ix86_expand_sse4_unpack (operands, false, false);
4774 ix86_expand_sse_unpack (operands, false, false);
4778 (define_expand "vec_unpacku_hi_v8hi"
4779 [(match_operand:V4SI 0 "register_operand" "")
4780 (match_operand:V8HI 1 "register_operand" "")]
4784 ix86_expand_sse4_unpack (operands, true, true);
4786 ix86_expand_sse_unpack (operands, true, true);
4790 (define_expand "vec_unpacks_hi_v8hi"
4791 [(match_operand:V4SI 0 "register_operand" "")
4792 (match_operand:V8HI 1 "register_operand" "")]
4796 ix86_expand_sse4_unpack (operands, false, true);
4798 ix86_expand_sse_unpack (operands, false, true);
4802 (define_expand "vec_unpacku_lo_v8hi"
4803 [(match_operand:V4SI 0 "register_operand" "")
4804 (match_operand:V8HI 1 "register_operand" "")]
4808 ix86_expand_sse4_unpack (operands, true, false);
4810 ix86_expand_sse_unpack (operands, true, false);
4814 (define_expand "vec_unpacks_lo_v8hi"
4815 [(match_operand:V4SI 0 "register_operand" "")
4816 (match_operand:V8HI 1 "register_operand" "")]
4820 ix86_expand_sse4_unpack (operands, false, false);
4822 ix86_expand_sse_unpack (operands, false, false);
4826 (define_expand "vec_unpacku_hi_v4si"
4827 [(match_operand:V2DI 0 "register_operand" "")
4828 (match_operand:V4SI 1 "register_operand" "")]
4832 ix86_expand_sse4_unpack (operands, true, true);
4834 ix86_expand_sse_unpack (operands, true, true);
4838 (define_expand "vec_unpacks_hi_v4si"
4839 [(match_operand:V2DI 0 "register_operand" "")
4840 (match_operand:V4SI 1 "register_operand" "")]
4844 ix86_expand_sse4_unpack (operands, false, true);
4846 ix86_expand_sse_unpack (operands, false, true);
4850 (define_expand "vec_unpacku_lo_v4si"
4851 [(match_operand:V2DI 0 "register_operand" "")
4852 (match_operand:V4SI 1 "register_operand" "")]
4856 ix86_expand_sse4_unpack (operands, true, false);
4858 ix86_expand_sse_unpack (operands, true, false);
4862 (define_expand "vec_unpacks_lo_v4si"
4863 [(match_operand:V2DI 0 "register_operand" "")
4864 (match_operand:V4SI 1 "register_operand" "")]
4868 ix86_expand_sse4_unpack (operands, false, false);
4870 ix86_expand_sse_unpack (operands, false, false);
4874 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4878 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4880 (define_insn "sse2_uavgv16qi3"
4881 [(set (match_operand:V16QI 0 "register_operand" "=x")
4887 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4889 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4890 (const_vector:V16QI [(const_int 1) (const_int 1)
4891 (const_int 1) (const_int 1)
4892 (const_int 1) (const_int 1)
4893 (const_int 1) (const_int 1)
4894 (const_int 1) (const_int 1)
4895 (const_int 1) (const_int 1)
4896 (const_int 1) (const_int 1)
4897 (const_int 1) (const_int 1)]))
4899 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4900 "pavgb\t{%2, %0|%0, %2}"
4901 [(set_attr "type" "sseiadd")
4902 (set_attr "prefix_data16" "1")
4903 (set_attr "mode" "TI")])
4905 (define_insn "sse2_uavgv8hi3"
4906 [(set (match_operand:V8HI 0 "register_operand" "=x")
4912 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4914 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4915 (const_vector:V8HI [(const_int 1) (const_int 1)
4916 (const_int 1) (const_int 1)
4917 (const_int 1) (const_int 1)
4918 (const_int 1) (const_int 1)]))
4920 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4921 "pavgw\t{%2, %0|%0, %2}"
4922 [(set_attr "type" "sseiadd")
4923 (set_attr "prefix_data16" "1")
4924 (set_attr "mode" "TI")])
4926 ;; The correct representation for this is absolutely enormous, and
4927 ;; surely not generally useful.
4928 (define_insn "sse2_psadbw"
4929 [(set (match_operand:V2DI 0 "register_operand" "=x")
4930 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4931 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4934 "psadbw\t{%2, %0|%0, %2}"
4935 [(set_attr "type" "sseiadd")
4936 (set_attr "prefix_data16" "1")
4937 (set_attr "mode" "TI")])
4939 (define_insn "sse_movmskps"
4940 [(set (match_operand:SI 0 "register_operand" "=r")
4941 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
4944 "movmskps\t{%1, %0|%0, %1}"
4945 [(set_attr "type" "ssecvt")
4946 (set_attr "mode" "V4SF")])
4948 (define_insn "sse2_movmskpd"
4949 [(set (match_operand:SI 0 "register_operand" "=r")
4950 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
4953 "movmskpd\t{%1, %0|%0, %1}"
4954 [(set_attr "type" "ssecvt")
4955 (set_attr "mode" "V2DF")])
4957 (define_insn "sse2_pmovmskb"
4958 [(set (match_operand:SI 0 "register_operand" "=r")
4959 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
4962 "pmovmskb\t{%1, %0|%0, %1}"
4963 [(set_attr "type" "ssecvt")
4964 (set_attr "prefix_data16" "1")
4965 (set_attr "mode" "SI")])
4967 (define_expand "sse2_maskmovdqu"
4968 [(set (match_operand:V16QI 0 "memory_operand" "")
4969 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4970 (match_operand:V16QI 2 "register_operand" "x")
4976 (define_insn "*sse2_maskmovdqu"
4977 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
4978 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4979 (match_operand:V16QI 2 "register_operand" "x")
4980 (mem:V16QI (match_dup 0))]
4982 "TARGET_SSE2 && !TARGET_64BIT"
4983 ;; @@@ check ordering of operands in intel/nonintel syntax
4984 "maskmovdqu\t{%2, %1|%1, %2}"
4985 [(set_attr "type" "ssecvt")
4986 (set_attr "prefix_data16" "1")
4987 (set_attr "mode" "TI")])
4989 (define_insn "*sse2_maskmovdqu_rex64"
4990 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
4991 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4992 (match_operand:V16QI 2 "register_operand" "x")
4993 (mem:V16QI (match_dup 0))]
4995 "TARGET_SSE2 && TARGET_64BIT"
4996 ;; @@@ check ordering of operands in intel/nonintel syntax
4997 "maskmovdqu\t{%2, %1|%1, %2}"
4998 [(set_attr "type" "ssecvt")
4999 (set_attr "prefix_data16" "1")
5000 (set_attr "mode" "TI")])
5002 (define_insn "sse_ldmxcsr"
5003 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5007 [(set_attr "type" "sse")
5008 (set_attr "memory" "load")])
5010 (define_insn "sse_stmxcsr"
5011 [(set (match_operand:SI 0 "memory_operand" "=m")
5012 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5015 [(set_attr "type" "sse")
5016 (set_attr "memory" "store")])
5018 (define_expand "sse_sfence"
5020 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5021 "TARGET_SSE || TARGET_3DNOW_A"
5023 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5024 MEM_VOLATILE_P (operands[0]) = 1;
5027 (define_insn "*sse_sfence"
5028 [(set (match_operand:BLK 0 "" "")
5029 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5030 "TARGET_SSE || TARGET_3DNOW_A"
5032 [(set_attr "type" "sse")
5033 (set_attr "memory" "unknown")])
5035 (define_insn "sse2_clflush"
5036 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5040 [(set_attr "type" "sse")
5041 (set_attr "memory" "unknown")])
5043 (define_expand "sse2_mfence"
5045 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5048 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5049 MEM_VOLATILE_P (operands[0]) = 1;
5052 (define_insn "*sse2_mfence"
5053 [(set (match_operand:BLK 0 "" "")
5054 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5057 [(set_attr "type" "sse")
5058 (set_attr "memory" "unknown")])
5060 (define_expand "sse2_lfence"
5062 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5065 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5066 MEM_VOLATILE_P (operands[0]) = 1;
5069 (define_insn "*sse2_lfence"
5070 [(set (match_operand:BLK 0 "" "")
5071 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5074 [(set_attr "type" "sse")
5075 (set_attr "memory" "unknown")])
5077 (define_insn "sse3_mwait"
5078 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5079 (match_operand:SI 1 "register_operand" "c")]
5082 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5083 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5084 ;; we only need to set up 32bit registers.
5086 [(set_attr "length" "3")])
5088 (define_insn "sse3_monitor"
5089 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5090 (match_operand:SI 1 "register_operand" "c")
5091 (match_operand:SI 2 "register_operand" "d")]
5093 "TARGET_SSE3 && !TARGET_64BIT"
5094 "monitor\t%0, %1, %2"
5095 [(set_attr "length" "3")])
5097 (define_insn "sse3_monitor64"
5098 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5099 (match_operand:SI 1 "register_operand" "c")
5100 (match_operand:SI 2 "register_operand" "d")]
5102 "TARGET_SSE3 && TARGET_64BIT"
5103 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5104 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5105 ;; zero extended to 64bit, we only need to set up 32bit registers.
5107 [(set_attr "length" "3")])
5110 (define_insn "ssse3_phaddwv8hi3"
5111 [(set (match_operand:V8HI 0 "register_operand" "=x")
5117 (match_operand:V8HI 1 "register_operand" "0")
5118 (parallel [(const_int 0)]))
5119 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5121 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5122 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5125 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5126 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5128 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5129 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5134 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5135 (parallel [(const_int 0)]))
5136 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5138 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5139 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5142 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5143 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5145 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5146 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5148 "phaddw\t{%2, %0|%0, %2}"
5149 [(set_attr "type" "sseiadd")
5150 (set_attr "prefix_data16" "1")
5151 (set_attr "prefix_extra" "1")
5152 (set_attr "mode" "TI")])
5154 (define_insn "ssse3_phaddwv4hi3"
5155 [(set (match_operand:V4HI 0 "register_operand" "=y")
5160 (match_operand:V4HI 1 "register_operand" "0")
5161 (parallel [(const_int 0)]))
5162 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5164 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5165 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5169 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5170 (parallel [(const_int 0)]))
5171 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5173 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5174 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5176 "phaddw\t{%2, %0|%0, %2}"
5177 [(set_attr "type" "sseiadd")
5178 (set_attr "prefix_extra" "1")
5179 (set_attr "mode" "DI")])
5181 (define_insn "ssse3_phadddv4si3"
5182 [(set (match_operand:V4SI 0 "register_operand" "=x")
5187 (match_operand:V4SI 1 "register_operand" "0")
5188 (parallel [(const_int 0)]))
5189 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5191 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5192 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5196 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5197 (parallel [(const_int 0)]))
5198 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5200 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5201 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5203 "phaddd\t{%2, %0|%0, %2}"
5204 [(set_attr "type" "sseiadd")
5205 (set_attr "prefix_data16" "1")
5206 (set_attr "prefix_extra" "1")
5207 (set_attr "mode" "TI")])
5209 (define_insn "ssse3_phadddv2si3"
5210 [(set (match_operand:V2SI 0 "register_operand" "=y")
5214 (match_operand:V2SI 1 "register_operand" "0")
5215 (parallel [(const_int 0)]))
5216 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5219 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5220 (parallel [(const_int 0)]))
5221 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5223 "phaddd\t{%2, %0|%0, %2}"
5224 [(set_attr "type" "sseiadd")
5225 (set_attr "prefix_extra" "1")
5226 (set_attr "mode" "DI")])
5228 (define_insn "ssse3_phaddswv8hi3"
5229 [(set (match_operand:V8HI 0 "register_operand" "=x")
5235 (match_operand:V8HI 1 "register_operand" "0")
5236 (parallel [(const_int 0)]))
5237 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5239 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5240 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5243 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5244 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5246 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5247 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5252 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5253 (parallel [(const_int 0)]))
5254 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5256 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5257 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5260 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5261 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5263 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5264 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5266 "phaddsw\t{%2, %0|%0, %2}"
5267 [(set_attr "type" "sseiadd")
5268 (set_attr "prefix_data16" "1")
5269 (set_attr "prefix_extra" "1")
5270 (set_attr "mode" "TI")])
5272 (define_insn "ssse3_phaddswv4hi3"
5273 [(set (match_operand:V4HI 0 "register_operand" "=y")
5278 (match_operand:V4HI 1 "register_operand" "0")
5279 (parallel [(const_int 0)]))
5280 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5282 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5283 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5287 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5288 (parallel [(const_int 0)]))
5289 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5291 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5292 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5294 "phaddsw\t{%2, %0|%0, %2}"
5295 [(set_attr "type" "sseiadd")
5296 (set_attr "prefix_extra" "1")
5297 (set_attr "mode" "DI")])
5299 (define_insn "ssse3_phsubwv8hi3"
5300 [(set (match_operand:V8HI 0 "register_operand" "=x")
5306 (match_operand:V8HI 1 "register_operand" "0")
5307 (parallel [(const_int 0)]))
5308 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5310 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5311 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5314 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5315 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5317 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5318 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5323 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5324 (parallel [(const_int 0)]))
5325 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5327 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5328 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5331 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5332 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5334 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5335 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5337 "phsubw\t{%2, %0|%0, %2}"
5338 [(set_attr "type" "sseiadd")
5339 (set_attr "prefix_data16" "1")
5340 (set_attr "prefix_extra" "1")
5341 (set_attr "mode" "TI")])
5343 (define_insn "ssse3_phsubwv4hi3"
5344 [(set (match_operand:V4HI 0 "register_operand" "=y")
5349 (match_operand:V4HI 1 "register_operand" "0")
5350 (parallel [(const_int 0)]))
5351 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5353 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5354 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5358 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5359 (parallel [(const_int 0)]))
5360 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5362 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5363 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5365 "phsubw\t{%2, %0|%0, %2}"
5366 [(set_attr "type" "sseiadd")
5367 (set_attr "prefix_extra" "1")
5368 (set_attr "mode" "DI")])
5370 (define_insn "ssse3_phsubdv4si3"
5371 [(set (match_operand:V4SI 0 "register_operand" "=x")
5376 (match_operand:V4SI 1 "register_operand" "0")
5377 (parallel [(const_int 0)]))
5378 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5380 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5381 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5385 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5386 (parallel [(const_int 0)]))
5387 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5389 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5390 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5392 "phsubd\t{%2, %0|%0, %2}"
5393 [(set_attr "type" "sseiadd")
5394 (set_attr "prefix_data16" "1")
5395 (set_attr "prefix_extra" "1")
5396 (set_attr "mode" "TI")])
5398 (define_insn "ssse3_phsubdv2si3"
5399 [(set (match_operand:V2SI 0 "register_operand" "=y")
5403 (match_operand:V2SI 1 "register_operand" "0")
5404 (parallel [(const_int 0)]))
5405 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5408 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5409 (parallel [(const_int 0)]))
5410 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5412 "phsubd\t{%2, %0|%0, %2}"
5413 [(set_attr "type" "sseiadd")
5414 (set_attr "prefix_extra" "1")
5415 (set_attr "mode" "DI")])
5417 (define_insn "ssse3_phsubswv8hi3"
5418 [(set (match_operand:V8HI 0 "register_operand" "=x")
5424 (match_operand:V8HI 1 "register_operand" "0")
5425 (parallel [(const_int 0)]))
5426 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5428 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5429 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5432 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5433 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5435 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5436 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5441 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5442 (parallel [(const_int 0)]))
5443 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5445 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5446 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5449 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5450 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5452 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5453 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5455 "phsubsw\t{%2, %0|%0, %2}"
5456 [(set_attr "type" "sseiadd")
5457 (set_attr "prefix_data16" "1")
5458 (set_attr "prefix_extra" "1")
5459 (set_attr "mode" "TI")])
5461 (define_insn "ssse3_phsubswv4hi3"
5462 [(set (match_operand:V4HI 0 "register_operand" "=y")
5467 (match_operand:V4HI 1 "register_operand" "0")
5468 (parallel [(const_int 0)]))
5469 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5471 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5472 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5476 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5477 (parallel [(const_int 0)]))
5478 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5480 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5481 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5483 "phsubsw\t{%2, %0|%0, %2}"
5484 [(set_attr "type" "sseiadd")
5485 (set_attr "prefix_extra" "1")
5486 (set_attr "mode" "DI")])
5488 (define_insn "ssse3_pmaddubswv8hi3"
5489 [(set (match_operand:V8HI 0 "register_operand" "=x")
5494 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5495 (parallel [(const_int 0)
5505 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5506 (parallel [(const_int 0)
5516 (vec_select:V16QI (match_dup 1)
5517 (parallel [(const_int 1)
5526 (vec_select:V16QI (match_dup 2)
5527 (parallel [(const_int 1)
5534 (const_int 15)]))))))]
5536 "pmaddubsw\t{%2, %0|%0, %2}"
5537 [(set_attr "type" "sseiadd")
5538 (set_attr "prefix_data16" "1")
5539 (set_attr "prefix_extra" "1")
5540 (set_attr "mode" "TI")])
5542 (define_insn "ssse3_pmaddubswv4hi3"
5543 [(set (match_operand:V4HI 0 "register_operand" "=y")
5548 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5549 (parallel [(const_int 0)
5555 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5556 (parallel [(const_int 0)
5562 (vec_select:V8QI (match_dup 1)
5563 (parallel [(const_int 1)
5568 (vec_select:V8QI (match_dup 2)
5569 (parallel [(const_int 1)
5572 (const_int 7)]))))))]
5574 "pmaddubsw\t{%2, %0|%0, %2}"
5575 [(set_attr "type" "sseiadd")
5576 (set_attr "prefix_extra" "1")
5577 (set_attr "mode" "DI")])
5579 (define_insn "ssse3_pmulhrswv8hi3"
5580 [(set (match_operand:V8HI 0 "register_operand" "=x")
5587 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5589 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5591 (const_vector:V8HI [(const_int 1) (const_int 1)
5592 (const_int 1) (const_int 1)
5593 (const_int 1) (const_int 1)
5594 (const_int 1) (const_int 1)]))
5596 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5597 "pmulhrsw\t{%2, %0|%0, %2}"
5598 [(set_attr "type" "sseimul")
5599 (set_attr "prefix_data16" "1")
5600 (set_attr "prefix_extra" "1")
5601 (set_attr "mode" "TI")])
5603 (define_insn "ssse3_pmulhrswv4hi3"
5604 [(set (match_operand:V4HI 0 "register_operand" "=y")
5611 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5613 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5615 (const_vector:V4HI [(const_int 1) (const_int 1)
5616 (const_int 1) (const_int 1)]))
5618 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5619 "pmulhrsw\t{%2, %0|%0, %2}"
5620 [(set_attr "type" "sseimul")
5621 (set_attr "prefix_extra" "1")
5622 (set_attr "mode" "DI")])
5624 (define_insn "ssse3_pshufbv16qi3"
5625 [(set (match_operand:V16QI 0 "register_operand" "=x")
5626 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5627 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5630 "pshufb\t{%2, %0|%0, %2}";
5631 [(set_attr "type" "sselog1")
5632 (set_attr "prefix_data16" "1")
5633 (set_attr "prefix_extra" "1")
5634 (set_attr "mode" "TI")])
5636 (define_insn "ssse3_pshufbv8qi3"
5637 [(set (match_operand:V8QI 0 "register_operand" "=y")
5638 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5639 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5642 "pshufb\t{%2, %0|%0, %2}";
5643 [(set_attr "type" "sselog1")
5644 (set_attr "prefix_extra" "1")
5645 (set_attr "mode" "DI")])
5647 (define_insn "ssse3_psign<mode>3"
5648 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5649 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5650 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5653 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5654 [(set_attr "type" "sselog1")
5655 (set_attr "prefix_data16" "1")
5656 (set_attr "prefix_extra" "1")
5657 (set_attr "mode" "TI")])
5659 (define_insn "ssse3_psign<mode>3"
5660 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5661 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5662 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5665 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5666 [(set_attr "type" "sselog1")
5667 (set_attr "prefix_extra" "1")
5668 (set_attr "mode" "DI")])
5670 (define_insn "ssse3_palignrti"
5671 [(set (match_operand:TI 0 "register_operand" "=x")
5672 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5673 (match_operand:TI 2 "nonimmediate_operand" "xm")
5674 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5678 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5679 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5681 [(set_attr "type" "sseishft")
5682 (set_attr "prefix_data16" "1")
5683 (set_attr "prefix_extra" "1")
5684 (set_attr "mode" "TI")])
5686 (define_insn "ssse3_palignrdi"
5687 [(set (match_operand:DI 0 "register_operand" "=y")
5688 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5689 (match_operand:DI 2 "nonimmediate_operand" "ym")
5690 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5694 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5695 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5697 [(set_attr "type" "sseishft")
5698 (set_attr "prefix_extra" "1")
5699 (set_attr "mode" "DI")])
5701 (define_insn "abs<mode>2"
5702 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5703 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5705 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5706 [(set_attr "type" "sselog1")
5707 (set_attr "prefix_data16" "1")
5708 (set_attr "prefix_extra" "1")
5709 (set_attr "mode" "TI")])
5711 (define_insn "abs<mode>2"
5712 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5713 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5715 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5716 [(set_attr "type" "sselog1")
5717 (set_attr "prefix_extra" "1")
5718 (set_attr "mode" "DI")])
5720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5722 ;; AMD SSE4A instructions
5724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5726 (define_insn "sse4a_vmmovntv2df"
5727 [(set (match_operand:DF 0 "memory_operand" "=m")
5728 (unspec:DF [(vec_select:DF
5729 (match_operand:V2DF 1 "register_operand" "x")
5730 (parallel [(const_int 0)]))]
5733 "movntsd\t{%1, %0|%0, %1}"
5734 [(set_attr "type" "ssemov")
5735 (set_attr "mode" "DF")])
5737 (define_insn "sse4a_movntdf"
5738 [(set (match_operand:DF 0 "memory_operand" "=m")
5739 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5742 "movntsd\t{%1, %0|%0, %1}"
5743 [(set_attr "type" "ssemov")
5744 (set_attr "mode" "DF")])
5746 (define_insn "sse4a_vmmovntv4sf"
5747 [(set (match_operand:SF 0 "memory_operand" "=m")
5748 (unspec:SF [(vec_select:SF
5749 (match_operand:V4SF 1 "register_operand" "x")
5750 (parallel [(const_int 0)]))]
5753 "movntss\t{%1, %0|%0, %1}"
5754 [(set_attr "type" "ssemov")
5755 (set_attr "mode" "SF")])
5757 (define_insn "sse4a_movntsf"
5758 [(set (match_operand:SF 0 "memory_operand" "=m")
5759 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5762 "movntss\t{%1, %0|%0, %1}"
5763 [(set_attr "type" "ssemov")
5764 (set_attr "mode" "SF")])
5766 (define_insn "sse4a_extrqi"
5767 [(set (match_operand:V2DI 0 "register_operand" "=x")
5768 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5769 (match_operand 2 "const_int_operand" "")
5770 (match_operand 3 "const_int_operand" "")]
5773 "extrq\t{%3, %2, %0|%0, %2, %3}"
5774 [(set_attr "type" "sse")
5775 (set_attr "prefix_data16" "1")
5776 (set_attr "mode" "TI")])
5778 (define_insn "sse4a_extrq"
5779 [(set (match_operand:V2DI 0 "register_operand" "=x")
5780 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5781 (match_operand:V16QI 2 "register_operand" "x")]
5784 "extrq\t{%2, %0|%0, %2}"
5785 [(set_attr "type" "sse")
5786 (set_attr "prefix_data16" "1")
5787 (set_attr "mode" "TI")])
5789 (define_insn "sse4a_insertqi"
5790 [(set (match_operand:V2DI 0 "register_operand" "=x")
5791 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5792 (match_operand:V2DI 2 "register_operand" "x")
5793 (match_operand 3 "const_int_operand" "")
5794 (match_operand 4 "const_int_operand" "")]
5797 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5798 [(set_attr "type" "sseins")
5799 (set_attr "prefix_rep" "1")
5800 (set_attr "mode" "TI")])
5802 (define_insn "sse4a_insertq"
5803 [(set (match_operand:V2DI 0 "register_operand" "=x")
5804 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5805 (match_operand:V2DI 2 "register_operand" "x")]
5808 "insertq\t{%2, %0|%0, %2}"
5809 [(set_attr "type" "sseins")
5810 (set_attr "prefix_rep" "1")
5811 (set_attr "mode" "TI")])
5813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5815 ;; Intel SSE4.1 instructions
5817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5819 (define_insn "sse4_1_blendpd"
5820 [(set (match_operand:V2DF 0 "register_operand" "=x")
5822 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5823 (match_operand:V2DF 1 "register_operand" "0")
5824 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
5826 "blendpd\t{%3, %2, %0|%0, %2, %3}"
5827 [(set_attr "type" "ssemov")
5828 (set_attr "prefix_extra" "1")
5829 (set_attr "mode" "V2DF")])
5831 (define_insn "sse4_1_blendps"
5832 [(set (match_operand:V4SF 0 "register_operand" "=x")
5834 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5835 (match_operand:V4SF 1 "register_operand" "0")
5836 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
5838 "blendps\t{%3, %2, %0|%0, %2, %3}"
5839 [(set_attr "type" "ssemov")
5840 (set_attr "prefix_extra" "1")
5841 (set_attr "mode" "V4SF")])
5843 (define_insn "sse4_1_blendvpd"
5844 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
5845 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
5846 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
5847 (match_operand:V2DF 3 "register_operand" "z")]
5850 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
5851 [(set_attr "type" "ssemov")
5852 (set_attr "prefix_extra" "1")
5853 (set_attr "mode" "V2DF")])
5855 (define_insn "sse4_1_blendvps"
5856 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
5857 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
5858 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
5859 (match_operand:V4SF 3 "register_operand" "z")]
5862 "blendvps\t{%3, %2, %0|%0, %2, %3}"
5863 [(set_attr "type" "ssemov")
5864 (set_attr "prefix_extra" "1")
5865 (set_attr "mode" "V4SF")])
5867 (define_insn "sse4_1_dppd"
5868 [(set (match_operand:V2DF 0 "register_operand" "=x")
5869 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
5870 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5871 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5874 "dppd\t{%3, %2, %0|%0, %2, %3}"
5875 [(set_attr "type" "ssemul")
5876 (set_attr "prefix_extra" "1")
5877 (set_attr "mode" "V2DF")])
5879 (define_insn "sse4_1_dpps"
5880 [(set (match_operand:V4SF 0 "register_operand" "=x")
5881 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
5882 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5883 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5886 "dpps\t{%3, %2, %0|%0, %2, %3}"
5887 [(set_attr "type" "ssemul")
5888 (set_attr "prefix_extra" "1")
5889 (set_attr "mode" "V4SF")])
5891 (define_insn "sse4_1_movntdqa"
5892 [(set (match_operand:V2DI 0 "register_operand" "=x")
5893 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
5896 "movntdqa\t{%1, %0|%0, %1}"
5897 [(set_attr "type" "ssecvt")
5898 (set_attr "prefix_extra" "1")
5899 (set_attr "mode" "TI")])
5901 (define_insn "sse4_1_mpsadbw"
5902 [(set (match_operand:V16QI 0 "register_operand" "=x")
5903 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5904 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5905 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5908 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
5909 [(set_attr "type" "sselog1")
5910 (set_attr "prefix_extra" "1")
5911 (set_attr "mode" "TI")])
5913 (define_insn "sse4_1_packusdw"
5914 [(set (match_operand:V8HI 0 "register_operand" "=x")
5917 (match_operand:V4SI 1 "register_operand" "0"))
5919 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
5921 "packusdw\t{%2, %0|%0, %2}"
5922 [(set_attr "type" "sselog")
5923 (set_attr "prefix_extra" "1")
5924 (set_attr "mode" "TI")])
5926 (define_insn "sse4_1_pblendvb"
5927 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
5928 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
5929 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
5930 (match_operand:V16QI 3 "register_operand" "z")]
5933 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
5934 [(set_attr "type" "ssemov")
5935 (set_attr "prefix_extra" "1")
5936 (set_attr "mode" "TI")])
5938 (define_insn "sse4_1_pblendw"
5939 [(set (match_operand:V8HI 0 "register_operand" "=x")
5941 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5942 (match_operand:V8HI 1 "register_operand" "0")
5943 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
5945 "pblendw\t{%3, %2, %0|%0, %2, %3}"
5946 [(set_attr "type" "ssemov")
5947 (set_attr "prefix_extra" "1")
5948 (set_attr "mode" "TI")])
5950 (define_insn "sse4_1_phminposuw"
5951 [(set (match_operand:V8HI 0 "register_operand" "=x")
5952 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
5953 UNSPEC_PHMINPOSUW))]
5955 "phminposuw\t{%1, %0|%0, %1}"
5956 [(set_attr "type" "sselog1")
5957 (set_attr "prefix_extra" "1")
5958 (set_attr "mode" "TI")])
5960 (define_insn "sse4_1_extendv8qiv8hi2"
5961 [(set (match_operand:V8HI 0 "register_operand" "=x")
5964 (match_operand:V16QI 1 "register_operand" "x")
5965 (parallel [(const_int 0)
5974 "pmovsxbw\t{%1, %0|%0, %1}"
5975 [(set_attr "type" "ssemov")
5976 (set_attr "prefix_extra" "1")
5977 (set_attr "mode" "TI")])
5979 (define_insn "*sse4_1_extendv8qiv8hi2"
5980 [(set (match_operand:V8HI 0 "register_operand" "=x")
5983 (vec_duplicate:V16QI
5984 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
5985 (parallel [(const_int 0)
5994 "pmovsxbw\t{%1, %0|%0, %1}"
5995 [(set_attr "type" "ssemov")
5996 (set_attr "prefix_extra" "1")
5997 (set_attr "mode" "TI")])
5999 (define_insn "sse4_1_extendv4qiv4si2"
6000 [(set (match_operand:V4SI 0 "register_operand" "=x")
6003 (match_operand:V16QI 1 "register_operand" "x")
6004 (parallel [(const_int 0)
6009 "pmovsxbd\t{%1, %0|%0, %1}"
6010 [(set_attr "type" "ssemov")
6011 (set_attr "prefix_extra" "1")
6012 (set_attr "mode" "TI")])
6014 (define_insn "*sse4_1_extendv4qiv4si2"
6015 [(set (match_operand:V4SI 0 "register_operand" "=x")
6018 (vec_duplicate:V16QI
6019 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6020 (parallel [(const_int 0)
6025 "pmovsxbd\t{%1, %0|%0, %1}"
6026 [(set_attr "type" "ssemov")
6027 (set_attr "prefix_extra" "1")
6028 (set_attr "mode" "TI")])
6030 (define_insn "sse4_1_extendv2qiv2di2"
6031 [(set (match_operand:V2DI 0 "register_operand" "=x")
6034 (match_operand:V16QI 1 "register_operand" "x")
6035 (parallel [(const_int 0)
6038 "pmovsxbq\t{%1, %0|%0, %1}"
6039 [(set_attr "type" "ssemov")
6040 (set_attr "prefix_extra" "1")
6041 (set_attr "mode" "TI")])
6043 (define_insn "*sse4_1_extendv2qiv2di2"
6044 [(set (match_operand:V2DI 0 "register_operand" "=x")
6047 (vec_duplicate:V16QI
6048 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6049 (parallel [(const_int 0)
6052 "pmovsxbq\t{%1, %0|%0, %1}"
6053 [(set_attr "type" "ssemov")
6054 (set_attr "prefix_extra" "1")
6055 (set_attr "mode" "TI")])
6057 (define_insn "sse4_1_extendv4hiv4si2"
6058 [(set (match_operand:V4SI 0 "register_operand" "=x")
6061 (match_operand:V8HI 1 "register_operand" "x")
6062 (parallel [(const_int 0)
6067 "pmovsxwd\t{%1, %0|%0, %1}"
6068 [(set_attr "type" "ssemov")
6069 (set_attr "prefix_extra" "1")
6070 (set_attr "mode" "TI")])
6072 (define_insn "*sse4_1_extendv4hiv4si2"
6073 [(set (match_operand:V4SI 0 "register_operand" "=x")
6077 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6078 (parallel [(const_int 0)
6083 "pmovsxwd\t{%1, %0|%0, %1}"
6084 [(set_attr "type" "ssemov")
6085 (set_attr "prefix_extra" "1")
6086 (set_attr "mode" "TI")])
6088 (define_insn "sse4_1_extendv2hiv2di2"
6089 [(set (match_operand:V2DI 0 "register_operand" "=x")
6092 (match_operand:V8HI 1 "register_operand" "x")
6093 (parallel [(const_int 0)
6096 "pmovsxwq\t{%1, %0|%0, %1}"
6097 [(set_attr "type" "ssemov")
6098 (set_attr "prefix_extra" "1")
6099 (set_attr "mode" "TI")])
6101 (define_insn "*sse4_1_extendv2hiv2di2"
6102 [(set (match_operand:V2DI 0 "register_operand" "=x")
6106 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6107 (parallel [(const_int 0)
6110 "pmovsxwq\t{%1, %0|%0, %1}"
6111 [(set_attr "type" "ssemov")
6112 (set_attr "prefix_extra" "1")
6113 (set_attr "mode" "TI")])
6115 (define_insn "sse4_1_extendv2siv2di2"
6116 [(set (match_operand:V2DI 0 "register_operand" "=x")
6119 (match_operand:V4SI 1 "register_operand" "x")
6120 (parallel [(const_int 0)
6123 "pmovsxdq\t{%1, %0|%0, %1}"
6124 [(set_attr "type" "ssemov")
6125 (set_attr "prefix_extra" "1")
6126 (set_attr "mode" "TI")])
6128 (define_insn "*sse4_1_extendv2siv2di2"
6129 [(set (match_operand:V2DI 0 "register_operand" "=x")
6133 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6134 (parallel [(const_int 0)
6137 "pmovsxdq\t{%1, %0|%0, %1}"
6138 [(set_attr "type" "ssemov")
6139 (set_attr "prefix_extra" "1")
6140 (set_attr "mode" "TI")])
6142 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6143 [(set (match_operand:V8HI 0 "register_operand" "=x")
6146 (match_operand:V16QI 1 "register_operand" "x")
6147 (parallel [(const_int 0)
6156 "pmovzxbw\t{%1, %0|%0, %1}"
6157 [(set_attr "type" "ssemov")
6158 (set_attr "prefix_extra" "1")
6159 (set_attr "mode" "TI")])
6161 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6162 [(set (match_operand:V8HI 0 "register_operand" "=x")
6165 (vec_duplicate:V16QI
6166 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6167 (parallel [(const_int 0)
6176 "pmovzxbw\t{%1, %0|%0, %1}"
6177 [(set_attr "type" "ssemov")
6178 (set_attr "prefix_extra" "1")
6179 (set_attr "mode" "TI")])
6181 (define_insn "sse4_1_zero_extendv4qiv4si2"
6182 [(set (match_operand:V4SI 0 "register_operand" "=x")
6185 (match_operand:V16QI 1 "register_operand" "x")
6186 (parallel [(const_int 0)
6191 "pmovzxbd\t{%1, %0|%0, %1}"
6192 [(set_attr "type" "ssemov")
6193 (set_attr "prefix_extra" "1")
6194 (set_attr "mode" "TI")])
6196 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6197 [(set (match_operand:V4SI 0 "register_operand" "=x")
6200 (vec_duplicate:V16QI
6201 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6202 (parallel [(const_int 0)
6207 "pmovzxbd\t{%1, %0|%0, %1}"
6208 [(set_attr "type" "ssemov")
6209 (set_attr "prefix_extra" "1")
6210 (set_attr "mode" "TI")])
6212 (define_insn "sse4_1_zero_extendv2qiv2di2"
6213 [(set (match_operand:V2DI 0 "register_operand" "=x")
6216 (match_operand:V16QI 1 "register_operand" "x")
6217 (parallel [(const_int 0)
6220 "pmovzxbq\t{%1, %0|%0, %1}"
6221 [(set_attr "type" "ssemov")
6222 (set_attr "prefix_extra" "1")
6223 (set_attr "mode" "TI")])
6225 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6226 [(set (match_operand:V2DI 0 "register_operand" "=x")
6229 (vec_duplicate:V16QI
6230 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6231 (parallel [(const_int 0)
6234 "pmovzxbq\t{%1, %0|%0, %1}"
6235 [(set_attr "type" "ssemov")
6236 (set_attr "prefix_extra" "1")
6237 (set_attr "mode" "TI")])
6239 (define_insn "sse4_1_zero_extendv4hiv4si2"
6240 [(set (match_operand:V4SI 0 "register_operand" "=x")
6243 (match_operand:V8HI 1 "register_operand" "x")
6244 (parallel [(const_int 0)
6249 "pmovzxwd\t{%1, %0|%0, %1}"
6250 [(set_attr "type" "ssemov")
6251 (set_attr "prefix_extra" "1")
6252 (set_attr "mode" "TI")])
6254 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6255 [(set (match_operand:V4SI 0 "register_operand" "=x")
6259 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6260 (parallel [(const_int 0)
6265 "pmovzxwd\t{%1, %0|%0, %1}"
6266 [(set_attr "type" "ssemov")
6267 (set_attr "prefix_extra" "1")
6268 (set_attr "mode" "TI")])
6270 (define_insn "sse4_1_zero_extendv2hiv2di2"
6271 [(set (match_operand:V2DI 0 "register_operand" "=x")
6274 (match_operand:V8HI 1 "register_operand" "x")
6275 (parallel [(const_int 0)
6278 "pmovzxwq\t{%1, %0|%0, %1}"
6279 [(set_attr "type" "ssemov")
6280 (set_attr "prefix_extra" "1")
6281 (set_attr "mode" "TI")])
6283 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6284 [(set (match_operand:V2DI 0 "register_operand" "=x")
6288 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6289 (parallel [(const_int 0)
6292 "pmovzxwq\t{%1, %0|%0, %1}"
6293 [(set_attr "type" "ssemov")
6294 (set_attr "prefix_extra" "1")
6295 (set_attr "mode" "TI")])
6297 (define_insn "sse4_1_zero_extendv2siv2di2"
6298 [(set (match_operand:V2DI 0 "register_operand" "=x")
6301 (match_operand:V4SI 1 "register_operand" "x")
6302 (parallel [(const_int 0)
6305 "pmovzxdq\t{%1, %0|%0, %1}"
6306 [(set_attr "type" "ssemov")
6307 (set_attr "prefix_extra" "1")
6308 (set_attr "mode" "TI")])
6310 (define_insn "*sse4_1_zero_extendv2siv2di2"
6311 [(set (match_operand:V2DI 0 "register_operand" "=x")
6315 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6316 (parallel [(const_int 0)
6319 "pmovzxdq\t{%1, %0|%0, %1}"
6320 [(set_attr "type" "ssemov")
6321 (set_attr "prefix_extra" "1")
6322 (set_attr "mode" "TI")])
6324 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6325 ;; But it is not a really compare instruction.
6326 (define_insn "sse4_1_ptest"
6327 [(set (reg:CC FLAGS_REG)
6328 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6329 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6332 "ptest\t{%1, %0|%0, %1}"
6333 [(set_attr "type" "ssecomi")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "mode" "TI")])
6337 (define_insn "sse4_1_roundpd"
6338 [(set (match_operand:V2DF 0 "register_operand" "=x")
6339 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6340 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6343 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6344 [(set_attr "type" "ssecvt")
6345 (set_attr "prefix_extra" "1")
6346 (set_attr "mode" "V2DF")])
6348 (define_insn "sse4_1_roundps"
6349 [(set (match_operand:V4SF 0 "register_operand" "=x")
6350 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6351 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6354 "roundps\t{%2, %1, %0|%0, %1, %2}"
6355 [(set_attr "type" "ssecvt")
6356 (set_attr "prefix_extra" "1")
6357 (set_attr "mode" "V4SF")])
6359 (define_insn "sse4_1_roundsd"
6360 [(set (match_operand:V2DF 0 "register_operand" "=x")
6362 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6363 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6365 (match_operand:V2DF 1 "register_operand" "0")
6368 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6369 [(set_attr "type" "ssecvt")
6370 (set_attr "prefix_extra" "1")
6371 (set_attr "mode" "V2DF")])
6373 (define_insn "sse4_1_roundss"
6374 [(set (match_operand:V4SF 0 "register_operand" "=x")
6376 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6377 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6379 (match_operand:V4SF 1 "register_operand" "0")
6382 "roundss\t{%3, %2, %0|%0, %2, %3}"
6383 [(set_attr "type" "ssecvt")
6384 (set_attr "prefix_extra" "1")
6385 (set_attr "mode" "V4SF")])
6387 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6389 ;; Intel SSE4.2 string/text processing instructions
6391 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6393 (define_insn_and_split "sse4_2_pcmpestr"
6394 [(set (match_operand:SI 0 "register_operand" "=c,c")
6396 [(match_operand:V16QI 2 "register_operand" "x,x")
6397 (match_operand:SI 3 "register_operand" "a,a")
6398 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6399 (match_operand:SI 5 "register_operand" "d,d")
6400 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6402 (set (match_operand:V16QI 1 "register_operand" "=z,z")
6410 (set (reg:CC FLAGS_REG)
6419 && !(reload_completed || reload_in_progress)"
6424 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6425 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6426 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6429 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6430 operands[3], operands[4],
6431 operands[5], operands[6]));
6433 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6434 operands[3], operands[4],
6435 operands[5], operands[6]));
6436 if (flags && !(ecx || xmm0))
6437 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6438 operands[4], operands[5],
6442 [(set_attr "type" "sselog")
6443 (set_attr "prefix_data16" "1")
6444 (set_attr "prefix_extra" "1")
6445 (set_attr "memory" "none,load")
6446 (set_attr "mode" "TI")])
6448 (define_insn "sse4_2_pcmpestri"
6449 [(set (match_operand:SI 0 "register_operand" "=c,c")
6451 [(match_operand:V16QI 1 "register_operand" "x,x")
6452 (match_operand:SI 2 "register_operand" "a,a")
6453 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6454 (match_operand:SI 4 "register_operand" "d,d")
6455 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6457 (set (reg:CC FLAGS_REG)
6466 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6467 [(set_attr "type" "sselog")
6468 (set_attr "prefix_data16" "1")
6469 (set_attr "prefix_extra" "1")
6470 (set_attr "memory" "none,load")
6471 (set_attr "mode" "TI")])
6473 (define_insn "sse4_2_pcmpestrm"
6474 [(set (match_operand:V16QI 0 "register_operand" "=z,z")
6476 [(match_operand:V16QI 1 "register_operand" "x,x")
6477 (match_operand:SI 2 "register_operand" "a,a")
6478 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6479 (match_operand:SI 4 "register_operand" "d,d")
6480 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6482 (set (reg:CC FLAGS_REG)
6491 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6492 [(set_attr "type" "sselog")
6493 (set_attr "prefix_data16" "1")
6494 (set_attr "prefix_extra" "1")
6495 (set_attr "memory" "none,load")
6496 (set_attr "mode" "TI")])
6498 (define_insn "sse4_2_pcmpestr_cconly"
6499 [(set (reg:CC FLAGS_REG)
6501 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6502 (match_operand:SI 1 "register_operand" "a,a,a,a")
6503 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6504 (match_operand:SI 3 "register_operand" "d,d,d,d")
6505 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6507 (clobber (match_scratch:SI 5 "=c,c,X,X"))
6508 (clobber (match_scratch:V16QI 6 "=X,X,z,z"))]
6511 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6512 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6513 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6514 pcmpestrm\t{%4, %2, %0|%0, %2, %4}"
6515 [(set_attr "type" "sselog")
6516 (set_attr "prefix_data16" "1")
6517 (set_attr "prefix_extra" "1")
6518 (set_attr "memory" "none,load,none,load")
6519 (set_attr "mode" "TI")])
6521 (define_insn_and_split "sse4_2_pcmpistr"
6522 [(set (match_operand:SI 0 "register_operand" "=c,c")
6524 [(match_operand:V16QI 2 "register_operand" "x,x")
6525 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6526 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6528 (set (match_operand:V16QI 1 "register_operand" "=z,z")
6534 (set (reg:CC FLAGS_REG)
6541 && !(reload_completed || reload_in_progress)"
6546 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6547 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6548 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6551 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6552 operands[3], operands[4]));
6554 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6555 operands[3], operands[4]));
6556 if (flags && !(ecx || xmm0))
6557 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6561 [(set_attr "type" "sselog")
6562 (set_attr "prefix_data16" "1")
6563 (set_attr "prefix_extra" "1")
6564 (set_attr "memory" "none,load")
6565 (set_attr "mode" "TI")])
6567 (define_insn "sse4_2_pcmpistri"
6568 [(set (match_operand:SI 0 "register_operand" "=c,c")
6570 [(match_operand:V16QI 1 "register_operand" "x,x")
6571 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6572 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6574 (set (reg:CC FLAGS_REG)
6581 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6582 [(set_attr "type" "sselog")
6583 (set_attr "prefix_data16" "1")
6584 (set_attr "prefix_extra" "1")
6585 (set_attr "memory" "none,load")
6586 (set_attr "mode" "TI")])
6588 (define_insn "sse4_2_pcmpistrm"
6589 [(set (match_operand:V16QI 0 "register_operand" "=z,z")
6591 [(match_operand:V16QI 1 "register_operand" "x,x")
6592 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6593 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6595 (set (reg:CC FLAGS_REG)
6602 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6603 [(set_attr "type" "sselog")
6604 (set_attr "prefix_data16" "1")
6605 (set_attr "prefix_extra" "1")
6606 (set_attr "memory" "none,load")
6607 (set_attr "mode" "TI")])
6609 (define_insn "sse4_2_pcmpistr_cconly"
6610 [(set (reg:CC FLAGS_REG)
6612 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6613 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6614 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6616 (clobber (match_scratch:SI 3 "=c,c,X,X"))
6617 (clobber (match_scratch:V16QI 4 "=X,X,z,z"))]
6620 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6621 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6622 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6623 pcmpistrm\t{%2, %1, %0|%0, %1, %2}"
6624 [(set_attr "type" "sselog")
6625 (set_attr "prefix_data16" "1")
6626 (set_attr "prefix_extra" "1")
6627 (set_attr "memory" "none,load,none,load")
6628 (set_attr "mode" "TI")])