1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 return standard_sse_constant_opcode (insn, operands[1]);
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
74 return "movdqa\t{%1, %0|%0, %1}";
79 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_string "TI")))])
90 (define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
95 ix86_expand_vector_move (V4SFmode, operands);
99 (define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
104 switch (which_alternative)
107 return standard_sse_constant_opcode (insn, operands[1]);
110 return "movaps\t{%1, %0|%0, %1}";
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
124 (vec_duplicate:V4SF (match_dup 1))
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
132 (define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
137 ix86_expand_vector_move (V2DFmode, operands);
141 (define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
146 switch (which_alternative)
149 return standard_sse_constant_opcode (insn, operands[1]);
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
155 return "movapd\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
168 (const_string "V4SF")
169 (const_string "V2DF")))])
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
207 (define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
225 (define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
234 (define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
243 (define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
252 (define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
272 ;; Parallel single-precision floating point arithmetic
274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276 (define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
282 (define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
288 (define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
295 (define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
304 (define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
316 (define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
323 (define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
332 (define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
344 (define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
351 (define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
360 (define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
372 (define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
379 (define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
388 (define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
400 (define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
409 (define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
414 (match_operand:V4SF 2 "register_operand" "0")
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
421 (define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
430 (define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
435 (match_operand:V4SF 2 "register_operand" "0")
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
442 (define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
450 (define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
461 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462 ;; isn't really correct, as those rtl operators aren't defined when
463 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
465 (define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
476 (define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
486 (define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
495 (define_insn "*sse_vmsmaxv4sf3_finite"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
502 "TARGET_SSE && flag_finite_math_only
503 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
504 "maxss\t{%2, %0|%0, %2}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 (define_insn "sse_vmsmaxv4sf3"
509 [(set (match_operand:V4SF 0 "register_operand" "=x")
511 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
512 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
516 "maxss\t{%2, %0|%0, %2}"
517 [(set_attr "type" "sse")
518 (set_attr "mode" "SF")])
520 (define_expand "sminv4sf3"
521 [(set (match_operand:V4SF 0 "register_operand" "")
522 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
523 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
526 if (!flag_finite_math_only)
527 operands[1] = force_reg (V4SFmode, operands[1]);
528 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
531 (define_insn "*sminv4sf3_finite"
532 [(set (match_operand:V4SF 0 "register_operand" "=x")
533 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
534 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
535 "TARGET_SSE && flag_finite_math_only
536 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
537 "minps\t{%2, %0|%0, %2}"
538 [(set_attr "type" "sse")
539 (set_attr "mode" "V4SF")])
541 (define_insn "*sminv4sf3"
542 [(set (match_operand:V4SF 0 "register_operand" "=x")
543 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
544 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
546 "minps\t{%2, %0|%0, %2}"
547 [(set_attr "type" "sse")
548 (set_attr "mode" "V4SF")])
550 (define_insn "*sse_vmsminv4sf3_finite"
551 [(set (match_operand:V4SF 0 "register_operand" "=x")
553 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
554 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
557 "TARGET_SSE && flag_finite_math_only
558 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
559 "minss\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sse")
561 (set_attr "mode" "SF")])
563 (define_insn "sse_vmsminv4sf3"
564 [(set (match_operand:V4SF 0 "register_operand" "=x")
566 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
567 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
571 "minss\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sse")
573 (set_attr "mode" "SF")])
575 ;; These versions of the min/max patterns implement exactly the operations
576 ;; min = (op1 < op2 ? op1 : op2)
577 ;; max = (!(op1 < op2) ? op1 : op2)
578 ;; Their operands are not commutative, and thus they may be used in the
579 ;; presence of -0.0 and NaN.
581 (define_insn "*ieee_sminv4sf3"
582 [(set (match_operand:V4SF 0 "register_operand" "=x")
583 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
584 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
587 "minps\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sseadd")
589 (set_attr "mode" "V4SF")])
591 (define_insn "*ieee_smaxv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
593 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
594 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
597 "maxps\t{%2, %0|%0, %2}"
598 [(set_attr "type" "sseadd")
599 (set_attr "mode" "V4SF")])
601 (define_insn "*ieee_sminv2df3"
602 [(set (match_operand:V2DF 0 "register_operand" "=x")
603 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
604 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
607 "minpd\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sseadd")
609 (set_attr "mode" "V2DF")])
611 (define_insn "*ieee_smaxv2df3"
612 [(set (match_operand:V2DF 0 "register_operand" "=x")
613 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
614 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
617 "maxpd\t{%2, %0|%0, %2}"
618 [(set_attr "type" "sseadd")
619 (set_attr "mode" "V2DF")])
621 (define_insn "sse3_addsubv4sf3"
622 [(set (match_operand:V4SF 0 "register_operand" "=x")
625 (match_operand:V4SF 1 "register_operand" "0")
626 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
627 (minus:V4SF (match_dup 1) (match_dup 2))
630 "addsubps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
634 (define_insn "sse3_haddv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
656 "haddps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
660 (define_insn "sse3_hsubv4sf3"
661 [(set (match_operand:V4SF 0 "register_operand" "=x")
666 (match_operand:V4SF 1 "register_operand" "0")
667 (parallel [(const_int 0)]))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
670 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
671 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
675 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
676 (parallel [(const_int 0)]))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
679 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
680 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
682 "hsubps\t{%2, %0|%0, %2}"
683 [(set_attr "type" "sseadd")
684 (set_attr "mode" "V4SF")])
686 (define_expand "reduc_splus_v4sf"
687 [(match_operand:V4SF 0 "register_operand" "")
688 (match_operand:V4SF 1 "register_operand" "")]
693 rtx tmp = gen_reg_rtx (V4SFmode);
694 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
695 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
698 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
702 (define_expand "reduc_smax_v4sf"
703 [(match_operand:V4SF 0 "register_operand" "")
704 (match_operand:V4SF 1 "register_operand" "")]
707 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
711 (define_expand "reduc_smin_v4sf"
712 [(match_operand:V4SF 0 "register_operand" "")
713 (match_operand:V4SF 1 "register_operand" "")]
716 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
722 ;; Parallel single-precision floating point comparisons
724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
726 (define_insn "sse_maskcmpv4sf3"
727 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operator:V4SF 3 "sse_comparison_operator"
729 [(match_operand:V4SF 1 "register_operand" "0")
730 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
732 "cmp%D3ps\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssecmp")
734 (set_attr "mode" "V4SF")])
736 (define_insn "sse_maskcmpsf3"
737 [(set (match_operand:SF 0 "register_operand" "=x")
738 (match_operator:SF 3 "sse_comparison_operator"
739 [(match_operand:SF 1 "register_operand" "0")
740 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
742 "cmp%D3ss\t{%2, %0|%0, %2}"
743 [(set_attr "type" "ssecmp")
744 (set_attr "mode" "SF")])
746 (define_insn "sse_vmmaskcmpv4sf3"
747 [(set (match_operand:V4SF 0 "register_operand" "=x")
749 (match_operator:V4SF 3 "sse_comparison_operator"
750 [(match_operand:V4SF 1 "register_operand" "0")
751 (match_operand:V4SF 2 "register_operand" "x")])
755 "cmp%D3ss\t{%2, %0|%0, %2}"
756 [(set_attr "type" "ssecmp")
757 (set_attr "mode" "SF")])
759 (define_insn "sse_comi"
760 [(set (reg:CCFP FLAGS_REG)
763 (match_operand:V4SF 0 "register_operand" "x")
764 (parallel [(const_int 0)]))
766 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
767 (parallel [(const_int 0)]))))]
769 "comiss\t{%1, %0|%0, %1}"
770 [(set_attr "type" "ssecomi")
771 (set_attr "mode" "SF")])
773 (define_insn "sse_ucomi"
774 [(set (reg:CCFPU FLAGS_REG)
777 (match_operand:V4SF 0 "register_operand" "x")
778 (parallel [(const_int 0)]))
780 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))))]
783 "ucomiss\t{%1, %0|%0, %1}"
784 [(set_attr "type" "ssecomi")
785 (set_attr "mode" "SF")])
787 (define_expand "vcondv4sf"
788 [(set (match_operand:V4SF 0 "register_operand" "")
791 [(match_operand:V4SF 4 "nonimmediate_operand" "")
792 (match_operand:V4SF 5 "nonimmediate_operand" "")])
793 (match_operand:V4SF 1 "general_operand" "")
794 (match_operand:V4SF 2 "general_operand" "")))]
797 if (ix86_expand_fp_vcond (operands))
803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
805 ;; Parallel single-precision floating point logical operations
807 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
809 (define_expand "andv4sf3"
810 [(set (match_operand:V4SF 0 "register_operand" "")
811 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
812 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
814 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
816 (define_insn "*andv4sf3"
817 [(set (match_operand:V4SF 0 "register_operand" "=x")
818 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
819 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
820 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
821 "andps\t{%2, %0|%0, %2}"
822 [(set_attr "type" "sselog")
823 (set_attr "mode" "V4SF")])
825 (define_insn "sse_nandv4sf3"
826 [(set (match_operand:V4SF 0 "register_operand" "=x")
827 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
828 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
830 "andnps\t{%2, %0|%0, %2}"
831 [(set_attr "type" "sselog")
832 (set_attr "mode" "V4SF")])
834 (define_expand "iorv4sf3"
835 [(set (match_operand:V4SF 0 "register_operand" "")
836 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
837 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
839 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
841 (define_insn "*iorv4sf3"
842 [(set (match_operand:V4SF 0 "register_operand" "=x")
843 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
844 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
845 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
846 "orps\t{%2, %0|%0, %2}"
847 [(set_attr "type" "sselog")
848 (set_attr "mode" "V4SF")])
850 (define_expand "xorv4sf3"
851 [(set (match_operand:V4SF 0 "register_operand" "")
852 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
853 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
855 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
857 (define_insn "*xorv4sf3"
858 [(set (match_operand:V4SF 0 "register_operand" "=x")
859 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
860 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
861 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
862 "xorps\t{%2, %0|%0, %2}"
863 [(set_attr "type" "sselog")
864 (set_attr "mode" "V4SF")])
866 ;; Also define scalar versions. These are used for abs, neg, and
867 ;; conditional move. Using subregs into vector modes causes register
868 ;; allocation lossage. These patterns do not allow memory operands
869 ;; because the native instructions read the full 128-bits.
871 (define_insn "*andsf3"
872 [(set (match_operand:SF 0 "register_operand" "=x")
873 (and:SF (match_operand:SF 1 "register_operand" "0")
874 (match_operand:SF 2 "register_operand" "x")))]
876 "andps\t{%2, %0|%0, %2}"
877 [(set_attr "type" "sselog")
878 (set_attr "mode" "V4SF")])
880 (define_insn "*nandsf3"
881 [(set (match_operand:SF 0 "register_operand" "=x")
882 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
883 (match_operand:SF 2 "register_operand" "x")))]
885 "andnps\t{%2, %0|%0, %2}"
886 [(set_attr "type" "sselog")
887 (set_attr "mode" "V4SF")])
889 (define_insn "*iorsf3"
890 [(set (match_operand:SF 0 "register_operand" "=x")
891 (ior:SF (match_operand:SF 1 "register_operand" "0")
892 (match_operand:SF 2 "register_operand" "x")))]
894 "orps\t{%2, %0|%0, %2}"
895 [(set_attr "type" "sselog")
896 (set_attr "mode" "V4SF")])
898 (define_insn "*xorsf3"
899 [(set (match_operand:SF 0 "register_operand" "=x")
900 (xor:SF (match_operand:SF 1 "register_operand" "0")
901 (match_operand:SF 2 "register_operand" "x")))]
903 "xorps\t{%2, %0|%0, %2}"
904 [(set_attr "type" "sselog")
905 (set_attr "mode" "V4SF")])
907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
909 ;; Parallel single-precision floating point conversion operations
911 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
913 (define_insn "sse_cvtpi2ps"
914 [(set (match_operand:V4SF 0 "register_operand" "=x")
917 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
918 (match_operand:V4SF 1 "register_operand" "0")
921 "cvtpi2ps\t{%2, %0|%0, %2}"
922 [(set_attr "type" "ssecvt")
923 (set_attr "mode" "V4SF")])
925 (define_insn "sse_cvtps2pi"
926 [(set (match_operand:V2SI 0 "register_operand" "=y")
928 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
930 (parallel [(const_int 0) (const_int 1)])))]
932 "cvtps2pi\t{%1, %0|%0, %1}"
933 [(set_attr "type" "ssecvt")
934 (set_attr "unit" "mmx")
935 (set_attr "mode" "DI")])
937 (define_insn "sse_cvttps2pi"
938 [(set (match_operand:V2SI 0 "register_operand" "=y")
940 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
941 (parallel [(const_int 0) (const_int 1)])))]
943 "cvttps2pi\t{%1, %0|%0, %1}"
944 [(set_attr "type" "ssecvt")
945 (set_attr "unit" "mmx")
946 (set_attr "mode" "SF")])
948 (define_insn "sse_cvtsi2ss"
949 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
952 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
953 (match_operand:V4SF 1 "register_operand" "0,0")
956 "cvtsi2ss\t{%2, %0|%0, %2}"
957 [(set_attr "type" "sseicvt")
958 (set_attr "athlon_decode" "vector,double")
959 (set_attr "amdfam10_decode" "vector,double")
960 (set_attr "mode" "SF")])
962 (define_insn "sse_cvtsi2ssq"
963 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
966 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
967 (match_operand:V4SF 1 "register_operand" "0,0")
969 "TARGET_SSE && TARGET_64BIT"
970 "cvtsi2ssq\t{%2, %0|%0, %2}"
971 [(set_attr "type" "sseicvt")
972 (set_attr "athlon_decode" "vector,double")
973 (set_attr "amdfam10_decode" "vector,double")
974 (set_attr "mode" "SF")])
976 (define_insn "sse_cvtss2si"
977 [(set (match_operand:SI 0 "register_operand" "=r,r")
980 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
981 (parallel [(const_int 0)]))]
982 UNSPEC_FIX_NOTRUNC))]
984 "cvtss2si\t{%1, %0|%0, %1}"
985 [(set_attr "type" "sseicvt")
986 (set_attr "athlon_decode" "double,vector")
987 (set_attr "mode" "SI")])
989 (define_insn "sse_cvtss2si_2"
990 [(set (match_operand:SI 0 "register_operand" "=r,r")
991 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
992 UNSPEC_FIX_NOTRUNC))]
994 "cvtss2si\t{%1, %0|%0, %1}"
995 [(set_attr "type" "sseicvt")
996 (set_attr "athlon_decode" "double,vector")
997 (set_attr "amdfam10_decode" "double,double")
998 (set_attr "mode" "SI")])
1000 (define_insn "sse_cvtss2siq"
1001 [(set (match_operand:DI 0 "register_operand" "=r,r")
1004 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1005 (parallel [(const_int 0)]))]
1006 UNSPEC_FIX_NOTRUNC))]
1007 "TARGET_SSE && TARGET_64BIT"
1008 "cvtss2siq\t{%1, %0|%0, %1}"
1009 [(set_attr "type" "sseicvt")
1010 (set_attr "athlon_decode" "double,vector")
1011 (set_attr "mode" "DI")])
1013 (define_insn "sse_cvtss2siq_2"
1014 [(set (match_operand:DI 0 "register_operand" "=r,r")
1015 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1016 UNSPEC_FIX_NOTRUNC))]
1017 "TARGET_SSE && TARGET_64BIT"
1018 "cvtss2siq\t{%1, %0|%0, %1}"
1019 [(set_attr "type" "sseicvt")
1020 (set_attr "athlon_decode" "double,vector")
1021 (set_attr "amdfam10_decode" "double,double")
1022 (set_attr "mode" "DI")])
1024 (define_insn "sse_cvttss2si"
1025 [(set (match_operand:SI 0 "register_operand" "=r,r")
1028 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1029 (parallel [(const_int 0)]))))]
1031 "cvttss2si\t{%1, %0|%0, %1}"
1032 [(set_attr "type" "sseicvt")
1033 (set_attr "athlon_decode" "double,vector")
1034 (set_attr "amdfam10_decode" "double,double")
1035 (set_attr "mode" "SI")])
1037 (define_insn "sse_cvttss2siq"
1038 [(set (match_operand:DI 0 "register_operand" "=r,r")
1041 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1042 (parallel [(const_int 0)]))))]
1043 "TARGET_SSE && TARGET_64BIT"
1044 "cvttss2siq\t{%1, %0|%0, %1}"
1045 [(set_attr "type" "sseicvt")
1046 (set_attr "athlon_decode" "double,vector")
1047 (set_attr "amdfam10_decode" "double,double")
1048 (set_attr "mode" "DI")])
1050 (define_insn "sse2_cvtdq2ps"
1051 [(set (match_operand:V4SF 0 "register_operand" "=x")
1052 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1054 "cvtdq2ps\t{%1, %0|%0, %1}"
1055 [(set_attr "type" "ssecvt")
1056 (set_attr "mode" "V2DF")])
1058 (define_insn "sse2_cvtps2dq"
1059 [(set (match_operand:V4SI 0 "register_operand" "=x")
1060 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1061 UNSPEC_FIX_NOTRUNC))]
1063 "cvtps2dq\t{%1, %0|%0, %1}"
1064 [(set_attr "type" "ssecvt")
1065 (set_attr "mode" "TI")])
1067 (define_insn "sse2_cvttps2dq"
1068 [(set (match_operand:V4SI 0 "register_operand" "=x")
1069 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1071 "cvttps2dq\t{%1, %0|%0, %1}"
1072 [(set_attr "type" "ssecvt")
1073 (set_attr "mode" "TI")])
1075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1077 ;; Parallel single-precision floating point element swizzling
1079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1081 (define_insn "sse_movhlps"
1082 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1085 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1086 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1087 (parallel [(const_int 6)
1091 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1093 movhlps\t{%2, %0|%0, %2}
1094 movlps\t{%H2, %0|%0, %H2}
1095 movhps\t{%2, %0|%0, %2}"
1096 [(set_attr "type" "ssemov")
1097 (set_attr "mode" "V4SF,V2SF,V2SF")])
1099 (define_insn "sse_movlhps"
1100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1103 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1104 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1105 (parallel [(const_int 0)
1109 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1111 movlhps\t{%2, %0|%0, %2}
1112 movhps\t{%2, %0|%0, %2}
1113 movlps\t{%2, %H0|%H0, %2}"
1114 [(set_attr "type" "ssemov")
1115 (set_attr "mode" "V4SF,V2SF,V2SF")])
1117 (define_insn "sse_unpckhps"
1118 [(set (match_operand:V4SF 0 "register_operand" "=x")
1121 (match_operand:V4SF 1 "register_operand" "0")
1122 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1123 (parallel [(const_int 2) (const_int 6)
1124 (const_int 3) (const_int 7)])))]
1126 "unpckhps\t{%2, %0|%0, %2}"
1127 [(set_attr "type" "sselog")
1128 (set_attr "mode" "V4SF")])
1130 (define_insn "sse_unpcklps"
1131 [(set (match_operand:V4SF 0 "register_operand" "=x")
1134 (match_operand:V4SF 1 "register_operand" "0")
1135 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1136 (parallel [(const_int 0) (const_int 4)
1137 (const_int 1) (const_int 5)])))]
1139 "unpcklps\t{%2, %0|%0, %2}"
1140 [(set_attr "type" "sselog")
1141 (set_attr "mode" "V4SF")])
1143 ;; These are modeled with the same vec_concat as the others so that we
1144 ;; capture users of shufps that can use the new instructions
1145 (define_insn "sse3_movshdup"
1146 [(set (match_operand:V4SF 0 "register_operand" "=x")
1149 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1151 (parallel [(const_int 1)
1156 "movshdup\t{%1, %0|%0, %1}"
1157 [(set_attr "type" "sse")
1158 (set_attr "mode" "V4SF")])
1160 (define_insn "sse3_movsldup"
1161 [(set (match_operand:V4SF 0 "register_operand" "=x")
1164 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1166 (parallel [(const_int 0)
1171 "movsldup\t{%1, %0|%0, %1}"
1172 [(set_attr "type" "sse")
1173 (set_attr "mode" "V4SF")])
1175 (define_expand "sse_shufps"
1176 [(match_operand:V4SF 0 "register_operand" "")
1177 (match_operand:V4SF 1 "register_operand" "")
1178 (match_operand:V4SF 2 "nonimmediate_operand" "")
1179 (match_operand:SI 3 "const_int_operand" "")]
1182 int mask = INTVAL (operands[3]);
1183 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1184 GEN_INT ((mask >> 0) & 3),
1185 GEN_INT ((mask >> 2) & 3),
1186 GEN_INT (((mask >> 4) & 3) + 4),
1187 GEN_INT (((mask >> 6) & 3) + 4)));
1191 (define_insn "sse_shufps_1"
1192 [(set (match_operand:V4SF 0 "register_operand" "=x")
1195 (match_operand:V4SF 1 "register_operand" "0")
1196 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1197 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1198 (match_operand 4 "const_0_to_3_operand" "")
1199 (match_operand 5 "const_4_to_7_operand" "")
1200 (match_operand 6 "const_4_to_7_operand" "")])))]
1204 mask |= INTVAL (operands[3]) << 0;
1205 mask |= INTVAL (operands[4]) << 2;
1206 mask |= (INTVAL (operands[5]) - 4) << 4;
1207 mask |= (INTVAL (operands[6]) - 4) << 6;
1208 operands[3] = GEN_INT (mask);
1210 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1212 [(set_attr "type" "sselog")
1213 (set_attr "mode" "V4SF")])
1215 (define_insn "sse_storehps"
1216 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1218 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1219 (parallel [(const_int 2) (const_int 3)])))]
1222 movhps\t{%1, %0|%0, %1}
1223 movhlps\t{%1, %0|%0, %1}
1224 movlps\t{%H1, %0|%0, %H1}"
1225 [(set_attr "type" "ssemov")
1226 (set_attr "mode" "V2SF,V4SF,V2SF")])
1228 (define_insn "sse_loadhps"
1229 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1232 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1233 (parallel [(const_int 0) (const_int 1)]))
1234 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1237 movhps\t{%2, %0|%0, %2}
1238 movlhps\t{%2, %0|%0, %2}
1239 movlps\t{%2, %H0|%H0, %2}"
1240 [(set_attr "type" "ssemov")
1241 (set_attr "mode" "V2SF,V4SF,V2SF")])
1243 (define_insn "sse_storelps"
1244 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1246 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1247 (parallel [(const_int 0) (const_int 1)])))]
1250 movlps\t{%1, %0|%0, %1}
1251 movaps\t{%1, %0|%0, %1}
1252 movlps\t{%1, %0|%0, %1}"
1253 [(set_attr "type" "ssemov")
1254 (set_attr "mode" "V2SF,V4SF,V2SF")])
1256 (define_insn "sse_loadlps"
1257 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1259 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1261 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1262 (parallel [(const_int 2) (const_int 3)]))))]
1265 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1266 movlps\t{%2, %0|%0, %2}
1267 movlps\t{%2, %0|%0, %2}"
1268 [(set_attr "type" "sselog,ssemov,ssemov")
1269 (set_attr "mode" "V4SF,V2SF,V2SF")])
1271 (define_insn "sse_movss"
1272 [(set (match_operand:V4SF 0 "register_operand" "=x")
1274 (match_operand:V4SF 2 "register_operand" "x")
1275 (match_operand:V4SF 1 "register_operand" "0")
1278 "movss\t{%2, %0|%0, %2}"
1279 [(set_attr "type" "ssemov")
1280 (set_attr "mode" "SF")])
1282 (define_insn "*vec_dupv4sf"
1283 [(set (match_operand:V4SF 0 "register_operand" "=x")
1285 (match_operand:SF 1 "register_operand" "0")))]
1287 "shufps\t{$0, %0, %0|%0, %0, 0}"
1288 [(set_attr "type" "sselog1")
1289 (set_attr "mode" "V4SF")])
1291 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1292 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1293 ;; alternatives pretty much forces the MMX alternative to be chosen.
1294 (define_insn "*sse_concatv2sf"
1295 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1297 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1298 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1301 unpcklps\t{%2, %0|%0, %2}
1302 movss\t{%1, %0|%0, %1}
1303 punpckldq\t{%2, %0|%0, %2}
1304 movd\t{%1, %0|%0, %1}"
1305 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1306 (set_attr "mode" "V4SF,SF,DI,DI")])
1308 (define_insn "*sse_concatv4sf"
1309 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1311 (match_operand:V2SF 1 "register_operand" " 0,0")
1312 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1315 movlhps\t{%2, %0|%0, %2}
1316 movhps\t{%2, %0|%0, %2}"
1317 [(set_attr "type" "ssemov")
1318 (set_attr "mode" "V4SF,V2SF")])
1320 (define_expand "vec_initv4sf"
1321 [(match_operand:V4SF 0 "register_operand" "")
1322 (match_operand 1 "" "")]
1325 ix86_expand_vector_init (false, operands[0], operands[1]);
1329 (define_insn "*vec_setv4sf_0"
1330 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
1333 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1334 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1338 movss\t{%2, %0|%0, %2}
1339 movss\t{%2, %0|%0, %2}
1340 movd\t{%2, %0|%0, %2}
1342 [(set_attr "type" "ssemov")
1343 (set_attr "mode" "SF")])
1346 [(set (match_operand:V4SF 0 "memory_operand" "")
1349 (match_operand:SF 1 "nonmemory_operand" ""))
1352 "TARGET_SSE && reload_completed"
1355 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1359 (define_expand "vec_setv4sf"
1360 [(match_operand:V4SF 0 "register_operand" "")
1361 (match_operand:SF 1 "register_operand" "")
1362 (match_operand 2 "const_int_operand" "")]
1365 ix86_expand_vector_set (false, operands[0], operands[1],
1366 INTVAL (operands[2]));
1370 (define_insn_and_split "*vec_extractv4sf_0"
1371 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1373 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1374 (parallel [(const_int 0)])))]
1375 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1377 "&& reload_completed"
1380 rtx op1 = operands[1];
1382 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1384 op1 = gen_lowpart (SFmode, op1);
1385 emit_move_insn (operands[0], op1);
1389 (define_expand "vec_extractv4sf"
1390 [(match_operand:SF 0 "register_operand" "")
1391 (match_operand:V4SF 1 "register_operand" "")
1392 (match_operand 2 "const_int_operand" "")]
1395 ix86_expand_vector_extract (false, operands[0], operands[1],
1396 INTVAL (operands[2]));
1400 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1402 ;; Parallel double-precision floating point arithmetic
1404 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1406 (define_expand "negv2df2"
1407 [(set (match_operand:V2DF 0 "register_operand" "")
1408 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1410 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1412 (define_expand "absv2df2"
1413 [(set (match_operand:V2DF 0 "register_operand" "")
1414 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1416 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1418 (define_expand "addv2df3"
1419 [(set (match_operand:V2DF 0 "register_operand" "")
1420 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1421 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1423 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1425 (define_insn "*addv2df3"
1426 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1429 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1430 "addpd\t{%2, %0|%0, %2}"
1431 [(set_attr "type" "sseadd")
1432 (set_attr "mode" "V2DF")])
1434 (define_insn "sse2_vmaddv2df3"
1435 [(set (match_operand:V2DF 0 "register_operand" "=x")
1437 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1438 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1441 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1442 "addsd\t{%2, %0|%0, %2}"
1443 [(set_attr "type" "sseadd")
1444 (set_attr "mode" "DF")])
1446 (define_expand "subv2df3"
1447 [(set (match_operand:V2DF 0 "register_operand" "")
1448 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1449 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1451 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1453 (define_insn "*subv2df3"
1454 [(set (match_operand:V2DF 0 "register_operand" "=x")
1455 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1458 "subpd\t{%2, %0|%0, %2}"
1459 [(set_attr "type" "sseadd")
1460 (set_attr "mode" "V2DF")])
1462 (define_insn "sse2_vmsubv2df3"
1463 [(set (match_operand:V2DF 0 "register_operand" "=x")
1465 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1466 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1470 "subsd\t{%2, %0|%0, %2}"
1471 [(set_attr "type" "sseadd")
1472 (set_attr "mode" "DF")])
1474 (define_expand "mulv2df3"
1475 [(set (match_operand:V2DF 0 "register_operand" "")
1476 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1477 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1479 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1481 (define_insn "*mulv2df3"
1482 [(set (match_operand:V2DF 0 "register_operand" "=x")
1483 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1484 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1485 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1486 "mulpd\t{%2, %0|%0, %2}"
1487 [(set_attr "type" "ssemul")
1488 (set_attr "mode" "V2DF")])
1490 (define_insn "sse2_vmmulv2df3"
1491 [(set (match_operand:V2DF 0 "register_operand" "=x")
1493 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1494 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1497 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1498 "mulsd\t{%2, %0|%0, %2}"
1499 [(set_attr "type" "ssemul")
1500 (set_attr "mode" "DF")])
1502 (define_expand "divv2df3"
1503 [(set (match_operand:V2DF 0 "register_operand" "")
1504 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1505 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1507 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1509 (define_insn "*divv2df3"
1510 [(set (match_operand:V2DF 0 "register_operand" "=x")
1511 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1512 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1514 "divpd\t{%2, %0|%0, %2}"
1515 [(set_attr "type" "ssediv")
1516 (set_attr "mode" "V2DF")])
1518 (define_insn "sse2_vmdivv2df3"
1519 [(set (match_operand:V2DF 0 "register_operand" "=x")
1521 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1522 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1526 "divsd\t{%2, %0|%0, %2}"
1527 [(set_attr "type" "ssediv")
1528 (set_attr "mode" "DF")])
1530 (define_insn "sqrtv2df2"
1531 [(set (match_operand:V2DF 0 "register_operand" "=x")
1532 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1534 "sqrtpd\t{%1, %0|%0, %1}"
1535 [(set_attr "type" "sse")
1536 (set_attr "mode" "V2DF")])
1538 (define_insn "sse2_vmsqrtv2df2"
1539 [(set (match_operand:V2DF 0 "register_operand" "=x")
1541 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1542 (match_operand:V2DF 2 "register_operand" "0")
1545 "sqrtsd\t{%1, %0|%0, %1}"
1546 [(set_attr "type" "sse")
1547 (set_attr "mode" "DF")])
1549 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1550 ;; isn't really correct, as those rtl operators aren't defined when
1551 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1553 (define_expand "smaxv2df3"
1554 [(set (match_operand:V2DF 0 "register_operand" "")
1555 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1556 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1559 if (!flag_finite_math_only)
1560 operands[1] = force_reg (V2DFmode, operands[1]);
1561 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1564 (define_insn "*smaxv2df3_finite"
1565 [(set (match_operand:V2DF 0 "register_operand" "=x")
1566 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1567 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1568 "TARGET_SSE2 && flag_finite_math_only
1569 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1570 "maxpd\t{%2, %0|%0, %2}"
1571 [(set_attr "type" "sseadd")
1572 (set_attr "mode" "V2DF")])
1574 (define_insn "*smaxv2df3"
1575 [(set (match_operand:V2DF 0 "register_operand" "=x")
1576 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1577 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1579 "maxpd\t{%2, %0|%0, %2}"
1580 [(set_attr "type" "sseadd")
1581 (set_attr "mode" "V2DF")])
1583 (define_insn "*sse2_vmsmaxv2df3_finite"
1584 [(set (match_operand:V2DF 0 "register_operand" "=x")
1586 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1587 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1590 "TARGET_SSE2 && flag_finite_math_only
1591 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1592 "maxsd\t{%2, %0|%0, %2}"
1593 [(set_attr "type" "sseadd")
1594 (set_attr "mode" "DF")])
1596 (define_insn "sse2_vmsmaxv2df3"
1597 [(set (match_operand:V2DF 0 "register_operand" "=x")
1599 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1600 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1604 "maxsd\t{%2, %0|%0, %2}"
1605 [(set_attr "type" "sseadd")
1606 (set_attr "mode" "DF")])
1608 (define_expand "sminv2df3"
1609 [(set (match_operand:V2DF 0 "register_operand" "")
1610 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1611 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1614 if (!flag_finite_math_only)
1615 operands[1] = force_reg (V2DFmode, operands[1]);
1616 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1619 (define_insn "*sminv2df3_finite"
1620 [(set (match_operand:V2DF 0 "register_operand" "=x")
1621 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1622 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1623 "TARGET_SSE2 && flag_finite_math_only
1624 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1625 "minpd\t{%2, %0|%0, %2}"
1626 [(set_attr "type" "sseadd")
1627 (set_attr "mode" "V2DF")])
1629 (define_insn "*sminv2df3"
1630 [(set (match_operand:V2DF 0 "register_operand" "=x")
1631 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1632 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1634 "minpd\t{%2, %0|%0, %2}"
1635 [(set_attr "type" "sseadd")
1636 (set_attr "mode" "V2DF")])
1638 (define_insn "*sse2_vmsminv2df3_finite"
1639 [(set (match_operand:V2DF 0 "register_operand" "=x")
1641 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1642 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1645 "TARGET_SSE2 && flag_finite_math_only
1646 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1647 "minsd\t{%2, %0|%0, %2}"
1648 [(set_attr "type" "sseadd")
1649 (set_attr "mode" "DF")])
1651 (define_insn "sse2_vmsminv2df3"
1652 [(set (match_operand:V2DF 0 "register_operand" "=x")
1654 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1655 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1659 "minsd\t{%2, %0|%0, %2}"
1660 [(set_attr "type" "sseadd")
1661 (set_attr "mode" "DF")])
1663 (define_insn "sse3_addsubv2df3"
1664 [(set (match_operand:V2DF 0 "register_operand" "=x")
1667 (match_operand:V2DF 1 "register_operand" "0")
1668 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1669 (minus:V2DF (match_dup 1) (match_dup 2))
1672 "addsubpd\t{%2, %0|%0, %2}"
1673 [(set_attr "type" "sseadd")
1674 (set_attr "mode" "V2DF")])
1676 (define_insn "sse3_haddv2df3"
1677 [(set (match_operand:V2DF 0 "register_operand" "=x")
1681 (match_operand:V2DF 1 "register_operand" "0")
1682 (parallel [(const_int 0)]))
1683 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1686 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1687 (parallel [(const_int 0)]))
1688 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1690 "haddpd\t{%2, %0|%0, %2}"
1691 [(set_attr "type" "sseadd")
1692 (set_attr "mode" "V2DF")])
1694 (define_insn "sse3_hsubv2df3"
1695 [(set (match_operand:V2DF 0 "register_operand" "=x")
1699 (match_operand:V2DF 1 "register_operand" "0")
1700 (parallel [(const_int 0)]))
1701 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1705 (parallel [(const_int 0)]))
1706 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1708 "hsubpd\t{%2, %0|%0, %2}"
1709 [(set_attr "type" "sseadd")
1710 (set_attr "mode" "V2DF")])
1712 (define_expand "reduc_splus_v2df"
1713 [(match_operand:V2DF 0 "register_operand" "")
1714 (match_operand:V2DF 1 "register_operand" "")]
1717 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1721 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1723 ;; Parallel double-precision floating point comparisons
1725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1727 (define_insn "sse2_maskcmpv2df3"
1728 [(set (match_operand:V2DF 0 "register_operand" "=x")
1729 (match_operator:V2DF 3 "sse_comparison_operator"
1730 [(match_operand:V2DF 1 "register_operand" "0")
1731 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1733 "cmp%D3pd\t{%2, %0|%0, %2}"
1734 [(set_attr "type" "ssecmp")
1735 (set_attr "mode" "V2DF")])
1737 (define_insn "sse2_maskcmpdf3"
1738 [(set (match_operand:DF 0 "register_operand" "=x")
1739 (match_operator:DF 3 "sse_comparison_operator"
1740 [(match_operand:DF 1 "register_operand" "0")
1741 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1743 "cmp%D3sd\t{%2, %0|%0, %2}"
1744 [(set_attr "type" "ssecmp")
1745 (set_attr "mode" "DF")])
1747 (define_insn "sse2_vmmaskcmpv2df3"
1748 [(set (match_operand:V2DF 0 "register_operand" "=x")
1750 (match_operator:V2DF 3 "sse_comparison_operator"
1751 [(match_operand:V2DF 1 "register_operand" "0")
1752 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1756 "cmp%D3sd\t{%2, %0|%0, %2}"
1757 [(set_attr "type" "ssecmp")
1758 (set_attr "mode" "DF")])
1760 (define_insn "sse2_comi"
1761 [(set (reg:CCFP FLAGS_REG)
1764 (match_operand:V2DF 0 "register_operand" "x")
1765 (parallel [(const_int 0)]))
1767 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1768 (parallel [(const_int 0)]))))]
1770 "comisd\t{%1, %0|%0, %1}"
1771 [(set_attr "type" "ssecomi")
1772 (set_attr "mode" "DF")])
1774 (define_insn "sse2_ucomi"
1775 [(set (reg:CCFPU FLAGS_REG)
1778 (match_operand:V2DF 0 "register_operand" "x")
1779 (parallel [(const_int 0)]))
1781 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1782 (parallel [(const_int 0)]))))]
1784 "ucomisd\t{%1, %0|%0, %1}"
1785 [(set_attr "type" "ssecomi")
1786 (set_attr "mode" "DF")])
1788 (define_expand "vcondv2df"
1789 [(set (match_operand:V2DF 0 "register_operand" "")
1791 (match_operator 3 ""
1792 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1793 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1794 (match_operand:V2DF 1 "general_operand" "")
1795 (match_operand:V2DF 2 "general_operand" "")))]
1798 if (ix86_expand_fp_vcond (operands))
1804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1806 ;; Parallel double-precision floating point logical operations
1808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1810 (define_expand "andv2df3"
1811 [(set (match_operand:V2DF 0 "register_operand" "")
1812 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1813 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1815 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1817 (define_insn "*andv2df3"
1818 [(set (match_operand:V2DF 0 "register_operand" "=x")
1819 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1820 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1821 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1822 "andpd\t{%2, %0|%0, %2}"
1823 [(set_attr "type" "sselog")
1824 (set_attr "mode" "V2DF")])
1826 (define_insn "sse2_nandv2df3"
1827 [(set (match_operand:V2DF 0 "register_operand" "=x")
1828 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1829 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1831 "andnpd\t{%2, %0|%0, %2}"
1832 [(set_attr "type" "sselog")
1833 (set_attr "mode" "V2DF")])
1835 (define_expand "iorv2df3"
1836 [(set (match_operand:V2DF 0 "register_operand" "")
1837 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1838 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1840 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1842 (define_insn "*iorv2df3"
1843 [(set (match_operand:V2DF 0 "register_operand" "=x")
1844 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1845 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1846 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1847 "orpd\t{%2, %0|%0, %2}"
1848 [(set_attr "type" "sselog")
1849 (set_attr "mode" "V2DF")])
1851 (define_expand "xorv2df3"
1852 [(set (match_operand:V2DF 0 "register_operand" "")
1853 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1854 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1856 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1858 (define_insn "*xorv2df3"
1859 [(set (match_operand:V2DF 0 "register_operand" "=x")
1860 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1861 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1862 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1863 "xorpd\t{%2, %0|%0, %2}"
1864 [(set_attr "type" "sselog")
1865 (set_attr "mode" "V2DF")])
1867 ;; Also define scalar versions. These are used for abs, neg, and
1868 ;; conditional move. Using subregs into vector modes causes register
1869 ;; allocation lossage. These patterns do not allow memory operands
1870 ;; because the native instructions read the full 128-bits.
1872 (define_insn "*anddf3"
1873 [(set (match_operand:DF 0 "register_operand" "=x")
1874 (and:DF (match_operand:DF 1 "register_operand" "0")
1875 (match_operand:DF 2 "register_operand" "x")))]
1877 "andpd\t{%2, %0|%0, %2}"
1878 [(set_attr "type" "sselog")
1879 (set_attr "mode" "V2DF")])
1881 (define_insn "*nanddf3"
1882 [(set (match_operand:DF 0 "register_operand" "=x")
1883 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1884 (match_operand:DF 2 "register_operand" "x")))]
1886 "andnpd\t{%2, %0|%0, %2}"
1887 [(set_attr "type" "sselog")
1888 (set_attr "mode" "V2DF")])
1890 (define_insn "*iordf3"
1891 [(set (match_operand:DF 0 "register_operand" "=x")
1892 (ior:DF (match_operand:DF 1 "register_operand" "0")
1893 (match_operand:DF 2 "register_operand" "x")))]
1895 "orpd\t{%2, %0|%0, %2}"
1896 [(set_attr "type" "sselog")
1897 (set_attr "mode" "V2DF")])
1899 (define_insn "*xordf3"
1900 [(set (match_operand:DF 0 "register_operand" "=x")
1901 (xor:DF (match_operand:DF 1 "register_operand" "0")
1902 (match_operand:DF 2 "register_operand" "x")))]
1904 "xorpd\t{%2, %0|%0, %2}"
1905 [(set_attr "type" "sselog")
1906 (set_attr "mode" "V2DF")])
1908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1910 ;; Parallel double-precision floating point conversion operations
1912 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1914 (define_insn "sse2_cvtpi2pd"
1915 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1916 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1918 "cvtpi2pd\t{%1, %0|%0, %1}"
1919 [(set_attr "type" "ssecvt")
1920 (set_attr "unit" "mmx,*")
1921 (set_attr "mode" "V2DF")])
1923 (define_insn "sse2_cvtpd2pi"
1924 [(set (match_operand:V2SI 0 "register_operand" "=y")
1925 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1926 UNSPEC_FIX_NOTRUNC))]
1928 "cvtpd2pi\t{%1, %0|%0, %1}"
1929 [(set_attr "type" "ssecvt")
1930 (set_attr "unit" "mmx")
1931 (set_attr "mode" "DI")])
1933 (define_insn "sse2_cvttpd2pi"
1934 [(set (match_operand:V2SI 0 "register_operand" "=y")
1935 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1937 "cvttpd2pi\t{%1, %0|%0, %1}"
1938 [(set_attr "type" "ssecvt")
1939 (set_attr "unit" "mmx")
1940 (set_attr "mode" "TI")])
1942 (define_insn "sse2_cvtsi2sd"
1943 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1946 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1947 (match_operand:V2DF 1 "register_operand" "0,0")
1950 "cvtsi2sd\t{%2, %0|%0, %2}"
1951 [(set_attr "type" "sseicvt")
1952 (set_attr "mode" "DF")
1953 (set_attr "athlon_decode" "double,direct")
1954 (set_attr "amdfam10_decode" "vector,double")])
1956 (define_insn "sse2_cvtsi2sdq"
1957 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1960 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1961 (match_operand:V2DF 1 "register_operand" "0,0")
1963 "TARGET_SSE2 && TARGET_64BIT"
1964 "cvtsi2sdq\t{%2, %0|%0, %2}"
1965 [(set_attr "type" "sseicvt")
1966 (set_attr "mode" "DF")
1967 (set_attr "athlon_decode" "double,direct")
1968 (set_attr "amdfam10_decode" "vector,double")])
1970 (define_insn "sse2_cvtsd2si"
1971 [(set (match_operand:SI 0 "register_operand" "=r,r")
1974 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1975 (parallel [(const_int 0)]))]
1976 UNSPEC_FIX_NOTRUNC))]
1978 "cvtsd2si\t{%1, %0|%0, %1}"
1979 [(set_attr "type" "sseicvt")
1980 (set_attr "athlon_decode" "double,vector")
1981 (set_attr "mode" "SI")])
1983 (define_insn "sse2_cvtsd2si_2"
1984 [(set (match_operand:SI 0 "register_operand" "=r,r")
1985 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1986 UNSPEC_FIX_NOTRUNC))]
1988 "cvtsd2si\t{%1, %0|%0, %1}"
1989 [(set_attr "type" "sseicvt")
1990 (set_attr "athlon_decode" "double,vector")
1991 (set_attr "amdfam10_decode" "double,double")
1992 (set_attr "mode" "SI")])
1994 (define_insn "sse2_cvtsd2siq"
1995 [(set (match_operand:DI 0 "register_operand" "=r,r")
1998 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1999 (parallel [(const_int 0)]))]
2000 UNSPEC_FIX_NOTRUNC))]
2001 "TARGET_SSE2 && TARGET_64BIT"
2002 "cvtsd2siq\t{%1, %0|%0, %1}"
2003 [(set_attr "type" "sseicvt")
2004 (set_attr "athlon_decode" "double,vector")
2005 (set_attr "mode" "DI")])
2007 (define_insn "sse2_cvtsd2siq_2"
2008 [(set (match_operand:DI 0 "register_operand" "=r,r")
2009 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2010 UNSPEC_FIX_NOTRUNC))]
2011 "TARGET_SSE2 && TARGET_64BIT"
2012 "cvtsd2siq\t{%1, %0|%0, %1}"
2013 [(set_attr "type" "sseicvt")
2014 (set_attr "athlon_decode" "double,vector")
2015 (set_attr "amdfam10_decode" "double,double")
2016 (set_attr "mode" "DI")])
2018 (define_insn "sse2_cvttsd2si"
2019 [(set (match_operand:SI 0 "register_operand" "=r,r")
2022 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2023 (parallel [(const_int 0)]))))]
2025 "cvttsd2si\t{%1, %0|%0, %1}"
2026 [(set_attr "type" "sseicvt")
2027 (set_attr "mode" "SI")
2028 (set_attr "athlon_decode" "double,vector")
2029 (set_attr "amdfam10_decode" "double,double")])
2031 (define_insn "sse2_cvttsd2siq"
2032 [(set (match_operand:DI 0 "register_operand" "=r,r")
2035 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2036 (parallel [(const_int 0)]))))]
2037 "TARGET_SSE2 && TARGET_64BIT"
2038 "cvttsd2siq\t{%1, %0|%0, %1}"
2039 [(set_attr "type" "sseicvt")
2040 (set_attr "mode" "DI")
2041 (set_attr "athlon_decode" "double,vector")
2042 (set_attr "amdfam10_decode" "double,double")])
2044 (define_insn "sse2_cvtdq2pd"
2045 [(set (match_operand:V2DF 0 "register_operand" "=x")
2048 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2049 (parallel [(const_int 0) (const_int 1)]))))]
2051 "cvtdq2pd\t{%1, %0|%0, %1}"
2052 [(set_attr "type" "ssecvt")
2053 (set_attr "mode" "V2DF")])
2055 (define_expand "sse2_cvtpd2dq"
2056 [(set (match_operand:V4SI 0 "register_operand" "")
2058 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2062 "operands[2] = CONST0_RTX (V2SImode);")
2064 (define_insn "*sse2_cvtpd2dq"
2065 [(set (match_operand:V4SI 0 "register_operand" "=x")
2067 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2069 (match_operand:V2SI 2 "const0_operand" "")))]
2071 "cvtpd2dq\t{%1, %0|%0, %1}"
2072 [(set_attr "type" "ssecvt")
2073 (set_attr "mode" "TI")
2074 (set_attr "amdfam10_decode" "double")])
2076 (define_expand "sse2_cvttpd2dq"
2077 [(set (match_operand:V4SI 0 "register_operand" "")
2079 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2082 "operands[2] = CONST0_RTX (V2SImode);")
2084 (define_insn "*sse2_cvttpd2dq"
2085 [(set (match_operand:V4SI 0 "register_operand" "=x")
2087 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2088 (match_operand:V2SI 2 "const0_operand" "")))]
2090 "cvttpd2dq\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "mode" "TI")
2093 (set_attr "amdfam10_decode" "double")])
2095 (define_insn "sse2_cvtsd2ss"
2096 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2099 (float_truncate:V2SF
2100 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2101 (match_operand:V4SF 1 "register_operand" "0,0")
2104 "cvtsd2ss\t{%2, %0|%0, %2}"
2105 [(set_attr "type" "ssecvt")
2106 (set_attr "athlon_decode" "vector,double")
2107 (set_attr "amdfam10_decode" "vector,double")
2108 (set_attr "mode" "SF")])
2110 (define_insn "sse2_cvtss2sd"
2111 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2115 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2116 (parallel [(const_int 0) (const_int 1)])))
2117 (match_operand:V2DF 1 "register_operand" "0,0")
2120 "cvtss2sd\t{%2, %0|%0, %2}"
2121 [(set_attr "type" "ssecvt")
2122 (set_attr "amdfam10_decode" "vector,double")
2123 (set_attr "mode" "DF")])
2125 (define_expand "sse2_cvtpd2ps"
2126 [(set (match_operand:V4SF 0 "register_operand" "")
2128 (float_truncate:V2SF
2129 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2132 "operands[2] = CONST0_RTX (V2SFmode);")
2134 (define_insn "*sse2_cvtpd2ps"
2135 [(set (match_operand:V4SF 0 "register_operand" "=x")
2137 (float_truncate:V2SF
2138 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2139 (match_operand:V2SF 2 "const0_operand" "")))]
2141 "cvtpd2ps\t{%1, %0|%0, %1}"
2142 [(set_attr "type" "ssecvt")
2143 (set_attr "mode" "V4SF")
2144 (set_attr "amdfam10_decode" "double")])
2146 (define_insn "sse2_cvtps2pd"
2147 [(set (match_operand:V2DF 0 "register_operand" "=x")
2150 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2151 (parallel [(const_int 0) (const_int 1)]))))]
2153 "cvtps2pd\t{%1, %0|%0, %1}"
2154 [(set_attr "type" "ssecvt")
2155 (set_attr "mode" "V2DF")
2156 (set_attr "amdfam10_decode" "direct")])
2158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2160 ;; Parallel double-precision floating point element swizzling
2162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2164 (define_insn "sse2_unpckhpd"
2165 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2168 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2169 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2170 (parallel [(const_int 1)
2172 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2174 unpckhpd\t{%2, %0|%0, %2}
2175 movlpd\t{%H1, %0|%0, %H1}
2176 movhpd\t{%1, %0|%0, %1}"
2177 [(set_attr "type" "sselog,ssemov,ssemov")
2178 (set_attr "mode" "V2DF,V1DF,V1DF")])
2180 (define_insn "*sse3_movddup"
2181 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2184 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2186 (parallel [(const_int 0)
2188 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2190 movddup\t{%1, %0|%0, %1}
2192 [(set_attr "type" "sselog1,ssemov")
2193 (set_attr "mode" "V2DF")])
2196 [(set (match_operand:V2DF 0 "memory_operand" "")
2199 (match_operand:V2DF 1 "register_operand" "")
2201 (parallel [(const_int 0)
2203 "TARGET_SSE3 && reload_completed"
2206 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2207 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2208 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2212 (define_insn "sse2_unpcklpd"
2213 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2216 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2217 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2218 (parallel [(const_int 0)
2220 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2222 unpcklpd\t{%2, %0|%0, %2}
2223 movhpd\t{%2, %0|%0, %2}
2224 movlpd\t{%2, %H0|%H0, %2}"
2225 [(set_attr "type" "sselog,ssemov,ssemov")
2226 (set_attr "mode" "V2DF,V1DF,V1DF")])
2228 (define_expand "sse2_shufpd"
2229 [(match_operand:V2DF 0 "register_operand" "")
2230 (match_operand:V2DF 1 "register_operand" "")
2231 (match_operand:V2DF 2 "nonimmediate_operand" "")
2232 (match_operand:SI 3 "const_int_operand" "")]
2235 int mask = INTVAL (operands[3]);
2236 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2238 GEN_INT (mask & 2 ? 3 : 2)));
2242 (define_insn "sse2_shufpd_1"
2243 [(set (match_operand:V2DF 0 "register_operand" "=x")
2246 (match_operand:V2DF 1 "register_operand" "0")
2247 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2248 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2249 (match_operand 4 "const_2_to_3_operand" "")])))]
2253 mask = INTVAL (operands[3]);
2254 mask |= (INTVAL (operands[4]) - 2) << 1;
2255 operands[3] = GEN_INT (mask);
2257 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2259 [(set_attr "type" "sselog")
2260 (set_attr "mode" "V2DF")])
2262 (define_insn "sse2_storehpd"
2263 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2265 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2266 (parallel [(const_int 1)])))]
2267 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2269 movhpd\t{%1, %0|%0, %1}
2272 [(set_attr "type" "ssemov,sselog1,ssemov")
2273 (set_attr "mode" "V1DF,V2DF,DF")])
2276 [(set (match_operand:DF 0 "register_operand" "")
2278 (match_operand:V2DF 1 "memory_operand" "")
2279 (parallel [(const_int 1)])))]
2280 "TARGET_SSE2 && reload_completed"
2281 [(set (match_dup 0) (match_dup 1))]
2283 operands[1] = adjust_address (operands[1], DFmode, 8);
2286 (define_insn "sse2_storelpd"
2287 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2289 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2290 (parallel [(const_int 0)])))]
2291 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2293 movlpd\t{%1, %0|%0, %1}
2296 [(set_attr "type" "ssemov")
2297 (set_attr "mode" "V1DF,DF,DF")])
2300 [(set (match_operand:DF 0 "register_operand" "")
2302 (match_operand:V2DF 1 "nonimmediate_operand" "")
2303 (parallel [(const_int 0)])))]
2304 "TARGET_SSE2 && reload_completed"
2307 rtx op1 = operands[1];
2309 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2311 op1 = gen_lowpart (DFmode, op1);
2312 emit_move_insn (operands[0], op1);
2316 (define_insn "sse2_loadhpd"
2317 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2320 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2321 (parallel [(const_int 0)]))
2322 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2323 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2325 movhpd\t{%2, %0|%0, %2}
2326 unpcklpd\t{%2, %0|%0, %2}
2327 shufpd\t{$1, %1, %0|%0, %1, 1}
2329 [(set_attr "type" "ssemov,sselog,sselog,other")
2330 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2333 [(set (match_operand:V2DF 0 "memory_operand" "")
2335 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2336 (match_operand:DF 1 "register_operand" "")))]
2337 "TARGET_SSE2 && reload_completed"
2338 [(set (match_dup 0) (match_dup 1))]
2340 operands[0] = adjust_address (operands[0], DFmode, 8);
2343 (define_insn "sse2_loadlpd"
2344 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2346 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2348 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2349 (parallel [(const_int 1)]))))]
2350 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2352 movsd\t{%2, %0|%0, %2}
2353 movlpd\t{%2, %0|%0, %2}
2354 movsd\t{%2, %0|%0, %2}
2355 shufpd\t{$2, %2, %0|%0, %2, 2}
2356 movhpd\t{%H1, %0|%0, %H1}
2358 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2359 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2362 [(set (match_operand:V2DF 0 "memory_operand" "")
2364 (match_operand:DF 1 "register_operand" "")
2365 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2366 "TARGET_SSE2 && reload_completed"
2367 [(set (match_dup 0) (match_dup 1))]
2369 operands[0] = adjust_address (operands[0], DFmode, 8);
2372 ;; Not sure these two are ever used, but it doesn't hurt to have
2374 (define_insn "*vec_extractv2df_1_sse"
2375 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2377 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2378 (parallel [(const_int 1)])))]
2379 "!TARGET_SSE2 && TARGET_SSE
2380 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2382 movhps\t{%1, %0|%0, %1}
2383 movhlps\t{%1, %0|%0, %1}
2384 movlps\t{%H1, %0|%0, %H1}"
2385 [(set_attr "type" "ssemov")
2386 (set_attr "mode" "V2SF,V4SF,V2SF")])
2388 (define_insn "*vec_extractv2df_0_sse"
2389 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2391 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2392 (parallel [(const_int 0)])))]
2393 "!TARGET_SSE2 && TARGET_SSE
2394 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2396 movlps\t{%1, %0|%0, %1}
2397 movaps\t{%1, %0|%0, %1}
2398 movlps\t{%1, %0|%0, %1}"
2399 [(set_attr "type" "ssemov")
2400 (set_attr "mode" "V2SF,V4SF,V2SF")])
2402 (define_insn "sse2_movsd"
2403 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2405 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2406 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2410 movsd\t{%2, %0|%0, %2}
2411 movlpd\t{%2, %0|%0, %2}
2412 movlpd\t{%2, %0|%0, %2}
2413 shufpd\t{$2, %2, %0|%0, %2, 2}
2414 movhps\t{%H1, %0|%0, %H1}
2415 movhps\t{%1, %H0|%H0, %1}"
2416 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2417 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2419 (define_insn "*vec_dupv2df_sse3"
2420 [(set (match_operand:V2DF 0 "register_operand" "=x")
2422 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2424 "movddup\t{%1, %0|%0, %1}"
2425 [(set_attr "type" "sselog1")
2426 (set_attr "mode" "DF")])
2428 (define_insn "*vec_dupv2df"
2429 [(set (match_operand:V2DF 0 "register_operand" "=x")
2431 (match_operand:DF 1 "register_operand" "0")))]
2434 [(set_attr "type" "sselog1")
2435 (set_attr "mode" "V4SF")])
2437 (define_insn "*vec_concatv2df_sse3"
2438 [(set (match_operand:V2DF 0 "register_operand" "=x")
2440 (match_operand:DF 1 "nonimmediate_operand" "xm")
2443 "movddup\t{%1, %0|%0, %1}"
2444 [(set_attr "type" "sselog1")
2445 (set_attr "mode" "DF")])
2447 (define_insn "*vec_concatv2df"
2448 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2450 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2451 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2454 unpcklpd\t{%2, %0|%0, %2}
2455 movhpd\t{%2, %0|%0, %2}
2456 movsd\t{%1, %0|%0, %1}
2457 movlhps\t{%2, %0|%0, %2}
2458 movhps\t{%2, %0|%0, %2}"
2459 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2460 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2462 (define_expand "vec_setv2df"
2463 [(match_operand:V2DF 0 "register_operand" "")
2464 (match_operand:DF 1 "register_operand" "")
2465 (match_operand 2 "const_int_operand" "")]
2468 ix86_expand_vector_set (false, operands[0], operands[1],
2469 INTVAL (operands[2]));
2473 (define_expand "vec_extractv2df"
2474 [(match_operand:DF 0 "register_operand" "")
2475 (match_operand:V2DF 1 "register_operand" "")
2476 (match_operand 2 "const_int_operand" "")]
2479 ix86_expand_vector_extract (false, operands[0], operands[1],
2480 INTVAL (operands[2]));
2484 (define_expand "vec_initv2df"
2485 [(match_operand:V2DF 0 "register_operand" "")
2486 (match_operand 1 "" "")]
2489 ix86_expand_vector_init (false, operands[0], operands[1]);
2493 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2495 ;; Parallel integral arithmetic
2497 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2499 (define_expand "neg<mode>2"
2500 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2503 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2505 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2507 (define_expand "add<mode>3"
2508 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2509 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2510 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2512 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2514 (define_insn "*add<mode>3"
2515 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2517 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2518 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2519 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2520 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2521 [(set_attr "type" "sseiadd")
2522 (set_attr "mode" "TI")])
2524 (define_insn "sse2_ssadd<mode>3"
2525 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2527 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2528 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2529 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2530 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2531 [(set_attr "type" "sseiadd")
2532 (set_attr "mode" "TI")])
2534 (define_insn "sse2_usadd<mode>3"
2535 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2537 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2538 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2539 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2540 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2541 [(set_attr "type" "sseiadd")
2542 (set_attr "mode" "TI")])
2544 (define_expand "sub<mode>3"
2545 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2546 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2547 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2549 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2551 (define_insn "*sub<mode>3"
2552 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2554 (match_operand:SSEMODEI 1 "register_operand" "0")
2555 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2557 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2558 [(set_attr "type" "sseiadd")
2559 (set_attr "mode" "TI")])
2561 (define_insn "sse2_sssub<mode>3"
2562 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2564 (match_operand:SSEMODE12 1 "register_operand" "0")
2565 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2567 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2568 [(set_attr "type" "sseiadd")
2569 (set_attr "mode" "TI")])
2571 (define_insn "sse2_ussub<mode>3"
2572 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2574 (match_operand:SSEMODE12 1 "register_operand" "0")
2575 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2577 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2578 [(set_attr "type" "sseiadd")
2579 (set_attr "mode" "TI")])
2581 (define_expand "mulv16qi3"
2582 [(set (match_operand:V16QI 0 "register_operand" "")
2583 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2584 (match_operand:V16QI 2 "register_operand" "")))]
2590 for (i = 0; i < 12; ++i)
2591 t[i] = gen_reg_rtx (V16QImode);
2593 /* Unpack data such that we've got a source byte in each low byte of
2594 each word. We don't care what goes into the high byte of each word.
2595 Rather than trying to get zero in there, most convenient is to let
2596 it be a copy of the low byte. */
2597 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2598 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2599 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2600 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2602 /* Multiply words. The end-of-line annotations here give a picture of what
2603 the output of that instruction looks like. Dot means don't care; the
2604 letters are the bytes of the result with A being the most significant. */
2605 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2606 gen_lowpart (V8HImode, t[0]),
2607 gen_lowpart (V8HImode, t[1])));
2608 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2609 gen_lowpart (V8HImode, t[2]),
2610 gen_lowpart (V8HImode, t[3])));
2612 /* Extract the relevant bytes and merge them back together. */
2613 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2614 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2615 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2616 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2617 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2618 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2621 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2625 (define_expand "mulv8hi3"
2626 [(set (match_operand:V8HI 0 "register_operand" "")
2627 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2628 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2630 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2632 (define_insn "*mulv8hi3"
2633 [(set (match_operand:V8HI 0 "register_operand" "=x")
2634 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2635 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2636 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2637 "pmullw\t{%2, %0|%0, %2}"
2638 [(set_attr "type" "sseimul")
2639 (set_attr "mode" "TI")])
2641 (define_expand "smulv8hi3_highpart"
2642 [(set (match_operand:V8HI 0 "register_operand" "")
2647 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2649 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2652 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2654 (define_insn "*smulv8hi3_highpart"
2655 [(set (match_operand:V8HI 0 "register_operand" "=x")
2660 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2662 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2664 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2665 "pmulhw\t{%2, %0|%0, %2}"
2666 [(set_attr "type" "sseimul")
2667 (set_attr "mode" "TI")])
2669 (define_expand "umulv8hi3_highpart"
2670 [(set (match_operand:V8HI 0 "register_operand" "")
2675 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2677 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2680 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2682 (define_insn "*umulv8hi3_highpart"
2683 [(set (match_operand:V8HI 0 "register_operand" "=x")
2688 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2690 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2692 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2693 "pmulhuw\t{%2, %0|%0, %2}"
2694 [(set_attr "type" "sseimul")
2695 (set_attr "mode" "TI")])
2697 (define_insn "sse2_umulv2siv2di3"
2698 [(set (match_operand:V2DI 0 "register_operand" "=x")
2702 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2703 (parallel [(const_int 0) (const_int 2)])))
2706 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2707 (parallel [(const_int 0) (const_int 2)])))))]
2708 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2709 "pmuludq\t{%2, %0|%0, %2}"
2710 [(set_attr "type" "sseimul")
2711 (set_attr "mode" "TI")])
2713 (define_insn "sse2_pmaddwd"
2714 [(set (match_operand:V4SI 0 "register_operand" "=x")
2719 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2720 (parallel [(const_int 0)
2726 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2727 (parallel [(const_int 0)
2733 (vec_select:V4HI (match_dup 1)
2734 (parallel [(const_int 1)
2739 (vec_select:V4HI (match_dup 2)
2740 (parallel [(const_int 1)
2743 (const_int 7)]))))))]
2745 "pmaddwd\t{%2, %0|%0, %2}"
2746 [(set_attr "type" "sseiadd")
2747 (set_attr "mode" "TI")])
2749 (define_expand "mulv4si3"
2750 [(set (match_operand:V4SI 0 "register_operand" "")
2751 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2752 (match_operand:V4SI 2 "register_operand" "")))]
2755 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2761 t1 = gen_reg_rtx (V4SImode);
2762 t2 = gen_reg_rtx (V4SImode);
2763 t3 = gen_reg_rtx (V4SImode);
2764 t4 = gen_reg_rtx (V4SImode);
2765 t5 = gen_reg_rtx (V4SImode);
2766 t6 = gen_reg_rtx (V4SImode);
2767 thirtytwo = GEN_INT (32);
2769 /* Multiply elements 2 and 0. */
2770 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2772 /* Shift both input vectors down one element, so that elements 3 and 1
2773 are now in the slots for elements 2 and 0. For K8, at least, this is
2774 faster than using a shuffle. */
2775 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2776 gen_lowpart (TImode, op1), thirtytwo));
2777 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2778 gen_lowpart (TImode, op2), thirtytwo));
2780 /* Multiply elements 3 and 1. */
2781 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2783 /* Move the results in element 2 down to element 1; we don't care what
2784 goes in elements 2 and 3. */
2785 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2786 const0_rtx, const0_rtx));
2787 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2788 const0_rtx, const0_rtx));
2790 /* Merge the parts back together. */
2791 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2795 (define_expand "mulv2di3"
2796 [(set (match_operand:V2DI 0 "register_operand" "")
2797 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2798 (match_operand:V2DI 2 "register_operand" "")))]
2801 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2807 t1 = gen_reg_rtx (V2DImode);
2808 t2 = gen_reg_rtx (V2DImode);
2809 t3 = gen_reg_rtx (V2DImode);
2810 t4 = gen_reg_rtx (V2DImode);
2811 t5 = gen_reg_rtx (V2DImode);
2812 t6 = gen_reg_rtx (V2DImode);
2813 thirtytwo = GEN_INT (32);
2815 /* Multiply low parts. */
2816 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2817 gen_lowpart (V4SImode, op2)));
2819 /* Shift input vectors left 32 bits so we can multiply high parts. */
2820 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2821 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2823 /* Multiply high parts by low parts. */
2824 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2825 gen_lowpart (V4SImode, t3)));
2826 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2827 gen_lowpart (V4SImode, t2)));
2829 /* Shift them back. */
2830 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2831 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2833 /* Add the three parts together. */
2834 emit_insn (gen_addv2di3 (t6, t1, t4));
2835 emit_insn (gen_addv2di3 (op0, t6, t5));
2839 (define_expand "vec_widen_smult_hi_v8hi"
2840 [(match_operand:V4SI 0 "register_operand" "")
2841 (match_operand:V8HI 1 "register_operand" "")
2842 (match_operand:V8HI 2 "register_operand" "")]
2845 rtx op1, op2, t1, t2, dest;
2849 t1 = gen_reg_rtx (V8HImode);
2850 t2 = gen_reg_rtx (V8HImode);
2851 dest = gen_lowpart (V8HImode, operands[0]);
2853 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2854 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2855 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2859 (define_expand "vec_widen_smult_lo_v8hi"
2860 [(match_operand:V4SI 0 "register_operand" "")
2861 (match_operand:V8HI 1 "register_operand" "")
2862 (match_operand:V8HI 2 "register_operand" "")]
2865 rtx op1, op2, t1, t2, dest;
2869 t1 = gen_reg_rtx (V8HImode);
2870 t2 = gen_reg_rtx (V8HImode);
2871 dest = gen_lowpart (V8HImode, operands[0]);
2873 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2874 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2875 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2879 (define_expand "vec_widen_umult_hi_v8hi"
2880 [(match_operand:V4SI 0 "register_operand" "")
2881 (match_operand:V8HI 1 "register_operand" "")
2882 (match_operand:V8HI 2 "register_operand" "")]
2885 rtx op1, op2, t1, t2, dest;
2889 t1 = gen_reg_rtx (V8HImode);
2890 t2 = gen_reg_rtx (V8HImode);
2891 dest = gen_lowpart (V8HImode, operands[0]);
2893 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2894 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2895 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2899 (define_expand "vec_widen_umult_lo_v8hi"
2900 [(match_operand:V4SI 0 "register_operand" "")
2901 (match_operand:V8HI 1 "register_operand" "")
2902 (match_operand:V8HI 2 "register_operand" "")]
2905 rtx op1, op2, t1, t2, dest;
2909 t1 = gen_reg_rtx (V8HImode);
2910 t2 = gen_reg_rtx (V8HImode);
2911 dest = gen_lowpart (V8HImode, operands[0]);
2913 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2914 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2915 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2919 (define_expand "vec_widen_smult_hi_v4si"
2920 [(match_operand:V2DI 0 "register_operand" "")
2921 (match_operand:V4SI 1 "register_operand" "")
2922 (match_operand:V4SI 2 "register_operand" "")]
2925 rtx op1, op2, t1, t2;
2929 t1 = gen_reg_rtx (V4SImode);
2930 t2 = gen_reg_rtx (V4SImode);
2932 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2933 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2934 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2938 (define_expand "vec_widen_smult_lo_v4si"
2939 [(match_operand:V2DI 0 "register_operand" "")
2940 (match_operand:V4SI 1 "register_operand" "")
2941 (match_operand:V4SI 2 "register_operand" "")]
2944 rtx op1, op2, t1, t2;
2948 t1 = gen_reg_rtx (V4SImode);
2949 t2 = gen_reg_rtx (V4SImode);
2951 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2952 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2953 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2957 (define_expand "vec_widen_umult_hi_v4si"
2958 [(match_operand:V2DI 0 "register_operand" "")
2959 (match_operand:V4SI 1 "register_operand" "")
2960 (match_operand:V4SI 2 "register_operand" "")]
2963 rtx op1, op2, t1, t2;
2967 t1 = gen_reg_rtx (V4SImode);
2968 t2 = gen_reg_rtx (V4SImode);
2970 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2971 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2972 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2976 (define_expand "vec_widen_umult_lo_v4si"
2977 [(match_operand:V2DI 0 "register_operand" "")
2978 (match_operand:V4SI 1 "register_operand" "")
2979 (match_operand:V4SI 2 "register_operand" "")]
2982 rtx op1, op2, t1, t2;
2986 t1 = gen_reg_rtx (V4SImode);
2987 t2 = gen_reg_rtx (V4SImode);
2989 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2990 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2991 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2995 (define_expand "sdot_prodv8hi"
2996 [(match_operand:V4SI 0 "register_operand" "")
2997 (match_operand:V8HI 1 "nonimmediate_operand" "")
2998 (match_operand:V8HI 2 "nonimmediate_operand" "")
2999 (match_operand:V4SI 3 "register_operand" "")]
3002 rtx t = gen_reg_rtx (V4SImode);
3003 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3004 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3008 (define_expand "udot_prodv4si"
3009 [(match_operand:V2DI 0 "register_operand" "")
3010 (match_operand:V4SI 1 "register_operand" "")
3011 (match_operand:V4SI 2 "register_operand" "")
3012 (match_operand:V2DI 3 "register_operand" "")]
3017 t1 = gen_reg_rtx (V2DImode);
3018 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3019 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3021 t2 = gen_reg_rtx (V4SImode);
3022 t3 = gen_reg_rtx (V4SImode);
3023 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3024 gen_lowpart (TImode, operands[1]),
3026 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3027 gen_lowpart (TImode, operands[2]),
3030 t4 = gen_reg_rtx (V2DImode);
3031 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3033 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3037 (define_insn "ashr<mode>3"
3038 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3040 (match_operand:SSEMODE24 1 "register_operand" "0")
3041 (match_operand:SI 2 "nonmemory_operand" "xi")))]
3043 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3044 [(set_attr "type" "sseishft")
3045 (set_attr "mode" "TI")])
3047 (define_insn "lshr<mode>3"
3048 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3049 (lshiftrt:SSEMODE248
3050 (match_operand:SSEMODE248 1 "register_operand" "0")
3051 (match_operand:SI 2 "nonmemory_operand" "xi")))]
3053 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3054 [(set_attr "type" "sseishft")
3055 (set_attr "mode" "TI")])
3057 (define_insn "ashl<mode>3"
3058 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3060 (match_operand:SSEMODE248 1 "register_operand" "0")
3061 (match_operand:SI 2 "nonmemory_operand" "xi")))]
3063 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3064 [(set_attr "type" "sseishft")
3065 (set_attr "mode" "TI")])
3067 (define_insn "sse2_ashlti3"
3068 [(set (match_operand:TI 0 "register_operand" "=x")
3069 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3070 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3073 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3074 return "pslldq\t{%2, %0|%0, %2}";
3076 [(set_attr "type" "sseishft")
3077 (set_attr "mode" "TI")])
3079 (define_expand "vec_shl_<mode>"
3080 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3081 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3082 (match_operand:SI 2 "general_operand" "")))]
3085 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3087 operands[0] = gen_lowpart (TImode, operands[0]);
3088 operands[1] = gen_lowpart (TImode, operands[1]);
3091 (define_insn "sse2_lshrti3"
3092 [(set (match_operand:TI 0 "register_operand" "=x")
3093 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3094 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3097 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3098 return "psrldq\t{%2, %0|%0, %2}";
3100 [(set_attr "type" "sseishft")
3101 (set_attr "mode" "TI")])
3103 (define_expand "vec_shr_<mode>"
3104 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3105 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3106 (match_operand:SI 2 "general_operand" "")))]
3109 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3111 operands[0] = gen_lowpart (TImode, operands[0]);
3112 operands[1] = gen_lowpart (TImode, operands[1]);
3115 (define_expand "umaxv16qi3"
3116 [(set (match_operand:V16QI 0 "register_operand" "")
3117 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3118 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3120 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3122 (define_insn "*umaxv16qi3"
3123 [(set (match_operand:V16QI 0 "register_operand" "=x")
3124 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3125 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3126 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3127 "pmaxub\t{%2, %0|%0, %2}"
3128 [(set_attr "type" "sseiadd")
3129 (set_attr "mode" "TI")])
3131 (define_expand "smaxv8hi3"
3132 [(set (match_operand:V8HI 0 "register_operand" "")
3133 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3134 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3136 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3138 (define_insn "*smaxv8hi3"
3139 [(set (match_operand:V8HI 0 "register_operand" "=x")
3140 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3141 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3142 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3143 "pmaxsw\t{%2, %0|%0, %2}"
3144 [(set_attr "type" "sseiadd")
3145 (set_attr "mode" "TI")])
3147 (define_expand "umaxv8hi3"
3148 [(set (match_operand:V8HI 0 "register_operand" "=x")
3149 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
3150 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3152 (plus:V8HI (match_dup 0) (match_dup 2)))]
3155 operands[3] = operands[0];
3156 if (rtx_equal_p (operands[0], operands[2]))
3157 operands[0] = gen_reg_rtx (V8HImode);
3160 (define_expand "smax<mode>3"
3161 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3162 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3163 (match_operand:SSEMODE14 2 "register_operand" "")))]
3169 xops[0] = operands[0];
3170 xops[1] = operands[1];
3171 xops[2] = operands[2];
3172 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3173 xops[4] = operands[1];
3174 xops[5] = operands[2];
3175 ok = ix86_expand_int_vcond (xops);
3180 (define_expand "umaxv4si3"
3181 [(set (match_operand:V4SI 0 "register_operand" "")
3182 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3183 (match_operand:V4SI 2 "register_operand" "")))]
3189 xops[0] = operands[0];
3190 xops[1] = operands[1];
3191 xops[2] = operands[2];
3192 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3193 xops[4] = operands[1];
3194 xops[5] = operands[2];
3195 ok = ix86_expand_int_vcond (xops);
3200 (define_expand "uminv16qi3"
3201 [(set (match_operand:V16QI 0 "register_operand" "")
3202 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3203 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3205 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3207 (define_insn "*uminv16qi3"
3208 [(set (match_operand:V16QI 0 "register_operand" "=x")
3209 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3210 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3211 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3212 "pminub\t{%2, %0|%0, %2}"
3213 [(set_attr "type" "sseiadd")
3214 (set_attr "mode" "TI")])
3216 (define_expand "sminv8hi3"
3217 [(set (match_operand:V8HI 0 "register_operand" "")
3218 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3219 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3221 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3223 (define_insn "*sminv8hi3"
3224 [(set (match_operand:V8HI 0 "register_operand" "=x")
3225 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3226 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3227 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3228 "pminsw\t{%2, %0|%0, %2}"
3229 [(set_attr "type" "sseiadd")
3230 (set_attr "mode" "TI")])
3232 (define_expand "smin<mode>3"
3233 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3234 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3235 (match_operand:SSEMODE14 2 "register_operand" "")))]
3241 xops[0] = operands[0];
3242 xops[1] = operands[2];
3243 xops[2] = operands[1];
3244 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3245 xops[4] = operands[1];
3246 xops[5] = operands[2];
3247 ok = ix86_expand_int_vcond (xops);
3252 (define_expand "umin<mode>3"
3253 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3254 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3255 (match_operand:SSEMODE24 2 "register_operand" "")))]
3261 xops[0] = operands[0];
3262 xops[1] = operands[2];
3263 xops[2] = operands[1];
3264 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3265 xops[4] = operands[1];
3266 xops[5] = operands[2];
3267 ok = ix86_expand_int_vcond (xops);
3272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3274 ;; Parallel integral comparisons
3276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3278 (define_insn "sse2_eq<mode>3"
3279 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3281 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3282 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3283 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3284 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3285 [(set_attr "type" "ssecmp")
3286 (set_attr "mode" "TI")])
3288 (define_insn "sse2_gt<mode>3"
3289 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3291 (match_operand:SSEMODE124 1 "register_operand" "0")
3292 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3294 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3295 [(set_attr "type" "ssecmp")
3296 (set_attr "mode" "TI")])
3298 (define_expand "vcond<mode>"
3299 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3300 (if_then_else:SSEMODE124
3301 (match_operator 3 ""
3302 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3303 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3304 (match_operand:SSEMODE124 1 "general_operand" "")
3305 (match_operand:SSEMODE124 2 "general_operand" "")))]
3308 if (ix86_expand_int_vcond (operands))
3314 (define_expand "vcondu<mode>"
3315 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3316 (if_then_else:SSEMODE124
3317 (match_operator 3 ""
3318 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3319 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3320 (match_operand:SSEMODE124 1 "general_operand" "")
3321 (match_operand:SSEMODE124 2 "general_operand" "")))]
3324 if (ix86_expand_int_vcond (operands))
3330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3332 ;; Parallel integral logical operations
3334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3336 (define_expand "one_cmpl<mode>2"
3337 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3338 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3342 int i, n = GET_MODE_NUNITS (<MODE>mode);
3343 rtvec v = rtvec_alloc (n);
3345 for (i = 0; i < n; ++i)
3346 RTVEC_ELT (v, i) = constm1_rtx;
3348 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3351 (define_expand "and<mode>3"
3352 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3353 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3354 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3356 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3358 (define_insn "*and<mode>3"
3359 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3361 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3362 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3363 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3364 "pand\t{%2, %0|%0, %2}"
3365 [(set_attr "type" "sselog")
3366 (set_attr "mode" "TI")])
3368 (define_insn "sse2_nand<mode>3"
3369 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3371 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3372 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3374 "pandn\t{%2, %0|%0, %2}"
3375 [(set_attr "type" "sselog")
3376 (set_attr "mode" "TI")])
3378 (define_expand "ior<mode>3"
3379 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3380 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3381 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3383 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3385 (define_insn "*ior<mode>3"
3386 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3388 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3389 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3390 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3391 "por\t{%2, %0|%0, %2}"
3392 [(set_attr "type" "sselog")
3393 (set_attr "mode" "TI")])
3395 (define_expand "xor<mode>3"
3396 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3397 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3398 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3400 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3402 (define_insn "*xor<mode>3"
3403 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3405 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3406 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3407 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3408 "pxor\t{%2, %0|%0, %2}"
3409 [(set_attr "type" "sselog")
3410 (set_attr "mode" "TI")])
3412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3414 ;; Parallel integral element swizzling
3416 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3419 ;; op1 = abcdefghijklmnop
3420 ;; op2 = qrstuvwxyz012345
3421 ;; h1 = aqbrcsdteufvgwhx
3422 ;; l1 = iyjzk0l1m2n3o4p5
3423 ;; h2 = aiqybjrzcks0dlt1
3424 ;; l2 = emu2fnv3gow4hpx5
3425 ;; h3 = aeimquy2bfjnrvz3
3426 ;; l3 = cgkosw04dhlptx15
3427 ;; result = bdfhjlnprtvxz135
3428 (define_expand "vec_pack_mod_v8hi"
3429 [(match_operand:V16QI 0 "register_operand" "")
3430 (match_operand:V8HI 1 "register_operand" "")
3431 (match_operand:V8HI 2 "register_operand" "")]
3434 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3436 op1 = gen_lowpart (V16QImode, operands[1]);
3437 op2 = gen_lowpart (V16QImode, operands[2]);
3438 h1 = gen_reg_rtx (V16QImode);
3439 l1 = gen_reg_rtx (V16QImode);
3440 h2 = gen_reg_rtx (V16QImode);
3441 l2 = gen_reg_rtx (V16QImode);
3442 h3 = gen_reg_rtx (V16QImode);
3443 l3 = gen_reg_rtx (V16QImode);
3445 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3446 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3447 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3448 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3449 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3450 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3451 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3462 ;; result = bdfhjlnp
3463 (define_expand "vec_pack_mod_v4si"
3464 [(match_operand:V8HI 0 "register_operand" "")
3465 (match_operand:V4SI 1 "register_operand" "")
3466 (match_operand:V4SI 2 "register_operand" "")]
3469 rtx op1, op2, h1, l1, h2, l2;
3471 op1 = gen_lowpart (V8HImode, operands[1]);
3472 op2 = gen_lowpart (V8HImode, operands[2]);
3473 h1 = gen_reg_rtx (V8HImode);
3474 l1 = gen_reg_rtx (V8HImode);
3475 h2 = gen_reg_rtx (V8HImode);
3476 l2 = gen_reg_rtx (V8HImode);
3478 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3479 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3480 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3481 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3482 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3492 (define_expand "vec_pack_mod_v2di"
3493 [(match_operand:V4SI 0 "register_operand" "")
3494 (match_operand:V2DI 1 "register_operand" "")
3495 (match_operand:V2DI 2 "register_operand" "")]
3498 rtx op1, op2, h1, l1;
3500 op1 = gen_lowpart (V4SImode, operands[1]);
3501 op2 = gen_lowpart (V4SImode, operands[2]);
3502 h1 = gen_reg_rtx (V4SImode);
3503 l1 = gen_reg_rtx (V4SImode);
3505 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3506 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3507 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3511 (define_expand "vec_interleave_highv16qi"
3512 [(set (match_operand:V16QI 0 "register_operand" "=x")
3515 (match_operand:V16QI 1 "register_operand" "0")
3516 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3517 (parallel [(const_int 8) (const_int 24)
3518 (const_int 9) (const_int 25)
3519 (const_int 10) (const_int 26)
3520 (const_int 11) (const_int 27)
3521 (const_int 12) (const_int 28)
3522 (const_int 13) (const_int 29)
3523 (const_int 14) (const_int 30)
3524 (const_int 15) (const_int 31)])))]
3527 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3531 (define_expand "vec_interleave_lowv16qi"
3532 [(set (match_operand:V16QI 0 "register_operand" "=x")
3535 (match_operand:V16QI 1 "register_operand" "0")
3536 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3537 (parallel [(const_int 0) (const_int 16)
3538 (const_int 1) (const_int 17)
3539 (const_int 2) (const_int 18)
3540 (const_int 3) (const_int 19)
3541 (const_int 4) (const_int 20)
3542 (const_int 5) (const_int 21)
3543 (const_int 6) (const_int 22)
3544 (const_int 7) (const_int 23)])))]
3547 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3551 (define_expand "vec_interleave_highv8hi"
3552 [(set (match_operand:V8HI 0 "register_operand" "=x")
3555 (match_operand:V8HI 1 "register_operand" "0")
3556 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3557 (parallel [(const_int 4) (const_int 12)
3558 (const_int 5) (const_int 13)
3559 (const_int 6) (const_int 14)
3560 (const_int 7) (const_int 15)])))]
3563 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3567 (define_expand "vec_interleave_lowv8hi"
3568 [(set (match_operand:V8HI 0 "register_operand" "=x")
3571 (match_operand:V8HI 1 "register_operand" "0")
3572 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3573 (parallel [(const_int 0) (const_int 8)
3574 (const_int 1) (const_int 9)
3575 (const_int 2) (const_int 10)
3576 (const_int 3) (const_int 11)])))]
3579 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
3583 (define_expand "vec_interleave_highv4si"
3584 [(set (match_operand:V4SI 0 "register_operand" "=x")
3587 (match_operand:V4SI 1 "register_operand" "0")
3588 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3589 (parallel [(const_int 2) (const_int 6)
3590 (const_int 3) (const_int 7)])))]
3593 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
3597 (define_expand "vec_interleave_lowv4si"
3598 [(set (match_operand:V4SI 0 "register_operand" "=x")
3601 (match_operand:V4SI 1 "register_operand" "0")
3602 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3603 (parallel [(const_int 0) (const_int 4)
3604 (const_int 1) (const_int 5)])))]
3607 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
3611 (define_expand "vec_interleave_highv2di"
3612 [(set (match_operand:V2DI 0 "register_operand" "=x")
3615 (match_operand:V2DI 1 "register_operand" "0")
3616 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3617 (parallel [(const_int 1)
3621 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
3625 (define_expand "vec_interleave_lowv2di"
3626 [(set (match_operand:V2DI 0 "register_operand" "=x")
3629 (match_operand:V2DI 1 "register_operand" "0")
3630 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3631 (parallel [(const_int 0)
3635 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
3639 (define_insn "sse2_packsswb"
3640 [(set (match_operand:V16QI 0 "register_operand" "=x")
3643 (match_operand:V8HI 1 "register_operand" "0"))
3645 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3647 "packsswb\t{%2, %0|%0, %2}"
3648 [(set_attr "type" "sselog")
3649 (set_attr "mode" "TI")])
3651 (define_insn "sse2_packssdw"
3652 [(set (match_operand:V8HI 0 "register_operand" "=x")
3655 (match_operand:V4SI 1 "register_operand" "0"))
3657 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3659 "packssdw\t{%2, %0|%0, %2}"
3660 [(set_attr "type" "sselog")
3661 (set_attr "mode" "TI")])
3663 (define_insn "sse2_packuswb"
3664 [(set (match_operand:V16QI 0 "register_operand" "=x")
3667 (match_operand:V8HI 1 "register_operand" "0"))
3669 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3671 "packuswb\t{%2, %0|%0, %2}"
3672 [(set_attr "type" "sselog")
3673 (set_attr "mode" "TI")])
3675 (define_insn "sse2_punpckhbw"
3676 [(set (match_operand:V16QI 0 "register_operand" "=x")
3679 (match_operand:V16QI 1 "register_operand" "0")
3680 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3681 (parallel [(const_int 8) (const_int 24)
3682 (const_int 9) (const_int 25)
3683 (const_int 10) (const_int 26)
3684 (const_int 11) (const_int 27)
3685 (const_int 12) (const_int 28)
3686 (const_int 13) (const_int 29)
3687 (const_int 14) (const_int 30)
3688 (const_int 15) (const_int 31)])))]
3690 "punpckhbw\t{%2, %0|%0, %2}"
3691 [(set_attr "type" "sselog")
3692 (set_attr "mode" "TI")])
3694 (define_insn "sse2_punpcklbw"
3695 [(set (match_operand:V16QI 0 "register_operand" "=x")
3698 (match_operand:V16QI 1 "register_operand" "0")
3699 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3700 (parallel [(const_int 0) (const_int 16)
3701 (const_int 1) (const_int 17)
3702 (const_int 2) (const_int 18)
3703 (const_int 3) (const_int 19)
3704 (const_int 4) (const_int 20)
3705 (const_int 5) (const_int 21)
3706 (const_int 6) (const_int 22)
3707 (const_int 7) (const_int 23)])))]
3709 "punpcklbw\t{%2, %0|%0, %2}"
3710 [(set_attr "type" "sselog")
3711 (set_attr "mode" "TI")])
3713 (define_insn "sse2_punpckhwd"
3714 [(set (match_operand:V8HI 0 "register_operand" "=x")
3717 (match_operand:V8HI 1 "register_operand" "0")
3718 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3719 (parallel [(const_int 4) (const_int 12)
3720 (const_int 5) (const_int 13)
3721 (const_int 6) (const_int 14)
3722 (const_int 7) (const_int 15)])))]
3724 "punpckhwd\t{%2, %0|%0, %2}"
3725 [(set_attr "type" "sselog")
3726 (set_attr "mode" "TI")])
3728 (define_insn "sse2_punpcklwd"
3729 [(set (match_operand:V8HI 0 "register_operand" "=x")
3732 (match_operand:V8HI 1 "register_operand" "0")
3733 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3734 (parallel [(const_int 0) (const_int 8)
3735 (const_int 1) (const_int 9)
3736 (const_int 2) (const_int 10)
3737 (const_int 3) (const_int 11)])))]
3739 "punpcklwd\t{%2, %0|%0, %2}"
3740 [(set_attr "type" "sselog")
3741 (set_attr "mode" "TI")])
3743 (define_insn "sse2_punpckhdq"
3744 [(set (match_operand:V4SI 0 "register_operand" "=x")
3747 (match_operand:V4SI 1 "register_operand" "0")
3748 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3749 (parallel [(const_int 2) (const_int 6)
3750 (const_int 3) (const_int 7)])))]
3752 "punpckhdq\t{%2, %0|%0, %2}"
3753 [(set_attr "type" "sselog")
3754 (set_attr "mode" "TI")])
3756 (define_insn "sse2_punpckldq"
3757 [(set (match_operand:V4SI 0 "register_operand" "=x")
3760 (match_operand:V4SI 1 "register_operand" "0")
3761 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3762 (parallel [(const_int 0) (const_int 4)
3763 (const_int 1) (const_int 5)])))]
3765 "punpckldq\t{%2, %0|%0, %2}"
3766 [(set_attr "type" "sselog")
3767 (set_attr "mode" "TI")])
3769 (define_insn "sse2_punpckhqdq"
3770 [(set (match_operand:V2DI 0 "register_operand" "=x")
3773 (match_operand:V2DI 1 "register_operand" "0")
3774 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3775 (parallel [(const_int 1)
3778 "punpckhqdq\t{%2, %0|%0, %2}"
3779 [(set_attr "type" "sselog")
3780 (set_attr "mode" "TI")])
3782 (define_insn "sse2_punpcklqdq"
3783 [(set (match_operand:V2DI 0 "register_operand" "=x")
3786 (match_operand:V2DI 1 "register_operand" "0")
3787 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3788 (parallel [(const_int 0)
3791 "punpcklqdq\t{%2, %0|%0, %2}"
3792 [(set_attr "type" "sselog")
3793 (set_attr "mode" "TI")])
3795 (define_expand "sse2_pinsrw"
3796 [(set (match_operand:V8HI 0 "register_operand" "")
3799 (match_operand:SI 2 "nonimmediate_operand" ""))
3800 (match_operand:V8HI 1 "register_operand" "")
3801 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3804 operands[2] = gen_lowpart (HImode, operands[2]);
3805 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3808 (define_insn "*sse2_pinsrw"
3809 [(set (match_operand:V8HI 0 "register_operand" "=x")
3812 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3813 (match_operand:V8HI 1 "register_operand" "0")
3814 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3817 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3818 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3820 [(set_attr "type" "sselog")
3821 (set_attr "mode" "TI")])
3823 (define_insn "sse2_pextrw"
3824 [(set (match_operand:SI 0 "register_operand" "=r")
3827 (match_operand:V8HI 1 "register_operand" "x")
3828 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3830 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3831 [(set_attr "type" "sselog")
3832 (set_attr "mode" "TI")])
3834 (define_expand "sse2_pshufd"
3835 [(match_operand:V4SI 0 "register_operand" "")
3836 (match_operand:V4SI 1 "nonimmediate_operand" "")
3837 (match_operand:SI 2 "const_int_operand" "")]
3840 int mask = INTVAL (operands[2]);
3841 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3842 GEN_INT ((mask >> 0) & 3),
3843 GEN_INT ((mask >> 2) & 3),
3844 GEN_INT ((mask >> 4) & 3),
3845 GEN_INT ((mask >> 6) & 3)));
3849 (define_insn "sse2_pshufd_1"
3850 [(set (match_operand:V4SI 0 "register_operand" "=x")
3852 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3853 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3854 (match_operand 3 "const_0_to_3_operand" "")
3855 (match_operand 4 "const_0_to_3_operand" "")
3856 (match_operand 5 "const_0_to_3_operand" "")])))]
3860 mask |= INTVAL (operands[2]) << 0;
3861 mask |= INTVAL (operands[3]) << 2;
3862 mask |= INTVAL (operands[4]) << 4;
3863 mask |= INTVAL (operands[5]) << 6;
3864 operands[2] = GEN_INT (mask);
3866 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3868 [(set_attr "type" "sselog1")
3869 (set_attr "mode" "TI")])
3871 (define_expand "sse2_pshuflw"
3872 [(match_operand:V8HI 0 "register_operand" "")
3873 (match_operand:V8HI 1 "nonimmediate_operand" "")
3874 (match_operand:SI 2 "const_int_operand" "")]
3877 int mask = INTVAL (operands[2]);
3878 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3879 GEN_INT ((mask >> 0) & 3),
3880 GEN_INT ((mask >> 2) & 3),
3881 GEN_INT ((mask >> 4) & 3),
3882 GEN_INT ((mask >> 6) & 3)));
3886 (define_insn "sse2_pshuflw_1"
3887 [(set (match_operand:V8HI 0 "register_operand" "=x")
3889 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3890 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3891 (match_operand 3 "const_0_to_3_operand" "")
3892 (match_operand 4 "const_0_to_3_operand" "")
3893 (match_operand 5 "const_0_to_3_operand" "")
3901 mask |= INTVAL (operands[2]) << 0;
3902 mask |= INTVAL (operands[3]) << 2;
3903 mask |= INTVAL (operands[4]) << 4;
3904 mask |= INTVAL (operands[5]) << 6;
3905 operands[2] = GEN_INT (mask);
3907 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3909 [(set_attr "type" "sselog")
3910 (set_attr "mode" "TI")])
3912 (define_expand "sse2_pshufhw"
3913 [(match_operand:V8HI 0 "register_operand" "")
3914 (match_operand:V8HI 1 "nonimmediate_operand" "")
3915 (match_operand:SI 2 "const_int_operand" "")]
3918 int mask = INTVAL (operands[2]);
3919 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3920 GEN_INT (((mask >> 0) & 3) + 4),
3921 GEN_INT (((mask >> 2) & 3) + 4),
3922 GEN_INT (((mask >> 4) & 3) + 4),
3923 GEN_INT (((mask >> 6) & 3) + 4)));
3927 (define_insn "sse2_pshufhw_1"
3928 [(set (match_operand:V8HI 0 "register_operand" "=x")
3930 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3931 (parallel [(const_int 0)
3935 (match_operand 2 "const_4_to_7_operand" "")
3936 (match_operand 3 "const_4_to_7_operand" "")
3937 (match_operand 4 "const_4_to_7_operand" "")
3938 (match_operand 5 "const_4_to_7_operand" "")])))]
3942 mask |= (INTVAL (operands[2]) - 4) << 0;
3943 mask |= (INTVAL (operands[3]) - 4) << 2;
3944 mask |= (INTVAL (operands[4]) - 4) << 4;
3945 mask |= (INTVAL (operands[5]) - 4) << 6;
3946 operands[2] = GEN_INT (mask);
3948 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3950 [(set_attr "type" "sselog")
3951 (set_attr "mode" "TI")])
3953 (define_expand "sse2_loadd"
3954 [(set (match_operand:V4SI 0 "register_operand" "")
3957 (match_operand:SI 1 "nonimmediate_operand" ""))
3961 "operands[2] = CONST0_RTX (V4SImode);")
3963 (define_insn "sse2_loadld"
3964 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
3967 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
3968 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
3972 movd\t{%2, %0|%0, %2}
3973 movd\t{%2, %0|%0, %2}
3974 movss\t{%2, %0|%0, %2}
3975 movss\t{%2, %0|%0, %2}"
3976 [(set_attr "type" "ssemov")
3977 (set_attr "mode" "TI,TI,V4SF,SF")])
3979 (define_insn_and_split "sse2_stored"
3980 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
3982 (match_operand:V4SI 1 "register_operand" "x,Yi")
3983 (parallel [(const_int 0)])))]
3986 "&& reload_completed"
3987 [(set (match_dup 0) (match_dup 1))]
3989 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3992 (define_expand "sse_storeq"
3993 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3995 (match_operand:V2DI 1 "register_operand" "")
3996 (parallel [(const_int 0)])))]
4000 (define_insn "*sse2_storeq_rex64"
4001 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4003 (match_operand:V2DI 1 "register_operand" "x,Yi")
4004 (parallel [(const_int 0)])))]
4005 "TARGET_64BIT && TARGET_SSE"
4008 (define_insn "*sse2_storeq"
4009 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4011 (match_operand:V2DI 1 "register_operand" "x")
4012 (parallel [(const_int 0)])))]
4017 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4019 (match_operand:V2DI 1 "register_operand" "")
4020 (parallel [(const_int 0)])))]
4021 "TARGET_SSE && reload_completed"
4022 [(set (match_dup 0) (match_dup 1))]
4024 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4027 (define_insn "*vec_extractv2di_1_sse2"
4028 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4030 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4031 (parallel [(const_int 1)])))]
4032 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4034 movhps\t{%1, %0|%0, %1}
4035 psrldq\t{$4, %0|%0, 4}
4036 movq\t{%H1, %0|%0, %H1}"
4037 [(set_attr "type" "ssemov,sseishft,ssemov")
4038 (set_attr "mode" "V2SF,TI,TI")])
4040 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4041 (define_insn "*vec_extractv2di_1_sse"
4042 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4044 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4045 (parallel [(const_int 1)])))]
4046 "!TARGET_SSE2 && TARGET_SSE
4047 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4049 movhps\t{%1, %0|%0, %1}
4050 movhlps\t{%1, %0|%0, %1}
4051 movlps\t{%H1, %0|%0, %H1}"
4052 [(set_attr "type" "ssemov")
4053 (set_attr "mode" "V2SF,V4SF,V2SF")])
4055 (define_insn "*vec_dupv4si"
4056 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4058 (match_operand:SI 1 "register_operand" " Y2,0")))]
4061 pshufd\t{$0, %1, %0|%0, %1, 0}
4062 shufps\t{$0, %0, %0|%0, %0, 0}"
4063 [(set_attr "type" "sselog1")
4064 (set_attr "mode" "TI,V4SF")])
4066 (define_insn "*vec_dupv2di"
4067 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4069 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4074 [(set_attr "type" "sselog1,ssemov")
4075 (set_attr "mode" "TI,V4SF")])
4077 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4078 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4079 ;; alternatives pretty much forces the MMX alternative to be chosen.
4080 (define_insn "*sse2_concatv2si"
4081 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4083 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4084 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4087 punpckldq\t{%2, %0|%0, %2}
4088 movd\t{%1, %0|%0, %1}
4089 punpckldq\t{%2, %0|%0, %2}
4090 movd\t{%1, %0|%0, %1}"
4091 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4092 (set_attr "mode" "TI,TI,DI,DI")])
4094 (define_insn "*sse1_concatv2si"
4095 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4097 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4098 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4101 unpcklps\t{%2, %0|%0, %2}
4102 movss\t{%1, %0|%0, %1}
4103 punpckldq\t{%2, %0|%0, %2}
4104 movd\t{%1, %0|%0, %1}"
4105 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4106 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4108 (define_insn "*vec_concatv4si_1"
4109 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4111 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4112 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4115 punpcklqdq\t{%2, %0|%0, %2}
4116 movlhps\t{%2, %0|%0, %2}
4117 movhps\t{%2, %0|%0, %2}"
4118 [(set_attr "type" "sselog,ssemov,ssemov")
4119 (set_attr "mode" "TI,V4SF,V2SF")])
4121 (define_insn "*vec_concatv2di"
4122 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4124 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4125 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4128 movq\t{%1, %0|%0, %1}
4129 movq2dq\t{%1, %0|%0, %1}
4130 punpcklqdq\t{%2, %0|%0, %2}
4131 movlhps\t{%2, %0|%0, %2}
4132 movhps\t{%2, %0|%0, %2}
4133 movlps\t{%1, %0|%0, %1}"
4134 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4135 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4137 (define_expand "vec_setv2di"
4138 [(match_operand:V2DI 0 "register_operand" "")
4139 (match_operand:DI 1 "register_operand" "")
4140 (match_operand 2 "const_int_operand" "")]
4143 ix86_expand_vector_set (false, operands[0], operands[1],
4144 INTVAL (operands[2]));
4148 (define_expand "vec_extractv2di"
4149 [(match_operand:DI 0 "register_operand" "")
4150 (match_operand:V2DI 1 "register_operand" "")
4151 (match_operand 2 "const_int_operand" "")]
4154 ix86_expand_vector_extract (false, operands[0], operands[1],
4155 INTVAL (operands[2]));
4159 (define_expand "vec_initv2di"
4160 [(match_operand:V2DI 0 "register_operand" "")
4161 (match_operand 1 "" "")]
4164 ix86_expand_vector_init (false, operands[0], operands[1]);
4168 (define_expand "vec_setv4si"
4169 [(match_operand:V4SI 0 "register_operand" "")
4170 (match_operand:SI 1 "register_operand" "")
4171 (match_operand 2 "const_int_operand" "")]
4174 ix86_expand_vector_set (false, operands[0], operands[1],
4175 INTVAL (operands[2]));
4179 (define_expand "vec_extractv4si"
4180 [(match_operand:SI 0 "register_operand" "")
4181 (match_operand:V4SI 1 "register_operand" "")
4182 (match_operand 2 "const_int_operand" "")]
4185 ix86_expand_vector_extract (false, operands[0], operands[1],
4186 INTVAL (operands[2]));
4190 (define_expand "vec_initv4si"
4191 [(match_operand:V4SI 0 "register_operand" "")
4192 (match_operand 1 "" "")]
4195 ix86_expand_vector_init (false, operands[0], operands[1]);
4199 (define_expand "vec_setv8hi"
4200 [(match_operand:V8HI 0 "register_operand" "")
4201 (match_operand:HI 1 "register_operand" "")
4202 (match_operand 2 "const_int_operand" "")]
4205 ix86_expand_vector_set (false, operands[0], operands[1],
4206 INTVAL (operands[2]));
4210 (define_expand "vec_extractv8hi"
4211 [(match_operand:HI 0 "register_operand" "")
4212 (match_operand:V8HI 1 "register_operand" "")
4213 (match_operand 2 "const_int_operand" "")]
4216 ix86_expand_vector_extract (false, operands[0], operands[1],
4217 INTVAL (operands[2]));
4221 (define_expand "vec_initv8hi"
4222 [(match_operand:V8HI 0 "register_operand" "")
4223 (match_operand 1 "" "")]
4226 ix86_expand_vector_init (false, operands[0], operands[1]);
4230 (define_expand "vec_setv16qi"
4231 [(match_operand:V16QI 0 "register_operand" "")
4232 (match_operand:QI 1 "register_operand" "")
4233 (match_operand 2 "const_int_operand" "")]
4236 ix86_expand_vector_set (false, operands[0], operands[1],
4237 INTVAL (operands[2]));
4241 (define_expand "vec_extractv16qi"
4242 [(match_operand:QI 0 "register_operand" "")
4243 (match_operand:V16QI 1 "register_operand" "")
4244 (match_operand 2 "const_int_operand" "")]
4247 ix86_expand_vector_extract (false, operands[0], operands[1],
4248 INTVAL (operands[2]));
4252 (define_expand "vec_initv16qi"
4253 [(match_operand:V16QI 0 "register_operand" "")
4254 (match_operand 1 "" "")]
4257 ix86_expand_vector_init (false, operands[0], operands[1]);
4261 (define_expand "vec_unpacku_hi_v16qi"
4262 [(match_operand:V8HI 0 "register_operand" "")
4263 (match_operand:V16QI 1 "register_operand" "")]
4266 ix86_expand_sse_unpack (operands, true, true);
4270 (define_expand "vec_unpacks_hi_v16qi"
4271 [(match_operand:V8HI 0 "register_operand" "")
4272 (match_operand:V16QI 1 "register_operand" "")]
4275 ix86_expand_sse_unpack (operands, false, true);
4279 (define_expand "vec_unpacku_lo_v16qi"
4280 [(match_operand:V8HI 0 "register_operand" "")
4281 (match_operand:V16QI 1 "register_operand" "")]
4284 ix86_expand_sse_unpack (operands, true, false);
4288 (define_expand "vec_unpacks_lo_v16qi"
4289 [(match_operand:V8HI 0 "register_operand" "")
4290 (match_operand:V16QI 1 "register_operand" "")]
4293 ix86_expand_sse_unpack (operands, false, false);
4297 (define_expand "vec_unpacku_hi_v8hi"
4298 [(match_operand:V4SI 0 "register_operand" "")
4299 (match_operand:V8HI 1 "register_operand" "")]
4302 ix86_expand_sse_unpack (operands, true, true);
4306 (define_expand "vec_unpacks_hi_v8hi"
4307 [(match_operand:V4SI 0 "register_operand" "")
4308 (match_operand:V8HI 1 "register_operand" "")]
4311 ix86_expand_sse_unpack (operands, false, true);
4315 (define_expand "vec_unpacku_lo_v8hi"
4316 [(match_operand:V4SI 0 "register_operand" "")
4317 (match_operand:V8HI 1 "register_operand" "")]
4320 ix86_expand_sse_unpack (operands, true, false);
4324 (define_expand "vec_unpacks_lo_v8hi"
4325 [(match_operand:V4SI 0 "register_operand" "")
4326 (match_operand:V8HI 1 "register_operand" "")]
4329 ix86_expand_sse_unpack (operands, false, false);
4333 (define_expand "vec_unpacku_hi_v4si"
4334 [(match_operand:V2DI 0 "register_operand" "")
4335 (match_operand:V4SI 1 "register_operand" "")]
4338 ix86_expand_sse_unpack (operands, true, true);
4342 (define_expand "vec_unpacks_hi_v4si"
4343 [(match_operand:V2DI 0 "register_operand" "")
4344 (match_operand:V4SI 1 "register_operand" "")]
4347 ix86_expand_sse_unpack (operands, false, true);
4351 (define_expand "vec_unpacku_lo_v4si"
4352 [(match_operand:V2DI 0 "register_operand" "")
4353 (match_operand:V4SI 1 "register_operand" "")]
4356 ix86_expand_sse_unpack (operands, true, false);
4360 (define_expand "vec_unpacks_lo_v4si"
4361 [(match_operand:V2DI 0 "register_operand" "")
4362 (match_operand:V4SI 1 "register_operand" "")]
4365 ix86_expand_sse_unpack (operands, false, false);
4369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4373 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4375 (define_insn "sse2_uavgv16qi3"
4376 [(set (match_operand:V16QI 0 "register_operand" "=x")
4382 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4384 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4385 (const_vector:V16QI [(const_int 1) (const_int 1)
4386 (const_int 1) (const_int 1)
4387 (const_int 1) (const_int 1)
4388 (const_int 1) (const_int 1)
4389 (const_int 1) (const_int 1)
4390 (const_int 1) (const_int 1)
4391 (const_int 1) (const_int 1)
4392 (const_int 1) (const_int 1)]))
4394 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4395 "pavgb\t{%2, %0|%0, %2}"
4396 [(set_attr "type" "sseiadd")
4397 (set_attr "mode" "TI")])
4399 (define_insn "sse2_uavgv8hi3"
4400 [(set (match_operand:V8HI 0 "register_operand" "=x")
4406 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4408 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4409 (const_vector:V8HI [(const_int 1) (const_int 1)
4410 (const_int 1) (const_int 1)
4411 (const_int 1) (const_int 1)
4412 (const_int 1) (const_int 1)]))
4414 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4415 "pavgw\t{%2, %0|%0, %2}"
4416 [(set_attr "type" "sseiadd")
4417 (set_attr "mode" "TI")])
4419 ;; The correct representation for this is absolutely enormous, and
4420 ;; surely not generally useful.
4421 (define_insn "sse2_psadbw"
4422 [(set (match_operand:V2DI 0 "register_operand" "=x")
4423 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4424 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4427 "psadbw\t{%2, %0|%0, %2}"
4428 [(set_attr "type" "sseiadd")
4429 (set_attr "mode" "TI")])
4431 (define_insn "sse_movmskps"
4432 [(set (match_operand:SI 0 "register_operand" "=r")
4433 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
4436 "movmskps\t{%1, %0|%0, %1}"
4437 [(set_attr "type" "ssecvt")
4438 (set_attr "mode" "V4SF")])
4440 (define_insn "sse2_movmskpd"
4441 [(set (match_operand:SI 0 "register_operand" "=r")
4442 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
4445 "movmskpd\t{%1, %0|%0, %1}"
4446 [(set_attr "type" "ssecvt")
4447 (set_attr "mode" "V2DF")])
4449 (define_insn "sse2_pmovmskb"
4450 [(set (match_operand:SI 0 "register_operand" "=r")
4451 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
4454 "pmovmskb\t{%1, %0|%0, %1}"
4455 [(set_attr "type" "ssecvt")
4456 (set_attr "mode" "V2DF")])
4458 (define_expand "sse2_maskmovdqu"
4459 [(set (match_operand:V16QI 0 "memory_operand" "")
4460 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4461 (match_operand:V16QI 2 "register_operand" "x")
4467 (define_insn "*sse2_maskmovdqu"
4468 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
4469 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4470 (match_operand:V16QI 2 "register_operand" "x")
4471 (mem:V16QI (match_dup 0))]
4473 "TARGET_SSE2 && !TARGET_64BIT"
4474 ;; @@@ check ordering of operands in intel/nonintel syntax
4475 "maskmovdqu\t{%2, %1|%1, %2}"
4476 [(set_attr "type" "ssecvt")
4477 (set_attr "mode" "TI")])
4479 (define_insn "*sse2_maskmovdqu_rex64"
4480 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
4481 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4482 (match_operand:V16QI 2 "register_operand" "x")
4483 (mem:V16QI (match_dup 0))]
4485 "TARGET_SSE2 && TARGET_64BIT"
4486 ;; @@@ check ordering of operands in intel/nonintel syntax
4487 "maskmovdqu\t{%2, %1|%1, %2}"
4488 [(set_attr "type" "ssecvt")
4489 (set_attr "mode" "TI")])
4491 (define_insn "sse_ldmxcsr"
4492 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
4496 [(set_attr "type" "sse")
4497 (set_attr "memory" "load")])
4499 (define_insn "sse_stmxcsr"
4500 [(set (match_operand:SI 0 "memory_operand" "=m")
4501 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
4504 [(set_attr "type" "sse")
4505 (set_attr "memory" "store")])
4507 (define_expand "sse_sfence"
4509 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4510 "TARGET_SSE || TARGET_3DNOW_A"
4512 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4513 MEM_VOLATILE_P (operands[0]) = 1;
4516 (define_insn "*sse_sfence"
4517 [(set (match_operand:BLK 0 "" "")
4518 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4519 "TARGET_SSE || TARGET_3DNOW_A"
4521 [(set_attr "type" "sse")
4522 (set_attr "memory" "unknown")])
4524 (define_insn "sse2_clflush"
4525 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
4529 [(set_attr "type" "sse")
4530 (set_attr "memory" "unknown")])
4532 (define_expand "sse2_mfence"
4534 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4537 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4538 MEM_VOLATILE_P (operands[0]) = 1;
4541 (define_insn "*sse2_mfence"
4542 [(set (match_operand:BLK 0 "" "")
4543 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4546 [(set_attr "type" "sse")
4547 (set_attr "memory" "unknown")])
4549 (define_expand "sse2_lfence"
4551 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4554 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4555 MEM_VOLATILE_P (operands[0]) = 1;
4558 (define_insn "*sse2_lfence"
4559 [(set (match_operand:BLK 0 "" "")
4560 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4563 [(set_attr "type" "sse")
4564 (set_attr "memory" "unknown")])
4566 (define_insn "sse3_mwait"
4567 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4568 (match_operand:SI 1 "register_operand" "c")]
4571 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
4572 ;; Since 32bit register operands are implicitly zero extended to 64bit,
4573 ;; we only need to set up 32bit registers.
4575 [(set_attr "length" "3")])
4577 (define_insn "sse3_monitor"
4578 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4579 (match_operand:SI 1 "register_operand" "c")
4580 (match_operand:SI 2 "register_operand" "d")]
4582 "TARGET_SSE3 && !TARGET_64BIT"
4583 "monitor\t%0, %1, %2"
4584 [(set_attr "length" "3")])
4586 (define_insn "sse3_monitor64"
4587 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
4588 (match_operand:SI 1 "register_operand" "c")
4589 (match_operand:SI 2 "register_operand" "d")]
4591 "TARGET_SSE3 && TARGET_64BIT"
4592 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
4593 ;; RCX and RDX are used. Since 32bit register operands are implicitly
4594 ;; zero extended to 64bit, we only need to set up 32bit registers.
4596 [(set_attr "length" "3")])
4599 (define_insn "ssse3_phaddwv8hi3"
4600 [(set (match_operand:V8HI 0 "register_operand" "=x")
4606 (match_operand:V8HI 1 "register_operand" "0")
4607 (parallel [(const_int 0)]))
4608 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4610 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4611 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4614 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4615 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4617 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4618 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4623 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4624 (parallel [(const_int 0)]))
4625 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4627 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4628 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4631 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4632 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4634 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4635 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4637 "phaddw\t{%2, %0|%0, %2}"
4638 [(set_attr "type" "sseiadd")
4639 (set_attr "mode" "TI")])
4641 (define_insn "ssse3_phaddwv4hi3"
4642 [(set (match_operand:V4HI 0 "register_operand" "=y")
4647 (match_operand:V4HI 1 "register_operand" "0")
4648 (parallel [(const_int 0)]))
4649 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4651 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4652 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4656 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4657 (parallel [(const_int 0)]))
4658 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4660 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4661 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4663 "phaddw\t{%2, %0|%0, %2}"
4664 [(set_attr "type" "sseiadd")
4665 (set_attr "mode" "DI")])
4667 (define_insn "ssse3_phadddv4si3"
4668 [(set (match_operand:V4SI 0 "register_operand" "=x")
4673 (match_operand:V4SI 1 "register_operand" "0")
4674 (parallel [(const_int 0)]))
4675 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4677 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4678 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4682 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4683 (parallel [(const_int 0)]))
4684 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4686 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4687 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4689 "phaddd\t{%2, %0|%0, %2}"
4690 [(set_attr "type" "sseiadd")
4691 (set_attr "mode" "TI")])
4693 (define_insn "ssse3_phadddv2si3"
4694 [(set (match_operand:V2SI 0 "register_operand" "=y")
4698 (match_operand:V2SI 1 "register_operand" "0")
4699 (parallel [(const_int 0)]))
4700 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4703 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4704 (parallel [(const_int 0)]))
4705 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4707 "phaddd\t{%2, %0|%0, %2}"
4708 [(set_attr "type" "sseiadd")
4709 (set_attr "mode" "DI")])
4711 (define_insn "ssse3_phaddswv8hi3"
4712 [(set (match_operand:V8HI 0 "register_operand" "=x")
4718 (match_operand:V8HI 1 "register_operand" "0")
4719 (parallel [(const_int 0)]))
4720 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4722 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4723 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4726 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4727 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4729 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4730 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4735 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4736 (parallel [(const_int 0)]))
4737 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4739 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4740 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4743 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4744 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4746 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4747 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4749 "phaddsw\t{%2, %0|%0, %2}"
4750 [(set_attr "type" "sseiadd")
4751 (set_attr "mode" "TI")])
4753 (define_insn "ssse3_phaddswv4hi3"
4754 [(set (match_operand:V4HI 0 "register_operand" "=y")
4759 (match_operand:V4HI 1 "register_operand" "0")
4760 (parallel [(const_int 0)]))
4761 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4763 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4764 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4768 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4769 (parallel [(const_int 0)]))
4770 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4772 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4773 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4775 "phaddsw\t{%2, %0|%0, %2}"
4776 [(set_attr "type" "sseiadd")
4777 (set_attr "mode" "DI")])
4779 (define_insn "ssse3_phsubwv8hi3"
4780 [(set (match_operand:V8HI 0 "register_operand" "=x")
4786 (match_operand:V8HI 1 "register_operand" "0")
4787 (parallel [(const_int 0)]))
4788 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4790 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4791 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4794 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4795 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4797 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4798 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4803 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4804 (parallel [(const_int 0)]))
4805 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4807 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4808 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4811 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4812 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4814 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4815 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4817 "phsubw\t{%2, %0|%0, %2}"
4818 [(set_attr "type" "sseiadd")
4819 (set_attr "mode" "TI")])
4821 (define_insn "ssse3_phsubwv4hi3"
4822 [(set (match_operand:V4HI 0 "register_operand" "=y")
4827 (match_operand:V4HI 1 "register_operand" "0")
4828 (parallel [(const_int 0)]))
4829 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4831 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4832 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4836 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4837 (parallel [(const_int 0)]))
4838 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4840 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4841 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4843 "phsubw\t{%2, %0|%0, %2}"
4844 [(set_attr "type" "sseiadd")
4845 (set_attr "mode" "DI")])
4847 (define_insn "ssse3_phsubdv4si3"
4848 [(set (match_operand:V4SI 0 "register_operand" "=x")
4853 (match_operand:V4SI 1 "register_operand" "0")
4854 (parallel [(const_int 0)]))
4855 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4857 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4858 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4862 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4863 (parallel [(const_int 0)]))
4864 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4866 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4867 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4869 "phsubd\t{%2, %0|%0, %2}"
4870 [(set_attr "type" "sseiadd")
4871 (set_attr "mode" "TI")])
4873 (define_insn "ssse3_phsubdv2si3"
4874 [(set (match_operand:V2SI 0 "register_operand" "=y")
4878 (match_operand:V2SI 1 "register_operand" "0")
4879 (parallel [(const_int 0)]))
4880 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4883 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4884 (parallel [(const_int 0)]))
4885 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4887 "phsubd\t{%2, %0|%0, %2}"
4888 [(set_attr "type" "sseiadd")
4889 (set_attr "mode" "DI")])
4891 (define_insn "ssse3_phsubswv8hi3"
4892 [(set (match_operand:V8HI 0 "register_operand" "=x")
4898 (match_operand:V8HI 1 "register_operand" "0")
4899 (parallel [(const_int 0)]))
4900 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4902 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4903 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4906 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4907 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4909 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4910 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4915 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4916 (parallel [(const_int 0)]))
4917 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4919 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4920 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4923 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4924 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4926 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4927 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4929 "phsubsw\t{%2, %0|%0, %2}"
4930 [(set_attr "type" "sseiadd")
4931 (set_attr "mode" "TI")])
4933 (define_insn "ssse3_phsubswv4hi3"
4934 [(set (match_operand:V4HI 0 "register_operand" "=y")
4939 (match_operand:V4HI 1 "register_operand" "0")
4940 (parallel [(const_int 0)]))
4941 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4943 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4944 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4948 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4949 (parallel [(const_int 0)]))
4950 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4952 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4953 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4955 "phsubsw\t{%2, %0|%0, %2}"
4956 [(set_attr "type" "sseiadd")
4957 (set_attr "mode" "DI")])
4959 (define_insn "ssse3_pmaddubswv8hi3"
4960 [(set (match_operand:V8HI 0 "register_operand" "=x")
4965 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4966 (parallel [(const_int 0)
4976 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
4977 (parallel [(const_int 0)
4987 (vec_select:V16QI (match_dup 1)
4988 (parallel [(const_int 1)
4997 (vec_select:V16QI (match_dup 2)
4998 (parallel [(const_int 1)
5005 (const_int 15)]))))))]
5007 "pmaddubsw\t{%2, %0|%0, %2}"
5008 [(set_attr "type" "sseiadd")
5009 (set_attr "mode" "TI")])
5011 (define_insn "ssse3_pmaddubswv4hi3"
5012 [(set (match_operand:V4HI 0 "register_operand" "=y")
5017 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5018 (parallel [(const_int 0)
5024 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5025 (parallel [(const_int 0)
5031 (vec_select:V8QI (match_dup 1)
5032 (parallel [(const_int 1)
5037 (vec_select:V8QI (match_dup 2)
5038 (parallel [(const_int 1)
5041 (const_int 7)]))))))]
5043 "pmaddubsw\t{%2, %0|%0, %2}"
5044 [(set_attr "type" "sseiadd")
5045 (set_attr "mode" "DI")])
5047 (define_insn "ssse3_pmulhrswv8hi3"
5048 [(set (match_operand:V8HI 0 "register_operand" "=x")
5055 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5057 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5059 (const_vector:V8HI [(const_int 1) (const_int 1)
5060 (const_int 1) (const_int 1)
5061 (const_int 1) (const_int 1)
5062 (const_int 1) (const_int 1)]))
5064 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5065 "pmulhrsw\t{%2, %0|%0, %2}"
5066 [(set_attr "type" "sseimul")
5067 (set_attr "mode" "TI")])
5069 (define_insn "ssse3_pmulhrswv4hi3"
5070 [(set (match_operand:V4HI 0 "register_operand" "=y")
5077 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5079 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5081 (const_vector:V4HI [(const_int 1) (const_int 1)
5082 (const_int 1) (const_int 1)]))
5084 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5085 "pmulhrsw\t{%2, %0|%0, %2}"
5086 [(set_attr "type" "sseimul")
5087 (set_attr "mode" "DI")])
5089 (define_insn "ssse3_pshufbv16qi3"
5090 [(set (match_operand:V16QI 0 "register_operand" "=x")
5091 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5092 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5095 "pshufb\t{%2, %0|%0, %2}";
5096 [(set_attr "type" "sselog1")
5097 (set_attr "mode" "TI")])
5099 (define_insn "ssse3_pshufbv8qi3"
5100 [(set (match_operand:V8QI 0 "register_operand" "=y")
5101 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5102 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5105 "pshufb\t{%2, %0|%0, %2}";
5106 [(set_attr "type" "sselog1")
5107 (set_attr "mode" "DI")])
5109 (define_insn "ssse3_psign<mode>3"
5110 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5111 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5112 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5115 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5116 [(set_attr "type" "sselog1")
5117 (set_attr "mode" "TI")])
5119 (define_insn "ssse3_psign<mode>3"
5120 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5121 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5122 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5125 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5126 [(set_attr "type" "sselog1")
5127 (set_attr "mode" "DI")])
5129 (define_insn "ssse3_palignrti"
5130 [(set (match_operand:TI 0 "register_operand" "=x")
5131 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5132 (match_operand:TI 2 "nonimmediate_operand" "xm")
5133 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5137 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5138 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5140 [(set_attr "type" "sseishft")
5141 (set_attr "mode" "TI")])
5143 (define_insn "ssse3_palignrdi"
5144 [(set (match_operand:DI 0 "register_operand" "=y")
5145 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5146 (match_operand:DI 2 "nonimmediate_operand" "ym")
5147 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5151 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5152 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5154 [(set_attr "type" "sseishft")
5155 (set_attr "mode" "DI")])
5157 (define_insn "abs<mode>2"
5158 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5159 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5161 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5162 [(set_attr "type" "sselog1")
5163 (set_attr "mode" "TI")])
5165 (define_insn "abs<mode>2"
5166 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5167 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5169 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5170 [(set_attr "type" "sselog1")
5171 (set_attr "mode" "DI")])
5173 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5175 ;; AMD SSE4A instructions
5177 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5179 (define_insn "sse4a_vmmovntv2df"
5180 [(set (match_operand:DF 0 "memory_operand" "=m")
5181 (unspec:DF [(vec_select:DF
5182 (match_operand:V2DF 1 "register_operand" "x")
5183 (parallel [(const_int 0)]))]
5186 "movntsd\t{%1, %0|%0, %1}"
5187 [(set_attr "type" "ssemov")
5188 (set_attr "mode" "DF")])
5190 (define_insn "sse4a_movntdf"
5191 [(set (match_operand:DF 0 "memory_operand" "=m")
5192 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5195 "movntsd\t{%1, %0|%0, %1}"
5196 [(set_attr "type" "ssemov")
5197 (set_attr "mode" "DF")])
5199 (define_insn "sse4a_vmmovntv4sf"
5200 [(set (match_operand:SF 0 "memory_operand" "=m")
5201 (unspec:SF [(vec_select:SF
5202 (match_operand:V4SF 1 "register_operand" "x")
5203 (parallel [(const_int 0)]))]
5206 "movntss\t{%1, %0|%0, %1}"
5207 [(set_attr "type" "ssemov")
5208 (set_attr "mode" "SF")])
5210 (define_insn "sse4a_movntsf"
5211 [(set (match_operand:SF 0 "memory_operand" "=m")
5212 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5215 "movntss\t{%1, %0|%0, %1}"
5216 [(set_attr "type" "ssemov")
5217 (set_attr "mode" "SF")])
5219 (define_insn "sse4a_extrqi"
5220 [(set (match_operand:V2DI 0 "register_operand" "=x")
5221 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5222 (match_operand 2 "const_int_operand" "")
5223 (match_operand 3 "const_int_operand" "")]
5226 "extrq\t{%3, %2, %0|%0, %2, %3}"
5227 [(set_attr "type" "sse")
5228 (set_attr "mode" "TI")])
5230 (define_insn "sse4a_extrq"
5231 [(set (match_operand:V2DI 0 "register_operand" "=x")
5232 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5233 (match_operand:V16QI 2 "register_operand" "x")]
5236 "extrq\t{%2, %0|%0, %2}"
5237 [(set_attr "type" "sse")
5238 (set_attr "mode" "TI")])
5240 (define_insn "sse4a_insertqi"
5241 [(set (match_operand:V2DI 0 "register_operand" "=x")
5242 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5243 (match_operand:V2DI 2 "register_operand" "x")
5244 (match_operand 3 "const_int_operand" "")
5245 (match_operand 4 "const_int_operand" "")]
5248 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5249 [(set_attr "type" "sseins")
5250 (set_attr "mode" "TI")])
5252 (define_insn "sse4a_insertq"
5253 [(set (match_operand:V2DI 0 "register_operand" "=x")
5254 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5255 (match_operand:V2DI 2 "register_operand" "x")]
5258 "insertq\t{%2, %0|%0, %2}"
5259 [(set_attr "type" "sseins")
5260 (set_attr "mode" "TI")])