1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 return standard_sse_constant_opcode (insn, operands[1]);
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
74 return "movdqa\t{%1, %0|%0, %1}";
79 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_string "TI")))])
90 (define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
95 ix86_expand_vector_move (V4SFmode, operands);
99 (define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
104 switch (which_alternative)
107 return standard_sse_constant_opcode (insn, operands[1]);
110 return "movaps\t{%1, %0|%0, %1}";
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
124 (vec_duplicate:V4SF (match_dup 1))
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
132 (define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
137 ix86_expand_vector_move (V2DFmode, operands);
141 (define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
146 switch (which_alternative)
149 return standard_sse_constant_opcode (insn, operands[1]);
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
155 return "movapd\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
168 (const_string "V4SF")
169 (const_string "V2DF")))])
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
207 (define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
225 (define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
234 (define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
243 (define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
252 (define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
272 ;; Parallel single-precision floating point arithmetic
274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276 (define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
282 (define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
288 (define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
295 (define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
304 (define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
316 (define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
323 (define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
332 (define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
344 (define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
351 (define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
360 (define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
372 (define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
379 (define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
388 (define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
400 (define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
409 (define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
414 (match_operand:V4SF 2 "register_operand" "0")
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
421 (define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
430 (define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
435 (match_operand:V4SF 2 "register_operand" "0")
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
442 (define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
450 (define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
461 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462 ;; isn't really correct, as those rtl operators aren't defined when
463 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
465 (define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
476 (define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
486 (define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
495 (define_insn "*sse_vmsmaxv4sf3_finite"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
502 "TARGET_SSE && flag_finite_math_only
503 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
504 "maxss\t{%2, %0|%0, %2}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 (define_insn "sse_vmsmaxv4sf3"
509 [(set (match_operand:V4SF 0 "register_operand" "=x")
511 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
512 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
516 "maxss\t{%2, %0|%0, %2}"
517 [(set_attr "type" "sse")
518 (set_attr "mode" "SF")])
520 (define_expand "sminv4sf3"
521 [(set (match_operand:V4SF 0 "register_operand" "")
522 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
523 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
526 if (!flag_finite_math_only)
527 operands[1] = force_reg (V4SFmode, operands[1]);
528 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
531 (define_insn "*sminv4sf3_finite"
532 [(set (match_operand:V4SF 0 "register_operand" "=x")
533 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
534 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
535 "TARGET_SSE && flag_finite_math_only
536 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
537 "minps\t{%2, %0|%0, %2}"
538 [(set_attr "type" "sse")
539 (set_attr "mode" "V4SF")])
541 (define_insn "*sminv4sf3"
542 [(set (match_operand:V4SF 0 "register_operand" "=x")
543 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
544 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
546 "minps\t{%2, %0|%0, %2}"
547 [(set_attr "type" "sse")
548 (set_attr "mode" "V4SF")])
550 (define_insn "*sse_vmsminv4sf3_finite"
551 [(set (match_operand:V4SF 0 "register_operand" "=x")
553 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
554 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
557 "TARGET_SSE && flag_finite_math_only
558 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
559 "minss\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sse")
561 (set_attr "mode" "SF")])
563 (define_insn "sse_vmsminv4sf3"
564 [(set (match_operand:V4SF 0 "register_operand" "=x")
566 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
567 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
571 "minss\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sse")
573 (set_attr "mode" "SF")])
575 ;; These versions of the min/max patterns implement exactly the operations
576 ;; min = (op1 < op2 ? op1 : op2)
577 ;; max = (!(op1 < op2) ? op1 : op2)
578 ;; Their operands are not commutative, and thus they may be used in the
579 ;; presence of -0.0 and NaN.
581 (define_insn "*ieee_sminv4sf3"
582 [(set (match_operand:V4SF 0 "register_operand" "=x")
583 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
584 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
587 "minps\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sseadd")
589 (set_attr "mode" "V4SF")])
591 (define_insn "*ieee_smaxv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
593 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
594 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
597 "maxps\t{%2, %0|%0, %2}"
598 [(set_attr "type" "sseadd")
599 (set_attr "mode" "V4SF")])
601 (define_insn "*ieee_sminv2df3"
602 [(set (match_operand:V2DF 0 "register_operand" "=x")
603 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
604 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
607 "minpd\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sseadd")
609 (set_attr "mode" "V2DF")])
611 (define_insn "*ieee_smaxv2df3"
612 [(set (match_operand:V2DF 0 "register_operand" "=x")
613 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
614 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
617 "maxpd\t{%2, %0|%0, %2}"
618 [(set_attr "type" "sseadd")
619 (set_attr "mode" "V2DF")])
621 (define_insn "sse3_addsubv4sf3"
622 [(set (match_operand:V4SF 0 "register_operand" "=x")
625 (match_operand:V4SF 1 "register_operand" "0")
626 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
627 (minus:V4SF (match_dup 1) (match_dup 2))
630 "addsubps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
634 (define_insn "sse3_haddv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
656 "haddps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
660 (define_insn "sse3_hsubv4sf3"
661 [(set (match_operand:V4SF 0 "register_operand" "=x")
666 (match_operand:V4SF 1 "register_operand" "0")
667 (parallel [(const_int 0)]))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
670 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
671 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
675 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
676 (parallel [(const_int 0)]))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
679 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
680 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
682 "hsubps\t{%2, %0|%0, %2}"
683 [(set_attr "type" "sseadd")
684 (set_attr "mode" "V4SF")])
686 (define_expand "reduc_splus_v4sf"
687 [(match_operand:V4SF 0 "register_operand" "")
688 (match_operand:V4SF 1 "register_operand" "")]
693 rtx tmp = gen_reg_rtx (V4SFmode);
694 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
695 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
698 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
702 (define_expand "reduc_smax_v4sf"
703 [(match_operand:V4SF 0 "register_operand" "")
704 (match_operand:V4SF 1 "register_operand" "")]
707 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
711 (define_expand "reduc_smin_v4sf"
712 [(match_operand:V4SF 0 "register_operand" "")
713 (match_operand:V4SF 1 "register_operand" "")]
716 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
722 ;; Parallel single-precision floating point comparisons
724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
726 (define_insn "sse_maskcmpv4sf3"
727 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operator:V4SF 3 "sse_comparison_operator"
729 [(match_operand:V4SF 1 "register_operand" "0")
730 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
732 "cmp%D3ps\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssecmp")
734 (set_attr "mode" "V4SF")])
736 (define_insn "sse_vmmaskcmpv4sf3"
737 [(set (match_operand:V4SF 0 "register_operand" "=x")
739 (match_operator:V4SF 3 "sse_comparison_operator"
740 [(match_operand:V4SF 1 "register_operand" "0")
741 (match_operand:V4SF 2 "register_operand" "x")])
745 "cmp%D3ss\t{%2, %0|%0, %2}"
746 [(set_attr "type" "ssecmp")
747 (set_attr "mode" "SF")])
749 (define_insn "sse_comi"
750 [(set (reg:CCFP FLAGS_REG)
753 (match_operand:V4SF 0 "register_operand" "x")
754 (parallel [(const_int 0)]))
756 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
757 (parallel [(const_int 0)]))))]
759 "comiss\t{%1, %0|%0, %1}"
760 [(set_attr "type" "ssecomi")
761 (set_attr "mode" "SF")])
763 (define_insn "sse_ucomi"
764 [(set (reg:CCFPU FLAGS_REG)
767 (match_operand:V4SF 0 "register_operand" "x")
768 (parallel [(const_int 0)]))
770 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
771 (parallel [(const_int 0)]))))]
773 "ucomiss\t{%1, %0|%0, %1}"
774 [(set_attr "type" "ssecomi")
775 (set_attr "mode" "SF")])
777 (define_expand "vcondv4sf"
778 [(set (match_operand:V4SF 0 "register_operand" "")
781 [(match_operand:V4SF 4 "nonimmediate_operand" "")
782 (match_operand:V4SF 5 "nonimmediate_operand" "")])
783 (match_operand:V4SF 1 "general_operand" "")
784 (match_operand:V4SF 2 "general_operand" "")))]
787 if (ix86_expand_fp_vcond (operands))
793 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
795 ;; Parallel single-precision floating point logical operations
797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
799 (define_expand "andv4sf3"
800 [(set (match_operand:V4SF 0 "register_operand" "")
801 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
802 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
804 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
806 (define_insn "*andv4sf3"
807 [(set (match_operand:V4SF 0 "register_operand" "=x")
808 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
809 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
810 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
811 "andps\t{%2, %0|%0, %2}"
812 [(set_attr "type" "sselog")
813 (set_attr "mode" "V4SF")])
815 (define_insn "sse_nandv4sf3"
816 [(set (match_operand:V4SF 0 "register_operand" "=x")
817 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
818 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
820 "andnps\t{%2, %0|%0, %2}"
821 [(set_attr "type" "sselog")
822 (set_attr "mode" "V4SF")])
824 (define_expand "iorv4sf3"
825 [(set (match_operand:V4SF 0 "register_operand" "")
826 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
827 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
829 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
831 (define_insn "*iorv4sf3"
832 [(set (match_operand:V4SF 0 "register_operand" "=x")
833 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
834 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
835 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
836 "orps\t{%2, %0|%0, %2}"
837 [(set_attr "type" "sselog")
838 (set_attr "mode" "V4SF")])
840 (define_expand "xorv4sf3"
841 [(set (match_operand:V4SF 0 "register_operand" "")
842 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
843 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
845 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
847 (define_insn "*xorv4sf3"
848 [(set (match_operand:V4SF 0 "register_operand" "=x")
849 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
850 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
851 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
852 "xorps\t{%2, %0|%0, %2}"
853 [(set_attr "type" "sselog")
854 (set_attr "mode" "V4SF")])
856 ;; Also define scalar versions. These are used for abs, neg, and
857 ;; conditional move. Using subregs into vector modes causes register
858 ;; allocation lossage. These patterns do not allow memory operands
859 ;; because the native instructions read the full 128-bits.
861 (define_insn "*andsf3"
862 [(set (match_operand:SF 0 "register_operand" "=x")
863 (and:SF (match_operand:SF 1 "register_operand" "0")
864 (match_operand:SF 2 "register_operand" "x")))]
866 "andps\t{%2, %0|%0, %2}"
867 [(set_attr "type" "sselog")
868 (set_attr "mode" "V4SF")])
870 (define_insn "*nandsf3"
871 [(set (match_operand:SF 0 "register_operand" "=x")
872 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
873 (match_operand:SF 2 "register_operand" "x")))]
875 "andnps\t{%2, %0|%0, %2}"
876 [(set_attr "type" "sselog")
877 (set_attr "mode" "V4SF")])
879 (define_insn "*iorsf3"
880 [(set (match_operand:SF 0 "register_operand" "=x")
881 (ior:SF (match_operand:SF 1 "register_operand" "0")
882 (match_operand:SF 2 "register_operand" "x")))]
884 "orps\t{%2, %0|%0, %2}"
885 [(set_attr "type" "sselog")
886 (set_attr "mode" "V4SF")])
888 (define_insn "*xorsf3"
889 [(set (match_operand:SF 0 "register_operand" "=x")
890 (xor:SF (match_operand:SF 1 "register_operand" "0")
891 (match_operand:SF 2 "register_operand" "x")))]
893 "xorps\t{%2, %0|%0, %2}"
894 [(set_attr "type" "sselog")
895 (set_attr "mode" "V4SF")])
897 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
899 ;; Parallel single-precision floating point conversion operations
901 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
903 (define_insn "sse_cvtpi2ps"
904 [(set (match_operand:V4SF 0 "register_operand" "=x")
907 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
908 (match_operand:V4SF 1 "register_operand" "0")
911 "cvtpi2ps\t{%2, %0|%0, %2}"
912 [(set_attr "type" "ssecvt")
913 (set_attr "mode" "V4SF")])
915 (define_insn "sse_cvtps2pi"
916 [(set (match_operand:V2SI 0 "register_operand" "=y")
918 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
920 (parallel [(const_int 0) (const_int 1)])))]
922 "cvtps2pi\t{%1, %0|%0, %1}"
923 [(set_attr "type" "ssecvt")
924 (set_attr "unit" "mmx")
925 (set_attr "mode" "DI")])
927 (define_insn "sse_cvttps2pi"
928 [(set (match_operand:V2SI 0 "register_operand" "=y")
930 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
931 (parallel [(const_int 0) (const_int 1)])))]
933 "cvttps2pi\t{%1, %0|%0, %1}"
934 [(set_attr "type" "ssecvt")
935 (set_attr "unit" "mmx")
936 (set_attr "mode" "SF")])
938 (define_insn "sse_cvtsi2ss"
939 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
942 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
943 (match_operand:V4SF 1 "register_operand" "0,0")
946 "cvtsi2ss\t{%2, %0|%0, %2}"
947 [(set_attr "type" "sseicvt")
948 (set_attr "athlon_decode" "vector,double")
949 (set_attr "mode" "SF")])
951 (define_insn "sse_cvtsi2ssq"
952 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
955 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
956 (match_operand:V4SF 1 "register_operand" "0,0")
958 "TARGET_SSE && TARGET_64BIT"
959 "cvtsi2ssq\t{%2, %0|%0, %2}"
960 [(set_attr "type" "sseicvt")
961 (set_attr "athlon_decode" "vector,double")
962 (set_attr "mode" "SF")])
964 (define_insn "sse_cvtss2si"
965 [(set (match_operand:SI 0 "register_operand" "=r,r")
968 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
969 (parallel [(const_int 0)]))]
970 UNSPEC_FIX_NOTRUNC))]
972 "cvtss2si\t{%1, %0|%0, %1}"
973 [(set_attr "type" "sseicvt")
974 (set_attr "athlon_decode" "double,vector")
975 (set_attr "mode" "SI")])
977 (define_insn "sse_cvtss2si_2"
978 [(set (match_operand:SI 0 "register_operand" "=r,r")
979 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
980 UNSPEC_FIX_NOTRUNC))]
982 "cvtss2si\t{%1, %0|%0, %1}"
983 [(set_attr "type" "sseicvt")
984 (set_attr "athlon_decode" "double,vector")
985 (set_attr "mode" "SI")])
987 (define_insn "sse_cvtss2siq"
988 [(set (match_operand:DI 0 "register_operand" "=r,r")
991 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
992 (parallel [(const_int 0)]))]
993 UNSPEC_FIX_NOTRUNC))]
994 "TARGET_SSE && TARGET_64BIT"
995 "cvtss2siq\t{%1, %0|%0, %1}"
996 [(set_attr "type" "sseicvt")
997 (set_attr "athlon_decode" "double,vector")
998 (set_attr "mode" "DI")])
1000 (define_insn "sse_cvtss2siq_2"
1001 [(set (match_operand:DI 0 "register_operand" "=r,r")
1002 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1003 UNSPEC_FIX_NOTRUNC))]
1004 "TARGET_SSE && TARGET_64BIT"
1005 "cvtss2siq\t{%1, %0|%0, %1}"
1006 [(set_attr "type" "sseicvt")
1007 (set_attr "athlon_decode" "double,vector")
1008 (set_attr "mode" "DI")])
1010 (define_insn "sse_cvttss2si"
1011 [(set (match_operand:SI 0 "register_operand" "=r,r")
1014 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1015 (parallel [(const_int 0)]))))]
1017 "cvttss2si\t{%1, %0|%0, %1}"
1018 [(set_attr "type" "sseicvt")
1019 (set_attr "athlon_decode" "double,vector")
1020 (set_attr "mode" "SI")])
1022 (define_insn "sse_cvttss2siq"
1023 [(set (match_operand:DI 0 "register_operand" "=r,r")
1026 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1027 (parallel [(const_int 0)]))))]
1028 "TARGET_SSE && TARGET_64BIT"
1029 "cvttss2siq\t{%1, %0|%0, %1}"
1030 [(set_attr "type" "sseicvt")
1031 (set_attr "athlon_decode" "double,vector")
1032 (set_attr "mode" "DI")])
1034 (define_insn "sse2_cvtdq2ps"
1035 [(set (match_operand:V4SF 0 "register_operand" "=x")
1036 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1038 "cvtdq2ps\t{%1, %0|%0, %1}"
1039 [(set_attr "type" "ssecvt")
1040 (set_attr "mode" "V2DF")])
1042 (define_insn "sse2_cvtps2dq"
1043 [(set (match_operand:V4SI 0 "register_operand" "=x")
1044 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1045 UNSPEC_FIX_NOTRUNC))]
1047 "cvtps2dq\t{%1, %0|%0, %1}"
1048 [(set_attr "type" "ssecvt")
1049 (set_attr "mode" "TI")])
1051 (define_insn "sse2_cvttps2dq"
1052 [(set (match_operand:V4SI 0 "register_operand" "=x")
1053 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1055 "cvttps2dq\t{%1, %0|%0, %1}"
1056 [(set_attr "type" "ssecvt")
1057 (set_attr "mode" "TI")])
1059 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1061 ;; Parallel single-precision floating point element swizzling
1063 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1065 (define_insn "sse_movhlps"
1066 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1069 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1070 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1071 (parallel [(const_int 6)
1075 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1077 movhlps\t{%2, %0|%0, %2}
1078 movlps\t{%H2, %0|%0, %H2}
1079 movhps\t{%2, %0|%0, %2}"
1080 [(set_attr "type" "ssemov")
1081 (set_attr "mode" "V4SF,V2SF,V2SF")])
1083 (define_insn "sse_movlhps"
1084 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1087 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1088 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1089 (parallel [(const_int 0)
1093 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1095 movlhps\t{%2, %0|%0, %2}
1096 movhps\t{%2, %0|%0, %2}
1097 movlps\t{%2, %H0|%H0, %2}"
1098 [(set_attr "type" "ssemov")
1099 (set_attr "mode" "V4SF,V2SF,V2SF")])
1101 (define_insn "sse_unpckhps"
1102 [(set (match_operand:V4SF 0 "register_operand" "=x")
1105 (match_operand:V4SF 1 "register_operand" "0")
1106 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1107 (parallel [(const_int 2) (const_int 6)
1108 (const_int 3) (const_int 7)])))]
1110 "unpckhps\t{%2, %0|%0, %2}"
1111 [(set_attr "type" "sselog")
1112 (set_attr "mode" "V4SF")])
1114 (define_insn "sse_unpcklps"
1115 [(set (match_operand:V4SF 0 "register_operand" "=x")
1118 (match_operand:V4SF 1 "register_operand" "0")
1119 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1120 (parallel [(const_int 0) (const_int 4)
1121 (const_int 1) (const_int 5)])))]
1123 "unpcklps\t{%2, %0|%0, %2}"
1124 [(set_attr "type" "sselog")
1125 (set_attr "mode" "V4SF")])
1127 ;; These are modeled with the same vec_concat as the others so that we
1128 ;; capture users of shufps that can use the new instructions
1129 (define_insn "sse3_movshdup"
1130 [(set (match_operand:V4SF 0 "register_operand" "=x")
1133 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1135 (parallel [(const_int 1)
1140 "movshdup\t{%1, %0|%0, %1}"
1141 [(set_attr "type" "sse")
1142 (set_attr "mode" "V4SF")])
1144 (define_insn "sse3_movsldup"
1145 [(set (match_operand:V4SF 0 "register_operand" "=x")
1148 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1150 (parallel [(const_int 0)
1155 "movsldup\t{%1, %0|%0, %1}"
1156 [(set_attr "type" "sse")
1157 (set_attr "mode" "V4SF")])
1159 (define_expand "sse_shufps"
1160 [(match_operand:V4SF 0 "register_operand" "")
1161 (match_operand:V4SF 1 "register_operand" "")
1162 (match_operand:V4SF 2 "nonimmediate_operand" "")
1163 (match_operand:SI 3 "const_int_operand" "")]
1166 int mask = INTVAL (operands[3]);
1167 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1168 GEN_INT ((mask >> 0) & 3),
1169 GEN_INT ((mask >> 2) & 3),
1170 GEN_INT (((mask >> 4) & 3) + 4),
1171 GEN_INT (((mask >> 6) & 3) + 4)));
1175 (define_insn "sse_shufps_1"
1176 [(set (match_operand:V4SF 0 "register_operand" "=x")
1179 (match_operand:V4SF 1 "register_operand" "0")
1180 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1181 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1182 (match_operand 4 "const_0_to_3_operand" "")
1183 (match_operand 5 "const_4_to_7_operand" "")
1184 (match_operand 6 "const_4_to_7_operand" "")])))]
1188 mask |= INTVAL (operands[3]) << 0;
1189 mask |= INTVAL (operands[4]) << 2;
1190 mask |= (INTVAL (operands[5]) - 4) << 4;
1191 mask |= (INTVAL (operands[6]) - 4) << 6;
1192 operands[3] = GEN_INT (mask);
1194 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1196 [(set_attr "type" "sselog")
1197 (set_attr "mode" "V4SF")])
1199 (define_insn "sse_storehps"
1200 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1202 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1203 (parallel [(const_int 2) (const_int 3)])))]
1206 movhps\t{%1, %0|%0, %1}
1207 movhlps\t{%1, %0|%0, %1}
1208 movlps\t{%H1, %0|%0, %H1}"
1209 [(set_attr "type" "ssemov")
1210 (set_attr "mode" "V2SF,V4SF,V2SF")])
1212 (define_insn "sse_loadhps"
1213 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1216 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1217 (parallel [(const_int 0) (const_int 1)]))
1218 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1221 movhps\t{%2, %0|%0, %2}
1222 movlhps\t{%2, %0|%0, %2}
1223 movlps\t{%2, %H0|%H0, %2}"
1224 [(set_attr "type" "ssemov")
1225 (set_attr "mode" "V2SF,V4SF,V2SF")])
1227 (define_insn "sse_storelps"
1228 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1230 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1231 (parallel [(const_int 0) (const_int 1)])))]
1234 movlps\t{%1, %0|%0, %1}
1235 movaps\t{%1, %0|%0, %1}
1236 movlps\t{%1, %0|%0, %1}"
1237 [(set_attr "type" "ssemov")
1238 (set_attr "mode" "V2SF,V4SF,V2SF")])
1240 (define_insn "sse_loadlps"
1241 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1243 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1245 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1246 (parallel [(const_int 2) (const_int 3)]))))]
1249 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1250 movlps\t{%2, %0|%0, %2}
1251 movlps\t{%2, %0|%0, %2}"
1252 [(set_attr "type" "sselog,ssemov,ssemov")
1253 (set_attr "mode" "V4SF,V2SF,V2SF")])
1255 (define_insn "sse_movss"
1256 [(set (match_operand:V4SF 0 "register_operand" "=x")
1258 (match_operand:V4SF 2 "register_operand" "x")
1259 (match_operand:V4SF 1 "register_operand" "0")
1262 "movss\t{%2, %0|%0, %2}"
1263 [(set_attr "type" "ssemov")
1264 (set_attr "mode" "SF")])
1266 (define_insn "*vec_dupv4sf"
1267 [(set (match_operand:V4SF 0 "register_operand" "=x")
1269 (match_operand:SF 1 "register_operand" "0")))]
1271 "shufps\t{$0, %0, %0|%0, %0, 0}"
1272 [(set_attr "type" "sselog1")
1273 (set_attr "mode" "V4SF")])
1275 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1276 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1277 ;; alternatives pretty much forces the MMX alternative to be chosen.
1278 (define_insn "*sse_concatv2sf"
1279 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1281 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1282 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1285 unpcklps\t{%2, %0|%0, %2}
1286 movss\t{%1, %0|%0, %1}
1287 punpckldq\t{%2, %0|%0, %2}
1288 movd\t{%1, %0|%0, %1}"
1289 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1290 (set_attr "mode" "V4SF,SF,DI,DI")])
1292 (define_insn "*sse_concatv4sf"
1293 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1295 (match_operand:V2SF 1 "register_operand" " 0,0")
1296 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1299 movlhps\t{%2, %0|%0, %2}
1300 movhps\t{%2, %0|%0, %2}"
1301 [(set_attr "type" "ssemov")
1302 (set_attr "mode" "V4SF,V2SF")])
1304 (define_expand "vec_initv4sf"
1305 [(match_operand:V4SF 0 "register_operand" "")
1306 (match_operand 1 "" "")]
1309 ix86_expand_vector_init (false, operands[0], operands[1]);
1313 (define_insn "*vec_setv4sf_0"
1314 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1317 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1318 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1322 movss\t{%2, %0|%0, %2}
1323 movss\t{%2, %0|%0, %2}
1324 movd\t{%2, %0|%0, %2}
1326 [(set_attr "type" "ssemov")
1327 (set_attr "mode" "SF")])
1330 [(set (match_operand:V4SF 0 "memory_operand" "")
1333 (match_operand:SF 1 "nonmemory_operand" ""))
1336 "TARGET_SSE && reload_completed"
1339 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1343 (define_expand "vec_setv4sf"
1344 [(match_operand:V4SF 0 "register_operand" "")
1345 (match_operand:SF 1 "register_operand" "")
1346 (match_operand 2 "const_int_operand" "")]
1349 ix86_expand_vector_set (false, operands[0], operands[1],
1350 INTVAL (operands[2]));
1354 (define_insn_and_split "*vec_extractv4sf_0"
1355 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1357 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1358 (parallel [(const_int 0)])))]
1359 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1361 "&& reload_completed"
1364 rtx op1 = operands[1];
1366 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1368 op1 = gen_lowpart (SFmode, op1);
1369 emit_move_insn (operands[0], op1);
1373 (define_expand "vec_extractv4sf"
1374 [(match_operand:SF 0 "register_operand" "")
1375 (match_operand:V4SF 1 "register_operand" "")
1376 (match_operand 2 "const_int_operand" "")]
1379 ix86_expand_vector_extract (false, operands[0], operands[1],
1380 INTVAL (operands[2]));
1384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1386 ;; Parallel double-precision floating point arithmetic
1388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1390 (define_expand "negv2df2"
1391 [(set (match_operand:V2DF 0 "register_operand" "")
1392 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1394 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1396 (define_expand "absv2df2"
1397 [(set (match_operand:V2DF 0 "register_operand" "")
1398 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1400 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1402 (define_expand "addv2df3"
1403 [(set (match_operand:V2DF 0 "register_operand" "")
1404 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1405 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1407 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1409 (define_insn "*addv2df3"
1410 [(set (match_operand:V2DF 0 "register_operand" "=x")
1411 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1412 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1413 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1414 "addpd\t{%2, %0|%0, %2}"
1415 [(set_attr "type" "sseadd")
1416 (set_attr "mode" "V2DF")])
1418 (define_insn "sse2_vmaddv2df3"
1419 [(set (match_operand:V2DF 0 "register_operand" "=x")
1421 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1422 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1425 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1426 "addsd\t{%2, %0|%0, %2}"
1427 [(set_attr "type" "sseadd")
1428 (set_attr "mode" "DF")])
1430 (define_expand "subv2df3"
1431 [(set (match_operand:V2DF 0 "register_operand" "")
1432 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1433 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1435 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1437 (define_insn "*subv2df3"
1438 [(set (match_operand:V2DF 0 "register_operand" "=x")
1439 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1440 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1442 "subpd\t{%2, %0|%0, %2}"
1443 [(set_attr "type" "sseadd")
1444 (set_attr "mode" "V2DF")])
1446 (define_insn "sse2_vmsubv2df3"
1447 [(set (match_operand:V2DF 0 "register_operand" "=x")
1449 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1450 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1454 "subsd\t{%2, %0|%0, %2}"
1455 [(set_attr "type" "sseadd")
1456 (set_attr "mode" "DF")])
1458 (define_expand "mulv2df3"
1459 [(set (match_operand:V2DF 0 "register_operand" "")
1460 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1461 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1463 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1465 (define_insn "*mulv2df3"
1466 [(set (match_operand:V2DF 0 "register_operand" "=x")
1467 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1468 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1469 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1470 "mulpd\t{%2, %0|%0, %2}"
1471 [(set_attr "type" "ssemul")
1472 (set_attr "mode" "V2DF")])
1474 (define_insn "sse2_vmmulv2df3"
1475 [(set (match_operand:V2DF 0 "register_operand" "=x")
1477 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1478 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1481 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1482 "mulsd\t{%2, %0|%0, %2}"
1483 [(set_attr "type" "ssemul")
1484 (set_attr "mode" "DF")])
1486 (define_expand "divv2df3"
1487 [(set (match_operand:V2DF 0 "register_operand" "")
1488 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1489 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1491 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1493 (define_insn "*divv2df3"
1494 [(set (match_operand:V2DF 0 "register_operand" "=x")
1495 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1496 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1498 "divpd\t{%2, %0|%0, %2}"
1499 [(set_attr "type" "ssediv")
1500 (set_attr "mode" "V2DF")])
1502 (define_insn "sse2_vmdivv2df3"
1503 [(set (match_operand:V2DF 0 "register_operand" "=x")
1505 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1506 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1510 "divsd\t{%2, %0|%0, %2}"
1511 [(set_attr "type" "ssediv")
1512 (set_attr "mode" "DF")])
1514 (define_insn "sqrtv2df2"
1515 [(set (match_operand:V2DF 0 "register_operand" "=x")
1516 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1518 "sqrtpd\t{%1, %0|%0, %1}"
1519 [(set_attr "type" "sse")
1520 (set_attr "mode" "V2DF")])
1522 (define_insn "sse2_vmsqrtv2df2"
1523 [(set (match_operand:V2DF 0 "register_operand" "=x")
1525 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1526 (match_operand:V2DF 2 "register_operand" "0")
1529 "sqrtsd\t{%1, %0|%0, %1}"
1530 [(set_attr "type" "sse")
1531 (set_attr "mode" "DF")])
1533 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1534 ;; isn't really correct, as those rtl operators aren't defined when
1535 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1537 (define_expand "smaxv2df3"
1538 [(set (match_operand:V2DF 0 "register_operand" "")
1539 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1540 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1543 if (!flag_finite_math_only)
1544 operands[1] = force_reg (V2DFmode, operands[1]);
1545 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1548 (define_insn "*smaxv2df3_finite"
1549 [(set (match_operand:V2DF 0 "register_operand" "=x")
1550 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1551 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1552 "TARGET_SSE2 && flag_finite_math_only
1553 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1554 "maxpd\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "sseadd")
1556 (set_attr "mode" "V2DF")])
1558 (define_insn "*smaxv2df3"
1559 [(set (match_operand:V2DF 0 "register_operand" "=x")
1560 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1561 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1563 "maxpd\t{%2, %0|%0, %2}"
1564 [(set_attr "type" "sseadd")
1565 (set_attr "mode" "V2DF")])
1567 (define_insn "*sse2_vmsmaxv2df3_finite"
1568 [(set (match_operand:V2DF 0 "register_operand" "=x")
1570 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1571 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1574 "TARGET_SSE2 && flag_finite_math_only
1575 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1576 "maxsd\t{%2, %0|%0, %2}"
1577 [(set_attr "type" "sseadd")
1578 (set_attr "mode" "DF")])
1580 (define_insn "sse2_vmsmaxv2df3"
1581 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1588 "maxsd\t{%2, %0|%0, %2}"
1589 [(set_attr "type" "sseadd")
1590 (set_attr "mode" "DF")])
1592 (define_expand "sminv2df3"
1593 [(set (match_operand:V2DF 0 "register_operand" "")
1594 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1595 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1598 if (!flag_finite_math_only)
1599 operands[1] = force_reg (V2DFmode, operands[1]);
1600 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1603 (define_insn "*sminv2df3_finite"
1604 [(set (match_operand:V2DF 0 "register_operand" "=x")
1605 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1606 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1607 "TARGET_SSE2 && flag_finite_math_only
1608 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1609 "minpd\t{%2, %0|%0, %2}"
1610 [(set_attr "type" "sseadd")
1611 (set_attr "mode" "V2DF")])
1613 (define_insn "*sminv2df3"
1614 [(set (match_operand:V2DF 0 "register_operand" "=x")
1615 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1616 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1618 "minpd\t{%2, %0|%0, %2}"
1619 [(set_attr "type" "sseadd")
1620 (set_attr "mode" "V2DF")])
1622 (define_insn "*sse2_vmsminv2df3_finite"
1623 [(set (match_operand:V2DF 0 "register_operand" "=x")
1625 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1626 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1629 "TARGET_SSE2 && flag_finite_math_only
1630 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1631 "minsd\t{%2, %0|%0, %2}"
1632 [(set_attr "type" "sseadd")
1633 (set_attr "mode" "DF")])
1635 (define_insn "sse2_vmsminv2df3"
1636 [(set (match_operand:V2DF 0 "register_operand" "=x")
1638 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1639 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1643 "minsd\t{%2, %0|%0, %2}"
1644 [(set_attr "type" "sseadd")
1645 (set_attr "mode" "DF")])
1647 (define_insn "sse3_addsubv2df3"
1648 [(set (match_operand:V2DF 0 "register_operand" "=x")
1651 (match_operand:V2DF 1 "register_operand" "0")
1652 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1653 (minus:V2DF (match_dup 1) (match_dup 2))
1656 "addsubpd\t{%2, %0|%0, %2}"
1657 [(set_attr "type" "sseadd")
1658 (set_attr "mode" "V2DF")])
1660 (define_insn "sse3_haddv2df3"
1661 [(set (match_operand:V2DF 0 "register_operand" "=x")
1665 (match_operand:V2DF 1 "register_operand" "0")
1666 (parallel [(const_int 0)]))
1667 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1670 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1671 (parallel [(const_int 0)]))
1672 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1674 "haddpd\t{%2, %0|%0, %2}"
1675 [(set_attr "type" "sseadd")
1676 (set_attr "mode" "V2DF")])
1678 (define_insn "sse3_hsubv2df3"
1679 [(set (match_operand:V2DF 0 "register_operand" "=x")
1683 (match_operand:V2DF 1 "register_operand" "0")
1684 (parallel [(const_int 0)]))
1685 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1688 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1689 (parallel [(const_int 0)]))
1690 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1692 "hsubpd\t{%2, %0|%0, %2}"
1693 [(set_attr "type" "sseadd")
1694 (set_attr "mode" "V2DF")])
1696 (define_expand "reduc_splus_v2df"
1697 [(match_operand:V2DF 0 "register_operand" "")
1698 (match_operand:V2DF 1 "register_operand" "")]
1701 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1707 ;; Parallel double-precision floating point comparisons
1709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1711 (define_insn "sse2_maskcmpv2df3"
1712 [(set (match_operand:V2DF 0 "register_operand" "=x")
1713 (match_operator:V2DF 3 "sse_comparison_operator"
1714 [(match_operand:V2DF 1 "register_operand" "0")
1715 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1717 "cmp%D3pd\t{%2, %0|%0, %2}"
1718 [(set_attr "type" "ssecmp")
1719 (set_attr "mode" "V2DF")])
1721 (define_insn "sse2_vmmaskcmpv2df3"
1722 [(set (match_operand:V2DF 0 "register_operand" "=x")
1724 (match_operator:V2DF 3 "sse_comparison_operator"
1725 [(match_operand:V2DF 1 "register_operand" "0")
1726 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1730 "cmp%D3sd\t{%2, %0|%0, %2}"
1731 [(set_attr "type" "ssecmp")
1732 (set_attr "mode" "DF")])
1734 (define_insn "sse2_comi"
1735 [(set (reg:CCFP FLAGS_REG)
1738 (match_operand:V2DF 0 "register_operand" "x")
1739 (parallel [(const_int 0)]))
1741 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1742 (parallel [(const_int 0)]))))]
1744 "comisd\t{%1, %0|%0, %1}"
1745 [(set_attr "type" "ssecomi")
1746 (set_attr "mode" "DF")])
1748 (define_insn "sse2_ucomi"
1749 [(set (reg:CCFPU FLAGS_REG)
1752 (match_operand:V2DF 0 "register_operand" "x")
1753 (parallel [(const_int 0)]))
1755 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1756 (parallel [(const_int 0)]))))]
1758 "ucomisd\t{%1, %0|%0, %1}"
1759 [(set_attr "type" "ssecomi")
1760 (set_attr "mode" "DF")])
1762 (define_expand "vcondv2df"
1763 [(set (match_operand:V2DF 0 "register_operand" "")
1765 (match_operator 3 ""
1766 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1767 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1768 (match_operand:V2DF 1 "general_operand" "")
1769 (match_operand:V2DF 2 "general_operand" "")))]
1772 if (ix86_expand_fp_vcond (operands))
1778 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1780 ;; Parallel double-precision floating point logical operations
1782 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1784 (define_expand "andv2df3"
1785 [(set (match_operand:V2DF 0 "register_operand" "")
1786 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1787 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1789 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1791 (define_insn "*andv2df3"
1792 [(set (match_operand:V2DF 0 "register_operand" "=x")
1793 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1794 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1795 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1796 "andpd\t{%2, %0|%0, %2}"
1797 [(set_attr "type" "sselog")
1798 (set_attr "mode" "V2DF")])
1800 (define_insn "sse2_nandv2df3"
1801 [(set (match_operand:V2DF 0 "register_operand" "=x")
1802 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1803 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1805 "andnpd\t{%2, %0|%0, %2}"
1806 [(set_attr "type" "sselog")
1807 (set_attr "mode" "V2DF")])
1809 (define_expand "iorv2df3"
1810 [(set (match_operand:V2DF 0 "register_operand" "")
1811 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1812 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1814 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1816 (define_insn "*iorv2df3"
1817 [(set (match_operand:V2DF 0 "register_operand" "=x")
1818 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1819 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1820 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1821 "orpd\t{%2, %0|%0, %2}"
1822 [(set_attr "type" "sselog")
1823 (set_attr "mode" "V2DF")])
1825 (define_expand "xorv2df3"
1826 [(set (match_operand:V2DF 0 "register_operand" "")
1827 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1828 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1830 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1832 (define_insn "*xorv2df3"
1833 [(set (match_operand:V2DF 0 "register_operand" "=x")
1834 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1835 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1836 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1837 "xorpd\t{%2, %0|%0, %2}"
1838 [(set_attr "type" "sselog")
1839 (set_attr "mode" "V2DF")])
1841 ;; Also define scalar versions. These are used for abs, neg, and
1842 ;; conditional move. Using subregs into vector modes causes register
1843 ;; allocation lossage. These patterns do not allow memory operands
1844 ;; because the native instructions read the full 128-bits.
1846 (define_insn "*anddf3"
1847 [(set (match_operand:DF 0 "register_operand" "=x")
1848 (and:DF (match_operand:DF 1 "register_operand" "0")
1849 (match_operand:DF 2 "register_operand" "x")))]
1851 "andpd\t{%2, %0|%0, %2}"
1852 [(set_attr "type" "sselog")
1853 (set_attr "mode" "V2DF")])
1855 (define_insn "*nanddf3"
1856 [(set (match_operand:DF 0 "register_operand" "=x")
1857 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1858 (match_operand:DF 2 "register_operand" "x")))]
1860 "andnpd\t{%2, %0|%0, %2}"
1861 [(set_attr "type" "sselog")
1862 (set_attr "mode" "V2DF")])
1864 (define_insn "*iordf3"
1865 [(set (match_operand:DF 0 "register_operand" "=x")
1866 (ior:DF (match_operand:DF 1 "register_operand" "0")
1867 (match_operand:DF 2 "register_operand" "x")))]
1869 "orpd\t{%2, %0|%0, %2}"
1870 [(set_attr "type" "sselog")
1871 (set_attr "mode" "V2DF")])
1873 (define_insn "*xordf3"
1874 [(set (match_operand:DF 0 "register_operand" "=x")
1875 (xor:DF (match_operand:DF 1 "register_operand" "0")
1876 (match_operand:DF 2 "register_operand" "x")))]
1878 "xorpd\t{%2, %0|%0, %2}"
1879 [(set_attr "type" "sselog")
1880 (set_attr "mode" "V2DF")])
1882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1884 ;; Parallel double-precision floating point conversion operations
1886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1888 (define_insn "sse2_cvtpi2pd"
1889 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1890 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1892 "cvtpi2pd\t{%1, %0|%0, %1}"
1893 [(set_attr "type" "ssecvt")
1894 (set_attr "unit" "mmx,*")
1895 (set_attr "mode" "V2DF")])
1897 (define_insn "sse2_cvtpd2pi"
1898 [(set (match_operand:V2SI 0 "register_operand" "=y")
1899 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1900 UNSPEC_FIX_NOTRUNC))]
1902 "cvtpd2pi\t{%1, %0|%0, %1}"
1903 [(set_attr "type" "ssecvt")
1904 (set_attr "unit" "mmx")
1905 (set_attr "mode" "DI")])
1907 (define_insn "sse2_cvttpd2pi"
1908 [(set (match_operand:V2SI 0 "register_operand" "=y")
1909 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1911 "cvttpd2pi\t{%1, %0|%0, %1}"
1912 [(set_attr "type" "ssecvt")
1913 (set_attr "unit" "mmx")
1914 (set_attr "mode" "TI")])
1916 (define_insn "sse2_cvtsi2sd"
1917 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1920 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1921 (match_operand:V2DF 1 "register_operand" "0,0")
1924 "cvtsi2sd\t{%2, %0|%0, %2}"
1925 [(set_attr "type" "sseicvt")
1926 (set_attr "mode" "DF")
1927 (set_attr "athlon_decode" "double,direct")])
1929 (define_insn "sse2_cvtsi2sdq"
1930 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1933 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1934 (match_operand:V2DF 1 "register_operand" "0,0")
1936 "TARGET_SSE2 && TARGET_64BIT"
1937 "cvtsi2sdq\t{%2, %0|%0, %2}"
1938 [(set_attr "type" "sseicvt")
1939 (set_attr "mode" "DF")
1940 (set_attr "athlon_decode" "double,direct")])
1942 (define_insn "sse2_cvtsd2si"
1943 [(set (match_operand:SI 0 "register_operand" "=r,r")
1946 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1947 (parallel [(const_int 0)]))]
1948 UNSPEC_FIX_NOTRUNC))]
1950 "cvtsd2si\t{%1, %0|%0, %1}"
1951 [(set_attr "type" "sseicvt")
1952 (set_attr "athlon_decode" "double,vector")
1953 (set_attr "mode" "SI")])
1955 (define_insn "sse2_cvtsd2si_2"
1956 [(set (match_operand:SI 0 "register_operand" "=r,r")
1957 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1958 UNSPEC_FIX_NOTRUNC))]
1960 "cvtsd2si\t{%1, %0|%0, %1}"
1961 [(set_attr "type" "sseicvt")
1962 (set_attr "athlon_decode" "double,vector")
1963 (set_attr "mode" "SI")])
1965 (define_insn "sse2_cvtsd2siq"
1966 [(set (match_operand:DI 0 "register_operand" "=r,r")
1969 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1970 (parallel [(const_int 0)]))]
1971 UNSPEC_FIX_NOTRUNC))]
1972 "TARGET_SSE2 && TARGET_64BIT"
1973 "cvtsd2siq\t{%1, %0|%0, %1}"
1974 [(set_attr "type" "sseicvt")
1975 (set_attr "athlon_decode" "double,vector")
1976 (set_attr "mode" "DI")])
1978 (define_insn "sse2_cvtsd2siq_2"
1979 [(set (match_operand:DI 0 "register_operand" "=r,r")
1980 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1981 UNSPEC_FIX_NOTRUNC))]
1982 "TARGET_SSE2 && TARGET_64BIT"
1983 "cvtsd2siq\t{%1, %0|%0, %1}"
1984 [(set_attr "type" "sseicvt")
1985 (set_attr "athlon_decode" "double,vector")
1986 (set_attr "mode" "DI")])
1988 (define_insn "sse2_cvttsd2si"
1989 [(set (match_operand:SI 0 "register_operand" "=r,r")
1992 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1993 (parallel [(const_int 0)]))))]
1995 "cvttsd2si\t{%1, %0|%0, %1}"
1996 [(set_attr "type" "sseicvt")
1997 (set_attr "mode" "SI")
1998 (set_attr "athlon_decode" "double,vector")])
2000 (define_insn "sse2_cvttsd2siq"
2001 [(set (match_operand:DI 0 "register_operand" "=r,r")
2004 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2005 (parallel [(const_int 0)]))))]
2006 "TARGET_SSE2 && TARGET_64BIT"
2007 "cvttsd2siq\t{%1, %0|%0, %1}"
2008 [(set_attr "type" "sseicvt")
2009 (set_attr "mode" "DI")
2010 (set_attr "athlon_decode" "double,vector")])
2012 (define_insn "sse2_cvtdq2pd"
2013 [(set (match_operand:V2DF 0 "register_operand" "=x")
2016 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2017 (parallel [(const_int 0) (const_int 1)]))))]
2019 "cvtdq2pd\t{%1, %0|%0, %1}"
2020 [(set_attr "type" "ssecvt")
2021 (set_attr "mode" "V2DF")])
2023 (define_expand "sse2_cvtpd2dq"
2024 [(set (match_operand:V4SI 0 "register_operand" "")
2026 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2030 "operands[2] = CONST0_RTX (V2SImode);")
2032 (define_insn "*sse2_cvtpd2dq"
2033 [(set (match_operand:V4SI 0 "register_operand" "=x")
2035 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2037 (match_operand:V2SI 2 "const0_operand" "")))]
2039 "cvtpd2dq\t{%1, %0|%0, %1}"
2040 [(set_attr "type" "ssecvt")
2041 (set_attr "mode" "TI")])
2043 (define_expand "sse2_cvttpd2dq"
2044 [(set (match_operand:V4SI 0 "register_operand" "")
2046 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2049 "operands[2] = CONST0_RTX (V2SImode);")
2051 (define_insn "*sse2_cvttpd2dq"
2052 [(set (match_operand:V4SI 0 "register_operand" "=x")
2054 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2055 (match_operand:V2SI 2 "const0_operand" "")))]
2057 "cvttpd2dq\t{%1, %0|%0, %1}"
2058 [(set_attr "type" "ssecvt")
2059 (set_attr "mode" "TI")])
2061 (define_insn "sse2_cvtsd2ss"
2062 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2065 (float_truncate:V2SF
2066 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2067 (match_operand:V4SF 1 "register_operand" "0,0")
2070 "cvtsd2ss\t{%2, %0|%0, %2}"
2071 [(set_attr "type" "ssecvt")
2072 (set_attr "athlon_decode" "vector,double")
2073 (set_attr "mode" "SF")])
2075 (define_insn "sse2_cvtss2sd"
2076 [(set (match_operand:V2DF 0 "register_operand" "=x")
2080 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2081 (parallel [(const_int 0) (const_int 1)])))
2082 (match_operand:V2DF 1 "register_operand" "0")
2085 "cvtss2sd\t{%2, %0|%0, %2}"
2086 [(set_attr "type" "ssecvt")
2087 (set_attr "mode" "DF")])
2089 (define_expand "sse2_cvtpd2ps"
2090 [(set (match_operand:V4SF 0 "register_operand" "")
2092 (float_truncate:V2SF
2093 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2096 "operands[2] = CONST0_RTX (V2SFmode);")
2098 (define_insn "*sse2_cvtpd2ps"
2099 [(set (match_operand:V4SF 0 "register_operand" "=x")
2101 (float_truncate:V2SF
2102 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2103 (match_operand:V2SF 2 "const0_operand" "")))]
2105 "cvtpd2ps\t{%1, %0|%0, %1}"
2106 [(set_attr "type" "ssecvt")
2107 (set_attr "mode" "V4SF")])
2109 (define_insn "sse2_cvtps2pd"
2110 [(set (match_operand:V2DF 0 "register_operand" "=x")
2113 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2114 (parallel [(const_int 0) (const_int 1)]))))]
2116 "cvtps2pd\t{%1, %0|%0, %1}"
2117 [(set_attr "type" "ssecvt")
2118 (set_attr "mode" "V2DF")])
2120 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2122 ;; Parallel double-precision floating point element swizzling
2124 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2126 (define_insn "sse2_unpckhpd"
2127 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2130 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2131 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2132 (parallel [(const_int 1)
2134 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2136 unpckhpd\t{%2, %0|%0, %2}
2137 movlpd\t{%H1, %0|%0, %H1}
2138 movhpd\t{%1, %0|%0, %1}"
2139 [(set_attr "type" "sselog,ssemov,ssemov")
2140 (set_attr "mode" "V2DF,V1DF,V1DF")])
2142 (define_insn "*sse3_movddup"
2143 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2146 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2148 (parallel [(const_int 0)
2150 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2152 movddup\t{%1, %0|%0, %1}
2154 [(set_attr "type" "sselog,ssemov")
2155 (set_attr "mode" "V2DF")])
2158 [(set (match_operand:V2DF 0 "memory_operand" "")
2161 (match_operand:V2DF 1 "register_operand" "")
2163 (parallel [(const_int 0)
2165 "TARGET_SSE3 && reload_completed"
2168 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2169 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2170 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2174 (define_insn "sse2_unpcklpd"
2175 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2178 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2179 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2180 (parallel [(const_int 0)
2182 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2184 unpcklpd\t{%2, %0|%0, %2}
2185 movhpd\t{%2, %0|%0, %2}
2186 movlpd\t{%2, %H0|%H0, %2}"
2187 [(set_attr "type" "sselog,ssemov,ssemov")
2188 (set_attr "mode" "V2DF,V1DF,V1DF")])
2190 (define_expand "sse2_shufpd"
2191 [(match_operand:V2DF 0 "register_operand" "")
2192 (match_operand:V2DF 1 "register_operand" "")
2193 (match_operand:V2DF 2 "nonimmediate_operand" "")
2194 (match_operand:SI 3 "const_int_operand" "")]
2197 int mask = INTVAL (operands[3]);
2198 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2200 GEN_INT (mask & 2 ? 3 : 2)));
2204 (define_insn "sse2_shufpd_1"
2205 [(set (match_operand:V2DF 0 "register_operand" "=x")
2208 (match_operand:V2DF 1 "register_operand" "0")
2209 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2210 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2211 (match_operand 4 "const_2_to_3_operand" "")])))]
2215 mask = INTVAL (operands[3]);
2216 mask |= (INTVAL (operands[4]) - 2) << 1;
2217 operands[3] = GEN_INT (mask);
2219 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2221 [(set_attr "type" "sselog")
2222 (set_attr "mode" "V2DF")])
2224 (define_insn "sse2_storehpd"
2225 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2227 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2228 (parallel [(const_int 1)])))]
2229 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2231 movhpd\t{%1, %0|%0, %1}
2234 [(set_attr "type" "ssemov,sselog1,ssemov")
2235 (set_attr "mode" "V1DF,V2DF,DF")])
2238 [(set (match_operand:DF 0 "register_operand" "")
2240 (match_operand:V2DF 1 "memory_operand" "")
2241 (parallel [(const_int 1)])))]
2242 "TARGET_SSE2 && reload_completed"
2243 [(set (match_dup 0) (match_dup 1))]
2245 operands[1] = adjust_address (operands[1], DFmode, 8);
2248 (define_insn "sse2_storelpd"
2249 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2251 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2252 (parallel [(const_int 0)])))]
2253 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2255 movlpd\t{%1, %0|%0, %1}
2258 [(set_attr "type" "ssemov")
2259 (set_attr "mode" "V1DF,DF,DF")])
2262 [(set (match_operand:DF 0 "register_operand" "")
2264 (match_operand:V2DF 1 "nonimmediate_operand" "")
2265 (parallel [(const_int 0)])))]
2266 "TARGET_SSE2 && reload_completed"
2269 rtx op1 = operands[1];
2271 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2273 op1 = gen_lowpart (DFmode, op1);
2274 emit_move_insn (operands[0], op1);
2278 (define_insn "sse2_loadhpd"
2279 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2282 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2283 (parallel [(const_int 0)]))
2284 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2285 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2287 movhpd\t{%2, %0|%0, %2}
2288 unpcklpd\t{%2, %0|%0, %2}
2289 shufpd\t{$1, %1, %0|%0, %1, 1}
2291 [(set_attr "type" "ssemov,sselog,sselog,other")
2292 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2295 [(set (match_operand:V2DF 0 "memory_operand" "")
2297 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2298 (match_operand:DF 1 "register_operand" "")))]
2299 "TARGET_SSE2 && reload_completed"
2300 [(set (match_dup 0) (match_dup 1))]
2302 operands[0] = adjust_address (operands[0], DFmode, 8);
2305 (define_insn "sse2_loadlpd"
2306 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2308 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2310 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2311 (parallel [(const_int 1)]))))]
2312 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2314 movsd\t{%2, %0|%0, %2}
2315 movlpd\t{%2, %0|%0, %2}
2316 movsd\t{%2, %0|%0, %2}
2317 shufpd\t{$2, %2, %0|%0, %2, 2}
2318 movhpd\t{%H1, %0|%0, %H1}
2320 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2321 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2324 [(set (match_operand:V2DF 0 "memory_operand" "")
2326 (match_operand:DF 1 "register_operand" "")
2327 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2328 "TARGET_SSE2 && reload_completed"
2329 [(set (match_dup 0) (match_dup 1))]
2331 operands[0] = adjust_address (operands[0], DFmode, 8);
2334 ;; Not sure these two are ever used, but it doesn't hurt to have
2336 (define_insn "*vec_extractv2df_1_sse"
2337 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2339 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2340 (parallel [(const_int 1)])))]
2341 "!TARGET_SSE2 && TARGET_SSE
2342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2344 movhps\t{%1, %0|%0, %1}
2345 movhlps\t{%1, %0|%0, %1}
2346 movlps\t{%H1, %0|%0, %H1}"
2347 [(set_attr "type" "ssemov")
2348 (set_attr "mode" "V2SF,V4SF,V2SF")])
2350 (define_insn "*vec_extractv2df_0_sse"
2351 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2353 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2354 (parallel [(const_int 0)])))]
2355 "!TARGET_SSE2 && TARGET_SSE
2356 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2358 movlps\t{%1, %0|%0, %1}
2359 movaps\t{%1, %0|%0, %1}
2360 movlps\t{%1, %0|%0, %1}"
2361 [(set_attr "type" "ssemov")
2362 (set_attr "mode" "V2SF,V4SF,V2SF")])
2364 (define_insn "sse2_movsd"
2365 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2367 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2368 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2372 movsd\t{%2, %0|%0, %2}
2373 movlpd\t{%2, %0|%0, %2}
2374 movlpd\t{%2, %0|%0, %2}
2375 shufpd\t{$2, %2, %0|%0, %2, 2}
2376 movhps\t{%H1, %0|%0, %H1}
2377 movhps\t{%1, %H0|%H0, %1}"
2378 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2379 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2381 (define_insn "*vec_dupv2df_sse3"
2382 [(set (match_operand:V2DF 0 "register_operand" "=x")
2384 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2386 "movddup\t{%1, %0|%0, %1}"
2387 [(set_attr "type" "sselog1")
2388 (set_attr "mode" "DF")])
2390 (define_insn "*vec_dupv2df"
2391 [(set (match_operand:V2DF 0 "register_operand" "=x")
2393 (match_operand:DF 1 "register_operand" "0")))]
2396 [(set_attr "type" "sselog1")
2397 (set_attr "mode" "V4SF")])
2399 (define_insn "*vec_concatv2df_sse3"
2400 [(set (match_operand:V2DF 0 "register_operand" "=x")
2402 (match_operand:DF 1 "nonimmediate_operand" "xm")
2405 "movddup\t{%1, %0|%0, %1}"
2406 [(set_attr "type" "sselog1")
2407 (set_attr "mode" "DF")])
2409 (define_insn "*vec_concatv2df"
2410 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2412 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2413 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2416 unpcklpd\t{%2, %0|%0, %2}
2417 movhpd\t{%2, %0|%0, %2}
2418 movsd\t{%1, %0|%0, %1}
2419 movlhps\t{%2, %0|%0, %2}
2420 movhps\t{%2, %0|%0, %2}"
2421 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2422 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2424 (define_expand "vec_setv2df"
2425 [(match_operand:V2DF 0 "register_operand" "")
2426 (match_operand:DF 1 "register_operand" "")
2427 (match_operand 2 "const_int_operand" "")]
2430 ix86_expand_vector_set (false, operands[0], operands[1],
2431 INTVAL (operands[2]));
2435 (define_expand "vec_extractv2df"
2436 [(match_operand:DF 0 "register_operand" "")
2437 (match_operand:V2DF 1 "register_operand" "")
2438 (match_operand 2 "const_int_operand" "")]
2441 ix86_expand_vector_extract (false, operands[0], operands[1],
2442 INTVAL (operands[2]));
2446 (define_expand "vec_initv2df"
2447 [(match_operand:V2DF 0 "register_operand" "")
2448 (match_operand 1 "" "")]
2451 ix86_expand_vector_init (false, operands[0], operands[1]);
2455 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2457 ;; Parallel integral arithmetic
2459 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2461 (define_expand "neg<mode>2"
2462 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2465 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2467 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2469 (define_expand "add<mode>3"
2470 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2471 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2472 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2474 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2476 (define_insn "*add<mode>3"
2477 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2479 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2480 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2481 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2482 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2483 [(set_attr "type" "sseiadd")
2484 (set_attr "mode" "TI")])
2486 (define_insn "sse2_ssadd<mode>3"
2487 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2489 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2490 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2491 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2492 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2493 [(set_attr "type" "sseiadd")
2494 (set_attr "mode" "TI")])
2496 (define_insn "sse2_usadd<mode>3"
2497 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2499 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2500 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2501 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2502 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseiadd")
2504 (set_attr "mode" "TI")])
2506 (define_expand "sub<mode>3"
2507 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2508 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2509 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2511 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2513 (define_insn "*sub<mode>3"
2514 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2516 (match_operand:SSEMODEI 1 "register_operand" "0")
2517 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2519 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2520 [(set_attr "type" "sseiadd")
2521 (set_attr "mode" "TI")])
2523 (define_insn "sse2_sssub<mode>3"
2524 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2526 (match_operand:SSEMODE12 1 "register_operand" "0")
2527 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2529 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2530 [(set_attr "type" "sseiadd")
2531 (set_attr "mode" "TI")])
2533 (define_insn "sse2_ussub<mode>3"
2534 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2536 (match_operand:SSEMODE12 1 "register_operand" "0")
2537 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2539 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2540 [(set_attr "type" "sseiadd")
2541 (set_attr "mode" "TI")])
2543 (define_expand "mulv16qi3"
2544 [(set (match_operand:V16QI 0 "register_operand" "")
2545 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2546 (match_operand:V16QI 2 "register_operand" "")))]
2552 for (i = 0; i < 12; ++i)
2553 t[i] = gen_reg_rtx (V16QImode);
2555 /* Unpack data such that we've got a source byte in each low byte of
2556 each word. We don't care what goes into the high byte of each word.
2557 Rather than trying to get zero in there, most convenient is to let
2558 it be a copy of the low byte. */
2559 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2560 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2561 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2562 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2564 /* Multiply words. The end-of-line annotations here give a picture of what
2565 the output of that instruction looks like. Dot means don't care; the
2566 letters are the bytes of the result with A being the most significant. */
2567 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2568 gen_lowpart (V8HImode, t[0]),
2569 gen_lowpart (V8HImode, t[1])));
2570 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2571 gen_lowpart (V8HImode, t[2]),
2572 gen_lowpart (V8HImode, t[3])));
2574 /* Extract the relevant bytes and merge them back together. */
2575 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2576 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2577 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2578 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2579 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2580 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2583 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2587 (define_expand "mulv8hi3"
2588 [(set (match_operand:V8HI 0 "register_operand" "")
2589 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2590 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2592 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2594 (define_insn "*mulv8hi3"
2595 [(set (match_operand:V8HI 0 "register_operand" "=x")
2596 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2597 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2598 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2599 "pmullw\t{%2, %0|%0, %2}"
2600 [(set_attr "type" "sseimul")
2601 (set_attr "mode" "TI")])
2603 (define_insn "sse2_smulv8hi3_highpart"
2604 [(set (match_operand:V8HI 0 "register_operand" "=x")
2609 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2611 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2613 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2614 "pmulhw\t{%2, %0|%0, %2}"
2615 [(set_attr "type" "sseimul")
2616 (set_attr "mode" "TI")])
2618 (define_insn "sse2_umulv8hi3_highpart"
2619 [(set (match_operand:V8HI 0 "register_operand" "=x")
2624 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2626 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2628 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2629 "pmulhuw\t{%2, %0|%0, %2}"
2630 [(set_attr "type" "sseimul")
2631 (set_attr "mode" "TI")])
2633 (define_insn "sse2_umulv2siv2di3"
2634 [(set (match_operand:V2DI 0 "register_operand" "=x")
2638 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2639 (parallel [(const_int 0) (const_int 2)])))
2642 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2643 (parallel [(const_int 0) (const_int 2)])))))]
2644 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2645 "pmuludq\t{%2, %0|%0, %2}"
2646 [(set_attr "type" "sseimul")
2647 (set_attr "mode" "TI")])
2649 (define_insn "sse2_pmaddwd"
2650 [(set (match_operand:V4SI 0 "register_operand" "=x")
2655 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2656 (parallel [(const_int 0)
2662 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2663 (parallel [(const_int 0)
2669 (vec_select:V4HI (match_dup 1)
2670 (parallel [(const_int 1)
2675 (vec_select:V4HI (match_dup 2)
2676 (parallel [(const_int 1)
2679 (const_int 7)]))))))]
2681 "pmaddwd\t{%2, %0|%0, %2}"
2682 [(set_attr "type" "sseiadd")
2683 (set_attr "mode" "TI")])
2685 (define_expand "mulv4si3"
2686 [(set (match_operand:V4SI 0 "register_operand" "")
2687 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2688 (match_operand:V4SI 2 "register_operand" "")))]
2691 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2697 t1 = gen_reg_rtx (V4SImode);
2698 t2 = gen_reg_rtx (V4SImode);
2699 t3 = gen_reg_rtx (V4SImode);
2700 t4 = gen_reg_rtx (V4SImode);
2701 t5 = gen_reg_rtx (V4SImode);
2702 t6 = gen_reg_rtx (V4SImode);
2703 thirtytwo = GEN_INT (32);
2705 /* Multiply elements 2 and 0. */
2706 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2708 /* Shift both input vectors down one element, so that elements 3 and 1
2709 are now in the slots for elements 2 and 0. For K8, at least, this is
2710 faster than using a shuffle. */
2711 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2712 gen_lowpart (TImode, op1), thirtytwo));
2713 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2714 gen_lowpart (TImode, op2), thirtytwo));
2716 /* Multiply elements 3 and 1. */
2717 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2719 /* Move the results in element 2 down to element 1; we don't care what
2720 goes in elements 2 and 3. */
2721 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2722 const0_rtx, const0_rtx));
2723 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2724 const0_rtx, const0_rtx));
2726 /* Merge the parts back together. */
2727 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2731 (define_expand "mulv2di3"
2732 [(set (match_operand:V2DI 0 "register_operand" "")
2733 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2734 (match_operand:V2DI 2 "register_operand" "")))]
2737 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2743 t1 = gen_reg_rtx (V2DImode);
2744 t2 = gen_reg_rtx (V2DImode);
2745 t3 = gen_reg_rtx (V2DImode);
2746 t4 = gen_reg_rtx (V2DImode);
2747 t5 = gen_reg_rtx (V2DImode);
2748 t6 = gen_reg_rtx (V2DImode);
2749 thirtytwo = GEN_INT (32);
2751 /* Multiply low parts. */
2752 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2753 gen_lowpart (V4SImode, op2)));
2755 /* Shift input vectors left 32 bits so we can multiply high parts. */
2756 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2757 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2759 /* Multiply high parts by low parts. */
2760 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2761 gen_lowpart (V4SImode, t3)));
2762 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2763 gen_lowpart (V4SImode, t2)));
2765 /* Shift them back. */
2766 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2767 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2769 /* Add the three parts together. */
2770 emit_insn (gen_addv2di3 (t6, t1, t4));
2771 emit_insn (gen_addv2di3 (op0, t6, t5));
2775 (define_expand "sdot_prodv8hi"
2776 [(match_operand:V4SI 0 "register_operand" "")
2777 (match_operand:V8HI 1 "nonimmediate_operand" "")
2778 (match_operand:V8HI 2 "nonimmediate_operand" "")
2779 (match_operand:V4SI 3 "register_operand" "")]
2782 rtx t = gen_reg_rtx (V4SImode);
2783 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2784 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2788 (define_expand "udot_prodv4si"
2789 [(match_operand:V2DI 0 "register_operand" "")
2790 (match_operand:V4SI 1 "register_operand" "")
2791 (match_operand:V4SI 2 "register_operand" "")
2792 (match_operand:V2DI 3 "register_operand" "")]
2797 t1 = gen_reg_rtx (V2DImode);
2798 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2799 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2801 t2 = gen_reg_rtx (V4SImode);
2802 t3 = gen_reg_rtx (V4SImode);
2803 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2804 gen_lowpart (TImode, operands[1]),
2806 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2807 gen_lowpart (TImode, operands[2]),
2810 t4 = gen_reg_rtx (V2DImode);
2811 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2813 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2817 (define_insn "ashr<mode>3"
2818 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2820 (match_operand:SSEMODE24 1 "register_operand" "0")
2821 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2823 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2824 [(set_attr "type" "sseishft")
2825 (set_attr "mode" "TI")])
2827 (define_insn "lshr<mode>3"
2828 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2829 (lshiftrt:SSEMODE248
2830 (match_operand:SSEMODE248 1 "register_operand" "0")
2831 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2833 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2834 [(set_attr "type" "sseishft")
2835 (set_attr "mode" "TI")])
2837 (define_insn "ashl<mode>3"
2838 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2840 (match_operand:SSEMODE248 1 "register_operand" "0")
2841 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2843 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2844 [(set_attr "type" "sseishft")
2845 (set_attr "mode" "TI")])
2847 (define_insn "sse2_ashlti3"
2848 [(set (match_operand:TI 0 "register_operand" "=x")
2849 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2850 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2853 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2854 return "pslldq\t{%2, %0|%0, %2}";
2856 [(set_attr "type" "sseishft")
2857 (set_attr "mode" "TI")])
2859 (define_expand "vec_shl_<mode>"
2860 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2861 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2862 (match_operand:SI 2 "general_operand" "")))]
2865 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2867 operands[0] = gen_lowpart (TImode, operands[0]);
2868 operands[1] = gen_lowpart (TImode, operands[1]);
2871 (define_insn "sse2_lshrti3"
2872 [(set (match_operand:TI 0 "register_operand" "=x")
2873 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2874 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2877 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2878 return "psrldq\t{%2, %0|%0, %2}";
2880 [(set_attr "type" "sseishft")
2881 (set_attr "mode" "TI")])
2883 (define_expand "vec_shr_<mode>"
2884 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2885 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2886 (match_operand:SI 2 "general_operand" "")))]
2889 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2891 operands[0] = gen_lowpart (TImode, operands[0]);
2892 operands[1] = gen_lowpart (TImode, operands[1]);
2895 (define_expand "umaxv16qi3"
2896 [(set (match_operand:V16QI 0 "register_operand" "")
2897 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2898 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2900 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2902 (define_insn "*umaxv16qi3"
2903 [(set (match_operand:V16QI 0 "register_operand" "=x")
2904 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2905 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2906 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2907 "pmaxub\t{%2, %0|%0, %2}"
2908 [(set_attr "type" "sseiadd")
2909 (set_attr "mode" "TI")])
2911 (define_expand "smaxv8hi3"
2912 [(set (match_operand:V8HI 0 "register_operand" "")
2913 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2914 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2916 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2918 (define_insn "*smaxv8hi3"
2919 [(set (match_operand:V8HI 0 "register_operand" "=x")
2920 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2921 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2922 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2923 "pmaxsw\t{%2, %0|%0, %2}"
2924 [(set_attr "type" "sseiadd")
2925 (set_attr "mode" "TI")])
2927 (define_expand "umaxv8hi3"
2928 [(set (match_operand:V8HI 0 "register_operand" "=x")
2929 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2930 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2932 (plus:V8HI (match_dup 0) (match_dup 2)))]
2935 operands[3] = operands[0];
2936 if (rtx_equal_p (operands[0], operands[2]))
2937 operands[0] = gen_reg_rtx (V8HImode);
2940 (define_expand "smax<mode>3"
2941 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2942 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2943 (match_operand:SSEMODE14 2 "register_operand" "")))]
2949 xops[0] = operands[0];
2950 xops[1] = operands[1];
2951 xops[2] = operands[2];
2952 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2953 xops[4] = operands[1];
2954 xops[5] = operands[2];
2955 ok = ix86_expand_int_vcond (xops);
2960 (define_expand "umaxv4si3"
2961 [(set (match_operand:V4SI 0 "register_operand" "")
2962 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2963 (match_operand:V4SI 2 "register_operand" "")))]
2969 xops[0] = operands[0];
2970 xops[1] = operands[1];
2971 xops[2] = operands[2];
2972 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2973 xops[4] = operands[1];
2974 xops[5] = operands[2];
2975 ok = ix86_expand_int_vcond (xops);
2980 (define_expand "uminv16qi3"
2981 [(set (match_operand:V16QI 0 "register_operand" "")
2982 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2983 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2985 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2987 (define_insn "*uminv16qi3"
2988 [(set (match_operand:V16QI 0 "register_operand" "=x")
2989 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2990 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2991 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2992 "pminub\t{%2, %0|%0, %2}"
2993 [(set_attr "type" "sseiadd")
2994 (set_attr "mode" "TI")])
2996 (define_expand "sminv8hi3"
2997 [(set (match_operand:V8HI 0 "register_operand" "")
2998 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2999 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3001 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3003 (define_insn "*sminv8hi3"
3004 [(set (match_operand:V8HI 0 "register_operand" "=x")
3005 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3006 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3007 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3008 "pminsw\t{%2, %0|%0, %2}"
3009 [(set_attr "type" "sseiadd")
3010 (set_attr "mode" "TI")])
3012 (define_expand "smin<mode>3"
3013 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3014 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3015 (match_operand:SSEMODE14 2 "register_operand" "")))]
3021 xops[0] = operands[0];
3022 xops[1] = operands[2];
3023 xops[2] = operands[1];
3024 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3025 xops[4] = operands[1];
3026 xops[5] = operands[2];
3027 ok = ix86_expand_int_vcond (xops);
3032 (define_expand "umin<mode>3"
3033 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3034 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3035 (match_operand:SSEMODE24 2 "register_operand" "")))]
3041 xops[0] = operands[0];
3042 xops[1] = operands[2];
3043 xops[2] = operands[1];
3044 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3045 xops[4] = operands[1];
3046 xops[5] = operands[2];
3047 ok = ix86_expand_int_vcond (xops);
3052 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3054 ;; Parallel integral comparisons
3056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3058 (define_insn "sse2_eq<mode>3"
3059 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3061 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3062 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3063 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3064 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3065 [(set_attr "type" "ssecmp")
3066 (set_attr "mode" "TI")])
3068 (define_insn "sse2_gt<mode>3"
3069 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3071 (match_operand:SSEMODE124 1 "register_operand" "0")
3072 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3074 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3075 [(set_attr "type" "ssecmp")
3076 (set_attr "mode" "TI")])
3078 (define_expand "vcond<mode>"
3079 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3080 (if_then_else:SSEMODE124
3081 (match_operator 3 ""
3082 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3083 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3084 (match_operand:SSEMODE124 1 "general_operand" "")
3085 (match_operand:SSEMODE124 2 "general_operand" "")))]
3088 if (ix86_expand_int_vcond (operands))
3094 (define_expand "vcondu<mode>"
3095 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3096 (if_then_else:SSEMODE124
3097 (match_operator 3 ""
3098 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3099 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3100 (match_operand:SSEMODE124 1 "general_operand" "")
3101 (match_operand:SSEMODE124 2 "general_operand" "")))]
3104 if (ix86_expand_int_vcond (operands))
3110 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3112 ;; Parallel integral logical operations
3114 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3116 (define_expand "one_cmpl<mode>2"
3117 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3118 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3122 int i, n = GET_MODE_NUNITS (<MODE>mode);
3123 rtvec v = rtvec_alloc (n);
3125 for (i = 0; i < n; ++i)
3126 RTVEC_ELT (v, i) = constm1_rtx;
3128 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3131 (define_expand "and<mode>3"
3132 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3133 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3134 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3136 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3138 (define_insn "*and<mode>3"
3139 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3141 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3142 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3143 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3144 "pand\t{%2, %0|%0, %2}"
3145 [(set_attr "type" "sselog")
3146 (set_attr "mode" "TI")])
3148 (define_insn "sse2_nand<mode>3"
3149 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3151 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3152 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3154 "pandn\t{%2, %0|%0, %2}"
3155 [(set_attr "type" "sselog")
3156 (set_attr "mode" "TI")])
3158 (define_expand "ior<mode>3"
3159 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3160 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3161 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3163 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3165 (define_insn "*ior<mode>3"
3166 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3168 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3169 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3170 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3171 "por\t{%2, %0|%0, %2}"
3172 [(set_attr "type" "sselog")
3173 (set_attr "mode" "TI")])
3175 (define_expand "xor<mode>3"
3176 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3177 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3178 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3180 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3182 (define_insn "*xor<mode>3"
3183 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3185 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3186 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3187 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3188 "pxor\t{%2, %0|%0, %2}"
3189 [(set_attr "type" "sselog")
3190 (set_attr "mode" "TI")])
3192 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3194 ;; Parallel integral element swizzling
3196 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3198 (define_insn "sse2_packsswb"
3199 [(set (match_operand:V16QI 0 "register_operand" "=x")
3202 (match_operand:V8HI 1 "register_operand" "0"))
3204 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3206 "packsswb\t{%2, %0|%0, %2}"
3207 [(set_attr "type" "sselog")
3208 (set_attr "mode" "TI")])
3210 (define_insn "sse2_packssdw"
3211 [(set (match_operand:V8HI 0 "register_operand" "=x")
3214 (match_operand:V4SI 1 "register_operand" "0"))
3216 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3218 "packssdw\t{%2, %0|%0, %2}"
3219 [(set_attr "type" "sselog")
3220 (set_attr "mode" "TI")])
3222 (define_insn "sse2_packuswb"
3223 [(set (match_operand:V16QI 0 "register_operand" "=x")
3226 (match_operand:V8HI 1 "register_operand" "0"))
3228 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3230 "packuswb\t{%2, %0|%0, %2}"
3231 [(set_attr "type" "sselog")
3232 (set_attr "mode" "TI")])
3234 (define_insn "sse2_punpckhbw"
3235 [(set (match_operand:V16QI 0 "register_operand" "=x")
3238 (match_operand:V16QI 1 "register_operand" "0")
3239 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3240 (parallel [(const_int 8) (const_int 24)
3241 (const_int 9) (const_int 25)
3242 (const_int 10) (const_int 26)
3243 (const_int 11) (const_int 27)
3244 (const_int 12) (const_int 28)
3245 (const_int 13) (const_int 29)
3246 (const_int 14) (const_int 30)
3247 (const_int 15) (const_int 31)])))]
3249 "punpckhbw\t{%2, %0|%0, %2}"
3250 [(set_attr "type" "sselog")
3251 (set_attr "mode" "TI")])
3253 (define_insn "sse2_punpcklbw"
3254 [(set (match_operand:V16QI 0 "register_operand" "=x")
3257 (match_operand:V16QI 1 "register_operand" "0")
3258 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3259 (parallel [(const_int 0) (const_int 16)
3260 (const_int 1) (const_int 17)
3261 (const_int 2) (const_int 18)
3262 (const_int 3) (const_int 19)
3263 (const_int 4) (const_int 20)
3264 (const_int 5) (const_int 21)
3265 (const_int 6) (const_int 22)
3266 (const_int 7) (const_int 23)])))]
3268 "punpcklbw\t{%2, %0|%0, %2}"
3269 [(set_attr "type" "sselog")
3270 (set_attr "mode" "TI")])
3272 (define_insn "sse2_punpckhwd"
3273 [(set (match_operand:V8HI 0 "register_operand" "=x")
3276 (match_operand:V8HI 1 "register_operand" "0")
3277 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3278 (parallel [(const_int 4) (const_int 12)
3279 (const_int 5) (const_int 13)
3280 (const_int 6) (const_int 14)
3281 (const_int 7) (const_int 15)])))]
3283 "punpckhwd\t{%2, %0|%0, %2}"
3284 [(set_attr "type" "sselog")
3285 (set_attr "mode" "TI")])
3287 (define_insn "sse2_punpcklwd"
3288 [(set (match_operand:V8HI 0 "register_operand" "=x")
3291 (match_operand:V8HI 1 "register_operand" "0")
3292 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3293 (parallel [(const_int 0) (const_int 8)
3294 (const_int 1) (const_int 9)
3295 (const_int 2) (const_int 10)
3296 (const_int 3) (const_int 11)])))]
3298 "punpcklwd\t{%2, %0|%0, %2}"
3299 [(set_attr "type" "sselog")
3300 (set_attr "mode" "TI")])
3302 (define_insn "sse2_punpckhdq"
3303 [(set (match_operand:V4SI 0 "register_operand" "=x")
3306 (match_operand:V4SI 1 "register_operand" "0")
3307 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3308 (parallel [(const_int 2) (const_int 6)
3309 (const_int 3) (const_int 7)])))]
3311 "punpckhdq\t{%2, %0|%0, %2}"
3312 [(set_attr "type" "sselog")
3313 (set_attr "mode" "TI")])
3315 (define_insn "sse2_punpckldq"
3316 [(set (match_operand:V4SI 0 "register_operand" "=x")
3319 (match_operand:V4SI 1 "register_operand" "0")
3320 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3321 (parallel [(const_int 0) (const_int 4)
3322 (const_int 1) (const_int 5)])))]
3324 "punpckldq\t{%2, %0|%0, %2}"
3325 [(set_attr "type" "sselog")
3326 (set_attr "mode" "TI")])
3328 (define_insn "sse2_punpckhqdq"
3329 [(set (match_operand:V2DI 0 "register_operand" "=x")
3332 (match_operand:V2DI 1 "register_operand" "0")
3333 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3334 (parallel [(const_int 1)
3337 "punpckhqdq\t{%2, %0|%0, %2}"
3338 [(set_attr "type" "sselog")
3339 (set_attr "mode" "TI")])
3341 (define_insn "sse2_punpcklqdq"
3342 [(set (match_operand:V2DI 0 "register_operand" "=x")
3345 (match_operand:V2DI 1 "register_operand" "0")
3346 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3347 (parallel [(const_int 0)
3350 "punpcklqdq\t{%2, %0|%0, %2}"
3351 [(set_attr "type" "sselog")
3352 (set_attr "mode" "TI")])
3354 (define_expand "sse2_pinsrw"
3355 [(set (match_operand:V8HI 0 "register_operand" "")
3358 (match_operand:SI 2 "nonimmediate_operand" ""))
3359 (match_operand:V8HI 1 "register_operand" "")
3360 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3363 operands[2] = gen_lowpart (HImode, operands[2]);
3364 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3367 (define_insn "*sse2_pinsrw"
3368 [(set (match_operand:V8HI 0 "register_operand" "=x")
3371 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3372 (match_operand:V8HI 1 "register_operand" "0")
3373 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3376 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3377 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3379 [(set_attr "type" "sselog")
3380 (set_attr "mode" "TI")])
3382 (define_insn "sse2_pextrw"
3383 [(set (match_operand:SI 0 "register_operand" "=r")
3386 (match_operand:V8HI 1 "register_operand" "x")
3387 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3389 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3390 [(set_attr "type" "sselog")
3391 (set_attr "mode" "TI")])
3393 (define_expand "sse2_pshufd"
3394 [(match_operand:V4SI 0 "register_operand" "")
3395 (match_operand:V4SI 1 "nonimmediate_operand" "")
3396 (match_operand:SI 2 "const_int_operand" "")]
3399 int mask = INTVAL (operands[2]);
3400 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3401 GEN_INT ((mask >> 0) & 3),
3402 GEN_INT ((mask >> 2) & 3),
3403 GEN_INT ((mask >> 4) & 3),
3404 GEN_INT ((mask >> 6) & 3)));
3408 (define_insn "sse2_pshufd_1"
3409 [(set (match_operand:V4SI 0 "register_operand" "=x")
3411 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3412 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3413 (match_operand 3 "const_0_to_3_operand" "")
3414 (match_operand 4 "const_0_to_3_operand" "")
3415 (match_operand 5 "const_0_to_3_operand" "")])))]
3419 mask |= INTVAL (operands[2]) << 0;
3420 mask |= INTVAL (operands[3]) << 2;
3421 mask |= INTVAL (operands[4]) << 4;
3422 mask |= INTVAL (operands[5]) << 6;
3423 operands[2] = GEN_INT (mask);
3425 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3427 [(set_attr "type" "sselog1")
3428 (set_attr "mode" "TI")])
3430 (define_expand "sse2_pshuflw"
3431 [(match_operand:V8HI 0 "register_operand" "")
3432 (match_operand:V8HI 1 "nonimmediate_operand" "")
3433 (match_operand:SI 2 "const_int_operand" "")]
3436 int mask = INTVAL (operands[2]);
3437 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3438 GEN_INT ((mask >> 0) & 3),
3439 GEN_INT ((mask >> 2) & 3),
3440 GEN_INT ((mask >> 4) & 3),
3441 GEN_INT ((mask >> 6) & 3)));
3445 (define_insn "sse2_pshuflw_1"
3446 [(set (match_operand:V8HI 0 "register_operand" "=x")
3448 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3449 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3450 (match_operand 3 "const_0_to_3_operand" "")
3451 (match_operand 4 "const_0_to_3_operand" "")
3452 (match_operand 5 "const_0_to_3_operand" "")
3460 mask |= INTVAL (operands[2]) << 0;
3461 mask |= INTVAL (operands[3]) << 2;
3462 mask |= INTVAL (operands[4]) << 4;
3463 mask |= INTVAL (operands[5]) << 6;
3464 operands[2] = GEN_INT (mask);
3466 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3468 [(set_attr "type" "sselog")
3469 (set_attr "mode" "TI")])
3471 (define_expand "sse2_pshufhw"
3472 [(match_operand:V8HI 0 "register_operand" "")
3473 (match_operand:V8HI 1 "nonimmediate_operand" "")
3474 (match_operand:SI 2 "const_int_operand" "")]
3477 int mask = INTVAL (operands[2]);
3478 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3479 GEN_INT (((mask >> 0) & 3) + 4),
3480 GEN_INT (((mask >> 2) & 3) + 4),
3481 GEN_INT (((mask >> 4) & 3) + 4),
3482 GEN_INT (((mask >> 6) & 3) + 4)));
3486 (define_insn "sse2_pshufhw_1"
3487 [(set (match_operand:V8HI 0 "register_operand" "=x")
3489 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3490 (parallel [(const_int 0)
3494 (match_operand 2 "const_4_to_7_operand" "")
3495 (match_operand 3 "const_4_to_7_operand" "")
3496 (match_operand 4 "const_4_to_7_operand" "")
3497 (match_operand 5 "const_4_to_7_operand" "")])))]
3501 mask |= (INTVAL (operands[2]) - 4) << 0;
3502 mask |= (INTVAL (operands[3]) - 4) << 2;
3503 mask |= (INTVAL (operands[4]) - 4) << 4;
3504 mask |= (INTVAL (operands[5]) - 4) << 6;
3505 operands[2] = GEN_INT (mask);
3507 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3509 [(set_attr "type" "sselog")
3510 (set_attr "mode" "TI")])
3512 (define_expand "sse2_loadd"
3513 [(set (match_operand:V4SI 0 "register_operand" "")
3516 (match_operand:SI 1 "nonimmediate_operand" ""))
3520 "operands[2] = CONST0_RTX (V4SImode);")
3522 (define_insn "sse2_loadld"
3523 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3526 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3527 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3531 movd\t{%2, %0|%0, %2}
3532 movss\t{%2, %0|%0, %2}
3533 movss\t{%2, %0|%0, %2}"
3534 [(set_attr "type" "ssemov")
3535 (set_attr "mode" "TI,V4SF,SF")])
3537 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3538 ;; be taken into account, and movdi isn't fully populated even without.
3539 (define_insn_and_split "sse2_stored"
3540 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3542 (match_operand:V4SI 1 "register_operand" "x")
3543 (parallel [(const_int 0)])))]
3546 "&& reload_completed"
3547 [(set (match_dup 0) (match_dup 1))]
3549 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3552 (define_expand "sse_storeq"
3553 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3555 (match_operand:V2DI 1 "register_operand" "")
3556 (parallel [(const_int 0)])))]
3560 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3561 ;; be taken into account, and movdi isn't fully populated even without.
3562 (define_insn "*sse2_storeq"
3563 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3565 (match_operand:V2DI 1 "register_operand" "x")
3566 (parallel [(const_int 0)])))]
3571 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3573 (match_operand:V2DI 1 "register_operand" "")
3574 (parallel [(const_int 0)])))]
3575 "TARGET_SSE && reload_completed"
3576 [(set (match_dup 0) (match_dup 1))]
3578 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3581 (define_insn "*vec_extractv2di_1_sse2"
3582 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3584 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3585 (parallel [(const_int 1)])))]
3586 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3588 movhps\t{%1, %0|%0, %1}
3589 psrldq\t{$4, %0|%0, 4}
3590 movq\t{%H1, %0|%0, %H1}"
3591 [(set_attr "type" "ssemov,sseishft,ssemov")
3592 (set_attr "mode" "V2SF,TI,TI")])
3594 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3595 (define_insn "*vec_extractv2di_1_sse"
3596 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3598 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3599 (parallel [(const_int 1)])))]
3600 "!TARGET_SSE2 && TARGET_SSE
3601 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3603 movhps\t{%1, %0|%0, %1}
3604 movhlps\t{%1, %0|%0, %1}
3605 movlps\t{%H1, %0|%0, %H1}"
3606 [(set_attr "type" "ssemov")
3607 (set_attr "mode" "V2SF,V4SF,V2SF")])
3609 (define_insn "*vec_dupv4si"
3610 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3612 (match_operand:SI 1 "register_operand" " Y,0")))]
3615 pshufd\t{$0, %1, %0|%0, %1, 0}
3616 shufps\t{$0, %0, %0|%0, %0, 0}"
3617 [(set_attr "type" "sselog1")
3618 (set_attr "mode" "TI,V4SF")])
3620 (define_insn "*vec_dupv2di"
3621 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3623 (match_operand:DI 1 "register_operand" " 0,0")))]
3628 [(set_attr "type" "sselog1,ssemov")
3629 (set_attr "mode" "TI,V4SF")])
3631 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3632 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3633 ;; alternatives pretty much forces the MMX alternative to be chosen.
3634 (define_insn "*sse2_concatv2si"
3635 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3637 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3638 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3641 punpckldq\t{%2, %0|%0, %2}
3642 movd\t{%1, %0|%0, %1}
3643 punpckldq\t{%2, %0|%0, %2}
3644 movd\t{%1, %0|%0, %1}"
3645 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3646 (set_attr "mode" "TI,TI,DI,DI")])
3648 (define_insn "*sse1_concatv2si"
3649 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3651 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3652 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3655 unpcklps\t{%2, %0|%0, %2}
3656 movss\t{%1, %0|%0, %1}
3657 punpckldq\t{%2, %0|%0, %2}
3658 movd\t{%1, %0|%0, %1}"
3659 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3660 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3662 (define_insn "*vec_concatv4si_1"
3663 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3665 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3666 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3669 punpcklqdq\t{%2, %0|%0, %2}
3670 movlhps\t{%2, %0|%0, %2}
3671 movhps\t{%2, %0|%0, %2}"
3672 [(set_attr "type" "sselog,ssemov,ssemov")
3673 (set_attr "mode" "TI,V4SF,V2SF")])
3675 (define_insn "*vec_concatv2di"
3676 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3678 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3679 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3682 movq\t{%1, %0|%0, %1}
3683 movq2dq\t{%1, %0|%0, %1}
3684 punpcklqdq\t{%2, %0|%0, %2}
3685 movlhps\t{%2, %0|%0, %2}
3686 movhps\t{%2, %0|%0, %2}
3687 movlps\t{%1, %0|%0, %1}"
3688 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3689 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3691 (define_expand "vec_setv2di"
3692 [(match_operand:V2DI 0 "register_operand" "")
3693 (match_operand:DI 1 "register_operand" "")
3694 (match_operand 2 "const_int_operand" "")]
3697 ix86_expand_vector_set (false, operands[0], operands[1],
3698 INTVAL (operands[2]));
3702 (define_expand "vec_extractv2di"
3703 [(match_operand:DI 0 "register_operand" "")
3704 (match_operand:V2DI 1 "register_operand" "")
3705 (match_operand 2 "const_int_operand" "")]
3708 ix86_expand_vector_extract (false, operands[0], operands[1],
3709 INTVAL (operands[2]));
3713 (define_expand "vec_initv2di"
3714 [(match_operand:V2DI 0 "register_operand" "")
3715 (match_operand 1 "" "")]
3718 ix86_expand_vector_init (false, operands[0], operands[1]);
3722 (define_expand "vec_setv4si"
3723 [(match_operand:V4SI 0 "register_operand" "")
3724 (match_operand:SI 1 "register_operand" "")
3725 (match_operand 2 "const_int_operand" "")]
3728 ix86_expand_vector_set (false, operands[0], operands[1],
3729 INTVAL (operands[2]));
3733 (define_expand "vec_extractv4si"
3734 [(match_operand:SI 0 "register_operand" "")
3735 (match_operand:V4SI 1 "register_operand" "")
3736 (match_operand 2 "const_int_operand" "")]
3739 ix86_expand_vector_extract (false, operands[0], operands[1],
3740 INTVAL (operands[2]));
3744 (define_expand "vec_initv4si"
3745 [(match_operand:V4SI 0 "register_operand" "")
3746 (match_operand 1 "" "")]
3749 ix86_expand_vector_init (false, operands[0], operands[1]);
3753 (define_expand "vec_setv8hi"
3754 [(match_operand:V8HI 0 "register_operand" "")
3755 (match_operand:HI 1 "register_operand" "")
3756 (match_operand 2 "const_int_operand" "")]
3759 ix86_expand_vector_set (false, operands[0], operands[1],
3760 INTVAL (operands[2]));
3764 (define_expand "vec_extractv8hi"
3765 [(match_operand:HI 0 "register_operand" "")
3766 (match_operand:V8HI 1 "register_operand" "")
3767 (match_operand 2 "const_int_operand" "")]
3770 ix86_expand_vector_extract (false, operands[0], operands[1],
3771 INTVAL (operands[2]));
3775 (define_expand "vec_initv8hi"
3776 [(match_operand:V8HI 0 "register_operand" "")
3777 (match_operand 1 "" "")]
3780 ix86_expand_vector_init (false, operands[0], operands[1]);
3784 (define_expand "vec_setv16qi"
3785 [(match_operand:V16QI 0 "register_operand" "")
3786 (match_operand:QI 1 "register_operand" "")
3787 (match_operand 2 "const_int_operand" "")]
3790 ix86_expand_vector_set (false, operands[0], operands[1],
3791 INTVAL (operands[2]));
3795 (define_expand "vec_extractv16qi"
3796 [(match_operand:QI 0 "register_operand" "")
3797 (match_operand:V16QI 1 "register_operand" "")
3798 (match_operand 2 "const_int_operand" "")]
3801 ix86_expand_vector_extract (false, operands[0], operands[1],
3802 INTVAL (operands[2]));
3806 (define_expand "vec_initv16qi"
3807 [(match_operand:V16QI 0 "register_operand" "")
3808 (match_operand 1 "" "")]
3811 ix86_expand_vector_init (false, operands[0], operands[1]);
3815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3821 (define_insn "sse2_uavgv16qi3"
3822 [(set (match_operand:V16QI 0 "register_operand" "=x")
3828 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3830 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3831 (const_vector:V16QI [(const_int 1) (const_int 1)
3832 (const_int 1) (const_int 1)
3833 (const_int 1) (const_int 1)
3834 (const_int 1) (const_int 1)
3835 (const_int 1) (const_int 1)
3836 (const_int 1) (const_int 1)
3837 (const_int 1) (const_int 1)
3838 (const_int 1) (const_int 1)]))
3840 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3841 "pavgb\t{%2, %0|%0, %2}"
3842 [(set_attr "type" "sseiadd")
3843 (set_attr "mode" "TI")])
3845 (define_insn "sse2_uavgv8hi3"
3846 [(set (match_operand:V8HI 0 "register_operand" "=x")
3852 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3854 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3855 (const_vector:V8HI [(const_int 1) (const_int 1)
3856 (const_int 1) (const_int 1)
3857 (const_int 1) (const_int 1)
3858 (const_int 1) (const_int 1)]))
3860 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3861 "pavgw\t{%2, %0|%0, %2}"
3862 [(set_attr "type" "sseiadd")
3863 (set_attr "mode" "TI")])
3865 ;; The correct representation for this is absolutely enormous, and
3866 ;; surely not generally useful.
3867 (define_insn "sse2_psadbw"
3868 [(set (match_operand:V2DI 0 "register_operand" "=x")
3869 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3870 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3873 "psadbw\t{%2, %0|%0, %2}"
3874 [(set_attr "type" "sseiadd")
3875 (set_attr "mode" "TI")])
3877 (define_insn "sse_movmskps"
3878 [(set (match_operand:SI 0 "register_operand" "=r")
3879 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3882 "movmskps\t{%1, %0|%0, %1}"
3883 [(set_attr "type" "ssecvt")
3884 (set_attr "mode" "V4SF")])
3886 (define_insn "sse2_movmskpd"
3887 [(set (match_operand:SI 0 "register_operand" "=r")
3888 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3891 "movmskpd\t{%1, %0|%0, %1}"
3892 [(set_attr "type" "ssecvt")
3893 (set_attr "mode" "V2DF")])
3895 (define_insn "sse2_pmovmskb"
3896 [(set (match_operand:SI 0 "register_operand" "=r")
3897 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3900 "pmovmskb\t{%1, %0|%0, %1}"
3901 [(set_attr "type" "ssecvt")
3902 (set_attr "mode" "V2DF")])
3904 (define_expand "sse2_maskmovdqu"
3905 [(set (match_operand:V16QI 0 "memory_operand" "")
3906 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3907 (match_operand:V16QI 2 "register_operand" "x")
3913 (define_insn "*sse2_maskmovdqu"
3914 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3915 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3916 (match_operand:V16QI 2 "register_operand" "x")
3917 (mem:V16QI (match_dup 0))]
3919 "TARGET_SSE2 && !TARGET_64BIT"
3920 ;; @@@ check ordering of operands in intel/nonintel syntax
3921 "maskmovdqu\t{%2, %1|%1, %2}"
3922 [(set_attr "type" "ssecvt")
3923 (set_attr "mode" "TI")])
3925 (define_insn "*sse2_maskmovdqu_rex64"
3926 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3927 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3928 (match_operand:V16QI 2 "register_operand" "x")
3929 (mem:V16QI (match_dup 0))]
3931 "TARGET_SSE2 && TARGET_64BIT"
3932 ;; @@@ check ordering of operands in intel/nonintel syntax
3933 "maskmovdqu\t{%2, %1|%1, %2}"
3934 [(set_attr "type" "ssecvt")
3935 (set_attr "mode" "TI")])
3937 (define_insn "sse_ldmxcsr"
3938 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3942 [(set_attr "type" "sse")
3943 (set_attr "memory" "load")])
3945 (define_insn "sse_stmxcsr"
3946 [(set (match_operand:SI 0 "memory_operand" "=m")
3947 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3950 [(set_attr "type" "sse")
3951 (set_attr "memory" "store")])
3953 (define_expand "sse_sfence"
3955 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3956 "TARGET_SSE || TARGET_3DNOW_A"
3958 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3959 MEM_VOLATILE_P (operands[0]) = 1;
3962 (define_insn "*sse_sfence"
3963 [(set (match_operand:BLK 0 "" "")
3964 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3965 "TARGET_SSE || TARGET_3DNOW_A"
3967 [(set_attr "type" "sse")
3968 (set_attr "memory" "unknown")])
3970 (define_insn "sse2_clflush"
3971 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3975 [(set_attr "type" "sse")
3976 (set_attr "memory" "unknown")])
3978 (define_expand "sse2_mfence"
3980 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3983 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3984 MEM_VOLATILE_P (operands[0]) = 1;
3987 (define_insn "*sse2_mfence"
3988 [(set (match_operand:BLK 0 "" "")
3989 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3992 [(set_attr "type" "sse")
3993 (set_attr "memory" "unknown")])
3995 (define_expand "sse2_lfence"
3997 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4000 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4001 MEM_VOLATILE_P (operands[0]) = 1;
4004 (define_insn "*sse2_lfence"
4005 [(set (match_operand:BLK 0 "" "")
4006 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4009 [(set_attr "type" "sse")
4010 (set_attr "memory" "unknown")])
4012 (define_insn "sse3_mwait"
4013 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4014 (match_operand:SI 1 "register_operand" "c")]
4017 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
4018 ;; Since 32bit register operands are implicitly zero extended to 64bit,
4019 ;; we only need to set up 32bit registers.
4021 [(set_attr "length" "3")])
4023 (define_insn "sse3_monitor"
4024 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4025 (match_operand:SI 1 "register_operand" "c")
4026 (match_operand:SI 2 "register_operand" "d")]
4028 "TARGET_SSE3 && !TARGET_64BIT"
4029 "monitor\t%0, %1, %2"
4030 [(set_attr "length" "3")])
4032 (define_insn "sse3_monitor64"
4033 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
4034 (match_operand:SI 1 "register_operand" "c")
4035 (match_operand:SI 2 "register_operand" "d")]
4037 "TARGET_SSE3 && TARGET_64BIT"
4038 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
4039 ;; RCX and RDX are used. Since 32bit register operands are implicitly
4040 ;; zero extended to 64bit, we only need to set up 32bit registers.
4042 [(set_attr "length" "3")])
4045 (define_insn "ssse3_phaddwv8hi3"
4046 [(set (match_operand:V8HI 0 "register_operand" "=x")
4052 (match_operand:V8HI 1 "register_operand" "0")
4053 (parallel [(const_int 0)]))
4054 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4056 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4057 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4060 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4061 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4063 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4064 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4069 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4070 (parallel [(const_int 0)]))
4071 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4073 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4074 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4077 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4078 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4080 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4081 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4083 "phaddw\t{%2, %0|%0, %2}"
4084 [(set_attr "type" "sseiadd")
4085 (set_attr "mode" "TI")])
4087 (define_insn "ssse3_phaddwv4hi3"
4088 [(set (match_operand:V4HI 0 "register_operand" "=y")
4093 (match_operand:V4HI 1 "register_operand" "0")
4094 (parallel [(const_int 0)]))
4095 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4097 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4098 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4102 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4103 (parallel [(const_int 0)]))
4104 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4106 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4107 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4109 "phaddw\t{%2, %0|%0, %2}"
4110 [(set_attr "type" "sseiadd")
4111 (set_attr "mode" "DI")])
4113 (define_insn "ssse3_phadddv4si3"
4114 [(set (match_operand:V4SI 0 "register_operand" "=x")
4119 (match_operand:V4SI 1 "register_operand" "0")
4120 (parallel [(const_int 0)]))
4121 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4123 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4124 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4128 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4129 (parallel [(const_int 0)]))
4130 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4132 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4133 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4135 "phaddd\t{%2, %0|%0, %2}"
4136 [(set_attr "type" "sseiadd")
4137 (set_attr "mode" "TI")])
4139 (define_insn "ssse3_phadddv2si3"
4140 [(set (match_operand:V2SI 0 "register_operand" "=y")
4144 (match_operand:V2SI 1 "register_operand" "0")
4145 (parallel [(const_int 0)]))
4146 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4149 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4150 (parallel [(const_int 0)]))
4151 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4153 "phaddd\t{%2, %0|%0, %2}"
4154 [(set_attr "type" "sseiadd")
4155 (set_attr "mode" "DI")])
4157 (define_insn "ssse3_phaddswv8hi3"
4158 [(set (match_operand:V8HI 0 "register_operand" "=x")
4164 (match_operand:V8HI 1 "register_operand" "0")
4165 (parallel [(const_int 0)]))
4166 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4168 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4169 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4172 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4173 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4175 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4176 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4181 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4182 (parallel [(const_int 0)]))
4183 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4185 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4186 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4189 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4190 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4192 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4193 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4195 "phaddsw\t{%2, %0|%0, %2}"
4196 [(set_attr "type" "sseiadd")
4197 (set_attr "mode" "TI")])
4199 (define_insn "ssse3_phaddswv4hi3"
4200 [(set (match_operand:V4HI 0 "register_operand" "=y")
4205 (match_operand:V4HI 1 "register_operand" "0")
4206 (parallel [(const_int 0)]))
4207 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4209 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4210 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4214 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4215 (parallel [(const_int 0)]))
4216 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4218 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4219 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4221 "phaddsw\t{%2, %0|%0, %2}"
4222 [(set_attr "type" "sseiadd")
4223 (set_attr "mode" "DI")])
4225 (define_insn "ssse3_phsubwv8hi3"
4226 [(set (match_operand:V8HI 0 "register_operand" "=x")
4232 (match_operand:V8HI 1 "register_operand" "0")
4233 (parallel [(const_int 0)]))
4234 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4236 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4237 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4240 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4241 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4243 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4244 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4249 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4250 (parallel [(const_int 0)]))
4251 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4253 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4254 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4257 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4258 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4260 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4261 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4263 "phsubw\t{%2, %0|%0, %2}"
4264 [(set_attr "type" "sseiadd")
4265 (set_attr "mode" "TI")])
4267 (define_insn "ssse3_phsubwv4hi3"
4268 [(set (match_operand:V4HI 0 "register_operand" "=y")
4273 (match_operand:V4HI 1 "register_operand" "0")
4274 (parallel [(const_int 0)]))
4275 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4277 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4278 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4282 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4283 (parallel [(const_int 0)]))
4284 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4286 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4287 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4289 "phsubw\t{%2, %0|%0, %2}"
4290 [(set_attr "type" "sseiadd")
4291 (set_attr "mode" "DI")])
4293 (define_insn "ssse3_phsubdv4si3"
4294 [(set (match_operand:V4SI 0 "register_operand" "=x")
4299 (match_operand:V4SI 1 "register_operand" "0")
4300 (parallel [(const_int 0)]))
4301 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4303 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4304 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4308 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4309 (parallel [(const_int 0)]))
4310 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4312 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4313 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4315 "phsubd\t{%2, %0|%0, %2}"
4316 [(set_attr "type" "sseiadd")
4317 (set_attr "mode" "TI")])
4319 (define_insn "ssse3_phsubdv2si3"
4320 [(set (match_operand:V2SI 0 "register_operand" "=y")
4324 (match_operand:V2SI 1 "register_operand" "0")
4325 (parallel [(const_int 0)]))
4326 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4329 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4330 (parallel [(const_int 0)]))
4331 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4333 "phsubd\t{%2, %0|%0, %2}"
4334 [(set_attr "type" "sseiadd")
4335 (set_attr "mode" "DI")])
4337 (define_insn "ssse3_phsubswv8hi3"
4338 [(set (match_operand:V8HI 0 "register_operand" "=x")
4344 (match_operand:V8HI 1 "register_operand" "0")
4345 (parallel [(const_int 0)]))
4346 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4348 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4349 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4352 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4353 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4355 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4356 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4361 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4362 (parallel [(const_int 0)]))
4363 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4365 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4366 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4369 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4370 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4372 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4373 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4375 "phsubsw\t{%2, %0|%0, %2}"
4376 [(set_attr "type" "sseiadd")
4377 (set_attr "mode" "TI")])
4379 (define_insn "ssse3_phsubswv4hi3"
4380 [(set (match_operand:V4HI 0 "register_operand" "=y")
4385 (match_operand:V4HI 1 "register_operand" "0")
4386 (parallel [(const_int 0)]))
4387 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4389 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4390 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4394 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4395 (parallel [(const_int 0)]))
4396 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4398 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4399 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4401 "phsubsw\t{%2, %0|%0, %2}"
4402 [(set_attr "type" "sseiadd")
4403 (set_attr "mode" "DI")])
4405 (define_insn "ssse3_pmaddubswv8hi3"
4406 [(set (match_operand:V8HI 0 "register_operand" "=x")
4411 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4412 (parallel [(const_int 0)
4422 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
4423 (parallel [(const_int 0)
4433 (vec_select:V16QI (match_dup 1)
4434 (parallel [(const_int 1)
4443 (vec_select:V16QI (match_dup 2)
4444 (parallel [(const_int 1)
4451 (const_int 15)]))))))]
4453 "pmaddubsw\t{%2, %0|%0, %2}"
4454 [(set_attr "type" "sseiadd")
4455 (set_attr "mode" "TI")])
4457 (define_insn "ssse3_pmaddubswv4hi3"
4458 [(set (match_operand:V4HI 0 "register_operand" "=y")
4463 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
4464 (parallel [(const_int 0)
4470 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
4471 (parallel [(const_int 0)
4477 (vec_select:V8QI (match_dup 1)
4478 (parallel [(const_int 1)
4483 (vec_select:V8QI (match_dup 2)
4484 (parallel [(const_int 1)
4487 (const_int 7)]))))))]
4489 "pmaddubsw\t{%2, %0|%0, %2}"
4490 [(set_attr "type" "sseiadd")
4491 (set_attr "mode" "DI")])
4493 (define_insn "ssse3_pmulhrswv8hi3"
4494 [(set (match_operand:V8HI 0 "register_operand" "=x")
4501 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4503 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4505 (const_vector:V8HI [(const_int 1) (const_int 1)
4506 (const_int 1) (const_int 1)
4507 (const_int 1) (const_int 1)
4508 (const_int 1) (const_int 1)]))
4510 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4511 "pmulhrsw\t{%2, %0|%0, %2}"
4512 [(set_attr "type" "sseimul")
4513 (set_attr "mode" "TI")])
4515 (define_insn "ssse3_pmulhrswv4hi3"
4516 [(set (match_operand:V4HI 0 "register_operand" "=y")
4523 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
4525 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
4527 (const_vector:V4HI [(const_int 1) (const_int 1)
4528 (const_int 1) (const_int 1)]))
4530 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
4531 "pmulhrsw\t{%2, %0|%0, %2}"
4532 [(set_attr "type" "sseimul")
4533 (set_attr "mode" "DI")])
4535 (define_insn "ssse3_pshufbv16qi3"
4536 [(set (match_operand:V16QI 0 "register_operand" "=x")
4537 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4538 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4541 "pshufb\t{%2, %0|%0, %2}";
4542 [(set_attr "type" "sselog1")
4543 (set_attr "mode" "TI")])
4545 (define_insn "ssse3_pshufbv8qi3"
4546 [(set (match_operand:V8QI 0 "register_operand" "=y")
4547 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
4548 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
4551 "pshufb\t{%2, %0|%0, %2}";
4552 [(set_attr "type" "sselog1")
4553 (set_attr "mode" "DI")])
4555 (define_insn "ssse3_psign<mode>3"
4556 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4557 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
4558 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
4561 "psign<ssevecsize>\t{%2, %0|%0, %2}";
4562 [(set_attr "type" "sselog1")
4563 (set_attr "mode" "TI")])
4565 (define_insn "ssse3_psign<mode>3"
4566 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4567 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
4568 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
4571 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
4572 [(set_attr "type" "sselog1")
4573 (set_attr "mode" "DI")])
4575 (define_insn "ssse3_palignrti"
4576 [(set (match_operand:TI 0 "register_operand" "=x")
4577 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
4578 (match_operand:TI 2 "nonimmediate_operand" "xm")
4579 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4583 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4584 return "palignr\t{%3, %2, %0|%0, %2, %3}";
4586 [(set_attr "type" "sseishft")
4587 (set_attr "mode" "TI")])
4589 (define_insn "ssse3_palignrdi"
4590 [(set (match_operand:DI 0 "register_operand" "=y")
4591 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
4592 (match_operand:DI 2 "nonimmediate_operand" "ym")
4593 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4597 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4598 return "palignr\t{%3, %2, %0|%0, %2, %3}";
4600 [(set_attr "type" "sseishft")
4601 (set_attr "mode" "DI")])
4603 (define_insn "abs<mode>2"
4604 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4605 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
4607 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
4608 [(set_attr "type" "sselog1")
4609 (set_attr "mode" "TI")])
4611 (define_insn "abs<mode>2"
4612 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4613 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
4615 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
4616 [(set_attr "type" "sselog1")
4617 (set_attr "mode" "DI")])