1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 return standard_sse_constant_opcode (insn, operands[1]);
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
74 return "movdqa\t{%1, %0|%0, %1}";
79 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_string "TI")))])
90 (define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
95 ix86_expand_vector_move (V4SFmode, operands);
99 (define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
104 switch (which_alternative)
107 return standard_sse_constant_opcode (insn, operands[1]);
110 return "movaps\t{%1, %0|%0, %1}";
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
124 (vec_duplicate:V4SF (match_dup 1))
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
132 (define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
137 ix86_expand_vector_move (V2DFmode, operands);
141 (define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
146 switch (which_alternative)
149 return standard_sse_constant_opcode (insn, operands[1]);
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
155 return "movapd\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
168 (const_string "V4SF")
169 (const_string "V2DF")))])
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
207 (define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
225 (define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
234 (define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
243 (define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
252 (define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
272 ;; Parallel single-precision floating point arithmetic
274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276 (define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
282 (define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
288 (define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
295 (define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
304 (define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
316 (define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
323 (define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
332 (define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
344 (define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
351 (define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
360 (define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
372 (define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
379 (define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
388 (define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
400 (define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
409 (define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
414 (match_operand:V4SF 2 "register_operand" "0")
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
421 (define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
430 (define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
435 (match_operand:V4SF 2 "register_operand" "0")
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
442 (define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
450 (define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
461 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462 ;; isn't really correct, as those rtl operators aren't defined when
463 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
465 (define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
476 (define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
486 (define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
495 (define_insn "*sse_vmsmaxv4sf3_finite"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
502 "TARGET_SSE && flag_finite_math_only
503 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
504 "maxss\t{%2, %0|%0, %2}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 (define_insn "sse_vmsmaxv4sf3"
509 [(set (match_operand:V4SF 0 "register_operand" "=x")
511 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
512 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
516 "maxss\t{%2, %0|%0, %2}"
517 [(set_attr "type" "sse")
518 (set_attr "mode" "SF")])
520 (define_expand "sminv4sf3"
521 [(set (match_operand:V4SF 0 "register_operand" "")
522 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
523 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
526 if (!flag_finite_math_only)
527 operands[1] = force_reg (V4SFmode, operands[1]);
528 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
531 (define_insn "*sminv4sf3_finite"
532 [(set (match_operand:V4SF 0 "register_operand" "=x")
533 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
534 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
535 "TARGET_SSE && flag_finite_math_only
536 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
537 "minps\t{%2, %0|%0, %2}"
538 [(set_attr "type" "sse")
539 (set_attr "mode" "V4SF")])
541 (define_insn "*sminv4sf3"
542 [(set (match_operand:V4SF 0 "register_operand" "=x")
543 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
544 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
546 "minps\t{%2, %0|%0, %2}"
547 [(set_attr "type" "sse")
548 (set_attr "mode" "V4SF")])
550 (define_insn "*sse_vmsminv4sf3_finite"
551 [(set (match_operand:V4SF 0 "register_operand" "=x")
553 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
554 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
557 "TARGET_SSE && flag_finite_math_only
558 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
559 "minss\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sse")
561 (set_attr "mode" "SF")])
563 (define_insn "sse_vmsminv4sf3"
564 [(set (match_operand:V4SF 0 "register_operand" "=x")
566 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
567 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
571 "minss\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sse")
573 (set_attr "mode" "SF")])
575 ;; These versions of the min/max patterns implement exactly the operations
576 ;; min = (op1 < op2 ? op1 : op2)
577 ;; max = (!(op1 < op2) ? op1 : op2)
578 ;; Their operands are not commutative, and thus they may be used in the
579 ;; presence of -0.0 and NaN.
581 (define_insn "*ieee_sminv4sf3"
582 [(set (match_operand:V4SF 0 "register_operand" "=x")
583 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
584 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
587 "minps\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sseadd")
589 (set_attr "mode" "V4SF")])
591 (define_insn "*ieee_smaxv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
593 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
594 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
597 "maxps\t{%2, %0|%0, %2}"
598 [(set_attr "type" "sseadd")
599 (set_attr "mode" "V4SF")])
601 (define_insn "*ieee_sminv2df3"
602 [(set (match_operand:V2DF 0 "register_operand" "=x")
603 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
604 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
607 "minpd\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sseadd")
609 (set_attr "mode" "V2DF")])
611 (define_insn "*ieee_smaxv2df3"
612 [(set (match_operand:V2DF 0 "register_operand" "=x")
613 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
614 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
617 "maxpd\t{%2, %0|%0, %2}"
618 [(set_attr "type" "sseadd")
619 (set_attr "mode" "V2DF")])
621 (define_insn "sse3_addsubv4sf3"
622 [(set (match_operand:V4SF 0 "register_operand" "=x")
625 (match_operand:V4SF 1 "register_operand" "0")
626 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
627 (minus:V4SF (match_dup 1) (match_dup 2))
630 "addsubps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
634 (define_insn "sse3_haddv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
656 "haddps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
660 (define_insn "sse3_hsubv4sf3"
661 [(set (match_operand:V4SF 0 "register_operand" "=x")
666 (match_operand:V4SF 1 "register_operand" "0")
667 (parallel [(const_int 0)]))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
670 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
671 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
675 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
676 (parallel [(const_int 0)]))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
679 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
680 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
682 "hsubps\t{%2, %0|%0, %2}"
683 [(set_attr "type" "sseadd")
684 (set_attr "mode" "V4SF")])
686 (define_expand "reduc_splus_v4sf"
687 [(match_operand:V4SF 0 "register_operand" "")
688 (match_operand:V4SF 1 "register_operand" "")]
693 rtx tmp = gen_reg_rtx (V4SFmode);
694 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
695 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
698 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
702 (define_expand "reduc_smax_v4sf"
703 [(match_operand:V4SF 0 "register_operand" "")
704 (match_operand:V4SF 1 "register_operand" "")]
707 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
711 (define_expand "reduc_smin_v4sf"
712 [(match_operand:V4SF 0 "register_operand" "")
713 (match_operand:V4SF 1 "register_operand" "")]
716 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
722 ;; Parallel single-precision floating point comparisons
724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
726 (define_insn "sse_maskcmpv4sf3"
727 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operator:V4SF 3 "sse_comparison_operator"
729 [(match_operand:V4SF 1 "register_operand" "0")
730 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
732 "cmp%D3ps\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssecmp")
734 (set_attr "mode" "V4SF")])
736 (define_insn "sse_vmmaskcmpv4sf3"
737 [(set (match_operand:V4SF 0 "register_operand" "=x")
739 (match_operator:V4SF 3 "sse_comparison_operator"
740 [(match_operand:V4SF 1 "register_operand" "0")
741 (match_operand:V4SF 2 "register_operand" "x")])
745 "cmp%D3ss\t{%2, %0|%0, %2}"
746 [(set_attr "type" "ssecmp")
747 (set_attr "mode" "SF")])
749 (define_insn "sse_comi"
750 [(set (reg:CCFP FLAGS_REG)
753 (match_operand:V4SF 0 "register_operand" "x")
754 (parallel [(const_int 0)]))
756 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
757 (parallel [(const_int 0)]))))]
759 "comiss\t{%1, %0|%0, %1}"
760 [(set_attr "type" "ssecomi")
761 (set_attr "mode" "SF")])
763 (define_insn "sse_ucomi"
764 [(set (reg:CCFPU FLAGS_REG)
767 (match_operand:V4SF 0 "register_operand" "x")
768 (parallel [(const_int 0)]))
770 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
771 (parallel [(const_int 0)]))))]
773 "ucomiss\t{%1, %0|%0, %1}"
774 [(set_attr "type" "ssecomi")
775 (set_attr "mode" "SF")])
777 (define_expand "vcondv4sf"
778 [(set (match_operand:V4SF 0 "register_operand" "")
781 [(match_operand:V4SF 4 "nonimmediate_operand" "")
782 (match_operand:V4SF 5 "nonimmediate_operand" "")])
783 (match_operand:V4SF 1 "general_operand" "")
784 (match_operand:V4SF 2 "general_operand" "")))]
787 if (ix86_expand_fp_vcond (operands))
793 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
795 ;; Parallel single-precision floating point logical operations
797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
799 (define_expand "andv4sf3"
800 [(set (match_operand:V4SF 0 "register_operand" "")
801 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
802 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
804 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
806 (define_insn "*andv4sf3"
807 [(set (match_operand:V4SF 0 "register_operand" "=x")
808 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
809 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
810 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
811 "andps\t{%2, %0|%0, %2}"
812 [(set_attr "type" "sselog")
813 (set_attr "mode" "V4SF")])
815 (define_insn "sse_nandv4sf3"
816 [(set (match_operand:V4SF 0 "register_operand" "=x")
817 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
818 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
820 "andnps\t{%2, %0|%0, %2}"
821 [(set_attr "type" "sselog")
822 (set_attr "mode" "V4SF")])
824 (define_expand "iorv4sf3"
825 [(set (match_operand:V4SF 0 "register_operand" "")
826 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
827 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
829 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
831 (define_insn "*iorv4sf3"
832 [(set (match_operand:V4SF 0 "register_operand" "=x")
833 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
834 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
835 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
836 "orps\t{%2, %0|%0, %2}"
837 [(set_attr "type" "sselog")
838 (set_attr "mode" "V4SF")])
840 (define_expand "xorv4sf3"
841 [(set (match_operand:V4SF 0 "register_operand" "")
842 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
843 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
845 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
847 (define_insn "*xorv4sf3"
848 [(set (match_operand:V4SF 0 "register_operand" "=x")
849 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
850 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
851 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
852 "xorps\t{%2, %0|%0, %2}"
853 [(set_attr "type" "sselog")
854 (set_attr "mode" "V4SF")])
856 ;; Also define scalar versions. These are used for abs, neg, and
857 ;; conditional move. Using subregs into vector modes causes register
858 ;; allocation lossage. These patterns do not allow memory operands
859 ;; because the native instructions read the full 128-bits.
861 (define_insn "*andsf3"
862 [(set (match_operand:SF 0 "register_operand" "=x")
863 (and:SF (match_operand:SF 1 "register_operand" "0")
864 (match_operand:SF 2 "register_operand" "x")))]
866 "andps\t{%2, %0|%0, %2}"
867 [(set_attr "type" "sselog")
868 (set_attr "mode" "V4SF")])
870 (define_insn "*nandsf3"
871 [(set (match_operand:SF 0 "register_operand" "=x")
872 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
873 (match_operand:SF 2 "register_operand" "x")))]
875 "andnps\t{%2, %0|%0, %2}"
876 [(set_attr "type" "sselog")
877 (set_attr "mode" "V4SF")])
879 (define_insn "*iorsf3"
880 [(set (match_operand:SF 0 "register_operand" "=x")
881 (ior:SF (match_operand:SF 1 "register_operand" "0")
882 (match_operand:SF 2 "register_operand" "x")))]
884 "orps\t{%2, %0|%0, %2}"
885 [(set_attr "type" "sselog")
886 (set_attr "mode" "V4SF")])
888 (define_insn "*xorsf3"
889 [(set (match_operand:SF 0 "register_operand" "=x")
890 (xor:SF (match_operand:SF 1 "register_operand" "0")
891 (match_operand:SF 2 "register_operand" "x")))]
893 "xorps\t{%2, %0|%0, %2}"
894 [(set_attr "type" "sselog")
895 (set_attr "mode" "V4SF")])
897 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
899 ;; Parallel single-precision floating point conversion operations
901 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
903 (define_insn "sse_cvtpi2ps"
904 [(set (match_operand:V4SF 0 "register_operand" "=x")
907 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
908 (match_operand:V4SF 1 "register_operand" "0")
911 "cvtpi2ps\t{%2, %0|%0, %2}"
912 [(set_attr "type" "ssecvt")
913 (set_attr "mode" "V4SF")])
915 (define_insn "sse_cvtps2pi"
916 [(set (match_operand:V2SI 0 "register_operand" "=y")
918 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
920 (parallel [(const_int 0) (const_int 1)])))]
922 "cvtps2pi\t{%1, %0|%0, %1}"
923 [(set_attr "type" "ssecvt")
924 (set_attr "unit" "mmx")
925 (set_attr "mode" "DI")])
927 (define_insn "sse_cvttps2pi"
928 [(set (match_operand:V2SI 0 "register_operand" "=y")
930 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
931 (parallel [(const_int 0) (const_int 1)])))]
933 "cvttps2pi\t{%1, %0|%0, %1}"
934 [(set_attr "type" "ssecvt")
935 (set_attr "unit" "mmx")
936 (set_attr "mode" "SF")])
938 (define_insn "sse_cvtsi2ss"
939 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
942 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
943 (match_operand:V4SF 1 "register_operand" "0,0")
946 "cvtsi2ss\t{%2, %0|%0, %2}"
947 [(set_attr "type" "sseicvt")
948 (set_attr "athlon_decode" "vector,double")
949 (set_attr "mode" "SF")])
951 (define_insn "sse_cvtsi2ssq"
952 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
955 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
956 (match_operand:V4SF 1 "register_operand" "0,0")
958 "TARGET_SSE && TARGET_64BIT"
959 "cvtsi2ssq\t{%2, %0|%0, %2}"
960 [(set_attr "type" "sseicvt")
961 (set_attr "athlon_decode" "vector,double")
962 (set_attr "mode" "SF")])
964 (define_insn "sse_cvtss2si"
965 [(set (match_operand:SI 0 "register_operand" "=r,r")
968 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
969 (parallel [(const_int 0)]))]
970 UNSPEC_FIX_NOTRUNC))]
972 "cvtss2si\t{%1, %0|%0, %1}"
973 [(set_attr "type" "sseicvt")
974 (set_attr "athlon_decode" "double,vector")
975 (set_attr "mode" "SI")])
977 (define_insn "sse_cvtss2siq"
978 [(set (match_operand:DI 0 "register_operand" "=r,r")
981 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
982 (parallel [(const_int 0)]))]
983 UNSPEC_FIX_NOTRUNC))]
984 "TARGET_SSE && TARGET_64BIT"
985 "cvtss2siq\t{%1, %0|%0, %1}"
986 [(set_attr "type" "sseicvt")
987 (set_attr "athlon_decode" "double,vector")
988 (set_attr "mode" "DI")])
990 (define_insn "sse_cvttss2si"
991 [(set (match_operand:SI 0 "register_operand" "=r,r")
994 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
995 (parallel [(const_int 0)]))))]
997 "cvttss2si\t{%1, %0|%0, %1}"
998 [(set_attr "type" "sseicvt")
999 (set_attr "athlon_decode" "double,vector")
1000 (set_attr "mode" "SI")])
1002 (define_insn "sse_cvttss2siq"
1003 [(set (match_operand:DI 0 "register_operand" "=r,r")
1006 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1007 (parallel [(const_int 0)]))))]
1008 "TARGET_SSE && TARGET_64BIT"
1009 "cvttss2siq\t{%1, %0|%0, %1}"
1010 [(set_attr "type" "sseicvt")
1011 (set_attr "athlon_decode" "double,vector")
1012 (set_attr "mode" "DI")])
1014 (define_insn "sse2_cvtdq2ps"
1015 [(set (match_operand:V4SF 0 "register_operand" "=x")
1016 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1018 "cvtdq2ps\t{%1, %0|%0, %1}"
1019 [(set_attr "type" "ssecvt")
1020 (set_attr "mode" "V2DF")])
1022 (define_insn "sse2_cvtps2dq"
1023 [(set (match_operand:V4SI 0 "register_operand" "=x")
1024 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1025 UNSPEC_FIX_NOTRUNC))]
1027 "cvtps2dq\t{%1, %0|%0, %1}"
1028 [(set_attr "type" "ssecvt")
1029 (set_attr "mode" "TI")])
1031 (define_insn "sse2_cvttps2dq"
1032 [(set (match_operand:V4SI 0 "register_operand" "=x")
1033 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1035 "cvttps2dq\t{%1, %0|%0, %1}"
1036 [(set_attr "type" "ssecvt")
1037 (set_attr "mode" "TI")])
1039 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1041 ;; Parallel single-precision floating point element swizzling
1043 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1045 (define_insn "sse_movhlps"
1046 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1049 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1050 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1051 (parallel [(const_int 6)
1055 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1057 movhlps\t{%2, %0|%0, %2}
1058 movlps\t{%H2, %0|%0, %H2}
1059 movhps\t{%2, %0|%0, %2}"
1060 [(set_attr "type" "ssemov")
1061 (set_attr "mode" "V4SF,V2SF,V2SF")])
1063 (define_insn "sse_movlhps"
1064 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1067 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1068 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1069 (parallel [(const_int 0)
1073 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1075 movlhps\t{%2, %0|%0, %2}
1076 movhps\t{%2, %0|%0, %2}
1077 movlps\t{%2, %H0|%H0, %2}"
1078 [(set_attr "type" "ssemov")
1079 (set_attr "mode" "V4SF,V2SF,V2SF")])
1081 (define_insn "sse_unpckhps"
1082 [(set (match_operand:V4SF 0 "register_operand" "=x")
1085 (match_operand:V4SF 1 "register_operand" "0")
1086 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1087 (parallel [(const_int 2) (const_int 6)
1088 (const_int 3) (const_int 7)])))]
1090 "unpckhps\t{%2, %0|%0, %2}"
1091 [(set_attr "type" "sselog")
1092 (set_attr "mode" "V4SF")])
1094 (define_insn "sse_unpcklps"
1095 [(set (match_operand:V4SF 0 "register_operand" "=x")
1098 (match_operand:V4SF 1 "register_operand" "0")
1099 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1100 (parallel [(const_int 0) (const_int 4)
1101 (const_int 1) (const_int 5)])))]
1103 "unpcklps\t{%2, %0|%0, %2}"
1104 [(set_attr "type" "sselog")
1105 (set_attr "mode" "V4SF")])
1107 ;; These are modeled with the same vec_concat as the others so that we
1108 ;; capture users of shufps that can use the new instructions
1109 (define_insn "sse3_movshdup"
1110 [(set (match_operand:V4SF 0 "register_operand" "=x")
1113 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1115 (parallel [(const_int 1)
1120 "movshdup\t{%1, %0|%0, %1}"
1121 [(set_attr "type" "sse")
1122 (set_attr "mode" "V4SF")])
1124 (define_insn "sse3_movsldup"
1125 [(set (match_operand:V4SF 0 "register_operand" "=x")
1128 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1130 (parallel [(const_int 0)
1135 "movsldup\t{%1, %0|%0, %1}"
1136 [(set_attr "type" "sse")
1137 (set_attr "mode" "V4SF")])
1139 (define_expand "sse_shufps"
1140 [(match_operand:V4SF 0 "register_operand" "")
1141 (match_operand:V4SF 1 "register_operand" "")
1142 (match_operand:V4SF 2 "nonimmediate_operand" "")
1143 (match_operand:SI 3 "const_int_operand" "")]
1146 int mask = INTVAL (operands[3]);
1147 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1148 GEN_INT ((mask >> 0) & 3),
1149 GEN_INT ((mask >> 2) & 3),
1150 GEN_INT (((mask >> 4) & 3) + 4),
1151 GEN_INT (((mask >> 6) & 3) + 4)));
1155 (define_insn "sse_shufps_1"
1156 [(set (match_operand:V4SF 0 "register_operand" "=x")
1159 (match_operand:V4SF 1 "register_operand" "0")
1160 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1161 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1162 (match_operand 4 "const_0_to_3_operand" "")
1163 (match_operand 5 "const_4_to_7_operand" "")
1164 (match_operand 6 "const_4_to_7_operand" "")])))]
1168 mask |= INTVAL (operands[3]) << 0;
1169 mask |= INTVAL (operands[4]) << 2;
1170 mask |= (INTVAL (operands[5]) - 4) << 4;
1171 mask |= (INTVAL (operands[6]) - 4) << 6;
1172 operands[3] = GEN_INT (mask);
1174 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1176 [(set_attr "type" "sselog")
1177 (set_attr "mode" "V4SF")])
1179 (define_insn "sse_storehps"
1180 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1182 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1183 (parallel [(const_int 2) (const_int 3)])))]
1186 movhps\t{%1, %0|%0, %1}
1187 movhlps\t{%1, %0|%0, %1}
1188 movlps\t{%H1, %0|%0, %H1}"
1189 [(set_attr "type" "ssemov")
1190 (set_attr "mode" "V2SF,V4SF,V2SF")])
1192 (define_insn "sse_loadhps"
1193 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1196 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1197 (parallel [(const_int 0) (const_int 1)]))
1198 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1201 movhps\t{%2, %0|%0, %2}
1202 movlhps\t{%2, %0|%0, %2}
1203 movlps\t{%2, %H0|%H0, %2}"
1204 [(set_attr "type" "ssemov")
1205 (set_attr "mode" "V2SF,V4SF,V2SF")])
1207 (define_insn "sse_storelps"
1208 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1210 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1211 (parallel [(const_int 0) (const_int 1)])))]
1214 movlps\t{%1, %0|%0, %1}
1215 movaps\t{%1, %0|%0, %1}
1216 movlps\t{%1, %0|%0, %1}"
1217 [(set_attr "type" "ssemov")
1218 (set_attr "mode" "V2SF,V4SF,V2SF")])
1220 (define_insn "sse_loadlps"
1221 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1223 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1225 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1226 (parallel [(const_int 2) (const_int 3)]))))]
1229 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1230 movlps\t{%2, %0|%0, %2}
1231 movlps\t{%2, %0|%0, %2}"
1232 [(set_attr "type" "sselog,ssemov,ssemov")
1233 (set_attr "mode" "V4SF,V2SF,V2SF")])
1235 (define_insn "sse_movss"
1236 [(set (match_operand:V4SF 0 "register_operand" "=x")
1238 (match_operand:V4SF 2 "register_operand" "x")
1239 (match_operand:V4SF 1 "register_operand" "0")
1242 "movss\t{%2, %0|%0, %2}"
1243 [(set_attr "type" "ssemov")
1244 (set_attr "mode" "SF")])
1246 (define_insn "*vec_dupv4sf"
1247 [(set (match_operand:V4SF 0 "register_operand" "=x")
1249 (match_operand:SF 1 "register_operand" "0")))]
1251 "shufps\t{$0, %0, %0|%0, %0, 0}"
1252 [(set_attr "type" "sselog1")
1253 (set_attr "mode" "V4SF")])
1255 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1256 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1257 ;; alternatives pretty much forces the MMX alternative to be chosen.
1258 (define_insn "*sse_concatv2sf"
1259 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1261 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1262 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1265 unpcklps\t{%2, %0|%0, %2}
1266 movss\t{%1, %0|%0, %1}
1267 punpckldq\t{%2, %0|%0, %2}
1268 movd\t{%1, %0|%0, %1}"
1269 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1270 (set_attr "mode" "V4SF,SF,DI,DI")])
1272 (define_insn "*sse_concatv4sf"
1273 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1275 (match_operand:V2SF 1 "register_operand" " 0,0")
1276 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1279 movlhps\t{%2, %0|%0, %2}
1280 movhps\t{%2, %0|%0, %2}"
1281 [(set_attr "type" "ssemov")
1282 (set_attr "mode" "V4SF,V2SF")])
1284 (define_expand "vec_initv4sf"
1285 [(match_operand:V4SF 0 "register_operand" "")
1286 (match_operand 1 "" "")]
1289 ix86_expand_vector_init (false, operands[0], operands[1]);
1293 (define_insn "*vec_setv4sf_0"
1294 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1297 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1298 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1302 movss\t{%2, %0|%0, %2}
1303 movss\t{%2, %0|%0, %2}
1304 movd\t{%2, %0|%0, %2}
1306 [(set_attr "type" "ssemov")
1307 (set_attr "mode" "SF")])
1310 [(set (match_operand:V4SF 0 "memory_operand" "")
1313 (match_operand:SF 1 "nonmemory_operand" ""))
1316 "TARGET_SSE && reload_completed"
1319 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1323 (define_expand "vec_setv4sf"
1324 [(match_operand:V4SF 0 "register_operand" "")
1325 (match_operand:SF 1 "register_operand" "")
1326 (match_operand 2 "const_int_operand" "")]
1329 ix86_expand_vector_set (false, operands[0], operands[1],
1330 INTVAL (operands[2]));
1334 (define_insn_and_split "*vec_extractv4sf_0"
1335 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1337 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1338 (parallel [(const_int 0)])))]
1339 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1341 "&& reload_completed"
1344 rtx op1 = operands[1];
1346 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1348 op1 = gen_lowpart (SFmode, op1);
1349 emit_move_insn (operands[0], op1);
1353 (define_expand "vec_extractv4sf"
1354 [(match_operand:SF 0 "register_operand" "")
1355 (match_operand:V4SF 1 "register_operand" "")
1356 (match_operand 2 "const_int_operand" "")]
1359 ix86_expand_vector_extract (false, operands[0], operands[1],
1360 INTVAL (operands[2]));
1364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1366 ;; Parallel double-precision floating point arithmetic
1368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1370 (define_expand "negv2df2"
1371 [(set (match_operand:V2DF 0 "register_operand" "")
1372 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1374 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1376 (define_expand "absv2df2"
1377 [(set (match_operand:V2DF 0 "register_operand" "")
1378 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1380 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1382 (define_expand "addv2df3"
1383 [(set (match_operand:V2DF 0 "register_operand" "")
1384 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1385 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1387 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1389 (define_insn "*addv2df3"
1390 [(set (match_operand:V2DF 0 "register_operand" "=x")
1391 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1392 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1393 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1394 "addpd\t{%2, %0|%0, %2}"
1395 [(set_attr "type" "sseadd")
1396 (set_attr "mode" "V2DF")])
1398 (define_insn "sse2_vmaddv2df3"
1399 [(set (match_operand:V2DF 0 "register_operand" "=x")
1401 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1402 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1405 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1406 "addsd\t{%2, %0|%0, %2}"
1407 [(set_attr "type" "sseadd")
1408 (set_attr "mode" "DF")])
1410 (define_expand "subv2df3"
1411 [(set (match_operand:V2DF 0 "register_operand" "")
1412 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1413 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1415 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1417 (define_insn "*subv2df3"
1418 [(set (match_operand:V2DF 0 "register_operand" "=x")
1419 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1420 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1422 "subpd\t{%2, %0|%0, %2}"
1423 [(set_attr "type" "sseadd")
1424 (set_attr "mode" "V2DF")])
1426 (define_insn "sse2_vmsubv2df3"
1427 [(set (match_operand:V2DF 0 "register_operand" "=x")
1429 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1430 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1434 "subsd\t{%2, %0|%0, %2}"
1435 [(set_attr "type" "sseadd")
1436 (set_attr "mode" "DF")])
1438 (define_expand "mulv2df3"
1439 [(set (match_operand:V2DF 0 "register_operand" "")
1440 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1441 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1443 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1445 (define_insn "*mulv2df3"
1446 [(set (match_operand:V2DF 0 "register_operand" "=x")
1447 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1448 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1449 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1450 "mulpd\t{%2, %0|%0, %2}"
1451 [(set_attr "type" "ssemul")
1452 (set_attr "mode" "V2DF")])
1454 (define_insn "sse2_vmmulv2df3"
1455 [(set (match_operand:V2DF 0 "register_operand" "=x")
1457 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1458 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1461 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1462 "mulsd\t{%2, %0|%0, %2}"
1463 [(set_attr "type" "ssemul")
1464 (set_attr "mode" "DF")])
1466 (define_expand "divv2df3"
1467 [(set (match_operand:V2DF 0 "register_operand" "")
1468 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1469 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1471 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1473 (define_insn "*divv2df3"
1474 [(set (match_operand:V2DF 0 "register_operand" "=x")
1475 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1476 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1478 "divpd\t{%2, %0|%0, %2}"
1479 [(set_attr "type" "ssediv")
1480 (set_attr "mode" "V2DF")])
1482 (define_insn "sse2_vmdivv2df3"
1483 [(set (match_operand:V2DF 0 "register_operand" "=x")
1485 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1486 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1490 "divsd\t{%2, %0|%0, %2}"
1491 [(set_attr "type" "ssediv")
1492 (set_attr "mode" "DF")])
1494 (define_insn "sqrtv2df2"
1495 [(set (match_operand:V2DF 0 "register_operand" "=x")
1496 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1498 "sqrtpd\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "sse")
1500 (set_attr "mode" "V2DF")])
1502 (define_insn "sse2_vmsqrtv2df2"
1503 [(set (match_operand:V2DF 0 "register_operand" "=x")
1505 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1506 (match_operand:V2DF 2 "register_operand" "0")
1509 "sqrtsd\t{%1, %0|%0, %1}"
1510 [(set_attr "type" "sse")
1511 (set_attr "mode" "DF")])
1513 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1514 ;; isn't really correct, as those rtl operators aren't defined when
1515 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1517 (define_expand "smaxv2df3"
1518 [(set (match_operand:V2DF 0 "register_operand" "")
1519 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1520 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1523 if (!flag_finite_math_only)
1524 operands[1] = force_reg (V2DFmode, operands[1]);
1525 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1528 (define_insn "*smaxv2df3_finite"
1529 [(set (match_operand:V2DF 0 "register_operand" "=x")
1530 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1531 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1532 "TARGET_SSE2 && flag_finite_math_only
1533 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1534 "maxpd\t{%2, %0|%0, %2}"
1535 [(set_attr "type" "sseadd")
1536 (set_attr "mode" "V2DF")])
1538 (define_insn "*smaxv2df3"
1539 [(set (match_operand:V2DF 0 "register_operand" "=x")
1540 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1541 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1543 "maxpd\t{%2, %0|%0, %2}"
1544 [(set_attr "type" "sseadd")
1545 (set_attr "mode" "V2DF")])
1547 (define_insn "*sse2_vmsmaxv2df3_finite"
1548 [(set (match_operand:V2DF 0 "register_operand" "=x")
1550 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1551 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1554 "TARGET_SSE2 && flag_finite_math_only
1555 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1556 "maxsd\t{%2, %0|%0, %2}"
1557 [(set_attr "type" "sseadd")
1558 (set_attr "mode" "DF")])
1560 (define_insn "sse2_vmsmaxv2df3"
1561 [(set (match_operand:V2DF 0 "register_operand" "=x")
1563 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1564 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1568 "maxsd\t{%2, %0|%0, %2}"
1569 [(set_attr "type" "sseadd")
1570 (set_attr "mode" "DF")])
1572 (define_expand "sminv2df3"
1573 [(set (match_operand:V2DF 0 "register_operand" "")
1574 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1575 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1578 if (!flag_finite_math_only)
1579 operands[1] = force_reg (V2DFmode, operands[1]);
1580 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1583 (define_insn "*sminv2df3_finite"
1584 [(set (match_operand:V2DF 0 "register_operand" "=x")
1585 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1586 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1587 "TARGET_SSE2 && flag_finite_math_only
1588 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1589 "minpd\t{%2, %0|%0, %2}"
1590 [(set_attr "type" "sseadd")
1591 (set_attr "mode" "V2DF")])
1593 (define_insn "*sminv2df3"
1594 [(set (match_operand:V2DF 0 "register_operand" "=x")
1595 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1596 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1598 "minpd\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sseadd")
1600 (set_attr "mode" "V2DF")])
1602 (define_insn "*sse2_vmsminv2df3_finite"
1603 [(set (match_operand:V2DF 0 "register_operand" "=x")
1605 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1606 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1609 "TARGET_SSE2 && flag_finite_math_only
1610 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1611 "minsd\t{%2, %0|%0, %2}"
1612 [(set_attr "type" "sseadd")
1613 (set_attr "mode" "DF")])
1615 (define_insn "sse2_vmsminv2df3"
1616 [(set (match_operand:V2DF 0 "register_operand" "=x")
1618 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1619 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1623 "minsd\t{%2, %0|%0, %2}"
1624 [(set_attr "type" "sseadd")
1625 (set_attr "mode" "DF")])
1627 (define_insn "sse3_addsubv2df3"
1628 [(set (match_operand:V2DF 0 "register_operand" "=x")
1631 (match_operand:V2DF 1 "register_operand" "0")
1632 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1633 (minus:V2DF (match_dup 1) (match_dup 2))
1636 "addsubpd\t{%2, %0|%0, %2}"
1637 [(set_attr "type" "sseadd")
1638 (set_attr "mode" "V2DF")])
1640 (define_insn "sse3_haddv2df3"
1641 [(set (match_operand:V2DF 0 "register_operand" "=x")
1645 (match_operand:V2DF 1 "register_operand" "0")
1646 (parallel [(const_int 0)]))
1647 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1650 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1651 (parallel [(const_int 0)]))
1652 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1654 "haddpd\t{%2, %0|%0, %2}"
1655 [(set_attr "type" "sseadd")
1656 (set_attr "mode" "V2DF")])
1658 (define_insn "sse3_hsubv2df3"
1659 [(set (match_operand:V2DF 0 "register_operand" "=x")
1663 (match_operand:V2DF 1 "register_operand" "0")
1664 (parallel [(const_int 0)]))
1665 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1668 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1669 (parallel [(const_int 0)]))
1670 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1672 "hsubpd\t{%2, %0|%0, %2}"
1673 [(set_attr "type" "sseadd")
1674 (set_attr "mode" "V2DF")])
1676 (define_expand "reduc_splus_v2df"
1677 [(match_operand:V2DF 0 "register_operand" "")
1678 (match_operand:V2DF 1 "register_operand" "")]
1681 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1685 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1687 ;; Parallel double-precision floating point comparisons
1689 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1691 (define_insn "sse2_maskcmpv2df3"
1692 [(set (match_operand:V2DF 0 "register_operand" "=x")
1693 (match_operator:V2DF 3 "sse_comparison_operator"
1694 [(match_operand:V2DF 1 "register_operand" "0")
1695 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1697 "cmp%D3pd\t{%2, %0|%0, %2}"
1698 [(set_attr "type" "ssecmp")
1699 (set_attr "mode" "V2DF")])
1701 (define_insn "sse2_vmmaskcmpv2df3"
1702 [(set (match_operand:V2DF 0 "register_operand" "=x")
1704 (match_operator:V2DF 3 "sse_comparison_operator"
1705 [(match_operand:V2DF 1 "register_operand" "0")
1706 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1710 "cmp%D3sd\t{%2, %0|%0, %2}"
1711 [(set_attr "type" "ssecmp")
1712 (set_attr "mode" "DF")])
1714 (define_insn "sse2_comi"
1715 [(set (reg:CCFP FLAGS_REG)
1718 (match_operand:V2DF 0 "register_operand" "x")
1719 (parallel [(const_int 0)]))
1721 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1722 (parallel [(const_int 0)]))))]
1724 "comisd\t{%1, %0|%0, %1}"
1725 [(set_attr "type" "ssecomi")
1726 (set_attr "mode" "DF")])
1728 (define_insn "sse2_ucomi"
1729 [(set (reg:CCFPU FLAGS_REG)
1732 (match_operand:V2DF 0 "register_operand" "x")
1733 (parallel [(const_int 0)]))
1735 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1736 (parallel [(const_int 0)]))))]
1738 "ucomisd\t{%1, %0|%0, %1}"
1739 [(set_attr "type" "ssecomi")
1740 (set_attr "mode" "DF")])
1742 (define_expand "vcondv2df"
1743 [(set (match_operand:V2DF 0 "register_operand" "")
1745 (match_operator 3 ""
1746 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1747 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1748 (match_operand:V2DF 1 "general_operand" "")
1749 (match_operand:V2DF 2 "general_operand" "")))]
1752 if (ix86_expand_fp_vcond (operands))
1758 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1760 ;; Parallel double-precision floating point logical operations
1762 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1764 (define_expand "andv2df3"
1765 [(set (match_operand:V2DF 0 "register_operand" "")
1766 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1767 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1769 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1771 (define_insn "*andv2df3"
1772 [(set (match_operand:V2DF 0 "register_operand" "=x")
1773 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1774 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1775 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1776 "andpd\t{%2, %0|%0, %2}"
1777 [(set_attr "type" "sselog")
1778 (set_attr "mode" "V2DF")])
1780 (define_insn "sse2_nandv2df3"
1781 [(set (match_operand:V2DF 0 "register_operand" "=x")
1782 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1783 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1785 "andnpd\t{%2, %0|%0, %2}"
1786 [(set_attr "type" "sselog")
1787 (set_attr "mode" "V2DF")])
1789 (define_expand "iorv2df3"
1790 [(set (match_operand:V2DF 0 "register_operand" "")
1791 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1792 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1794 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1796 (define_insn "*iorv2df3"
1797 [(set (match_operand:V2DF 0 "register_operand" "=x")
1798 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1799 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1800 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1801 "orpd\t{%2, %0|%0, %2}"
1802 [(set_attr "type" "sselog")
1803 (set_attr "mode" "V2DF")])
1805 (define_expand "xorv2df3"
1806 [(set (match_operand:V2DF 0 "register_operand" "")
1807 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1808 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1810 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1812 (define_insn "*xorv2df3"
1813 [(set (match_operand:V2DF 0 "register_operand" "=x")
1814 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1815 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1816 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1817 "xorpd\t{%2, %0|%0, %2}"
1818 [(set_attr "type" "sselog")
1819 (set_attr "mode" "V2DF")])
1821 ;; Also define scalar versions. These are used for abs, neg, and
1822 ;; conditional move. Using subregs into vector modes causes register
1823 ;; allocation lossage. These patterns do not allow memory operands
1824 ;; because the native instructions read the full 128-bits.
1826 (define_insn "*anddf3"
1827 [(set (match_operand:DF 0 "register_operand" "=x")
1828 (and:DF (match_operand:DF 1 "register_operand" "0")
1829 (match_operand:DF 2 "register_operand" "x")))]
1831 "andpd\t{%2, %0|%0, %2}"
1832 [(set_attr "type" "sselog")
1833 (set_attr "mode" "V2DF")])
1835 (define_insn "*nanddf3"
1836 [(set (match_operand:DF 0 "register_operand" "=x")
1837 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1838 (match_operand:DF 2 "register_operand" "x")))]
1840 "andnpd\t{%2, %0|%0, %2}"
1841 [(set_attr "type" "sselog")
1842 (set_attr "mode" "V2DF")])
1844 (define_insn "*iordf3"
1845 [(set (match_operand:DF 0 "register_operand" "=x")
1846 (ior:DF (match_operand:DF 1 "register_operand" "0")
1847 (match_operand:DF 2 "register_operand" "x")))]
1849 "orpd\t{%2, %0|%0, %2}"
1850 [(set_attr "type" "sselog")
1851 (set_attr "mode" "V2DF")])
1853 (define_insn "*xordf3"
1854 [(set (match_operand:DF 0 "register_operand" "=x")
1855 (xor:DF (match_operand:DF 1 "register_operand" "0")
1856 (match_operand:DF 2 "register_operand" "x")))]
1858 "xorpd\t{%2, %0|%0, %2}"
1859 [(set_attr "type" "sselog")
1860 (set_attr "mode" "V2DF")])
1862 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1864 ;; Parallel double-precision floating point conversion operations
1866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1868 (define_insn "sse2_cvtpi2pd"
1869 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1870 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1872 "cvtpi2pd\t{%1, %0|%0, %1}"
1873 [(set_attr "type" "ssecvt")
1874 (set_attr "unit" "mmx,*")
1875 (set_attr "mode" "V2DF")])
1877 (define_insn "sse2_cvtpd2pi"
1878 [(set (match_operand:V2SI 0 "register_operand" "=y")
1879 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1880 UNSPEC_FIX_NOTRUNC))]
1882 "cvtpd2pi\t{%1, %0|%0, %1}"
1883 [(set_attr "type" "ssecvt")
1884 (set_attr "unit" "mmx")
1885 (set_attr "mode" "DI")])
1887 (define_insn "sse2_cvttpd2pi"
1888 [(set (match_operand:V2SI 0 "register_operand" "=y")
1889 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1891 "cvttpd2pi\t{%1, %0|%0, %1}"
1892 [(set_attr "type" "ssecvt")
1893 (set_attr "unit" "mmx")
1894 (set_attr "mode" "TI")])
1896 (define_insn "sse2_cvtsi2sd"
1897 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1900 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1901 (match_operand:V2DF 1 "register_operand" "0,0")
1904 "cvtsi2sd\t{%2, %0|%0, %2}"
1905 [(set_attr "type" "sseicvt")
1906 (set_attr "mode" "DF")
1907 (set_attr "athlon_decode" "double,direct")])
1909 (define_insn "sse2_cvtsi2sdq"
1910 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1913 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1914 (match_operand:V2DF 1 "register_operand" "0,0")
1916 "TARGET_SSE2 && TARGET_64BIT"
1917 "cvtsi2sdq\t{%2, %0|%0, %2}"
1918 [(set_attr "type" "sseicvt")
1919 (set_attr "mode" "DF")
1920 (set_attr "athlon_decode" "double,direct")])
1922 (define_insn "sse2_cvtsd2si"
1923 [(set (match_operand:SI 0 "register_operand" "=r,r")
1926 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1927 (parallel [(const_int 0)]))]
1928 UNSPEC_FIX_NOTRUNC))]
1930 "cvtsd2si\t{%1, %0|%0, %1}"
1931 [(set_attr "type" "sseicvt")
1932 (set_attr "athlon_decode" "double,vector")
1933 (set_attr "mode" "SI")])
1935 (define_insn "sse2_cvtsd2siq"
1936 [(set (match_operand:DI 0 "register_operand" "=r,r")
1939 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1940 (parallel [(const_int 0)]))]
1941 UNSPEC_FIX_NOTRUNC))]
1942 "TARGET_SSE2 && TARGET_64BIT"
1943 "cvtsd2siq\t{%1, %0|%0, %1}"
1944 [(set_attr "type" "sseicvt")
1945 (set_attr "athlon_decode" "double,vector")
1946 (set_attr "mode" "DI")])
1948 (define_insn "sse2_cvttsd2si"
1949 [(set (match_operand:SI 0 "register_operand" "=r,r")
1952 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1953 (parallel [(const_int 0)]))))]
1955 "cvttsd2si\t{%1, %0|%0, %1}"
1956 [(set_attr "type" "sseicvt")
1957 (set_attr "mode" "SI")
1958 (set_attr "athlon_decode" "double,vector")])
1960 (define_insn "sse2_cvttsd2siq"
1961 [(set (match_operand:DI 0 "register_operand" "=r,r")
1964 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1965 (parallel [(const_int 0)]))))]
1966 "TARGET_SSE2 && TARGET_64BIT"
1967 "cvttsd2siq\t{%1, %0|%0, %1}"
1968 [(set_attr "type" "sseicvt")
1969 (set_attr "mode" "DI")
1970 (set_attr "athlon_decode" "double,vector")])
1972 (define_insn "sse2_cvtdq2pd"
1973 [(set (match_operand:V2DF 0 "register_operand" "=x")
1976 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1977 (parallel [(const_int 0) (const_int 1)]))))]
1979 "cvtdq2pd\t{%1, %0|%0, %1}"
1980 [(set_attr "type" "ssecvt")
1981 (set_attr "mode" "V2DF")])
1983 (define_expand "sse2_cvtpd2dq"
1984 [(set (match_operand:V4SI 0 "register_operand" "")
1986 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1990 "operands[2] = CONST0_RTX (V2SImode);")
1992 (define_insn "*sse2_cvtpd2dq"
1993 [(set (match_operand:V4SI 0 "register_operand" "=x")
1995 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1997 (match_operand:V2SI 2 "const0_operand" "")))]
1999 "cvtpd2dq\t{%1, %0|%0, %1}"
2000 [(set_attr "type" "ssecvt")
2001 (set_attr "mode" "TI")])
2003 (define_expand "sse2_cvttpd2dq"
2004 [(set (match_operand:V4SI 0 "register_operand" "")
2006 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2009 "operands[2] = CONST0_RTX (V2SImode);")
2011 (define_insn "*sse2_cvttpd2dq"
2012 [(set (match_operand:V4SI 0 "register_operand" "=x")
2014 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2015 (match_operand:V2SI 2 "const0_operand" "")))]
2017 "cvttpd2dq\t{%1, %0|%0, %1}"
2018 [(set_attr "type" "ssecvt")
2019 (set_attr "mode" "TI")])
2021 (define_insn "sse2_cvtsd2ss"
2022 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2025 (float_truncate:V2SF
2026 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2027 (match_operand:V4SF 1 "register_operand" "0,0")
2030 "cvtsd2ss\t{%2, %0|%0, %2}"
2031 [(set_attr "type" "ssecvt")
2032 (set_attr "athlon_decode" "vector,double")
2033 (set_attr "mode" "SF")])
2035 (define_insn "sse2_cvtss2sd"
2036 [(set (match_operand:V2DF 0 "register_operand" "=x")
2040 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2041 (parallel [(const_int 0) (const_int 1)])))
2042 (match_operand:V2DF 1 "register_operand" "0")
2045 "cvtss2sd\t{%2, %0|%0, %2}"
2046 [(set_attr "type" "ssecvt")
2047 (set_attr "mode" "DF")])
2049 (define_expand "sse2_cvtpd2ps"
2050 [(set (match_operand:V4SF 0 "register_operand" "")
2052 (float_truncate:V2SF
2053 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2056 "operands[2] = CONST0_RTX (V2SFmode);")
2058 (define_insn "*sse2_cvtpd2ps"
2059 [(set (match_operand:V4SF 0 "register_operand" "=x")
2061 (float_truncate:V2SF
2062 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2063 (match_operand:V2SF 2 "const0_operand" "")))]
2065 "cvtpd2ps\t{%1, %0|%0, %1}"
2066 [(set_attr "type" "ssecvt")
2067 (set_attr "mode" "V4SF")])
2069 (define_insn "sse2_cvtps2pd"
2070 [(set (match_operand:V2DF 0 "register_operand" "=x")
2073 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2074 (parallel [(const_int 0) (const_int 1)]))))]
2076 "cvtps2pd\t{%1, %0|%0, %1}"
2077 [(set_attr "type" "ssecvt")
2078 (set_attr "mode" "V2DF")])
2080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2082 ;; Parallel double-precision floating point element swizzling
2084 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2086 (define_insn "sse2_unpckhpd"
2087 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2090 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2091 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2092 (parallel [(const_int 1)
2094 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2096 unpckhpd\t{%2, %0|%0, %2}
2097 movlpd\t{%H1, %0|%0, %H1}
2098 movhpd\t{%1, %0|%0, %1}"
2099 [(set_attr "type" "sselog,ssemov,ssemov")
2100 (set_attr "mode" "V2DF,V1DF,V1DF")])
2102 (define_insn "*sse3_movddup"
2103 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2106 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2108 (parallel [(const_int 0)
2110 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2112 movddup\t{%1, %0|%0, %1}
2114 [(set_attr "type" "sselog,ssemov")
2115 (set_attr "mode" "V2DF")])
2118 [(set (match_operand:V2DF 0 "memory_operand" "")
2121 (match_operand:V2DF 1 "register_operand" "")
2123 (parallel [(const_int 0)
2125 "TARGET_SSE3 && reload_completed"
2128 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2129 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2130 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2134 (define_insn "sse2_unpcklpd"
2135 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2138 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2139 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2140 (parallel [(const_int 0)
2142 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2144 unpcklpd\t{%2, %0|%0, %2}
2145 movhpd\t{%2, %0|%0, %2}
2146 movlpd\t{%2, %H0|%H0, %2}"
2147 [(set_attr "type" "sselog,ssemov,ssemov")
2148 (set_attr "mode" "V2DF,V1DF,V1DF")])
2150 (define_expand "sse2_shufpd"
2151 [(match_operand:V2DF 0 "register_operand" "")
2152 (match_operand:V2DF 1 "register_operand" "")
2153 (match_operand:V2DF 2 "nonimmediate_operand" "")
2154 (match_operand:SI 3 "const_int_operand" "")]
2157 int mask = INTVAL (operands[3]);
2158 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2160 GEN_INT (mask & 2 ? 3 : 2)));
2164 (define_insn "sse2_shufpd_1"
2165 [(set (match_operand:V2DF 0 "register_operand" "=x")
2168 (match_operand:V2DF 1 "register_operand" "0")
2169 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2170 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2171 (match_operand 4 "const_2_to_3_operand" "")])))]
2175 mask = INTVAL (operands[3]);
2176 mask |= (INTVAL (operands[4]) - 2) << 1;
2177 operands[3] = GEN_INT (mask);
2179 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2181 [(set_attr "type" "sselog")
2182 (set_attr "mode" "V2DF")])
2184 (define_insn "sse2_storehpd"
2185 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2187 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2188 (parallel [(const_int 1)])))]
2189 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2191 movhpd\t{%1, %0|%0, %1}
2194 [(set_attr "type" "ssemov,sselog1,ssemov")
2195 (set_attr "mode" "V1DF,V2DF,DF")])
2198 [(set (match_operand:DF 0 "register_operand" "")
2200 (match_operand:V2DF 1 "memory_operand" "")
2201 (parallel [(const_int 1)])))]
2202 "TARGET_SSE2 && reload_completed"
2203 [(set (match_dup 0) (match_dup 1))]
2205 operands[1] = adjust_address (operands[1], DFmode, 8);
2208 (define_insn "sse2_storelpd"
2209 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2211 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2212 (parallel [(const_int 0)])))]
2213 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2215 movlpd\t{%1, %0|%0, %1}
2218 [(set_attr "type" "ssemov")
2219 (set_attr "mode" "V1DF,DF,DF")])
2222 [(set (match_operand:DF 0 "register_operand" "")
2224 (match_operand:V2DF 1 "nonimmediate_operand" "")
2225 (parallel [(const_int 0)])))]
2226 "TARGET_SSE2 && reload_completed"
2229 rtx op1 = operands[1];
2231 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2233 op1 = gen_lowpart (DFmode, op1);
2234 emit_move_insn (operands[0], op1);
2238 (define_insn "sse2_loadhpd"
2239 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2242 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2243 (parallel [(const_int 0)]))
2244 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2245 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2247 movhpd\t{%2, %0|%0, %2}
2248 unpcklpd\t{%2, %0|%0, %2}
2249 shufpd\t{$1, %1, %0|%0, %1, 1}
2251 [(set_attr "type" "ssemov,sselog,sselog,other")
2252 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2255 [(set (match_operand:V2DF 0 "memory_operand" "")
2257 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2258 (match_operand:DF 1 "register_operand" "")))]
2259 "TARGET_SSE2 && reload_completed"
2260 [(set (match_dup 0) (match_dup 1))]
2262 operands[0] = adjust_address (operands[0], DFmode, 8);
2265 (define_insn "sse2_loadlpd"
2266 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2268 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2270 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2271 (parallel [(const_int 1)]))))]
2272 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2274 movsd\t{%2, %0|%0, %2}
2275 movlpd\t{%2, %0|%0, %2}
2276 movsd\t{%2, %0|%0, %2}
2277 shufpd\t{$2, %2, %0|%0, %2, 2}
2278 movhpd\t{%H1, %0|%0, %H1}
2280 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2281 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2284 [(set (match_operand:V2DF 0 "memory_operand" "")
2286 (match_operand:DF 1 "register_operand" "")
2287 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2288 "TARGET_SSE2 && reload_completed"
2289 [(set (match_dup 0) (match_dup 1))]
2291 operands[0] = adjust_address (operands[0], DFmode, 8);
2294 ;; Not sure these two are ever used, but it doesn't hurt to have
2296 (define_insn "*vec_extractv2df_1_sse"
2297 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2299 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2300 (parallel [(const_int 1)])))]
2301 "!TARGET_SSE2 && TARGET_SSE
2302 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2304 movhps\t{%1, %0|%0, %1}
2305 movhlps\t{%1, %0|%0, %1}
2306 movlps\t{%H1, %0|%0, %H1}"
2307 [(set_attr "type" "ssemov")
2308 (set_attr "mode" "V2SF,V4SF,V2SF")])
2310 (define_insn "*vec_extractv2df_0_sse"
2311 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2313 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2314 (parallel [(const_int 0)])))]
2315 "!TARGET_SSE2 && TARGET_SSE
2316 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2318 movlps\t{%1, %0|%0, %1}
2319 movaps\t{%1, %0|%0, %1}
2320 movlps\t{%1, %0|%0, %1}"
2321 [(set_attr "type" "ssemov")
2322 (set_attr "mode" "V2SF,V4SF,V2SF")])
2324 (define_insn "sse2_movsd"
2325 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2327 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2328 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2332 movsd\t{%2, %0|%0, %2}
2333 movlpd\t{%2, %0|%0, %2}
2334 movlpd\t{%2, %0|%0, %2}
2335 shufpd\t{$2, %2, %0|%0, %2, 2}
2336 movhps\t{%H1, %0|%0, %H1}
2337 movhps\t{%1, %H0|%H0, %1}"
2338 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2339 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2341 (define_insn "*vec_dupv2df_sse3"
2342 [(set (match_operand:V2DF 0 "register_operand" "=x")
2344 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2346 "movddup\t{%1, %0|%0, %1}"
2347 [(set_attr "type" "sselog1")
2348 (set_attr "mode" "DF")])
2350 (define_insn "*vec_dupv2df"
2351 [(set (match_operand:V2DF 0 "register_operand" "=x")
2353 (match_operand:DF 1 "register_operand" "0")))]
2356 [(set_attr "type" "sselog1")
2357 (set_attr "mode" "V4SF")])
2359 (define_insn "*vec_concatv2df_sse3"
2360 [(set (match_operand:V2DF 0 "register_operand" "=x")
2362 (match_operand:DF 1 "nonimmediate_operand" "xm")
2365 "movddup\t{%1, %0|%0, %1}"
2366 [(set_attr "type" "sselog1")
2367 (set_attr "mode" "DF")])
2369 (define_insn "*vec_concatv2df"
2370 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2372 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2373 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2376 unpcklpd\t{%2, %0|%0, %2}
2377 movhpd\t{%2, %0|%0, %2}
2378 movsd\t{%1, %0|%0, %1}
2379 movlhps\t{%2, %0|%0, %2}
2380 movhps\t{%2, %0|%0, %2}"
2381 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2382 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2384 (define_expand "vec_setv2df"
2385 [(match_operand:V2DF 0 "register_operand" "")
2386 (match_operand:DF 1 "register_operand" "")
2387 (match_operand 2 "const_int_operand" "")]
2390 ix86_expand_vector_set (false, operands[0], operands[1],
2391 INTVAL (operands[2]));
2395 (define_expand "vec_extractv2df"
2396 [(match_operand:DF 0 "register_operand" "")
2397 (match_operand:V2DF 1 "register_operand" "")
2398 (match_operand 2 "const_int_operand" "")]
2401 ix86_expand_vector_extract (false, operands[0], operands[1],
2402 INTVAL (operands[2]));
2406 (define_expand "vec_initv2df"
2407 [(match_operand:V2DF 0 "register_operand" "")
2408 (match_operand 1 "" "")]
2411 ix86_expand_vector_init (false, operands[0], operands[1]);
2415 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2417 ;; Parallel integral arithmetic
2419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2421 (define_expand "neg<mode>2"
2422 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2425 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2427 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2429 (define_expand "add<mode>3"
2430 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2431 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2432 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2434 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2436 (define_insn "*add<mode>3"
2437 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2439 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2440 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2441 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2442 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2443 [(set_attr "type" "sseiadd")
2444 (set_attr "mode" "TI")])
2446 (define_insn "sse2_ssadd<mode>3"
2447 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2449 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2450 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2451 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2452 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2453 [(set_attr "type" "sseiadd")
2454 (set_attr "mode" "TI")])
2456 (define_insn "sse2_usadd<mode>3"
2457 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2459 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2460 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2461 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2462 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2463 [(set_attr "type" "sseiadd")
2464 (set_attr "mode" "TI")])
2466 (define_expand "sub<mode>3"
2467 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2468 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2469 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2471 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2473 (define_insn "*sub<mode>3"
2474 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2476 (match_operand:SSEMODEI 1 "register_operand" "0")
2477 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2479 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2480 [(set_attr "type" "sseiadd")
2481 (set_attr "mode" "TI")])
2483 (define_insn "sse2_sssub<mode>3"
2484 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2486 (match_operand:SSEMODE12 1 "register_operand" "0")
2487 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2489 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2490 [(set_attr "type" "sseiadd")
2491 (set_attr "mode" "TI")])
2493 (define_insn "sse2_ussub<mode>3"
2494 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2496 (match_operand:SSEMODE12 1 "register_operand" "0")
2497 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2499 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2500 [(set_attr "type" "sseiadd")
2501 (set_attr "mode" "TI")])
2503 (define_expand "mulv16qi3"
2504 [(set (match_operand:V16QI 0 "register_operand" "")
2505 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2506 (match_operand:V16QI 2 "register_operand" "")))]
2512 for (i = 0; i < 12; ++i)
2513 t[i] = gen_reg_rtx (V16QImode);
2515 /* Unpack data such that we've got a source byte in each low byte of
2516 each word. We don't care what goes into the high byte of each word.
2517 Rather than trying to get zero in there, most convenient is to let
2518 it be a copy of the low byte. */
2519 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2520 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2521 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2522 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2524 /* Multiply words. The end-of-line annotations here give a picture of what
2525 the output of that instruction looks like. Dot means don't care; the
2526 letters are the bytes of the result with A being the most significant. */
2527 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2528 gen_lowpart (V8HImode, t[0]),
2529 gen_lowpart (V8HImode, t[1])));
2530 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2531 gen_lowpart (V8HImode, t[2]),
2532 gen_lowpart (V8HImode, t[3])));
2534 /* Extract the relevant bytes and merge them back together. */
2535 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2536 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2537 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2538 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2539 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2540 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2543 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2547 (define_expand "mulv8hi3"
2548 [(set (match_operand:V8HI 0 "register_operand" "")
2549 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2550 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2552 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2554 (define_insn "*mulv8hi3"
2555 [(set (match_operand:V8HI 0 "register_operand" "=x")
2556 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2557 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2558 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2559 "pmullw\t{%2, %0|%0, %2}"
2560 [(set_attr "type" "sseimul")
2561 (set_attr "mode" "TI")])
2563 (define_insn "sse2_smulv8hi3_highpart"
2564 [(set (match_operand:V8HI 0 "register_operand" "=x")
2569 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2571 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2573 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2574 "pmulhw\t{%2, %0|%0, %2}"
2575 [(set_attr "type" "sseimul")
2576 (set_attr "mode" "TI")])
2578 (define_insn "sse2_umulv8hi3_highpart"
2579 [(set (match_operand:V8HI 0 "register_operand" "=x")
2584 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2586 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2588 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2589 "pmulhuw\t{%2, %0|%0, %2}"
2590 [(set_attr "type" "sseimul")
2591 (set_attr "mode" "TI")])
2593 (define_insn "sse2_umulv2siv2di3"
2594 [(set (match_operand:V2DI 0 "register_operand" "=x")
2598 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2599 (parallel [(const_int 0) (const_int 2)])))
2602 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2603 (parallel [(const_int 0) (const_int 2)])))))]
2604 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2605 "pmuludq\t{%2, %0|%0, %2}"
2606 [(set_attr "type" "sseimul")
2607 (set_attr "mode" "TI")])
2609 (define_insn "sse2_pmaddwd"
2610 [(set (match_operand:V4SI 0 "register_operand" "=x")
2615 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2616 (parallel [(const_int 0)
2622 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2623 (parallel [(const_int 0)
2629 (vec_select:V4HI (match_dup 1)
2630 (parallel [(const_int 1)
2635 (vec_select:V4HI (match_dup 2)
2636 (parallel [(const_int 1)
2639 (const_int 7)]))))))]
2641 "pmaddwd\t{%2, %0|%0, %2}"
2642 [(set_attr "type" "sseiadd")
2643 (set_attr "mode" "TI")])
2645 (define_expand "mulv4si3"
2646 [(set (match_operand:V4SI 0 "register_operand" "")
2647 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2648 (match_operand:V4SI 2 "register_operand" "")))]
2651 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2657 t1 = gen_reg_rtx (V4SImode);
2658 t2 = gen_reg_rtx (V4SImode);
2659 t3 = gen_reg_rtx (V4SImode);
2660 t4 = gen_reg_rtx (V4SImode);
2661 t5 = gen_reg_rtx (V4SImode);
2662 t6 = gen_reg_rtx (V4SImode);
2663 thirtytwo = GEN_INT (32);
2665 /* Multiply elements 2 and 0. */
2666 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2668 /* Shift both input vectors down one element, so that elements 3 and 1
2669 are now in the slots for elements 2 and 0. For K8, at least, this is
2670 faster than using a shuffle. */
2671 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2672 gen_lowpart (TImode, op1), thirtytwo));
2673 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2674 gen_lowpart (TImode, op2), thirtytwo));
2676 /* Multiply elements 3 and 1. */
2677 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2679 /* Move the results in element 2 down to element 1; we don't care what
2680 goes in elements 2 and 3. */
2681 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2682 const0_rtx, const0_rtx));
2683 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2684 const0_rtx, const0_rtx));
2686 /* Merge the parts back together. */
2687 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2691 (define_expand "mulv2di3"
2692 [(set (match_operand:V2DI 0 "register_operand" "")
2693 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2694 (match_operand:V2DI 2 "register_operand" "")))]
2697 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2703 t1 = gen_reg_rtx (V2DImode);
2704 t2 = gen_reg_rtx (V2DImode);
2705 t3 = gen_reg_rtx (V2DImode);
2706 t4 = gen_reg_rtx (V2DImode);
2707 t5 = gen_reg_rtx (V2DImode);
2708 t6 = gen_reg_rtx (V2DImode);
2709 thirtytwo = GEN_INT (32);
2711 /* Multiply low parts. */
2712 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2713 gen_lowpart (V4SImode, op2)));
2715 /* Shift input vectors left 32 bits so we can multiply high parts. */
2716 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2717 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2719 /* Multiply high parts by low parts. */
2720 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2721 gen_lowpart (V4SImode, t3)));
2722 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2723 gen_lowpart (V4SImode, t2)));
2725 /* Shift them back. */
2726 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2727 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2729 /* Add the three parts together. */
2730 emit_insn (gen_addv2di3 (t6, t1, t4));
2731 emit_insn (gen_addv2di3 (op0, t6, t5));
2735 (define_expand "sdot_prodv8hi"
2736 [(match_operand:V4SI 0 "register_operand" "")
2737 (match_operand:V8HI 1 "nonimmediate_operand" "")
2738 (match_operand:V8HI 2 "nonimmediate_operand" "")
2739 (match_operand:V4SI 3 "register_operand" "")]
2742 rtx t = gen_reg_rtx (V4SImode);
2743 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2744 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2748 (define_expand "udot_prodv4si"
2749 [(match_operand:V2DI 0 "register_operand" "")
2750 (match_operand:V4SI 1 "register_operand" "")
2751 (match_operand:V4SI 2 "register_operand" "")
2752 (match_operand:V2DI 3 "register_operand" "")]
2757 t1 = gen_reg_rtx (V2DImode);
2758 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2759 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2761 t2 = gen_reg_rtx (V4SImode);
2762 t3 = gen_reg_rtx (V4SImode);
2763 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2764 gen_lowpart (TImode, operands[1]),
2766 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2767 gen_lowpart (TImode, operands[2]),
2770 t4 = gen_reg_rtx (V2DImode);
2771 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2773 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2777 (define_insn "ashr<mode>3"
2778 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2780 (match_operand:SSEMODE24 1 "register_operand" "0")
2781 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2783 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2784 [(set_attr "type" "sseishft")
2785 (set_attr "mode" "TI")])
2787 (define_insn "lshr<mode>3"
2788 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2789 (lshiftrt:SSEMODE248
2790 (match_operand:SSEMODE248 1 "register_operand" "0")
2791 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2793 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2794 [(set_attr "type" "sseishft")
2795 (set_attr "mode" "TI")])
2797 (define_insn "ashl<mode>3"
2798 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2800 (match_operand:SSEMODE248 1 "register_operand" "0")
2801 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2803 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2804 [(set_attr "type" "sseishft")
2805 (set_attr "mode" "TI")])
2807 (define_insn "sse2_ashlti3"
2808 [(set (match_operand:TI 0 "register_operand" "=x")
2809 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2810 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2813 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2814 return "pslldq\t{%2, %0|%0, %2}";
2816 [(set_attr "type" "sseishft")
2817 (set_attr "mode" "TI")])
2819 (define_expand "vec_shl_<mode>"
2820 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2821 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2822 (match_operand:SI 2 "general_operand" "")))]
2825 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2827 operands[0] = gen_lowpart (TImode, operands[0]);
2828 operands[1] = gen_lowpart (TImode, operands[1]);
2831 (define_insn "sse2_lshrti3"
2832 [(set (match_operand:TI 0 "register_operand" "=x")
2833 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2834 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2837 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2838 return "psrldq\t{%2, %0|%0, %2}";
2840 [(set_attr "type" "sseishft")
2841 (set_attr "mode" "TI")])
2843 (define_expand "vec_shr_<mode>"
2844 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2845 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2846 (match_operand:SI 2 "general_operand" "")))]
2849 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2851 operands[0] = gen_lowpart (TImode, operands[0]);
2852 operands[1] = gen_lowpart (TImode, operands[1]);
2855 (define_expand "umaxv16qi3"
2856 [(set (match_operand:V16QI 0 "register_operand" "")
2857 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2858 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2860 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2862 (define_insn "*umaxv16qi3"
2863 [(set (match_operand:V16QI 0 "register_operand" "=x")
2864 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2865 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2866 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2867 "pmaxub\t{%2, %0|%0, %2}"
2868 [(set_attr "type" "sseiadd")
2869 (set_attr "mode" "TI")])
2871 (define_expand "smaxv8hi3"
2872 [(set (match_operand:V8HI 0 "register_operand" "")
2873 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2874 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2876 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2878 (define_insn "*smaxv8hi3"
2879 [(set (match_operand:V8HI 0 "register_operand" "=x")
2880 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2881 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2882 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2883 "pmaxsw\t{%2, %0|%0, %2}"
2884 [(set_attr "type" "sseiadd")
2885 (set_attr "mode" "TI")])
2887 (define_expand "umaxv8hi3"
2888 [(set (match_operand:V8HI 0 "register_operand" "=x")
2889 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2890 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2892 (plus:V8HI (match_dup 0) (match_dup 2)))]
2895 operands[3] = operands[0];
2896 if (rtx_equal_p (operands[0], operands[2]))
2897 operands[0] = gen_reg_rtx (V8HImode);
2900 (define_expand "smax<mode>3"
2901 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2902 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2903 (match_operand:SSEMODE14 2 "register_operand" "")))]
2909 xops[0] = operands[0];
2910 xops[1] = operands[1];
2911 xops[2] = operands[2];
2912 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2913 xops[4] = operands[1];
2914 xops[5] = operands[2];
2915 ok = ix86_expand_int_vcond (xops);
2920 (define_expand "umaxv4si3"
2921 [(set (match_operand:V4SI 0 "register_operand" "")
2922 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2923 (match_operand:V4SI 2 "register_operand" "")))]
2929 xops[0] = operands[0];
2930 xops[1] = operands[1];
2931 xops[2] = operands[2];
2932 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2933 xops[4] = operands[1];
2934 xops[5] = operands[2];
2935 ok = ix86_expand_int_vcond (xops);
2940 (define_expand "uminv16qi3"
2941 [(set (match_operand:V16QI 0 "register_operand" "")
2942 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2943 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2945 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2947 (define_insn "*uminv16qi3"
2948 [(set (match_operand:V16QI 0 "register_operand" "=x")
2949 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2950 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2951 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2952 "pminub\t{%2, %0|%0, %2}"
2953 [(set_attr "type" "sseiadd")
2954 (set_attr "mode" "TI")])
2956 (define_expand "sminv8hi3"
2957 [(set (match_operand:V8HI 0 "register_operand" "")
2958 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2959 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2961 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2963 (define_insn "*sminv8hi3"
2964 [(set (match_operand:V8HI 0 "register_operand" "=x")
2965 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2966 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2967 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2968 "pminsw\t{%2, %0|%0, %2}"
2969 [(set_attr "type" "sseiadd")
2970 (set_attr "mode" "TI")])
2972 (define_expand "smin<mode>3"
2973 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2974 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2975 (match_operand:SSEMODE14 2 "register_operand" "")))]
2981 xops[0] = operands[0];
2982 xops[1] = operands[2];
2983 xops[2] = operands[1];
2984 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2985 xops[4] = operands[1];
2986 xops[5] = operands[2];
2987 ok = ix86_expand_int_vcond (xops);
2992 (define_expand "umin<mode>3"
2993 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2994 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2995 (match_operand:SSEMODE24 2 "register_operand" "")))]
3001 xops[0] = operands[0];
3002 xops[1] = operands[2];
3003 xops[2] = operands[1];
3004 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3005 xops[4] = operands[1];
3006 xops[5] = operands[2];
3007 ok = ix86_expand_int_vcond (xops);
3012 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3014 ;; Parallel integral comparisons
3016 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3018 (define_insn "sse2_eq<mode>3"
3019 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3021 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3022 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3023 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3024 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3025 [(set_attr "type" "ssecmp")
3026 (set_attr "mode" "TI")])
3028 (define_insn "sse2_gt<mode>3"
3029 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3031 (match_operand:SSEMODE124 1 "register_operand" "0")
3032 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3034 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3035 [(set_attr "type" "ssecmp")
3036 (set_attr "mode" "TI")])
3038 (define_expand "vcond<mode>"
3039 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3040 (if_then_else:SSEMODE124
3041 (match_operator 3 ""
3042 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3043 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3044 (match_operand:SSEMODE124 1 "general_operand" "")
3045 (match_operand:SSEMODE124 2 "general_operand" "")))]
3048 if (ix86_expand_int_vcond (operands))
3054 (define_expand "vcondu<mode>"
3055 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3056 (if_then_else:SSEMODE124
3057 (match_operator 3 ""
3058 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3059 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3060 (match_operand:SSEMODE124 1 "general_operand" "")
3061 (match_operand:SSEMODE124 2 "general_operand" "")))]
3064 if (ix86_expand_int_vcond (operands))
3070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3072 ;; Parallel integral logical operations
3074 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3076 (define_expand "one_cmpl<mode>2"
3077 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3078 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3082 int i, n = GET_MODE_NUNITS (<MODE>mode);
3083 rtvec v = rtvec_alloc (n);
3085 for (i = 0; i < n; ++i)
3086 RTVEC_ELT (v, i) = constm1_rtx;
3088 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3091 (define_expand "and<mode>3"
3092 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3093 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3094 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3096 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3098 (define_insn "*and<mode>3"
3099 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3101 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3102 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3103 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3104 "pand\t{%2, %0|%0, %2}"
3105 [(set_attr "type" "sselog")
3106 (set_attr "mode" "TI")])
3108 (define_insn "sse2_nand<mode>3"
3109 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3111 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3112 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3114 "pandn\t{%2, %0|%0, %2}"
3115 [(set_attr "type" "sselog")
3116 (set_attr "mode" "TI")])
3118 (define_expand "ior<mode>3"
3119 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3120 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3121 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3123 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3125 (define_insn "*ior<mode>3"
3126 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3128 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3129 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3130 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3131 "por\t{%2, %0|%0, %2}"
3132 [(set_attr "type" "sselog")
3133 (set_attr "mode" "TI")])
3135 (define_expand "xor<mode>3"
3136 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3137 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3138 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3140 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3142 (define_insn "*xor<mode>3"
3143 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3145 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3146 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3147 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3148 "pxor\t{%2, %0|%0, %2}"
3149 [(set_attr "type" "sselog")
3150 (set_attr "mode" "TI")])
3152 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3154 ;; Parallel integral element swizzling
3156 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3158 (define_insn "sse2_packsswb"
3159 [(set (match_operand:V16QI 0 "register_operand" "=x")
3162 (match_operand:V8HI 1 "register_operand" "0"))
3164 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3166 "packsswb\t{%2, %0|%0, %2}"
3167 [(set_attr "type" "sselog")
3168 (set_attr "mode" "TI")])
3170 (define_insn "sse2_packssdw"
3171 [(set (match_operand:V8HI 0 "register_operand" "=x")
3174 (match_operand:V4SI 1 "register_operand" "0"))
3176 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3178 "packssdw\t{%2, %0|%0, %2}"
3179 [(set_attr "type" "sselog")
3180 (set_attr "mode" "TI")])
3182 (define_insn "sse2_packuswb"
3183 [(set (match_operand:V16QI 0 "register_operand" "=x")
3186 (match_operand:V8HI 1 "register_operand" "0"))
3188 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3190 "packuswb\t{%2, %0|%0, %2}"
3191 [(set_attr "type" "sselog")
3192 (set_attr "mode" "TI")])
3194 (define_insn "sse2_punpckhbw"
3195 [(set (match_operand:V16QI 0 "register_operand" "=x")
3198 (match_operand:V16QI 1 "register_operand" "0")
3199 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3200 (parallel [(const_int 8) (const_int 24)
3201 (const_int 9) (const_int 25)
3202 (const_int 10) (const_int 26)
3203 (const_int 11) (const_int 27)
3204 (const_int 12) (const_int 28)
3205 (const_int 13) (const_int 29)
3206 (const_int 14) (const_int 30)
3207 (const_int 15) (const_int 31)])))]
3209 "punpckhbw\t{%2, %0|%0, %2}"
3210 [(set_attr "type" "sselog")
3211 (set_attr "mode" "TI")])
3213 (define_insn "sse2_punpcklbw"
3214 [(set (match_operand:V16QI 0 "register_operand" "=x")
3217 (match_operand:V16QI 1 "register_operand" "0")
3218 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3219 (parallel [(const_int 0) (const_int 16)
3220 (const_int 1) (const_int 17)
3221 (const_int 2) (const_int 18)
3222 (const_int 3) (const_int 19)
3223 (const_int 4) (const_int 20)
3224 (const_int 5) (const_int 21)
3225 (const_int 6) (const_int 22)
3226 (const_int 7) (const_int 23)])))]
3228 "punpcklbw\t{%2, %0|%0, %2}"
3229 [(set_attr "type" "sselog")
3230 (set_attr "mode" "TI")])
3232 (define_insn "sse2_punpckhwd"
3233 [(set (match_operand:V8HI 0 "register_operand" "=x")
3236 (match_operand:V8HI 1 "register_operand" "0")
3237 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3238 (parallel [(const_int 4) (const_int 12)
3239 (const_int 5) (const_int 13)
3240 (const_int 6) (const_int 14)
3241 (const_int 7) (const_int 15)])))]
3243 "punpckhwd\t{%2, %0|%0, %2}"
3244 [(set_attr "type" "sselog")
3245 (set_attr "mode" "TI")])
3247 (define_insn "sse2_punpcklwd"
3248 [(set (match_operand:V8HI 0 "register_operand" "=x")
3251 (match_operand:V8HI 1 "register_operand" "0")
3252 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3253 (parallel [(const_int 0) (const_int 8)
3254 (const_int 1) (const_int 9)
3255 (const_int 2) (const_int 10)
3256 (const_int 3) (const_int 11)])))]
3258 "punpcklwd\t{%2, %0|%0, %2}"
3259 [(set_attr "type" "sselog")
3260 (set_attr "mode" "TI")])
3262 (define_insn "sse2_punpckhdq"
3263 [(set (match_operand:V4SI 0 "register_operand" "=x")
3266 (match_operand:V4SI 1 "register_operand" "0")
3267 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3268 (parallel [(const_int 2) (const_int 6)
3269 (const_int 3) (const_int 7)])))]
3271 "punpckhdq\t{%2, %0|%0, %2}"
3272 [(set_attr "type" "sselog")
3273 (set_attr "mode" "TI")])
3275 (define_insn "sse2_punpckldq"
3276 [(set (match_operand:V4SI 0 "register_operand" "=x")
3279 (match_operand:V4SI 1 "register_operand" "0")
3280 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3281 (parallel [(const_int 0) (const_int 4)
3282 (const_int 1) (const_int 5)])))]
3284 "punpckldq\t{%2, %0|%0, %2}"
3285 [(set_attr "type" "sselog")
3286 (set_attr "mode" "TI")])
3288 (define_insn "sse2_punpckhqdq"
3289 [(set (match_operand:V2DI 0 "register_operand" "=x")
3292 (match_operand:V2DI 1 "register_operand" "0")
3293 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3294 (parallel [(const_int 1)
3297 "punpckhqdq\t{%2, %0|%0, %2}"
3298 [(set_attr "type" "sselog")
3299 (set_attr "mode" "TI")])
3301 (define_insn "sse2_punpcklqdq"
3302 [(set (match_operand:V2DI 0 "register_operand" "=x")
3305 (match_operand:V2DI 1 "register_operand" "0")
3306 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3307 (parallel [(const_int 0)
3310 "punpcklqdq\t{%2, %0|%0, %2}"
3311 [(set_attr "type" "sselog")
3312 (set_attr "mode" "TI")])
3314 (define_expand "sse2_pinsrw"
3315 [(set (match_operand:V8HI 0 "register_operand" "")
3318 (match_operand:SI 2 "nonimmediate_operand" ""))
3319 (match_operand:V8HI 1 "register_operand" "")
3320 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3323 operands[2] = gen_lowpart (HImode, operands[2]);
3324 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3327 (define_insn "*sse2_pinsrw"
3328 [(set (match_operand:V8HI 0 "register_operand" "=x")
3331 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3332 (match_operand:V8HI 1 "register_operand" "0")
3333 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3336 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3337 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3339 [(set_attr "type" "sselog")
3340 (set_attr "mode" "TI")])
3342 (define_insn "sse2_pextrw"
3343 [(set (match_operand:SI 0 "register_operand" "=r")
3346 (match_operand:V8HI 1 "register_operand" "x")
3347 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3349 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3350 [(set_attr "type" "sselog")
3351 (set_attr "mode" "TI")])
3353 (define_expand "sse2_pshufd"
3354 [(match_operand:V4SI 0 "register_operand" "")
3355 (match_operand:V4SI 1 "nonimmediate_operand" "")
3356 (match_operand:SI 2 "const_int_operand" "")]
3359 int mask = INTVAL (operands[2]);
3360 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3361 GEN_INT ((mask >> 0) & 3),
3362 GEN_INT ((mask >> 2) & 3),
3363 GEN_INT ((mask >> 4) & 3),
3364 GEN_INT ((mask >> 6) & 3)));
3368 (define_insn "sse2_pshufd_1"
3369 [(set (match_operand:V4SI 0 "register_operand" "=x")
3371 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3372 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3373 (match_operand 3 "const_0_to_3_operand" "")
3374 (match_operand 4 "const_0_to_3_operand" "")
3375 (match_operand 5 "const_0_to_3_operand" "")])))]
3379 mask |= INTVAL (operands[2]) << 0;
3380 mask |= INTVAL (operands[3]) << 2;
3381 mask |= INTVAL (operands[4]) << 4;
3382 mask |= INTVAL (operands[5]) << 6;
3383 operands[2] = GEN_INT (mask);
3385 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3387 [(set_attr "type" "sselog1")
3388 (set_attr "mode" "TI")])
3390 (define_expand "sse2_pshuflw"
3391 [(match_operand:V8HI 0 "register_operand" "")
3392 (match_operand:V8HI 1 "nonimmediate_operand" "")
3393 (match_operand:SI 2 "const_int_operand" "")]
3396 int mask = INTVAL (operands[2]);
3397 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3398 GEN_INT ((mask >> 0) & 3),
3399 GEN_INT ((mask >> 2) & 3),
3400 GEN_INT ((mask >> 4) & 3),
3401 GEN_INT ((mask >> 6) & 3)));
3405 (define_insn "sse2_pshuflw_1"
3406 [(set (match_operand:V8HI 0 "register_operand" "=x")
3408 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3409 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3410 (match_operand 3 "const_0_to_3_operand" "")
3411 (match_operand 4 "const_0_to_3_operand" "")
3412 (match_operand 5 "const_0_to_3_operand" "")
3420 mask |= INTVAL (operands[2]) << 0;
3421 mask |= INTVAL (operands[3]) << 2;
3422 mask |= INTVAL (operands[4]) << 4;
3423 mask |= INTVAL (operands[5]) << 6;
3424 operands[2] = GEN_INT (mask);
3426 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3428 [(set_attr "type" "sselog")
3429 (set_attr "mode" "TI")])
3431 (define_expand "sse2_pshufhw"
3432 [(match_operand:V8HI 0 "register_operand" "")
3433 (match_operand:V8HI 1 "nonimmediate_operand" "")
3434 (match_operand:SI 2 "const_int_operand" "")]
3437 int mask = INTVAL (operands[2]);
3438 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3439 GEN_INT (((mask >> 0) & 3) + 4),
3440 GEN_INT (((mask >> 2) & 3) + 4),
3441 GEN_INT (((mask >> 4) & 3) + 4),
3442 GEN_INT (((mask >> 6) & 3) + 4)));
3446 (define_insn "sse2_pshufhw_1"
3447 [(set (match_operand:V8HI 0 "register_operand" "=x")
3449 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3450 (parallel [(const_int 0)
3454 (match_operand 2 "const_4_to_7_operand" "")
3455 (match_operand 3 "const_4_to_7_operand" "")
3456 (match_operand 4 "const_4_to_7_operand" "")
3457 (match_operand 5 "const_4_to_7_operand" "")])))]
3461 mask |= (INTVAL (operands[2]) - 4) << 0;
3462 mask |= (INTVAL (operands[3]) - 4) << 2;
3463 mask |= (INTVAL (operands[4]) - 4) << 4;
3464 mask |= (INTVAL (operands[5]) - 4) << 6;
3465 operands[2] = GEN_INT (mask);
3467 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3469 [(set_attr "type" "sselog")
3470 (set_attr "mode" "TI")])
3472 (define_expand "sse2_loadd"
3473 [(set (match_operand:V4SI 0 "register_operand" "")
3476 (match_operand:SI 1 "nonimmediate_operand" ""))
3480 "operands[2] = CONST0_RTX (V4SImode);")
3482 (define_insn "sse2_loadld"
3483 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3486 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3487 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3491 movd\t{%2, %0|%0, %2}
3492 movss\t{%2, %0|%0, %2}
3493 movss\t{%2, %0|%0, %2}"
3494 [(set_attr "type" "ssemov")
3495 (set_attr "mode" "TI,V4SF,SF")])
3497 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3498 ;; be taken into account, and movdi isn't fully populated even without.
3499 (define_insn_and_split "sse2_stored"
3500 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3502 (match_operand:V4SI 1 "register_operand" "x")
3503 (parallel [(const_int 0)])))]
3506 "&& reload_completed"
3507 [(set (match_dup 0) (match_dup 1))]
3509 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3512 (define_expand "sse_storeq"
3513 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3515 (match_operand:V2DI 1 "register_operand" "")
3516 (parallel [(const_int 0)])))]
3520 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3521 ;; be taken into account, and movdi isn't fully populated even without.
3522 (define_insn "*sse2_storeq"
3523 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3525 (match_operand:V2DI 1 "register_operand" "x")
3526 (parallel [(const_int 0)])))]
3531 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3533 (match_operand:V2DI 1 "register_operand" "")
3534 (parallel [(const_int 0)])))]
3535 "TARGET_SSE && reload_completed"
3536 [(set (match_dup 0) (match_dup 1))]
3538 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3541 (define_insn "*vec_extractv2di_1_sse2"
3542 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3544 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3545 (parallel [(const_int 1)])))]
3546 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3548 movhps\t{%1, %0|%0, %1}
3549 psrldq\t{$4, %0|%0, 4}
3550 movq\t{%H1, %0|%0, %H1}"
3551 [(set_attr "type" "ssemov,sseishft,ssemov")
3552 (set_attr "mode" "V2SF,TI,TI")])
3554 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3555 (define_insn "*vec_extractv2di_1_sse"
3556 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3558 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3559 (parallel [(const_int 1)])))]
3560 "!TARGET_SSE2 && TARGET_SSE
3561 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3563 movhps\t{%1, %0|%0, %1}
3564 movhlps\t{%1, %0|%0, %1}
3565 movlps\t{%H1, %0|%0, %H1}"
3566 [(set_attr "type" "ssemov")
3567 (set_attr "mode" "V2SF,V4SF,V2SF")])
3569 (define_insn "*vec_dupv4si"
3570 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3572 (match_operand:SI 1 "register_operand" " Y,0")))]
3575 pshufd\t{$0, %1, %0|%0, %1, 0}
3576 shufps\t{$0, %0, %0|%0, %0, 0}"
3577 [(set_attr "type" "sselog1")
3578 (set_attr "mode" "TI,V4SF")])
3580 (define_insn "*vec_dupv2di"
3581 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3583 (match_operand:DI 1 "register_operand" " 0,0")))]
3588 [(set_attr "type" "sselog1,ssemov")
3589 (set_attr "mode" "TI,V4SF")])
3591 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3592 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3593 ;; alternatives pretty much forces the MMX alternative to be chosen.
3594 (define_insn "*sse2_concatv2si"
3595 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3597 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3598 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3601 punpckldq\t{%2, %0|%0, %2}
3602 movd\t{%1, %0|%0, %1}
3603 punpckldq\t{%2, %0|%0, %2}
3604 movd\t{%1, %0|%0, %1}"
3605 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3606 (set_attr "mode" "TI,TI,DI,DI")])
3608 (define_insn "*sse1_concatv2si"
3609 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3611 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3612 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3615 unpcklps\t{%2, %0|%0, %2}
3616 movss\t{%1, %0|%0, %1}
3617 punpckldq\t{%2, %0|%0, %2}
3618 movd\t{%1, %0|%0, %1}"
3619 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3620 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3622 (define_insn "*vec_concatv4si_1"
3623 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3625 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3626 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3629 punpcklqdq\t{%2, %0|%0, %2}
3630 movlhps\t{%2, %0|%0, %2}
3631 movhps\t{%2, %0|%0, %2}"
3632 [(set_attr "type" "sselog,ssemov,ssemov")
3633 (set_attr "mode" "TI,V4SF,V2SF")])
3635 (define_insn "*vec_concatv2di"
3636 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3638 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3639 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3642 movq\t{%1, %0|%0, %1}
3643 movq2dq\t{%1, %0|%0, %1}
3644 punpcklqdq\t{%2, %0|%0, %2}
3645 movlhps\t{%2, %0|%0, %2}
3646 movhps\t{%2, %0|%0, %2}
3647 movlps\t{%1, %0|%0, %1}"
3648 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3649 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3651 (define_expand "vec_setv2di"
3652 [(match_operand:V2DI 0 "register_operand" "")
3653 (match_operand:DI 1 "register_operand" "")
3654 (match_operand 2 "const_int_operand" "")]
3657 ix86_expand_vector_set (false, operands[0], operands[1],
3658 INTVAL (operands[2]));
3662 (define_expand "vec_extractv2di"
3663 [(match_operand:DI 0 "register_operand" "")
3664 (match_operand:V2DI 1 "register_operand" "")
3665 (match_operand 2 "const_int_operand" "")]
3668 ix86_expand_vector_extract (false, operands[0], operands[1],
3669 INTVAL (operands[2]));
3673 (define_expand "vec_initv2di"
3674 [(match_operand:V2DI 0 "register_operand" "")
3675 (match_operand 1 "" "")]
3678 ix86_expand_vector_init (false, operands[0], operands[1]);
3682 (define_expand "vec_setv4si"
3683 [(match_operand:V4SI 0 "register_operand" "")
3684 (match_operand:SI 1 "register_operand" "")
3685 (match_operand 2 "const_int_operand" "")]
3688 ix86_expand_vector_set (false, operands[0], operands[1],
3689 INTVAL (operands[2]));
3693 (define_expand "vec_extractv4si"
3694 [(match_operand:SI 0 "register_operand" "")
3695 (match_operand:V4SI 1 "register_operand" "")
3696 (match_operand 2 "const_int_operand" "")]
3699 ix86_expand_vector_extract (false, operands[0], operands[1],
3700 INTVAL (operands[2]));
3704 (define_expand "vec_initv4si"
3705 [(match_operand:V4SI 0 "register_operand" "")
3706 (match_operand 1 "" "")]
3709 ix86_expand_vector_init (false, operands[0], operands[1]);
3713 (define_expand "vec_setv8hi"
3714 [(match_operand:V8HI 0 "register_operand" "")
3715 (match_operand:HI 1 "register_operand" "")
3716 (match_operand 2 "const_int_operand" "")]
3719 ix86_expand_vector_set (false, operands[0], operands[1],
3720 INTVAL (operands[2]));
3724 (define_expand "vec_extractv8hi"
3725 [(match_operand:HI 0 "register_operand" "")
3726 (match_operand:V8HI 1 "register_operand" "")
3727 (match_operand 2 "const_int_operand" "")]
3730 ix86_expand_vector_extract (false, operands[0], operands[1],
3731 INTVAL (operands[2]));
3735 (define_expand "vec_initv8hi"
3736 [(match_operand:V8HI 0 "register_operand" "")
3737 (match_operand 1 "" "")]
3740 ix86_expand_vector_init (false, operands[0], operands[1]);
3744 (define_expand "vec_setv16qi"
3745 [(match_operand:V16QI 0 "register_operand" "")
3746 (match_operand:QI 1 "register_operand" "")
3747 (match_operand 2 "const_int_operand" "")]
3750 ix86_expand_vector_set (false, operands[0], operands[1],
3751 INTVAL (operands[2]));
3755 (define_expand "vec_extractv16qi"
3756 [(match_operand:QI 0 "register_operand" "")
3757 (match_operand:V16QI 1 "register_operand" "")
3758 (match_operand 2 "const_int_operand" "")]
3761 ix86_expand_vector_extract (false, operands[0], operands[1],
3762 INTVAL (operands[2]));
3766 (define_expand "vec_initv16qi"
3767 [(match_operand:V16QI 0 "register_operand" "")
3768 (match_operand 1 "" "")]
3771 ix86_expand_vector_init (false, operands[0], operands[1]);
3775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3781 (define_insn "sse2_uavgv16qi3"
3782 [(set (match_operand:V16QI 0 "register_operand" "=x")
3788 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3790 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3791 (const_vector:V16QI [(const_int 1) (const_int 1)
3792 (const_int 1) (const_int 1)
3793 (const_int 1) (const_int 1)
3794 (const_int 1) (const_int 1)
3795 (const_int 1) (const_int 1)
3796 (const_int 1) (const_int 1)
3797 (const_int 1) (const_int 1)
3798 (const_int 1) (const_int 1)]))
3800 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3801 "pavgb\t{%2, %0|%0, %2}"
3802 [(set_attr "type" "sseiadd")
3803 (set_attr "mode" "TI")])
3805 (define_insn "sse2_uavgv8hi3"
3806 [(set (match_operand:V8HI 0 "register_operand" "=x")
3812 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3814 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3815 (const_vector:V8HI [(const_int 1) (const_int 1)
3816 (const_int 1) (const_int 1)
3817 (const_int 1) (const_int 1)
3818 (const_int 1) (const_int 1)]))
3820 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3821 "pavgw\t{%2, %0|%0, %2}"
3822 [(set_attr "type" "sseiadd")
3823 (set_attr "mode" "TI")])
3825 ;; The correct representation for this is absolutely enormous, and
3826 ;; surely not generally useful.
3827 (define_insn "sse2_psadbw"
3828 [(set (match_operand:V2DI 0 "register_operand" "=x")
3829 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3830 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3833 "psadbw\t{%2, %0|%0, %2}"
3834 [(set_attr "type" "sseiadd")
3835 (set_attr "mode" "TI")])
3837 (define_insn "sse_movmskps"
3838 [(set (match_operand:SI 0 "register_operand" "=r")
3839 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3842 "movmskps\t{%1, %0|%0, %1}"
3843 [(set_attr "type" "ssecvt")
3844 (set_attr "mode" "V4SF")])
3846 (define_insn "sse2_movmskpd"
3847 [(set (match_operand:SI 0 "register_operand" "=r")
3848 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3851 "movmskpd\t{%1, %0|%0, %1}"
3852 [(set_attr "type" "ssecvt")
3853 (set_attr "mode" "V2DF")])
3855 (define_insn "sse2_pmovmskb"
3856 [(set (match_operand:SI 0 "register_operand" "=r")
3857 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3860 "pmovmskb\t{%1, %0|%0, %1}"
3861 [(set_attr "type" "ssecvt")
3862 (set_attr "mode" "V2DF")])
3864 (define_expand "sse2_maskmovdqu"
3865 [(set (match_operand:V16QI 0 "memory_operand" "")
3866 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3867 (match_operand:V16QI 2 "register_operand" "x")
3873 (define_insn "*sse2_maskmovdqu"
3874 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3875 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3876 (match_operand:V16QI 2 "register_operand" "x")
3877 (mem:V16QI (match_dup 0))]
3879 "TARGET_SSE2 && !TARGET_64BIT"
3880 ;; @@@ check ordering of operands in intel/nonintel syntax
3881 "maskmovdqu\t{%2, %1|%1, %2}"
3882 [(set_attr "type" "ssecvt")
3883 (set_attr "mode" "TI")])
3885 (define_insn "*sse2_maskmovdqu_rex64"
3886 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3887 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3888 (match_operand:V16QI 2 "register_operand" "x")
3889 (mem:V16QI (match_dup 0))]
3891 "TARGET_SSE2 && TARGET_64BIT"
3892 ;; @@@ check ordering of operands in intel/nonintel syntax
3893 "maskmovdqu\t{%2, %1|%1, %2}"
3894 [(set_attr "type" "ssecvt")
3895 (set_attr "mode" "TI")])
3897 (define_insn "sse_ldmxcsr"
3898 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3902 [(set_attr "type" "sse")
3903 (set_attr "memory" "load")])
3905 (define_insn "sse_stmxcsr"
3906 [(set (match_operand:SI 0 "memory_operand" "=m")
3907 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3910 [(set_attr "type" "sse")
3911 (set_attr "memory" "store")])
3913 (define_expand "sse_sfence"
3915 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3916 "TARGET_SSE || TARGET_3DNOW_A"
3918 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3919 MEM_VOLATILE_P (operands[0]) = 1;
3922 (define_insn "*sse_sfence"
3923 [(set (match_operand:BLK 0 "" "")
3924 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3925 "TARGET_SSE || TARGET_3DNOW_A"
3927 [(set_attr "type" "sse")
3928 (set_attr "memory" "unknown")])
3930 (define_insn "sse2_clflush"
3931 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3935 [(set_attr "type" "sse")
3936 (set_attr "memory" "unknown")])
3938 (define_expand "sse2_mfence"
3940 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3943 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3944 MEM_VOLATILE_P (operands[0]) = 1;
3947 (define_insn "*sse2_mfence"
3948 [(set (match_operand:BLK 0 "" "")
3949 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3952 [(set_attr "type" "sse")
3953 (set_attr "memory" "unknown")])
3955 (define_expand "sse2_lfence"
3957 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3960 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3961 MEM_VOLATILE_P (operands[0]) = 1;
3964 (define_insn "*sse2_lfence"
3965 [(set (match_operand:BLK 0 "" "")
3966 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3969 [(set_attr "type" "sse")
3970 (set_attr "memory" "unknown")])
3972 (define_insn "sse3_mwait"
3973 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3974 (match_operand:SI 1 "register_operand" "c")]
3977 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
3978 ;; Since 32bit register operands are implicitly zero extended to 64bit,
3979 ;; we only need to set up 32bit registers.
3981 [(set_attr "length" "3")])
3983 (define_insn "sse3_monitor"
3984 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3985 (match_operand:SI 1 "register_operand" "c")
3986 (match_operand:SI 2 "register_operand" "d")]
3988 "TARGET_SSE3 && !TARGET_64BIT"
3989 "monitor\t%0, %1, %2"
3990 [(set_attr "length" "3")])
3992 (define_insn "sse3_monitor64"
3993 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
3994 (match_operand:SI 1 "register_operand" "c")
3995 (match_operand:SI 2 "register_operand" "d")]
3997 "TARGET_SSE3 && TARGET_64BIT"
3998 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
3999 ;; RCX and RDX are used. Since 32bit register operands are implicitly
4000 ;; zero extended to 64bit, we only need to set up 32bit registers.
4002 [(set_attr "length" "3")])