1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 if (get_attr_mode (insn) == MODE_V4SF)
69 return "xorps\t%0, %0";
71 return "pxor\t%0, %0";
74 if (get_attr_mode (insn) == MODE_V4SF)
75 return "movaps\t{%1, %0|%0, %1}";
77 return "movdqa\t{%1, %0|%0, %1}";
82 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
87 (eq_attr "alternative" "0,1")
89 (ne (symbol_ref "optimize_size")
93 (eq_attr "alternative" "2")
95 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
97 (ne (symbol_ref "optimize_size")
100 (const_string "TI"))]
101 (const_string "TI")))])
103 (define_expand "movv4sf"
104 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
105 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
108 ix86_expand_vector_move (V4SFmode, operands);
112 (define_insn "*movv4sf_internal"
113 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
114 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
118 movaps\t{%1, %0|%0, %1}
119 movaps\t{%1, %0|%0, %1}"
120 [(set_attr "type" "sselog1,ssemov,ssemov")
121 (set_attr "mode" "V4SF")])
124 [(set (match_operand:V4SF 0 "register_operand" "")
125 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
126 "TARGET_SSE && reload_completed"
129 (vec_duplicate:V4SF (match_dup 1))
133 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
134 operands[2] = CONST0_RTX (V4SFmode);
137 (define_expand "movv2df"
138 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
139 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
142 ix86_expand_vector_move (V2DFmode, operands);
146 (define_insn "*movv2df_internal"
147 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
148 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
149 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
151 switch (which_alternative)
154 if (get_attr_mode (insn) == MODE_V4SF)
155 return "xorps\t%0, %0";
157 return "xorpd\t%0, %0";
160 if (get_attr_mode (insn) == MODE_V4SF)
161 return "movaps\t{%1, %0|%0, %1}";
163 return "movapd\t{%1, %0|%0, %1}";
168 [(set_attr "type" "sselog1,ssemov,ssemov")
170 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
171 (const_string "V4SF")
172 (eq_attr "alternative" "0,1")
174 (ne (symbol_ref "optimize_size")
176 (const_string "V4SF")
177 (const_string "V2DF"))
178 (eq_attr "alternative" "2")
180 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
182 (ne (symbol_ref "optimize_size")
184 (const_string "V4SF")
185 (const_string "V2DF"))]
186 (const_string "V2DF")))])
189 [(set (match_operand:V2DF 0 "register_operand" "")
190 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
191 "TARGET_SSE2 && reload_completed"
192 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
194 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
195 operands[2] = CONST0_RTX (DFmode);
198 (define_expand "push<mode>1"
199 [(match_operand:SSEMODE 0 "register_operand" "")]
202 ix86_expand_push (<MODE>mode, operands[0]);
206 (define_expand "movmisalign<mode>"
207 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
208 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
211 ix86_expand_vector_move_misalign (<MODE>mode, operands);
215 (define_insn "sse_movups"
216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
217 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
219 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
220 "movups\t{%1, %0|%0, %1}"
221 [(set_attr "type" "ssemov")
222 (set_attr "mode" "V2DF")])
224 (define_insn "sse2_movupd"
225 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
226 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
228 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
229 "movupd\t{%1, %0|%0, %1}"
230 [(set_attr "type" "ssemov")
231 (set_attr "mode" "V2DF")])
233 (define_insn "sse2_movdqu"
234 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
235 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
237 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
238 "movdqu\t{%1, %0|%0, %1}"
239 [(set_attr "type" "ssemov")
240 (set_attr "mode" "TI")])
242 (define_insn "sse_movntv4sf"
243 [(set (match_operand:V4SF 0 "memory_operand" "=m")
244 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
247 "movntps\t{%1, %0|%0, %1}"
248 [(set_attr "type" "ssemov")
249 (set_attr "mode" "V4SF")])
251 (define_insn "sse2_movntv2df"
252 [(set (match_operand:V2DF 0 "memory_operand" "=m")
253 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
256 "movntpd\t{%1, %0|%0, %1}"
257 [(set_attr "type" "ssecvt")
258 (set_attr "mode" "V2DF")])
260 (define_insn "sse2_movntv2di"
261 [(set (match_operand:V2DI 0 "memory_operand" "=m")
262 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
265 "movntdq\t{%1, %0|%0, %1}"
266 [(set_attr "type" "ssecvt")
267 (set_attr "mode" "TI")])
269 (define_insn "sse2_movntsi"
270 [(set (match_operand:SI 0 "memory_operand" "=m")
271 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
274 "movnti\t{%1, %0|%0, %1}"
275 [(set_attr "type" "ssecvt")
276 (set_attr "mode" "V2DF")])
278 (define_insn "sse3_lddqu"
279 [(set (match_operand:V16QI 0 "register_operand" "=x")
280 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
283 "lddqu\t{%1, %0|%0, %1}"
284 [(set_attr "type" "ssecvt")
285 (set_attr "mode" "TI")])
287 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
289 ;; Parallel single-precision floating point arithmetic
291 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293 (define_expand "negv4sf2"
294 [(set (match_operand:V4SF 0 "register_operand" "")
295 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
297 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
299 (define_expand "absv4sf2"
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
303 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
305 (define_expand "addv4sf3"
306 [(set (match_operand:V4SF 0 "register_operand" "")
307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
308 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
310 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
312 (define_insn "*addv4sf3"
313 [(set (match_operand:V4SF 0 "register_operand" "=x")
314 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
315 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
316 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
317 "addps\t{%2, %0|%0, %2}"
318 [(set_attr "type" "sseadd")
319 (set_attr "mode" "V4SF")])
321 (define_insn "sse_vmaddv4sf3"
322 [(set (match_operand:V4SF 0 "register_operand" "=x")
324 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
325 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
328 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
329 "addss\t{%2, %0|%0, %2}"
330 [(set_attr "type" "sseadd")
331 (set_attr "mode" "SF")])
333 (define_expand "subv4sf3"
334 [(set (match_operand:V4SF 0 "register_operand" "")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
336 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
338 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
340 (define_insn "*subv4sf3"
341 [(set (match_operand:V4SF 0 "register_operand" "=x")
342 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
343 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
345 "subps\t{%2, %0|%0, %2}"
346 [(set_attr "type" "sseadd")
347 (set_attr "mode" "V4SF")])
349 (define_insn "sse_vmsubv4sf3"
350 [(set (match_operand:V4SF 0 "register_operand" "=x")
352 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
353 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
357 "subss\t{%2, %0|%0, %2}"
358 [(set_attr "type" "sseadd")
359 (set_attr "mode" "SF")])
361 (define_expand "mulv4sf3"
362 [(set (match_operand:V4SF 0 "register_operand" "")
363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
364 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
366 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
368 (define_insn "*mulv4sf3"
369 [(set (match_operand:V4SF 0 "register_operand" "=x")
370 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
371 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
372 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
373 "mulps\t{%2, %0|%0, %2}"
374 [(set_attr "type" "ssemul")
375 (set_attr "mode" "V4SF")])
377 (define_insn "sse_vmmulv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
380 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
381 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
384 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
385 "mulss\t{%2, %0|%0, %2}"
386 [(set_attr "type" "ssemul")
387 (set_attr "mode" "SF")])
389 (define_expand "divv4sf3"
390 [(set (match_operand:V4SF 0 "register_operand" "")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
392 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
394 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
396 (define_insn "*divv4sf3"
397 [(set (match_operand:V4SF 0 "register_operand" "=x")
398 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
399 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
401 "divps\t{%2, %0|%0, %2}"
402 [(set_attr "type" "ssediv")
403 (set_attr "mode" "V4SF")])
405 (define_insn "sse_vmdivv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "=x")
408 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
409 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
413 "divss\t{%2, %0|%0, %2}"
414 [(set_attr "type" "ssediv")
415 (set_attr "mode" "SF")])
417 (define_insn "sse_rcpv4sf2"
418 [(set (match_operand:V4SF 0 "register_operand" "=x")
420 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
422 "rcpps\t{%1, %0|%0, %1}"
423 [(set_attr "type" "sse")
424 (set_attr "mode" "V4SF")])
426 (define_insn "sse_vmrcpv4sf2"
427 [(set (match_operand:V4SF 0 "register_operand" "=x")
429 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
431 (match_operand:V4SF 2 "register_operand" "0")
434 "rcpss\t{%1, %0|%0, %1}"
435 [(set_attr "type" "sse")
436 (set_attr "mode" "SF")])
438 (define_insn "sse_rsqrtv4sf2"
439 [(set (match_operand:V4SF 0 "register_operand" "=x")
441 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
443 "rsqrtps\t{%1, %0|%0, %1}"
444 [(set_attr "type" "sse")
445 (set_attr "mode" "V4SF")])
447 (define_insn "sse_vmrsqrtv4sf2"
448 [(set (match_operand:V4SF 0 "register_operand" "=x")
450 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
452 (match_operand:V4SF 2 "register_operand" "0")
455 "rsqrtss\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "SF")])
459 (define_insn "sqrtv4sf2"
460 [(set (match_operand:V4SF 0 "register_operand" "=x")
461 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
463 "sqrtps\t{%1, %0|%0, %1}"
464 [(set_attr "type" "sse")
465 (set_attr "mode" "V4SF")])
467 (define_insn "sse_vmsqrtv4sf2"
468 [(set (match_operand:V4SF 0 "register_operand" "=x")
470 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
471 (match_operand:V4SF 2 "register_operand" "0")
474 "sqrtss\t{%1, %0|%0, %1}"
475 [(set_attr "type" "sse")
476 (set_attr "mode" "SF")])
478 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
479 ;; isn't really correct, as those rtl operators aren't defined when
480 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
482 (define_expand "smaxv4sf3"
483 [(set (match_operand:V4SF 0 "register_operand" "")
484 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
485 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
488 if (!flag_finite_math_only)
489 operands[1] = force_reg (V4SFmode, operands[1]);
490 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
493 (define_insn "*smaxv4sf3_finite"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
495 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
496 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
497 "TARGET_SSE && flag_finite_math_only
498 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
499 "maxps\t{%2, %0|%0, %2}"
500 [(set_attr "type" "sse")
501 (set_attr "mode" "V4SF")])
503 (define_insn "*smaxv4sf3"
504 [(set (match_operand:V4SF 0 "register_operand" "=x")
505 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
506 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
508 "maxps\t{%2, %0|%0, %2}"
509 [(set_attr "type" "sse")
510 (set_attr "mode" "V4SF")])
512 (define_insn "*sse_vmsmaxv4sf3_finite"
513 [(set (match_operand:V4SF 0 "register_operand" "=x")
515 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
516 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
519 "TARGET_SSE && flag_finite_math_only
520 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
521 "maxss\t{%2, %0|%0, %2}"
522 [(set_attr "type" "sse")
523 (set_attr "mode" "SF")])
525 (define_insn "sse_vmsmaxv4sf3"
526 [(set (match_operand:V4SF 0 "register_operand" "=x")
528 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
529 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
533 "maxss\t{%2, %0|%0, %2}"
534 [(set_attr "type" "sse")
535 (set_attr "mode" "SF")])
537 (define_expand "sminv4sf3"
538 [(set (match_operand:V4SF 0 "register_operand" "")
539 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
540 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
543 if (!flag_finite_math_only)
544 operands[1] = force_reg (V4SFmode, operands[1]);
545 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
548 (define_insn "*sminv4sf3_finite"
549 [(set (match_operand:V4SF 0 "register_operand" "=x")
550 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
551 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
552 "TARGET_SSE && flag_finite_math_only
553 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
554 "minps\t{%2, %0|%0, %2}"
555 [(set_attr "type" "sse")
556 (set_attr "mode" "V4SF")])
558 (define_insn "*sminv4sf3"
559 [(set (match_operand:V4SF 0 "register_operand" "=x")
560 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
561 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
563 "minps\t{%2, %0|%0, %2}"
564 [(set_attr "type" "sse")
565 (set_attr "mode" "V4SF")])
567 (define_insn "*sse_vmsminv4sf3_finite"
568 [(set (match_operand:V4SF 0 "register_operand" "=x")
570 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
571 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
574 "TARGET_SSE && flag_finite_math_only
575 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
576 "minss\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sse")
578 (set_attr "mode" "SF")])
580 (define_insn "sse_vmsminv4sf3"
581 [(set (match_operand:V4SF 0 "register_operand" "=x")
583 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
584 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
588 "minss\t{%2, %0|%0, %2}"
589 [(set_attr "type" "sse")
590 (set_attr "mode" "SF")])
592 (define_insn "sse3_addsubv4sf3"
593 [(set (match_operand:V4SF 0 "register_operand" "=x")
596 (match_operand:V4SF 1 "register_operand" "0")
597 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
598 (minus:V4SF (match_dup 1) (match_dup 2))
601 "addsubps\t{%2, %0|%0, %2}"
602 [(set_attr "type" "sseadd")
603 (set_attr "mode" "V4SF")])
605 (define_insn "sse3_haddv4sf3"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
611 (match_operand:V4SF 1 "register_operand" "0")
612 (parallel [(const_int 0)]))
613 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
615 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
616 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
620 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
621 (parallel [(const_int 0)]))
622 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
624 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
625 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
627 "haddps\t{%2, %0|%0, %2}"
628 [(set_attr "type" "sseadd")
629 (set_attr "mode" "V4SF")])
631 (define_insn "sse3_hsubv4sf3"
632 [(set (match_operand:V4SF 0 "register_operand" "=x")
637 (match_operand:V4SF 1 "register_operand" "0")
638 (parallel [(const_int 0)]))
639 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
641 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
646 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
647 (parallel [(const_int 0)]))
648 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
650 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
653 "hsubps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sseadd")
655 (set_attr "mode" "V4SF")])
657 (define_expand "reduc_plus_v4sf"
658 [(match_operand:V4SF 0 "register_operand" "")
659 (match_operand:V4SF 1 "register_operand" "")]
664 rtx tmp = gen_reg_rtx (V4SFmode);
665 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
666 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
669 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
673 (define_expand "reduc_smax_v4sf"
674 [(match_operand:V4SF 0 "register_operand" "")
675 (match_operand:V4SF 1 "register_operand" "")]
678 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
682 (define_expand "reduc_smin_v4sf"
683 [(match_operand:V4SF 0 "register_operand" "")
684 (match_operand:V4SF 1 "register_operand" "")]
687 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
691 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
693 ;; Parallel single-precision floating point comparisons
695 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
697 (define_insn "sse_maskcmpv4sf3"
698 [(set (match_operand:V4SF 0 "register_operand" "=x")
699 (match_operator:V4SF 3 "sse_comparison_operator"
700 [(match_operand:V4SF 1 "register_operand" "0")
701 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
703 "cmp%D3ps\t{%2, %0|%0, %2}"
704 [(set_attr "type" "ssecmp")
705 (set_attr "mode" "V4SF")])
707 (define_insn "sse_vmmaskcmpv4sf3"
708 [(set (match_operand:V4SF 0 "register_operand" "=x")
710 (match_operator:V4SF 3 "sse_comparison_operator"
711 [(match_operand:V4SF 1 "register_operand" "0")
712 (match_operand:V4SF 2 "register_operand" "x")])
716 "cmp%D3ss\t{%2, %0|%0, %2}"
717 [(set_attr "type" "ssecmp")
718 (set_attr "mode" "SF")])
720 (define_insn "sse_comi"
721 [(set (reg:CCFP FLAGS_REG)
724 (match_operand:V4SF 0 "register_operand" "x")
725 (parallel [(const_int 0)]))
727 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
728 (parallel [(const_int 0)]))))]
730 "comiss\t{%1, %0|%0, %1}"
731 [(set_attr "type" "ssecomi")
732 (set_attr "mode" "SF")])
734 (define_insn "sse_ucomi"
735 [(set (reg:CCFPU FLAGS_REG)
738 (match_operand:V4SF 0 "register_operand" "x")
739 (parallel [(const_int 0)]))
741 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
742 (parallel [(const_int 0)]))))]
744 "ucomiss\t{%1, %0|%0, %1}"
745 [(set_attr "type" "ssecomi")
746 (set_attr "mode" "SF")])
748 (define_expand "vcondv4sf"
749 [(set (match_operand:V4SF 0 "register_operand" "")
752 [(match_operand:V4SF 4 "nonimmediate_operand" "")
753 (match_operand:V4SF 5 "nonimmediate_operand" "")])
754 (match_operand:V4SF 1 "general_operand" "")
755 (match_operand:V4SF 2 "general_operand" "")))]
758 if (ix86_expand_fp_vcond (operands))
764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
766 ;; Parallel single-precision floating point logical operations
768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
770 (define_expand "andv4sf3"
771 [(set (match_operand:V4SF 0 "register_operand" "")
772 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
773 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
775 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
777 (define_insn "*andv4sf3"
778 [(set (match_operand:V4SF 0 "register_operand" "=x")
779 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
780 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
781 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
782 "andps\t{%2, %0|%0, %2}"
783 [(set_attr "type" "sselog")
784 (set_attr "mode" "V4SF")])
786 (define_insn "sse_nandv4sf3"
787 [(set (match_operand:V4SF 0 "register_operand" "=x")
788 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
789 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
791 "andnps\t{%2, %0|%0, %2}"
792 [(set_attr "type" "sselog")
793 (set_attr "mode" "V4SF")])
795 (define_expand "iorv4sf3"
796 [(set (match_operand:V4SF 0 "register_operand" "")
797 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
798 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
800 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
802 (define_insn "*iorv4sf3"
803 [(set (match_operand:V4SF 0 "register_operand" "=x")
804 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
805 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
806 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
807 "orps\t{%2, %0|%0, %2}"
808 [(set_attr "type" "sselog")
809 (set_attr "mode" "V4SF")])
811 (define_expand "xorv4sf3"
812 [(set (match_operand:V4SF 0 "register_operand" "")
813 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
814 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
816 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
818 (define_insn "*xorv4sf3"
819 [(set (match_operand:V4SF 0 "register_operand" "=x")
820 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
821 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
822 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
823 "xorps\t{%2, %0|%0, %2}"
824 [(set_attr "type" "sselog")
825 (set_attr "mode" "V4SF")])
827 ;; Also define scalar versions. These are used for abs, neg, and
828 ;; conditional move. Using subregs into vector modes causes register
829 ;; allocation lossage. These patterns do not allow memory operands
830 ;; because the native instructions read the full 128-bits.
832 (define_insn "*andsf3"
833 [(set (match_operand:SF 0 "register_operand" "=x")
834 (and:SF (match_operand:SF 1 "register_operand" "0")
835 (match_operand:SF 2 "register_operand" "x")))]
837 "andps\t{%2, %0|%0, %2}"
838 [(set_attr "type" "sselog")
839 (set_attr "mode" "V4SF")])
841 (define_insn "*nandsf3"
842 [(set (match_operand:SF 0 "register_operand" "=x")
843 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
844 (match_operand:SF 2 "register_operand" "x")))]
846 "andnps\t{%2, %0|%0, %2}"
847 [(set_attr "type" "sselog")
848 (set_attr "mode" "V4SF")])
850 (define_insn "*iorsf3"
851 [(set (match_operand:SF 0 "register_operand" "=x")
852 (ior:SF (match_operand:SF 1 "register_operand" "0")
853 (match_operand:SF 2 "register_operand" "x")))]
855 "orps\t{%2, %0|%0, %2}"
856 [(set_attr "type" "sselog")
857 (set_attr "mode" "V4SF")])
859 (define_insn "*xorsf3"
860 [(set (match_operand:SF 0 "register_operand" "=x")
861 (xor:SF (match_operand:SF 1 "register_operand" "0")
862 (match_operand:SF 2 "register_operand" "x")))]
864 "xorps\t{%2, %0|%0, %2}"
865 [(set_attr "type" "sselog")
866 (set_attr "mode" "V4SF")])
868 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
870 ;; Parallel single-precision floating point conversion operations
872 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
874 (define_insn "sse_cvtpi2ps"
875 [(set (match_operand:V4SF 0 "register_operand" "=x")
878 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
879 (match_operand:V4SF 1 "register_operand" "0")
882 "cvtpi2ps\t{%2, %0|%0, %2}"
883 [(set_attr "type" "ssecvt")
884 (set_attr "mode" "V4SF")])
886 (define_insn "sse_cvtps2pi"
887 [(set (match_operand:V2SI 0 "register_operand" "=y")
889 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
891 (parallel [(const_int 0) (const_int 1)])))]
893 "cvtps2pi\t{%1, %0|%0, %1}"
894 [(set_attr "type" "ssecvt")
895 (set_attr "unit" "mmx")
896 (set_attr "mode" "DI")])
898 (define_insn "sse_cvttps2pi"
899 [(set (match_operand:V2SI 0 "register_operand" "=y")
901 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
902 (parallel [(const_int 0) (const_int 1)])))]
904 "cvttps2pi\t{%1, %0|%0, %1}"
905 [(set_attr "type" "ssecvt")
906 (set_attr "unit" "mmx")
907 (set_attr "mode" "SF")])
909 (define_insn "sse_cvtsi2ss"
910 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
913 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
914 (match_operand:V4SF 1 "register_operand" "0,0")
917 "cvtsi2ss\t{%2, %0|%0, %2}"
918 [(set_attr "type" "sseicvt")
919 (set_attr "athlon_decode" "vector,double")
920 (set_attr "mode" "SF")])
922 (define_insn "sse_cvtsi2ssq"
923 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
926 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
927 (match_operand:V4SF 1 "register_operand" "0,0")
929 "TARGET_SSE && TARGET_64BIT"
930 "cvtsi2ssq\t{%2, %0|%0, %2}"
931 [(set_attr "type" "sseicvt")
932 (set_attr "athlon_decode" "vector,double")
933 (set_attr "mode" "SF")])
935 (define_insn "sse_cvtss2si"
936 [(set (match_operand:SI 0 "register_operand" "=r,r")
939 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
940 (parallel [(const_int 0)]))]
941 UNSPEC_FIX_NOTRUNC))]
943 "cvtss2si\t{%1, %0|%0, %1}"
944 [(set_attr "type" "sseicvt")
945 (set_attr "athlon_decode" "double,vector")
946 (set_attr "mode" "SI")])
948 (define_insn "sse_cvtss2siq"
949 [(set (match_operand:DI 0 "register_operand" "=r,r")
952 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
953 (parallel [(const_int 0)]))]
954 UNSPEC_FIX_NOTRUNC))]
955 "TARGET_SSE && TARGET_64BIT"
956 "cvtss2siq\t{%1, %0|%0, %1}"
957 [(set_attr "type" "sseicvt")
958 (set_attr "athlon_decode" "double,vector")
959 (set_attr "mode" "DI")])
961 (define_insn "sse_cvttss2si"
962 [(set (match_operand:SI 0 "register_operand" "=r,r")
965 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
966 (parallel [(const_int 0)]))))]
968 "cvttss2si\t{%1, %0|%0, %1}"
969 [(set_attr "type" "sseicvt")
970 (set_attr "athlon_decode" "double,vector")
971 (set_attr "mode" "SI")])
973 (define_insn "sse_cvttss2siq"
974 [(set (match_operand:DI 0 "register_operand" "=r,r")
977 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
978 (parallel [(const_int 0)]))))]
979 "TARGET_SSE && TARGET_64BIT"
980 "cvttss2siq\t{%1, %0|%0, %1}"
981 [(set_attr "type" "sseicvt")
982 (set_attr "athlon_decode" "double,vector")
983 (set_attr "mode" "DI")])
985 (define_insn "sse2_cvtdq2ps"
986 [(set (match_operand:V4SF 0 "register_operand" "=x")
987 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
989 "cvtdq2ps\t{%1, %0|%0, %1}"
990 [(set_attr "type" "ssecvt")
991 (set_attr "mode" "V2DF")])
993 (define_insn "sse2_cvtps2dq"
994 [(set (match_operand:V4SI 0 "register_operand" "=x")
995 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
996 UNSPEC_FIX_NOTRUNC))]
998 "cvtps2dq\t{%1, %0|%0, %1}"
999 [(set_attr "type" "ssecvt")
1000 (set_attr "mode" "TI")])
1002 (define_insn "sse2_cvttps2dq"
1003 [(set (match_operand:V4SI 0 "register_operand" "=x")
1004 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1006 "cvttps2dq\t{%1, %0|%0, %1}"
1007 [(set_attr "type" "ssecvt")
1008 (set_attr "mode" "TI")])
1010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1012 ;; Parallel single-precision floating point element swizzling
1014 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1016 (define_insn "sse_movhlps"
1017 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1020 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
1021 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
1022 (parallel [(const_int 6)
1026 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1028 movhlps\t{%2, %0|%0, %2}
1029 movlps\t{%H1, %0|%0, %H1}
1030 movhps\t{%1, %0|%0, %1}"
1031 [(set_attr "type" "ssemov")
1032 (set_attr "mode" "V4SF,V2SF,V2SF")])
1034 (define_insn "sse_movlhps"
1035 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1038 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1039 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1040 (parallel [(const_int 0)
1044 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1046 movlhps\t{%2, %0|%0, %2}
1047 movhps\t{%2, %0|%0, %2}
1048 movlps\t{%2, %H0|%H0, %2}"
1049 [(set_attr "type" "ssemov")
1050 (set_attr "mode" "V4SF,V2SF,V2SF")])
1052 (define_insn "sse_unpckhps"
1053 [(set (match_operand:V4SF 0 "register_operand" "=x")
1056 (match_operand:V4SF 1 "register_operand" "0")
1057 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1058 (parallel [(const_int 2) (const_int 6)
1059 (const_int 3) (const_int 7)])))]
1061 "unpckhps\t{%2, %0|%0, %2}"
1062 [(set_attr "type" "sselog")
1063 (set_attr "mode" "V4SF")])
1065 (define_insn "sse_unpcklps"
1066 [(set (match_operand:V4SF 0 "register_operand" "=x")
1069 (match_operand:V4SF 1 "register_operand" "0")
1070 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1071 (parallel [(const_int 0) (const_int 4)
1072 (const_int 1) (const_int 5)])))]
1074 "unpcklps\t{%2, %0|%0, %2}"
1075 [(set_attr "type" "sselog")
1076 (set_attr "mode" "V4SF")])
1078 ;; These are modeled with the same vec_concat as the others so that we
1079 ;; capture users of shufps that can use the new instructions
1080 (define_insn "sse3_movshdup"
1081 [(set (match_operand:V4SF 0 "register_operand" "=x")
1084 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1086 (parallel [(const_int 1)
1091 "movshdup\t{%1, %0|%0, %1}"
1092 [(set_attr "type" "sse")
1093 (set_attr "mode" "V4SF")])
1095 (define_insn "sse3_movsldup"
1096 [(set (match_operand:V4SF 0 "register_operand" "=x")
1099 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1101 (parallel [(const_int 0)
1106 "movsldup\t{%1, %0|%0, %1}"
1107 [(set_attr "type" "sse")
1108 (set_attr "mode" "V4SF")])
1110 (define_expand "sse_shufps"
1111 [(match_operand:V4SF 0 "register_operand" "")
1112 (match_operand:V4SF 1 "register_operand" "")
1113 (match_operand:V4SF 2 "nonimmediate_operand" "")
1114 (match_operand:SI 3 "const_int_operand" "")]
1117 int mask = INTVAL (operands[3]);
1118 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1119 GEN_INT ((mask >> 0) & 3),
1120 GEN_INT ((mask >> 2) & 3),
1121 GEN_INT (((mask >> 4) & 3) + 4),
1122 GEN_INT (((mask >> 6) & 3) + 4)));
1126 (define_insn "sse_shufps_1"
1127 [(set (match_operand:V4SF 0 "register_operand" "=x")
1130 (match_operand:V4SF 1 "register_operand" "0")
1131 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1132 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1133 (match_operand 4 "const_0_to_3_operand" "")
1134 (match_operand 5 "const_4_to_7_operand" "")
1135 (match_operand 6 "const_4_to_7_operand" "")])))]
1139 mask |= INTVAL (operands[3]) << 0;
1140 mask |= INTVAL (operands[4]) << 2;
1141 mask |= (INTVAL (operands[5]) - 4) << 4;
1142 mask |= (INTVAL (operands[6]) - 4) << 6;
1143 operands[3] = GEN_INT (mask);
1145 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1147 [(set_attr "type" "sselog")
1148 (set_attr "mode" "V4SF")])
1150 (define_insn "sse_storehps"
1151 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1153 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1154 (parallel [(const_int 2) (const_int 3)])))]
1157 movhps\t{%1, %0|%0, %1}
1158 movhlps\t{%1, %0|%0, %1}
1159 movlps\t{%H1, %0|%0, %H1}"
1160 [(set_attr "type" "ssemov")
1161 (set_attr "mode" "V2SF,V4SF,V2SF")])
1163 (define_insn "sse_loadhps"
1164 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1167 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1168 (parallel [(const_int 0) (const_int 1)]))
1169 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1172 movhps\t{%2, %0|%0, %2}
1173 movlhps\t{%2, %0|%0, %2}
1174 movlps\t{%2, %H0|%H0, %2}"
1175 [(set_attr "type" "ssemov")
1176 (set_attr "mode" "V2SF,V4SF,V2SF")])
1178 (define_insn "sse_storelps"
1179 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1181 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1182 (parallel [(const_int 0) (const_int 1)])))]
1185 movlps\t{%1, %0|%0, %1}
1186 movaps\t{%1, %0|%0, %1}
1187 movlps\t{%1, %0|%0, %1}"
1188 [(set_attr "type" "ssemov")
1189 (set_attr "mode" "V2SF,V4SF,V2SF")])
1191 (define_insn "sse_loadlps"
1192 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1194 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1196 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1197 (parallel [(const_int 2) (const_int 3)]))))]
1200 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1201 movlps\t{%2, %0|%0, %2}
1202 movlps\t{%2, %0|%0, %2}"
1203 [(set_attr "type" "sselog,ssemov,ssemov")
1204 (set_attr "mode" "V4SF,V2SF,V2SF")])
1206 (define_insn "sse_movss"
1207 [(set (match_operand:V4SF 0 "register_operand" "=x")
1209 (match_operand:V4SF 2 "register_operand" "x")
1210 (match_operand:V4SF 1 "register_operand" "0")
1213 "movss\t{%2, %0|%0, %2}"
1214 [(set_attr "type" "ssemov")
1215 (set_attr "mode" "SF")])
1217 (define_insn "*vec_dupv4sf"
1218 [(set (match_operand:V4SF 0 "register_operand" "=x")
1220 (match_operand:SF 1 "register_operand" "0")))]
1222 "shufps\t{$0, %0, %0|%0, %0, 0}"
1223 [(set_attr "type" "sselog1")
1224 (set_attr "mode" "V4SF")])
1226 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1227 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1228 ;; alternatives pretty much forces the MMX alternative to be chosen.
1229 (define_insn "*sse_concatv2sf"
1230 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1232 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1233 (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
1236 unpcklps\t{%2, %0|%0, %2}
1237 movss\t{%1, %0|%0, %1}
1238 punpckldq\t{%2, %0|%0, %2}
1239 movd\t{%1, %0|%0, %1}"
1240 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1241 (set_attr "mode" "V4SF,SF,DI,DI")])
1243 (define_insn "*sse_concatv4sf"
1244 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1246 (match_operand:V2SF 1 "register_operand" " 0,0")
1247 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1250 movlhps\t{%2, %0|%0, %2}
1251 movhps\t{%2, %0|%0, %2}"
1252 [(set_attr "type" "ssemov")
1253 (set_attr "mode" "V4SF,V2SF")])
1255 (define_expand "vec_initv4sf"
1256 [(match_operand:V4SF 0 "register_operand" "")
1257 (match_operand 1 "" "")]
1260 ix86_expand_vector_init (false, operands[0], operands[1]);
1264 (define_insn "*vec_setv4sf_0"
1265 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1268 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1269 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1273 movss\t{%2, %0|%0, %2}
1274 movss\t{%2, %0|%0, %2}
1275 movd\t{%2, %0|%0, %2}
1277 [(set_attr "type" "ssemov")
1278 (set_attr "mode" "SF")])
1281 [(set (match_operand:V4SF 0 "memory_operand" "")
1284 (match_operand:SF 1 "nonmemory_operand" ""))
1287 "TARGET_SSE && reload_completed"
1290 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1294 (define_expand "vec_setv4sf"
1295 [(match_operand:V4SF 0 "register_operand" "")
1296 (match_operand:SF 1 "register_operand" "")
1297 (match_operand 2 "const_int_operand" "")]
1300 ix86_expand_vector_set (false, operands[0], operands[1],
1301 INTVAL (operands[2]));
1305 (define_insn_and_split "*vec_extractv4sf_0"
1306 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1308 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1309 (parallel [(const_int 0)])))]
1310 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1312 "&& reload_completed"
1315 rtx op1 = operands[1];
1317 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1319 op1 = gen_lowpart (SFmode, op1);
1320 emit_move_insn (operands[0], op1);
1324 (define_expand "vec_extractv4sf"
1325 [(match_operand:SF 0 "register_operand" "")
1326 (match_operand:V4SF 1 "register_operand" "")
1327 (match_operand 2 "const_int_operand" "")]
1330 ix86_expand_vector_extract (false, operands[0], operands[1],
1331 INTVAL (operands[2]));
1335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1337 ;; Parallel double-precision floating point arithmetic
1339 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1341 (define_expand "negv2df2"
1342 [(set (match_operand:V2DF 0 "register_operand" "")
1343 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1345 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1347 (define_expand "absv2df2"
1348 [(set (match_operand:V2DF 0 "register_operand" "")
1349 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1351 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1353 (define_expand "addv2df3"
1354 [(set (match_operand:V2DF 0 "register_operand" "")
1355 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1356 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1358 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1360 (define_insn "*addv2df3"
1361 [(set (match_operand:V2DF 0 "register_operand" "=x")
1362 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1363 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1364 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1365 "addpd\t{%2, %0|%0, %2}"
1366 [(set_attr "type" "sseadd")
1367 (set_attr "mode" "V2DF")])
1369 (define_insn "sse2_vmaddv2df3"
1370 [(set (match_operand:V2DF 0 "register_operand" "=x")
1372 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1373 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1376 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1377 "addsd\t{%2, %0|%0, %2}"
1378 [(set_attr "type" "sseadd")
1379 (set_attr "mode" "DF")])
1381 (define_expand "subv2df3"
1382 [(set (match_operand:V2DF 0 "register_operand" "")
1383 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1384 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1386 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1388 (define_insn "*subv2df3"
1389 [(set (match_operand:V2DF 0 "register_operand" "=x")
1390 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1391 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1393 "subpd\t{%2, %0|%0, %2}"
1394 [(set_attr "type" "sseadd")
1395 (set_attr "mode" "V2DF")])
1397 (define_insn "sse2_vmsubv2df3"
1398 [(set (match_operand:V2DF 0 "register_operand" "=x")
1400 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1401 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1405 "subsd\t{%2, %0|%0, %2}"
1406 [(set_attr "type" "sseadd")
1407 (set_attr "mode" "DF")])
1409 (define_expand "mulv2df3"
1410 [(set (match_operand:V2DF 0 "register_operand" "")
1411 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1412 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1414 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1416 (define_insn "*mulv2df3"
1417 [(set (match_operand:V2DF 0 "register_operand" "=x")
1418 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1419 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1420 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1421 "mulpd\t{%2, %0|%0, %2}"
1422 [(set_attr "type" "ssemul")
1423 (set_attr "mode" "V2DF")])
1425 (define_insn "sse2_vmmulv2df3"
1426 [(set (match_operand:V2DF 0 "register_operand" "=x")
1428 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1429 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1432 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1433 "mulsd\t{%2, %0|%0, %2}"
1434 [(set_attr "type" "ssemul")
1435 (set_attr "mode" "DF")])
1437 (define_expand "divv2df3"
1438 [(set (match_operand:V2DF 0 "register_operand" "")
1439 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1440 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1442 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1444 (define_insn "*divv2df3"
1445 [(set (match_operand:V2DF 0 "register_operand" "=x")
1446 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1447 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1449 "divpd\t{%2, %0|%0, %2}"
1450 [(set_attr "type" "ssediv")
1451 (set_attr "mode" "V2DF")])
1453 (define_insn "sse2_vmdivv2df3"
1454 [(set (match_operand:V2DF 0 "register_operand" "=x")
1456 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1457 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1461 "divsd\t{%2, %0|%0, %2}"
1462 [(set_attr "type" "ssediv")
1463 (set_attr "mode" "DF")])
1465 (define_insn "sqrtv2df2"
1466 [(set (match_operand:V2DF 0 "register_operand" "=x")
1467 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1469 "sqrtpd\t{%1, %0|%0, %1}"
1470 [(set_attr "type" "sse")
1471 (set_attr "mode" "V2DF")])
1473 (define_insn "sse2_vmsqrtv2df2"
1474 [(set (match_operand:V2DF 0 "register_operand" "=x")
1476 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1477 (match_operand:V2DF 2 "register_operand" "0")
1480 "sqrtsd\t{%1, %0|%0, %1}"
1481 [(set_attr "type" "sse")
1482 (set_attr "mode" "SF")])
1484 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1485 ;; isn't really correct, as those rtl operators aren't defined when
1486 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1488 (define_expand "smaxv2df3"
1489 [(set (match_operand:V2DF 0 "register_operand" "")
1490 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1491 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1494 if (!flag_finite_math_only)
1495 operands[1] = force_reg (V2DFmode, operands[1]);
1496 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1499 (define_insn "*smaxv2df3_finite"
1500 [(set (match_operand:V2DF 0 "register_operand" "=x")
1501 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1502 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1503 "TARGET_SSE2 && flag_finite_math_only
1504 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1505 "maxpd\t{%2, %0|%0, %2}"
1506 [(set_attr "type" "sseadd")
1507 (set_attr "mode" "V2DF")])
1509 (define_insn "*smaxv2df3"
1510 [(set (match_operand:V2DF 0 "register_operand" "=x")
1511 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1512 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1514 "maxpd\t{%2, %0|%0, %2}"
1515 [(set_attr "type" "sseadd")
1516 (set_attr "mode" "V2DF")])
1518 (define_insn "*sse2_vmsmaxv2df3_finite"
1519 [(set (match_operand:V2DF 0 "register_operand" "=x")
1521 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1522 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1525 "TARGET_SSE2 && flag_finite_math_only
1526 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1527 "maxsd\t{%2, %0|%0, %2}"
1528 [(set_attr "type" "sseadd")
1529 (set_attr "mode" "DF")])
1531 (define_insn "sse2_vmsmaxv2df3"
1532 [(set (match_operand:V2DF 0 "register_operand" "=x")
1534 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1535 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1539 "maxsd\t{%2, %0|%0, %2}"
1540 [(set_attr "type" "sseadd")
1541 (set_attr "mode" "DF")])
1543 (define_expand "sminv2df3"
1544 [(set (match_operand:V2DF 0 "register_operand" "")
1545 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1546 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1549 if (!flag_finite_math_only)
1550 operands[1] = force_reg (V2DFmode, operands[1]);
1551 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1554 (define_insn "*sminv2df3_finite"
1555 [(set (match_operand:V2DF 0 "register_operand" "=x")
1556 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1557 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1558 "TARGET_SSE2 && flag_finite_math_only
1559 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1560 "minpd\t{%2, %0|%0, %2}"
1561 [(set_attr "type" "sseadd")
1562 (set_attr "mode" "V2DF")])
1564 (define_insn "*sminv2df3"
1565 [(set (match_operand:V2DF 0 "register_operand" "=x")
1566 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1567 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1569 "minpd\t{%2, %0|%0, %2}"
1570 [(set_attr "type" "sseadd")
1571 (set_attr "mode" "V2DF")])
1573 (define_insn "*sse2_vmsminv2df3_finite"
1574 [(set (match_operand:V2DF 0 "register_operand" "=x")
1576 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1577 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1580 "TARGET_SSE2 && flag_finite_math_only
1581 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1582 "minsd\t{%2, %0|%0, %2}"
1583 [(set_attr "type" "sseadd")
1584 (set_attr "mode" "DF")])
1586 (define_insn "sse2_vmsminv2df3"
1587 [(set (match_operand:V2DF 0 "register_operand" "=x")
1589 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1590 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1594 "minsd\t{%2, %0|%0, %2}"
1595 [(set_attr "type" "sseadd")
1596 (set_attr "mode" "DF")])
1598 (define_insn "sse3_addsubv2df3"
1599 [(set (match_operand:V2DF 0 "register_operand" "=x")
1602 (match_operand:V2DF 1 "register_operand" "0")
1603 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1604 (minus:V2DF (match_dup 1) (match_dup 2))
1607 "addsubpd\t{%2, %0|%0, %2}"
1608 [(set_attr "type" "sseadd")
1609 (set_attr "mode" "V2DF")])
1611 (define_insn "sse3_haddv2df3"
1612 [(set (match_operand:V2DF 0 "register_operand" "=x")
1616 (match_operand:V2DF 1 "register_operand" "0")
1617 (parallel [(const_int 0)]))
1618 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1621 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1622 (parallel [(const_int 0)]))
1623 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1625 "haddpd\t{%2, %0|%0, %2}"
1626 [(set_attr "type" "sseadd")
1627 (set_attr "mode" "V2DF")])
1629 (define_insn "sse3_hsubv2df3"
1630 [(set (match_operand:V2DF 0 "register_operand" "=x")
1634 (match_operand:V2DF 1 "register_operand" "0")
1635 (parallel [(const_int 0)]))
1636 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1639 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1640 (parallel [(const_int 0)]))
1641 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1643 "hsubpd\t{%2, %0|%0, %2}"
1644 [(set_attr "type" "sseadd")
1645 (set_attr "mode" "V2DF")])
1647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1649 ;; Parallel double-precision floating point comparisons
1651 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1653 (define_insn "sse2_maskcmpv2df3"
1654 [(set (match_operand:V2DF 0 "register_operand" "=x")
1655 (match_operator:V2DF 3 "sse_comparison_operator"
1656 [(match_operand:V2DF 1 "register_operand" "0")
1657 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1659 "cmp%D3pd\t{%2, %0|%0, %2}"
1660 [(set_attr "type" "ssecmp")
1661 (set_attr "mode" "V2DF")])
1663 (define_insn "sse2_vmmaskcmpv2df3"
1664 [(set (match_operand:V2DF 0 "register_operand" "=x")
1666 (match_operator:V2DF 3 "sse_comparison_operator"
1667 [(match_operand:V2DF 1 "register_operand" "0")
1668 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1672 "cmp%D3sd\t{%2, %0|%0, %2}"
1673 [(set_attr "type" "ssecmp")
1674 (set_attr "mode" "DF")])
1676 (define_insn "sse2_comi"
1677 [(set (reg:CCFP FLAGS_REG)
1680 (match_operand:V2DF 0 "register_operand" "x")
1681 (parallel [(const_int 0)]))
1683 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1684 (parallel [(const_int 0)]))))]
1686 "comisd\t{%1, %0|%0, %1}"
1687 [(set_attr "type" "ssecomi")
1688 (set_attr "mode" "DF")])
1690 (define_insn "sse2_ucomi"
1691 [(set (reg:CCFPU FLAGS_REG)
1694 (match_operand:V2DF 0 "register_operand" "x")
1695 (parallel [(const_int 0)]))
1697 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1698 (parallel [(const_int 0)]))))]
1700 "ucomisd\t{%1, %0|%0, %1}"
1701 [(set_attr "type" "ssecomi")
1702 (set_attr "mode" "DF")])
1704 (define_expand "vcondv2df"
1705 [(set (match_operand:V2DF 0 "register_operand" "")
1707 (match_operator 3 ""
1708 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1709 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1710 (match_operand:V2DF 1 "general_operand" "")
1711 (match_operand:V2DF 2 "general_operand" "")))]
1714 if (ix86_expand_fp_vcond (operands))
1720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1722 ;; Parallel double-precision floating point logical operations
1724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1726 (define_expand "andv2df3"
1727 [(set (match_operand:V2DF 0 "register_operand" "")
1728 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1729 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1731 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1733 (define_insn "*andv2df3"
1734 [(set (match_operand:V2DF 0 "register_operand" "=x")
1735 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1736 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1737 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1738 "andpd\t{%2, %0|%0, %2}"
1739 [(set_attr "type" "sselog")
1740 (set_attr "mode" "V2DF")])
1742 (define_insn "sse2_nandv2df3"
1743 [(set (match_operand:V2DF 0 "register_operand" "=x")
1744 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1745 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1747 "andnpd\t{%2, %0|%0, %2}"
1748 [(set_attr "type" "sselog")
1749 (set_attr "mode" "V2DF")])
1751 (define_expand "iorv2df3"
1752 [(set (match_operand:V2DF 0 "register_operand" "")
1753 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1754 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1756 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1758 (define_insn "*iorv2df3"
1759 [(set (match_operand:V2DF 0 "register_operand" "=x")
1760 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1761 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1762 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1763 "orpd\t{%2, %0|%0, %2}"
1764 [(set_attr "type" "sselog")
1765 (set_attr "mode" "V2DF")])
1767 (define_expand "xorv2df3"
1768 [(set (match_operand:V2DF 0 "register_operand" "")
1769 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1770 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1772 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1774 (define_insn "*xorv2df3"
1775 [(set (match_operand:V2DF 0 "register_operand" "=x")
1776 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1777 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1778 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1779 "xorpd\t{%2, %0|%0, %2}"
1780 [(set_attr "type" "sselog")
1781 (set_attr "mode" "V2DF")])
1783 ;; Also define scalar versions. These are used for abs, neg, and
1784 ;; conditional move. Using subregs into vector modes causes register
1785 ;; allocation lossage. These patterns do not allow memory operands
1786 ;; because the native instructions read the full 128-bits.
1788 (define_insn "*anddf3"
1789 [(set (match_operand:DF 0 "register_operand" "=x")
1790 (and:DF (match_operand:DF 1 "register_operand" "0")
1791 (match_operand:DF 2 "register_operand" "x")))]
1793 "andpd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "sselog")
1795 (set_attr "mode" "V2DF")])
1797 (define_insn "*nanddf3"
1798 [(set (match_operand:DF 0 "register_operand" "=x")
1799 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1800 (match_operand:DF 2 "register_operand" "x")))]
1802 "andnpd\t{%2, %0|%0, %2}"
1803 [(set_attr "type" "sselog")
1804 (set_attr "mode" "V2DF")])
1806 (define_insn "*iordf3"
1807 [(set (match_operand:DF 0 "register_operand" "=x")
1808 (ior:DF (match_operand:DF 1 "register_operand" "0")
1809 (match_operand:DF 2 "register_operand" "x")))]
1811 "orpd\t{%2, %0|%0, %2}"
1812 [(set_attr "type" "sselog")
1813 (set_attr "mode" "V2DF")])
1815 (define_insn "*xordf3"
1816 [(set (match_operand:DF 0 "register_operand" "=x")
1817 (xor:DF (match_operand:DF 1 "register_operand" "0")
1818 (match_operand:DF 2 "register_operand" "x")))]
1820 "xorpd\t{%2, %0|%0, %2}"
1821 [(set_attr "type" "sselog")
1822 (set_attr "mode" "V2DF")])
1824 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1826 ;; Parallel double-precision floating point conversion operations
1828 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1830 (define_insn "sse2_cvtpi2pd"
1831 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1832 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1834 "cvtpi2pd\t{%1, %0|%0, %1}"
1835 [(set_attr "type" "ssecvt")
1836 (set_attr "unit" "mmx,*")
1837 (set_attr "mode" "V2DF")])
1839 (define_insn "sse2_cvtpd2pi"
1840 [(set (match_operand:V2SI 0 "register_operand" "=y")
1841 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1842 UNSPEC_FIX_NOTRUNC))]
1844 "cvtpd2pi\t{%1, %0|%0, %1}"
1845 [(set_attr "type" "ssecvt")
1846 (set_attr "unit" "mmx")
1847 (set_attr "mode" "DI")])
1849 (define_insn "sse2_cvttpd2pi"
1850 [(set (match_operand:V2SI 0 "register_operand" "=y")
1851 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1853 "cvttpd2pi\t{%1, %0|%0, %1}"
1854 [(set_attr "type" "ssecvt")
1855 (set_attr "unit" "mmx")
1856 (set_attr "mode" "TI")])
1858 (define_insn "sse2_cvtsi2sd"
1859 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1862 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1863 (match_operand:V2DF 1 "register_operand" "0,0")
1866 "cvtsi2sd\t{%2, %0|%0, %2}"
1867 [(set_attr "type" "sseicvt")
1868 (set_attr "mode" "DF")
1869 (set_attr "athlon_decode" "double,direct")])
1871 (define_insn "sse2_cvtsi2sdq"
1872 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1875 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1876 (match_operand:V2DF 1 "register_operand" "0,0")
1878 "TARGET_SSE2 && TARGET_64BIT"
1879 "cvtsi2sdq\t{%2, %0|%0, %2}"
1880 [(set_attr "type" "sseicvt")
1881 (set_attr "mode" "DF")
1882 (set_attr "athlon_decode" "double,direct")])
1884 (define_insn "sse2_cvtsd2si"
1885 [(set (match_operand:SI 0 "register_operand" "=r,r")
1888 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1889 (parallel [(const_int 0)]))]
1890 UNSPEC_FIX_NOTRUNC))]
1892 "cvtsd2si\t{%1, %0|%0, %1}"
1893 [(set_attr "type" "sseicvt")
1894 (set_attr "athlon_decode" "double,vector")
1895 (set_attr "mode" "SI")])
1897 (define_insn "sse2_cvtsd2siq"
1898 [(set (match_operand:DI 0 "register_operand" "=r,r")
1901 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1902 (parallel [(const_int 0)]))]
1903 UNSPEC_FIX_NOTRUNC))]
1904 "TARGET_SSE2 && TARGET_64BIT"
1905 "cvtsd2siq\t{%1, %0|%0, %1}"
1906 [(set_attr "type" "sseicvt")
1907 (set_attr "athlon_decode" "double,vector")
1908 (set_attr "mode" "DI")])
1910 (define_insn "sse2_cvttsd2si"
1911 [(set (match_operand:SI 0 "register_operand" "=r,r")
1914 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1915 (parallel [(const_int 0)]))))]
1917 "cvttsd2si\t{%1, %0|%0, %1}"
1918 [(set_attr "type" "sseicvt")
1919 (set_attr "mode" "SI")
1920 (set_attr "athlon_decode" "double,vector")])
1922 (define_insn "sse2_cvttsd2siq"
1923 [(set (match_operand:DI 0 "register_operand" "=r,r")
1926 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1927 (parallel [(const_int 0)]))))]
1928 "TARGET_SSE2 && TARGET_64BIT"
1929 "cvttsd2siq\t{%1, %0|%0, %1}"
1930 [(set_attr "type" "sseicvt")
1931 (set_attr "mode" "DI")
1932 (set_attr "athlon_decode" "double,vector")])
1934 (define_insn "sse2_cvtdq2pd"
1935 [(set (match_operand:V2DF 0 "register_operand" "=x")
1938 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1939 (parallel [(const_int 0) (const_int 1)]))))]
1941 "cvtdq2pd\t{%1, %0|%0, %1}"
1942 [(set_attr "type" "ssecvt")
1943 (set_attr "mode" "V2DF")])
1945 (define_expand "sse2_cvtpd2dq"
1946 [(set (match_operand:V4SI 0 "register_operand" "")
1948 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1952 "operands[2] = CONST0_RTX (V2SImode);")
1954 (define_insn "*sse2_cvtpd2dq"
1955 [(set (match_operand:V4SI 0 "register_operand" "=x")
1957 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1959 (match_operand:V2SI 2 "const0_operand" "")))]
1961 "cvtpd2dq\t{%1, %0|%0, %1}"
1962 [(set_attr "type" "ssecvt")
1963 (set_attr "mode" "TI")])
1965 (define_expand "sse2_cvttpd2dq"
1966 [(set (match_operand:V4SI 0 "register_operand" "")
1968 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1971 "operands[2] = CONST0_RTX (V2SImode);")
1973 (define_insn "*sse2_cvttpd2dq"
1974 [(set (match_operand:V4SI 0 "register_operand" "=x")
1976 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1977 (match_operand:V2SI 2 "const0_operand" "")))]
1979 "cvttpd2dq\t{%1, %0|%0, %1}"
1980 [(set_attr "type" "ssecvt")
1981 (set_attr "mode" "TI")])
1983 (define_insn "sse2_cvtsd2ss"
1984 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1987 (float_truncate:V2SF
1988 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1989 (match_operand:V4SF 1 "register_operand" "0,0")
1992 "cvtsd2ss\t{%2, %0|%0, %2}"
1993 [(set_attr "type" "ssecvt")
1994 (set_attr "athlon_decode" "vector,double")
1995 (set_attr "mode" "SF")])
1997 (define_insn "sse2_cvtss2sd"
1998 [(set (match_operand:V2DF 0 "register_operand" "=x")
2002 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2003 (parallel [(const_int 0) (const_int 1)])))
2004 (match_operand:V2DF 1 "register_operand" "0")
2007 "cvtss2sd\t{%2, %0|%0, %2}"
2008 [(set_attr "type" "ssecvt")
2009 (set_attr "mode" "DF")])
2011 (define_expand "sse2_cvtpd2ps"
2012 [(set (match_operand:V4SF 0 "register_operand" "")
2014 (float_truncate:V2SF
2015 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2018 "operands[2] = CONST0_RTX (V2SFmode);")
2020 (define_insn "*sse2_cvtpd2ps"
2021 [(set (match_operand:V4SF 0 "register_operand" "=x")
2023 (float_truncate:V2SF
2024 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2025 (match_operand:V2SF 2 "const0_operand" "")))]
2027 "cvtpd2ps\t{%1, %0|%0, %1}"
2028 [(set_attr "type" "ssecvt")
2029 (set_attr "mode" "V4SF")])
2031 (define_insn "sse2_cvtps2pd"
2032 [(set (match_operand:V2DF 0 "register_operand" "=x")
2035 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2036 (parallel [(const_int 0) (const_int 1)]))))]
2038 "cvtps2pd\t{%1, %0|%0, %1}"
2039 [(set_attr "type" "ssecvt")
2040 (set_attr "mode" "V2DF")])
2042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2044 ;; Parallel double-precision floating point element swizzling
2046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2048 (define_insn "sse2_unpckhpd"
2049 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2052 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2053 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2054 (parallel [(const_int 1)
2056 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2058 unpckhpd\t{%2, %0|%0, %2}
2059 movlpd\t{%H1, %0|%0, %H1}
2060 movhpd\t{%1, %0|%0, %1}"
2061 [(set_attr "type" "sselog,ssemov,ssemov")
2062 (set_attr "mode" "V2DF,V1DF,V1DF")])
2064 (define_insn "*sse3_movddup"
2065 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2068 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2070 (parallel [(const_int 0)
2072 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2074 movddup\t{%1, %0|%0, %1}
2076 [(set_attr "type" "sselog,ssemov")
2077 (set_attr "mode" "V2DF")])
2080 [(set (match_operand:V2DF 0 "memory_operand" "")
2083 (match_operand:V2DF 1 "register_operand" "")
2085 (parallel [(const_int 0)
2087 "TARGET_SSE3 && reload_completed"
2090 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2091 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2092 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2096 (define_insn "sse2_unpcklpd"
2097 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2100 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2101 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2102 (parallel [(const_int 0)
2104 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2106 unpcklpd\t{%2, %0|%0, %2}
2107 movhpd\t{%2, %0|%0, %2}
2108 movlpd\t{%2, %H0|%H0, %2}"
2109 [(set_attr "type" "sselog,ssemov,ssemov")
2110 (set_attr "mode" "V2DF,V1DF,V1DF")])
2112 (define_expand "sse2_shufpd"
2113 [(match_operand:V2DF 0 "register_operand" "")
2114 (match_operand:V2DF 1 "register_operand" "")
2115 (match_operand:V2DF 2 "nonimmediate_operand" "")
2116 (match_operand:SI 3 "const_int_operand" "")]
2119 int mask = INTVAL (operands[3]);
2120 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2122 GEN_INT (mask & 2 ? 3 : 2)));
2126 (define_insn "sse2_shufpd_1"
2127 [(set (match_operand:V2DF 0 "register_operand" "=x")
2130 (match_operand:V2DF 1 "register_operand" "0")
2131 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2132 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2133 (match_operand 4 "const_2_to_3_operand" "")])))]
2137 mask = INTVAL (operands[3]);
2138 mask |= (INTVAL (operands[4]) - 2) << 1;
2139 operands[3] = GEN_INT (mask);
2141 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2143 [(set_attr "type" "sselog")
2144 (set_attr "mode" "V2DF")])
2146 (define_insn "sse2_storehpd"
2147 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2149 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2150 (parallel [(const_int 1)])))]
2151 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2153 movhpd\t{%1, %0|%0, %1}
2156 [(set_attr "type" "ssemov,sselog1,ssemov")
2157 (set_attr "mode" "V1DF,V2DF,DF")])
2160 [(set (match_operand:DF 0 "register_operand" "")
2162 (match_operand:V2DF 1 "memory_operand" "")
2163 (parallel [(const_int 1)])))]
2164 "TARGET_SSE2 && reload_completed"
2165 [(set (match_dup 0) (match_dup 1))]
2167 operands[1] = adjust_address (operands[1], DFmode, 8);
2170 (define_insn "sse2_storelpd"
2171 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2173 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2174 (parallel [(const_int 0)])))]
2175 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2177 movlpd\t{%1, %0|%0, %1}
2180 [(set_attr "type" "ssemov")
2181 (set_attr "mode" "V1DF,DF,DF")])
2184 [(set (match_operand:DF 0 "register_operand" "")
2186 (match_operand:V2DF 1 "nonimmediate_operand" "")
2187 (parallel [(const_int 0)])))]
2188 "TARGET_SSE2 && reload_completed"
2191 rtx op1 = operands[1];
2193 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2195 op1 = gen_lowpart (DFmode, op1);
2196 emit_move_insn (operands[0], op1);
2200 (define_insn "sse2_loadhpd"
2201 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2204 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2205 (parallel [(const_int 0)]))
2206 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2207 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2209 movhpd\t{%2, %0|%0, %2}
2210 unpcklpd\t{%2, %0|%0, %2}
2211 shufpd\t{$1, %1, %0|%0, %1, 1}
2213 [(set_attr "type" "ssemov,sselog,sselog,other")
2214 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2217 [(set (match_operand:V2DF 0 "memory_operand" "")
2219 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2220 (match_operand:DF 1 "register_operand" "")))]
2221 "TARGET_SSE2 && reload_completed"
2222 [(set (match_dup 0) (match_dup 1))]
2224 operands[0] = adjust_address (operands[0], DFmode, 8);
2227 (define_insn "sse2_loadlpd"
2228 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2230 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2232 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2233 (parallel [(const_int 1)]))))]
2234 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2236 movsd\t{%2, %0|%0, %2}
2237 movlpd\t{%2, %0|%0, %2}
2238 movsd\t{%2, %0|%0, %2}
2239 shufpd\t{$2, %2, %0|%0, %2, 2}
2240 movhpd\t{%H1, %0|%0, %H1}
2242 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2243 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2246 [(set (match_operand:V2DF 0 "memory_operand" "")
2248 (match_operand:DF 1 "register_operand" "")
2249 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2250 "TARGET_SSE2 && reload_completed"
2251 [(set (match_dup 0) (match_dup 1))]
2253 operands[0] = adjust_address (operands[0], DFmode, 8);
2256 (define_insn "sse2_movsd"
2257 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2259 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2260 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2264 movsd\t{%2, %0|%0, %2}
2265 movlpd\t{%2, %0|%0, %2}
2266 movlpd\t{%2, %0|%0, %2}
2267 shufpd\t{$2, %2, %0|%0, %2, 2}
2268 movhps\t{%H1, %0|%0, %H1
2269 movhps\t{%1, %H0|%H0, %1"
2270 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2271 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2273 (define_insn "*vec_dupv2df_sse3"
2274 [(set (match_operand:V2DF 0 "register_operand" "=x")
2276 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2278 "movddup\t{%1, %0|%0, %1}"
2279 [(set_attr "type" "sselog1")
2280 (set_attr "mode" "DF")])
2282 (define_insn "*vec_dupv2df"
2283 [(set (match_operand:V2DF 0 "register_operand" "=x")
2285 (match_operand:DF 1 "register_operand" "0")))]
2288 [(set_attr "type" "sselog1")
2289 (set_attr "mode" "V4SF")])
2291 (define_insn "*vec_concatv2df_sse3"
2292 [(set (match_operand:V2DF 0 "register_operand" "=x")
2294 (match_operand:DF 1 "nonimmediate_operand" "xm")
2297 "movddup\t{%1, %0|%0, %1}"
2298 [(set_attr "type" "sselog1")
2299 (set_attr "mode" "DF")])
2301 (define_insn "*vec_concatv2df"
2302 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2304 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2305 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2308 unpcklpd\t{%2, %0|%0, %2}
2309 movhpd\t{%2, %0|%0, %2}
2310 movsd\t{%1, %0|%0, %1}
2311 movlhps\t{%2, %0|%0, %2}
2312 movhps\t{%2, %0|%0, %2}"
2313 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2314 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2316 (define_expand "vec_setv2df"
2317 [(match_operand:V2DF 0 "register_operand" "")
2318 (match_operand:DF 1 "register_operand" "")
2319 (match_operand 2 "const_int_operand" "")]
2322 ix86_expand_vector_set (false, operands[0], operands[1],
2323 INTVAL (operands[2]));
2327 (define_expand "vec_extractv2df"
2328 [(match_operand:DF 0 "register_operand" "")
2329 (match_operand:V2DF 1 "register_operand" "")
2330 (match_operand 2 "const_int_operand" "")]
2333 ix86_expand_vector_extract (false, operands[0], operands[1],
2334 INTVAL (operands[2]));
2338 (define_expand "vec_initv2df"
2339 [(match_operand:V2DF 0 "register_operand" "")
2340 (match_operand 1 "" "")]
2343 ix86_expand_vector_init (false, operands[0], operands[1]);
2347 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2349 ;; Parallel integral arithmetic
2351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2353 (define_expand "neg<mode>2"
2354 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2357 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2359 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2361 (define_expand "add<mode>3"
2362 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2363 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2364 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2366 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2368 (define_insn "*add<mode>3"
2369 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2371 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2372 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2373 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2374 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2375 [(set_attr "type" "sseiadd")
2376 (set_attr "mode" "TI")])
2378 (define_insn "sse2_ssadd<mode>3"
2379 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2381 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2382 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2383 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2384 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2385 [(set_attr "type" "sseiadd")
2386 (set_attr "mode" "TI")])
2388 (define_insn "sse2_usadd<mode>3"
2389 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2391 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2392 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2393 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2394 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2395 [(set_attr "type" "sseiadd")
2396 (set_attr "mode" "TI")])
2398 (define_expand "sub<mode>3"
2399 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2400 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2401 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2403 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2405 (define_insn "*sub<mode>3"
2406 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2408 (match_operand:SSEMODEI 1 "register_operand" "0")
2409 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2411 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2412 [(set_attr "type" "sseiadd")
2413 (set_attr "mode" "TI")])
2415 (define_insn "sse2_sssub<mode>3"
2416 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2418 (match_operand:SSEMODE12 1 "register_operand" "0")
2419 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2421 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2422 [(set_attr "type" "sseiadd")
2423 (set_attr "mode" "TI")])
2425 (define_insn "sse2_ussub<mode>3"
2426 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2428 (match_operand:SSEMODE12 1 "register_operand" "0")
2429 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2431 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2432 [(set_attr "type" "sseiadd")
2433 (set_attr "mode" "TI")])
2435 (define_expand "mulv16qi3"
2436 [(set (match_operand:V16QI 0 "register_operand" "")
2437 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2438 (match_operand:V16QI 2 "register_operand" "")))]
2444 for (i = 0; i < 12; ++i)
2445 t[i] = gen_reg_rtx (V16QImode);
2447 /* Unpack data such that we've got a source byte in each low byte of
2448 each word. We don't care what goes into the high byte of each word.
2449 Rather than trying to get zero in there, most convenient is to let
2450 it be a copy of the low byte. */
2451 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2452 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2453 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2454 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2456 /* Multiply words. The end-of-line annotations here give a picture of what
2457 the output of that instruction looks like. Dot means don't care; the
2458 letters are the bytes of the result with A being the most significant. */
2459 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2460 gen_lowpart (V8HImode, t[0]),
2461 gen_lowpart (V8HImode, t[1])));
2462 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2463 gen_lowpart (V8HImode, t[2]),
2464 gen_lowpart (V8HImode, t[3])));
2466 /* Extract the relevant bytes and merge them back together. */
2467 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2468 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2469 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2470 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2471 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2472 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2475 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2479 (define_expand "mulv8hi3"
2480 [(set (match_operand:V8HI 0 "register_operand" "")
2481 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2482 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2484 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2486 (define_insn "*mulv8hi3"
2487 [(set (match_operand:V8HI 0 "register_operand" "=x")
2488 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2489 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2490 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2491 "pmullw\t{%2, %0|%0, %2}"
2492 [(set_attr "type" "sseimul")
2493 (set_attr "mode" "TI")])
2495 (define_insn "sse2_smulv8hi3_highpart"
2496 [(set (match_operand:V8HI 0 "register_operand" "=x")
2501 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2503 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2505 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2506 "pmulhw\t{%2, %0|%0, %2}"
2507 [(set_attr "type" "sseimul")
2508 (set_attr "mode" "TI")])
2510 (define_insn "sse2_umulv8hi3_highpart"
2511 [(set (match_operand:V8HI 0 "register_operand" "=x")
2516 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2518 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2520 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2521 "pmulhuw\t{%2, %0|%0, %2}"
2522 [(set_attr "type" "sseimul")
2523 (set_attr "mode" "TI")])
2525 (define_insn "sse2_umulv2siv2di3"
2526 [(set (match_operand:V2DI 0 "register_operand" "=x")
2530 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2531 (parallel [(const_int 0) (const_int 2)])))
2534 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2535 (parallel [(const_int 0) (const_int 2)])))))]
2536 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2537 "pmuludq\t{%2, %0|%0, %2}"
2538 [(set_attr "type" "sseimul")
2539 (set_attr "mode" "TI")])
2541 (define_insn "sse2_pmaddwd"
2542 [(set (match_operand:V4SI 0 "register_operand" "=x")
2547 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2548 (parallel [(const_int 0)
2554 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2555 (parallel [(const_int 0)
2561 (vec_select:V4HI (match_dup 1)
2562 (parallel [(const_int 1)
2567 (vec_select:V4HI (match_dup 2)
2568 (parallel [(const_int 1)
2571 (const_int 7)]))))))]
2573 "pmaddwd\t{%2, %0|%0, %2}"
2574 [(set_attr "type" "sseiadd")
2575 (set_attr "mode" "TI")])
2577 (define_expand "mulv4si3"
2578 [(set (match_operand:V4SI 0 "register_operand" "")
2579 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2580 (match_operand:V4SI 2 "register_operand" "")))]
2583 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2589 t1 = gen_reg_rtx (V4SImode);
2590 t2 = gen_reg_rtx (V4SImode);
2591 t3 = gen_reg_rtx (V4SImode);
2592 t4 = gen_reg_rtx (V4SImode);
2593 t5 = gen_reg_rtx (V4SImode);
2594 t6 = gen_reg_rtx (V4SImode);
2595 thirtytwo = GEN_INT (32);
2597 /* Multiply elements 2 and 0. */
2598 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2600 /* Shift both input vectors down one element, so that elements 3 and 1
2601 are now in the slots for elements 2 and 0. For K8, at least, this is
2602 faster than using a shuffle. */
2603 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2604 gen_lowpart (TImode, op1), thirtytwo));
2605 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2606 gen_lowpart (TImode, op2), thirtytwo));
2608 /* Multiply elements 3 and 1. */
2609 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2611 /* Move the results in element 2 down to element 1; we don't care what
2612 goes in elements 2 and 3. */
2613 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2614 const0_rtx, const0_rtx));
2615 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2616 const0_rtx, const0_rtx));
2618 /* Merge the parts back together. */
2619 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2623 (define_expand "mulv2di3"
2624 [(set (match_operand:V2DI 0 "register_operand" "")
2625 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2626 (match_operand:V2DI 2 "register_operand" "")))]
2629 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2635 t1 = gen_reg_rtx (V2DImode);
2636 t2 = gen_reg_rtx (V2DImode);
2637 t3 = gen_reg_rtx (V2DImode);
2638 t4 = gen_reg_rtx (V2DImode);
2639 t5 = gen_reg_rtx (V2DImode);
2640 t6 = gen_reg_rtx (V2DImode);
2641 thirtytwo = GEN_INT (32);
2643 /* Multiply low parts. */
2644 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2645 gen_lowpart (V4SImode, op2)));
2647 /* Shift input vectors left 32 bits so we can multiply high parts. */
2648 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2649 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2651 /* Multiply high parts by low parts. */
2652 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2653 gen_lowpart (V4SImode, t3)));
2654 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2655 gen_lowpart (V4SImode, t2)));
2657 /* Shift them back. */
2658 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2659 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2661 /* Add the three parts together. */
2662 emit_insn (gen_addv2di3 (t6, t1, t4));
2663 emit_insn (gen_addv2di3 (op0, t6, t5));
2667 (define_insn "ashr<mode>3"
2668 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2670 (match_operand:SSEMODE24 1 "register_operand" "0")
2671 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2673 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2674 [(set_attr "type" "sseishft")
2675 (set_attr "mode" "TI")])
2677 (define_insn "lshr<mode>3"
2678 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2679 (lshiftrt:SSEMODE248
2680 (match_operand:SSEMODE248 1 "register_operand" "0")
2681 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2683 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2684 [(set_attr "type" "sseishft")
2685 (set_attr "mode" "TI")])
2687 (define_insn "ashl<mode>3"
2688 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2690 (match_operand:SSEMODE248 1 "register_operand" "0")
2691 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2693 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2694 [(set_attr "type" "sseishft")
2695 (set_attr "mode" "TI")])
2697 (define_insn "sse2_ashlti3"
2698 [(set (match_operand:TI 0 "register_operand" "=x")
2699 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2700 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2703 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2704 return "pslldq\t{%2, %0|%0, %2}";
2706 [(set_attr "type" "sseishft")
2707 (set_attr "mode" "TI")])
2709 (define_expand "vec_shl_<mode>"
2710 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2711 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2712 (match_operand:SI 2 "general_operand" "")))]
2715 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2717 operands[0] = gen_lowpart (TImode, operands[0]);
2718 operands[1] = gen_lowpart (TImode, operands[1]);
2721 (define_insn "sse2_lshrti3"
2722 [(set (match_operand:TI 0 "register_operand" "=x")
2723 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2724 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2727 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2728 return "psrldq\t{%2, %0|%0, %2}";
2730 [(set_attr "type" "sseishft")
2731 (set_attr "mode" "TI")])
2733 (define_expand "vec_shr_<mode>"
2734 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2735 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2736 (match_operand:SI 2 "general_operand" "")))]
2739 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2741 operands[0] = gen_lowpart (TImode, operands[0]);
2742 operands[1] = gen_lowpart (TImode, operands[1]);
2745 (define_expand "umaxv16qi3"
2746 [(set (match_operand:V16QI 0 "register_operand" "")
2747 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2748 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2750 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2752 (define_insn "*umaxv16qi3"
2753 [(set (match_operand:V16QI 0 "register_operand" "=x")
2754 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2755 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2756 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2757 "pmaxub\t{%2, %0|%0, %2}"
2758 [(set_attr "type" "sseiadd")
2759 (set_attr "mode" "TI")])
2761 (define_expand "smaxv8hi3"
2762 [(set (match_operand:V8HI 0 "register_operand" "")
2763 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2764 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2766 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2768 (define_insn "*smaxv8hi3"
2769 [(set (match_operand:V8HI 0 "register_operand" "=x")
2770 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2771 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2772 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2773 "pmaxsw\t{%2, %0|%0, %2}"
2774 [(set_attr "type" "sseiadd")
2775 (set_attr "mode" "TI")])
2777 (define_expand "umaxv8hi3"
2778 [(set (match_operand:V8HI 0 "register_operand" "=x")
2779 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2780 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2782 (plus:V8HI (match_dup 0) (match_dup 2)))]
2785 operands[3] = operands[0];
2786 if (rtx_equal_p (operands[0], operands[2]))
2787 operands[0] = gen_reg_rtx (V8HImode);
2790 (define_expand "smax<mode>3"
2791 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2792 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2793 (match_operand:SSEMODE14 2 "register_operand" "")))]
2799 xops[0] = operands[0];
2800 xops[1] = operands[1];
2801 xops[2] = operands[2];
2802 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2803 xops[4] = operands[1];
2804 xops[5] = operands[2];
2805 ok = ix86_expand_int_vcond (xops);
2810 (define_expand "umaxv4si3"
2811 [(set (match_operand:V4SI 0 "register_operand" "")
2812 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2813 (match_operand:V4SI 2 "register_operand" "")))]
2819 xops[0] = operands[0];
2820 xops[1] = operands[1];
2821 xops[2] = operands[2];
2822 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2823 xops[4] = operands[1];
2824 xops[5] = operands[2];
2825 ok = ix86_expand_int_vcond (xops);
2830 (define_expand "uminv16qi3"
2831 [(set (match_operand:V16QI 0 "register_operand" "")
2832 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2833 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2835 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2837 (define_insn "*uminv16qi3"
2838 [(set (match_operand:V16QI 0 "register_operand" "=x")
2839 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2840 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2841 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2842 "pminub\t{%2, %0|%0, %2}"
2843 [(set_attr "type" "sseiadd")
2844 (set_attr "mode" "TI")])
2846 (define_expand "sminv8hi3"
2847 [(set (match_operand:V8HI 0 "register_operand" "")
2848 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2849 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2851 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2853 (define_insn "*sminv8hi3"
2854 [(set (match_operand:V8HI 0 "register_operand" "=x")
2855 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2856 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2857 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2858 "pminsw\t{%2, %0|%0, %2}"
2859 [(set_attr "type" "sseiadd")
2860 (set_attr "mode" "TI")])
2862 (define_expand "smin<mode>3"
2863 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2864 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2865 (match_operand:SSEMODE14 2 "register_operand" "")))]
2871 xops[0] = operands[0];
2872 xops[1] = operands[2];
2873 xops[2] = operands[1];
2874 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2875 xops[4] = operands[1];
2876 xops[5] = operands[2];
2877 ok = ix86_expand_int_vcond (xops);
2882 (define_expand "umin<mode>3"
2883 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2884 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2885 (match_operand:SSEMODE24 2 "register_operand" "")))]
2891 xops[0] = operands[0];
2892 xops[1] = operands[2];
2893 xops[2] = operands[1];
2894 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2895 xops[4] = operands[1];
2896 xops[5] = operands[2];
2897 ok = ix86_expand_int_vcond (xops);
2902 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2904 ;; Parallel integral comparisons
2906 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2908 (define_insn "sse2_eq<mode>3"
2909 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2911 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2912 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2913 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2914 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2915 [(set_attr "type" "ssecmp")
2916 (set_attr "mode" "TI")])
2918 (define_insn "sse2_gt<mode>3"
2919 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2921 (match_operand:SSEMODE124 1 "register_operand" "0")
2922 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2924 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2925 [(set_attr "type" "ssecmp")
2926 (set_attr "mode" "TI")])
2928 (define_expand "vcond<mode>"
2929 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2930 (if_then_else:SSEMODE124
2931 (match_operator 3 ""
2932 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2933 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2934 (match_operand:SSEMODE124 1 "general_operand" "")
2935 (match_operand:SSEMODE124 2 "general_operand" "")))]
2938 if (ix86_expand_int_vcond (operands))
2944 (define_expand "vcondu<mode>"
2945 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2946 (if_then_else:SSEMODE12
2947 (match_operator 3 ""
2948 [(match_operand:SSEMODE12 4 "nonimmediate_operand" "")
2949 (match_operand:SSEMODE12 5 "nonimmediate_operand" "")])
2950 (match_operand:SSEMODE12 1 "general_operand" "")
2951 (match_operand:SSEMODE12 2 "general_operand" "")))]
2954 if (ix86_expand_int_vcond (operands))
2960 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2962 ;; Parallel integral logical operations
2964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2966 (define_expand "one_cmpl<mode>2"
2967 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2968 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2972 int i, n = GET_MODE_NUNITS (<MODE>mode);
2973 rtvec v = rtvec_alloc (n);
2975 for (i = 0; i < n; ++i)
2976 RTVEC_ELT (v, i) = constm1_rtx;
2978 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
2981 (define_expand "and<mode>3"
2982 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2983 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2984 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2986 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
2988 (define_insn "*and<mode>3"
2989 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2991 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2992 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2993 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
2994 "pand\t{%2, %0|%0, %2}"
2995 [(set_attr "type" "sselog")
2996 (set_attr "mode" "TI")])
2998 (define_insn "sse2_nand<mode>3"
2999 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3001 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3002 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3004 "pandn\t{%2, %0|%0, %2}"
3005 [(set_attr "type" "sselog")
3006 (set_attr "mode" "TI")])
3008 (define_expand "ior<mode>3"
3009 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3010 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3011 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3013 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3015 (define_insn "*ior<mode>3"
3016 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3018 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3019 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3020 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3021 "por\t{%2, %0|%0, %2}"
3022 [(set_attr "type" "sselog")
3023 (set_attr "mode" "TI")])
3025 (define_expand "xor<mode>3"
3026 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3027 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3028 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3030 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3032 (define_insn "*xor<mode>3"
3033 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3035 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3036 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3037 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3038 "pxor\t{%2, %0|%0, %2}"
3039 [(set_attr "type" "sselog")
3040 (set_attr "mode" "TI")])
3042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3044 ;; Parallel integral element swizzling
3046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3048 (define_insn "sse2_packsswb"
3049 [(set (match_operand:V16QI 0 "register_operand" "=x")
3052 (match_operand:V8HI 1 "register_operand" "0"))
3054 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3056 "packsswb\t{%2, %0|%0, %2}"
3057 [(set_attr "type" "sselog")
3058 (set_attr "mode" "TI")])
3060 (define_insn "sse2_packssdw"
3061 [(set (match_operand:V8HI 0 "register_operand" "=x")
3064 (match_operand:V4SI 1 "register_operand" "0"))
3066 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3068 "packssdw\t{%2, %0|%0, %2}"
3069 [(set_attr "type" "sselog")
3070 (set_attr "mode" "TI")])
3072 (define_insn "sse2_packuswb"
3073 [(set (match_operand:V16QI 0 "register_operand" "=x")
3076 (match_operand:V8HI 1 "register_operand" "0"))
3078 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3080 "packuswb\t{%2, %0|%0, %2}"
3081 [(set_attr "type" "sselog")
3082 (set_attr "mode" "TI")])
3084 (define_insn "sse2_punpckhbw"
3085 [(set (match_operand:V16QI 0 "register_operand" "=x")
3088 (match_operand:V16QI 1 "register_operand" "0")
3089 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3090 (parallel [(const_int 8) (const_int 24)
3091 (const_int 9) (const_int 25)
3092 (const_int 10) (const_int 26)
3093 (const_int 11) (const_int 27)
3094 (const_int 12) (const_int 28)
3095 (const_int 13) (const_int 29)
3096 (const_int 14) (const_int 30)
3097 (const_int 15) (const_int 31)])))]
3099 "punpckhbw\t{%2, %0|%0, %2}"
3100 [(set_attr "type" "sselog")
3101 (set_attr "mode" "TI")])
3103 (define_insn "sse2_punpcklbw"
3104 [(set (match_operand:V16QI 0 "register_operand" "=x")
3107 (match_operand:V16QI 1 "register_operand" "0")
3108 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3109 (parallel [(const_int 0) (const_int 16)
3110 (const_int 1) (const_int 17)
3111 (const_int 2) (const_int 18)
3112 (const_int 3) (const_int 19)
3113 (const_int 4) (const_int 20)
3114 (const_int 5) (const_int 21)
3115 (const_int 6) (const_int 22)
3116 (const_int 7) (const_int 23)])))]
3118 "punpcklbw\t{%2, %0|%0, %2}"
3119 [(set_attr "type" "sselog")
3120 (set_attr "mode" "TI")])
3122 (define_insn "sse2_punpckhwd"
3123 [(set (match_operand:V8HI 0 "register_operand" "=x")
3126 (match_operand:V8HI 1 "register_operand" "0")
3127 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3128 (parallel [(const_int 4) (const_int 12)
3129 (const_int 5) (const_int 13)
3130 (const_int 6) (const_int 14)
3131 (const_int 7) (const_int 15)])))]
3133 "punpckhwd\t{%2, %0|%0, %2}"
3134 [(set_attr "type" "sselog")
3135 (set_attr "mode" "TI")])
3137 (define_insn "sse2_punpcklwd"
3138 [(set (match_operand:V8HI 0 "register_operand" "=x")
3141 (match_operand:V8HI 1 "register_operand" "0")
3142 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3143 (parallel [(const_int 0) (const_int 8)
3144 (const_int 1) (const_int 9)
3145 (const_int 2) (const_int 10)
3146 (const_int 3) (const_int 11)])))]
3148 "punpcklwd\t{%2, %0|%0, %2}"
3149 [(set_attr "type" "sselog")
3150 (set_attr "mode" "TI")])
3152 (define_insn "sse2_punpckhdq"
3153 [(set (match_operand:V4SI 0 "register_operand" "=x")
3156 (match_operand:V4SI 1 "register_operand" "0")
3157 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3158 (parallel [(const_int 2) (const_int 6)
3159 (const_int 3) (const_int 7)])))]
3161 "punpckhdq\t{%2, %0|%0, %2}"
3162 [(set_attr "type" "sselog")
3163 (set_attr "mode" "TI")])
3165 (define_insn "sse2_punpckldq"
3166 [(set (match_operand:V4SI 0 "register_operand" "=x")
3169 (match_operand:V4SI 1 "register_operand" "0")
3170 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3171 (parallel [(const_int 0) (const_int 4)
3172 (const_int 1) (const_int 5)])))]
3174 "punpckldq\t{%2, %0|%0, %2}"
3175 [(set_attr "type" "sselog")
3176 (set_attr "mode" "TI")])
3178 (define_insn "sse2_punpckhqdq"
3179 [(set (match_operand:V2DI 0 "register_operand" "=x")
3182 (match_operand:V2DI 1 "register_operand" "0")
3183 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3184 (parallel [(const_int 1)
3187 "punpckhqdq\t{%2, %0|%0, %2}"
3188 [(set_attr "type" "sselog")
3189 (set_attr "mode" "TI")])
3191 (define_insn "sse2_punpcklqdq"
3192 [(set (match_operand:V2DI 0 "register_operand" "=x")
3195 (match_operand:V2DI 1 "register_operand" "0")
3196 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3197 (parallel [(const_int 0)
3200 "punpcklqdq\t{%2, %0|%0, %2}"
3201 [(set_attr "type" "sselog")
3202 (set_attr "mode" "TI")])
3204 (define_expand "sse2_pinsrw"
3205 [(set (match_operand:V8HI 0 "register_operand" "")
3208 (match_operand:SI 2 "nonimmediate_operand" ""))
3209 (match_operand:V8HI 1 "register_operand" "")
3210 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3213 operands[2] = gen_lowpart (HImode, operands[2]);
3214 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3217 (define_insn "*sse2_pinsrw"
3218 [(set (match_operand:V8HI 0 "register_operand" "=x")
3221 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3222 (match_operand:V8HI 1 "register_operand" "0")
3223 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3226 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3227 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3229 [(set_attr "type" "sselog")
3230 (set_attr "mode" "TI")])
3232 (define_insn "sse2_pextrw"
3233 [(set (match_operand:SI 0 "register_operand" "=r")
3236 (match_operand:V8HI 1 "register_operand" "x")
3237 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3239 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3240 [(set_attr "type" "sselog")
3241 (set_attr "mode" "TI")])
3243 (define_expand "sse2_pshufd"
3244 [(match_operand:V4SI 0 "register_operand" "")
3245 (match_operand:V4SI 1 "nonimmediate_operand" "")
3246 (match_operand:SI 2 "const_int_operand" "")]
3249 int mask = INTVAL (operands[2]);
3250 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3251 GEN_INT ((mask >> 0) & 3),
3252 GEN_INT ((mask >> 2) & 3),
3253 GEN_INT ((mask >> 4) & 3),
3254 GEN_INT ((mask >> 6) & 3)));
3258 (define_insn "sse2_pshufd_1"
3259 [(set (match_operand:V4SI 0 "register_operand" "=x")
3261 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3262 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3263 (match_operand 3 "const_0_to_3_operand" "")
3264 (match_operand 4 "const_0_to_3_operand" "")
3265 (match_operand 5 "const_0_to_3_operand" "")])))]
3269 mask |= INTVAL (operands[2]) << 0;
3270 mask |= INTVAL (operands[3]) << 2;
3271 mask |= INTVAL (operands[4]) << 4;
3272 mask |= INTVAL (operands[5]) << 6;
3273 operands[2] = GEN_INT (mask);
3275 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3277 [(set_attr "type" "sselog1")
3278 (set_attr "mode" "TI")])
3280 (define_expand "sse2_pshuflw"
3281 [(match_operand:V8HI 0 "register_operand" "")
3282 (match_operand:V8HI 1 "nonimmediate_operand" "")
3283 (match_operand:SI 2 "const_int_operand" "")]
3286 int mask = INTVAL (operands[2]);
3287 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3288 GEN_INT ((mask >> 0) & 3),
3289 GEN_INT ((mask >> 2) & 3),
3290 GEN_INT ((mask >> 4) & 3),
3291 GEN_INT ((mask >> 6) & 3)));
3295 (define_insn "sse2_pshuflw_1"
3296 [(set (match_operand:V8HI 0 "register_operand" "=x")
3298 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3299 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3300 (match_operand 3 "const_0_to_3_operand" "")
3301 (match_operand 4 "const_0_to_3_operand" "")
3302 (match_operand 5 "const_0_to_3_operand" "")
3310 mask |= INTVAL (operands[2]) << 0;
3311 mask |= INTVAL (operands[3]) << 2;
3312 mask |= INTVAL (operands[4]) << 4;
3313 mask |= INTVAL (operands[5]) << 6;
3314 operands[2] = GEN_INT (mask);
3316 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3318 [(set_attr "type" "sselog")
3319 (set_attr "mode" "TI")])
3321 (define_expand "sse2_pshufhw"
3322 [(match_operand:V8HI 0 "register_operand" "")
3323 (match_operand:V8HI 1 "nonimmediate_operand" "")
3324 (match_operand:SI 2 "const_int_operand" "")]
3327 int mask = INTVAL (operands[2]);
3328 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3329 GEN_INT (((mask >> 0) & 3) + 4),
3330 GEN_INT (((mask >> 2) & 3) + 4),
3331 GEN_INT (((mask >> 4) & 3) + 4),
3332 GEN_INT (((mask >> 6) & 3) + 4)));
3336 (define_insn "sse2_pshufhw_1"
3337 [(set (match_operand:V8HI 0 "register_operand" "=x")
3339 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3340 (parallel [(const_int 0)
3344 (match_operand 2 "const_4_to_7_operand" "")
3345 (match_operand 3 "const_4_to_7_operand" "")
3346 (match_operand 4 "const_4_to_7_operand" "")
3347 (match_operand 5 "const_4_to_7_operand" "")])))]
3351 mask |= (INTVAL (operands[2]) - 4) << 0;
3352 mask |= (INTVAL (operands[3]) - 4) << 2;
3353 mask |= (INTVAL (operands[4]) - 4) << 4;
3354 mask |= (INTVAL (operands[5]) - 4) << 6;
3355 operands[2] = GEN_INT (mask);
3357 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3359 [(set_attr "type" "sselog")
3360 (set_attr "mode" "TI")])
3362 (define_expand "sse2_loadd"
3363 [(set (match_operand:V4SI 0 "register_operand" "")
3366 (match_operand:SI 1 "nonimmediate_operand" ""))
3370 "operands[2] = CONST0_RTX (V4SImode);")
3372 (define_insn "sse2_loadld"
3373 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3376 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3377 (match_operand:V4SI 1 "vector_move_operand" " C,C,0")
3381 movd\t{%2, %0|%0, %2}
3382 movss\t{%2, %0|%0, %2}
3383 movss\t{%2, %0|%0, %2}"
3384 [(set_attr "type" "ssemov")
3385 (set_attr "mode" "TI,V4SF,SF")])
3387 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3388 ;; be taken into account, and movdi isn't fully populated even without.
3389 (define_insn_and_split "sse2_stored"
3390 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3392 (match_operand:V4SI 1 "register_operand" "x")
3393 (parallel [(const_int 0)])))]
3396 "&& reload_completed"
3397 [(set (match_dup 0) (match_dup 1))]
3399 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3402 (define_expand "sse_storeq"
3403 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3405 (match_operand:V2DI 1 "register_operand" "")
3406 (parallel [(const_int 0)])))]
3410 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3411 ;; be taken into account, and movdi isn't fully populated even without.
3412 (define_insn "*sse2_storeq"
3413 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3415 (match_operand:V2DI 1 "register_operand" "x")
3416 (parallel [(const_int 0)])))]
3421 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3423 (match_operand:V2DI 1 "register_operand" "")
3424 (parallel [(const_int 0)])))]
3425 "TARGET_SSE && reload_completed"
3426 [(set (match_dup 0) (match_dup 1))]
3428 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3431 (define_insn "*vec_dupv4si"
3432 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3434 (match_operand:SI 1 "register_operand" " Y,0")))]
3437 pshufd\t{$0, %1, %0|%0, %1, 0}
3438 shufps\t{$0, %0, %0|%0, %0, 0}"
3439 [(set_attr "type" "sselog1")
3440 (set_attr "mode" "TI,V4SF")])
3442 (define_insn "*vec_dupv2di"
3443 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3445 (match_operand:DI 1 "register_operand" " 0,0")))]
3450 [(set_attr "type" "sselog1,ssemov")
3451 (set_attr "mode" "TI,V4SF")])
3453 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3454 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3455 ;; alternatives pretty much forces the MMX alternative to be chosen.
3456 (define_insn "*sse2_concatv2si"
3457 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3459 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3460 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3463 punpckldq\t{%2, %0|%0, %2}
3464 movd\t{%1, %0|%0, %1}
3465 punpckldq\t{%2, %0|%0, %2}
3466 movd\t{%1, %0|%0, %1}"
3467 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3468 (set_attr "mode" "TI,TI,DI,DI")])
3470 (define_insn "*sse1_concatv2si"
3471 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3473 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3474 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3477 unpcklps\t{%2, %0|%0, %2}
3478 movss\t{%1, %0|%0, %1}
3479 punpckldq\t{%2, %0|%0, %2}
3480 movd\t{%1, %0|%0, %1}"
3481 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3482 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3484 (define_insn "*vec_concatv4si_1"
3485 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3487 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3488 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3491 punpcklqdq\t{%2, %0|%0, %2}
3492 movlhps\t{%2, %0|%0, %2}
3493 movhps\t{%2, %0|%0, %2}"
3494 [(set_attr "type" "sselog,ssemov,ssemov")
3495 (set_attr "mode" "TI,V4SF,V2SF")])
3497 (define_insn "*vec_concatv2di"
3498 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3500 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3501 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3504 movq\t{%1, %0|%0, %1}
3505 movq2dq\t{%1, %0|%0, %1}
3506 punpcklqdq\t{%2, %0|%0, %2}
3507 movlhps\t{%2, %0|%0, %2}
3508 movhps\t{%2, %0|%0, %2}
3509 movlps\t{%1, %0|%0, %1}"
3510 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3511 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3513 (define_expand "vec_setv2di"
3514 [(match_operand:V2DI 0 "register_operand" "")
3515 (match_operand:DI 1 "register_operand" "")
3516 (match_operand 2 "const_int_operand" "")]
3519 ix86_expand_vector_set (false, operands[0], operands[1],
3520 INTVAL (operands[2]));
3524 (define_expand "vec_extractv2di"
3525 [(match_operand:DI 0 "register_operand" "")
3526 (match_operand:V2DI 1 "register_operand" "")
3527 (match_operand 2 "const_int_operand" "")]
3530 ix86_expand_vector_extract (false, operands[0], operands[1],
3531 INTVAL (operands[2]));
3535 (define_expand "vec_initv2di"
3536 [(match_operand:V2DI 0 "register_operand" "")
3537 (match_operand 1 "" "")]
3540 ix86_expand_vector_init (false, operands[0], operands[1]);
3544 (define_expand "vec_setv4si"
3545 [(match_operand:V4SI 0 "register_operand" "")
3546 (match_operand:SI 1 "register_operand" "")
3547 (match_operand 2 "const_int_operand" "")]
3550 ix86_expand_vector_set (false, operands[0], operands[1],
3551 INTVAL (operands[2]));
3555 (define_expand "vec_extractv4si"
3556 [(match_operand:SI 0 "register_operand" "")
3557 (match_operand:V4SI 1 "register_operand" "")
3558 (match_operand 2 "const_int_operand" "")]
3561 ix86_expand_vector_extract (false, operands[0], operands[1],
3562 INTVAL (operands[2]));
3566 (define_expand "vec_initv4si"
3567 [(match_operand:V4SI 0 "register_operand" "")
3568 (match_operand 1 "" "")]
3571 ix86_expand_vector_init (false, operands[0], operands[1]);
3575 (define_expand "vec_setv8hi"
3576 [(match_operand:V8HI 0 "register_operand" "")
3577 (match_operand:HI 1 "register_operand" "")
3578 (match_operand 2 "const_int_operand" "")]
3581 ix86_expand_vector_set (false, operands[0], operands[1],
3582 INTVAL (operands[2]));
3586 (define_expand "vec_extractv8hi"
3587 [(match_operand:HI 0 "register_operand" "")
3588 (match_operand:V8HI 1 "register_operand" "")
3589 (match_operand 2 "const_int_operand" "")]
3592 ix86_expand_vector_extract (false, operands[0], operands[1],
3593 INTVAL (operands[2]));
3597 (define_expand "vec_initv8hi"
3598 [(match_operand:V8HI 0 "register_operand" "")
3599 (match_operand 1 "" "")]
3602 ix86_expand_vector_init (false, operands[0], operands[1]);
3606 (define_expand "vec_setv16qi"
3607 [(match_operand:V16QI 0 "register_operand" "")
3608 (match_operand:QI 1 "register_operand" "")
3609 (match_operand 2 "const_int_operand" "")]
3612 ix86_expand_vector_set (false, operands[0], operands[1],
3613 INTVAL (operands[2]));
3617 (define_expand "vec_extractv16qi"
3618 [(match_operand:QI 0 "register_operand" "")
3619 (match_operand:V16QI 1 "register_operand" "")
3620 (match_operand 2 "const_int_operand" "")]
3623 ix86_expand_vector_extract (false, operands[0], operands[1],
3624 INTVAL (operands[2]));
3628 (define_expand "vec_initv16qi"
3629 [(match_operand:V16QI 0 "register_operand" "")
3630 (match_operand 1 "" "")]
3633 ix86_expand_vector_init (false, operands[0], operands[1]);
3637 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3643 (define_insn "sse2_uavgv16qi3"
3644 [(set (match_operand:V16QI 0 "register_operand" "=x")
3650 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3652 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3653 (const_vector:V16QI [(const_int 1) (const_int 1)
3654 (const_int 1) (const_int 1)
3655 (const_int 1) (const_int 1)
3656 (const_int 1) (const_int 1)
3657 (const_int 1) (const_int 1)
3658 (const_int 1) (const_int 1)
3659 (const_int 1) (const_int 1)
3660 (const_int 1) (const_int 1)]))
3662 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3663 "pavgb\t{%2, %0|%0, %2}"
3664 [(set_attr "type" "sseiadd")
3665 (set_attr "mode" "TI")])
3667 (define_insn "sse2_uavgv8hi3"
3668 [(set (match_operand:V8HI 0 "register_operand" "=x")
3674 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3676 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3677 (const_vector:V8HI [(const_int 1) (const_int 1)
3678 (const_int 1) (const_int 1)
3679 (const_int 1) (const_int 1)
3680 (const_int 1) (const_int 1)]))
3682 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3683 "pavgw\t{%2, %0|%0, %2}"
3684 [(set_attr "type" "sseiadd")
3685 (set_attr "mode" "TI")])
3687 ;; The correct representation for this is absolutely enormous, and
3688 ;; surely not generally useful.
3689 (define_insn "sse2_psadbw"
3690 [(set (match_operand:V2DI 0 "register_operand" "=x")
3691 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3692 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3695 "psadbw\t{%2, %0|%0, %2}"
3696 [(set_attr "type" "sseiadd")
3697 (set_attr "mode" "TI")])
3699 (define_insn "sse_movmskps"
3700 [(set (match_operand:SI 0 "register_operand" "=r")
3701 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3704 "movmskps\t{%1, %0|%0, %1}"
3705 [(set_attr "type" "ssecvt")
3706 (set_attr "mode" "V4SF")])
3708 (define_insn "sse2_movmskpd"
3709 [(set (match_operand:SI 0 "register_operand" "=r")
3710 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3713 "movmskpd\t{%1, %0|%0, %1}"
3714 [(set_attr "type" "ssecvt")
3715 (set_attr "mode" "V2DF")])
3717 (define_insn "sse2_pmovmskb"
3718 [(set (match_operand:SI 0 "register_operand" "=r")
3719 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3722 "pmovmskb\t{%1, %0|%0, %1}"
3723 [(set_attr "type" "ssecvt")
3724 (set_attr "mode" "V2DF")])
3726 (define_expand "sse2_maskmovdqu"
3727 [(set (match_operand:V16QI 0 "memory_operand" "")
3728 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3729 (match_operand:V16QI 2 "register_operand" "x")
3735 (define_insn "*sse2_maskmovdqu"
3736 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3737 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3738 (match_operand:V16QI 2 "register_operand" "x")
3739 (mem:V16QI (match_dup 0))]
3741 "TARGET_SSE2 && !TARGET_64BIT"
3742 ;; @@@ check ordering of operands in intel/nonintel syntax
3743 "maskmovdqu\t{%2, %1|%1, %2}"
3744 [(set_attr "type" "ssecvt")
3745 (set_attr "mode" "TI")])
3747 (define_insn "*sse2_maskmovdqu_rex64"
3748 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3749 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3750 (match_operand:V16QI 2 "register_operand" "x")
3751 (mem:V16QI (match_dup 0))]
3753 "TARGET_SSE2 && TARGET_64BIT"
3754 ;; @@@ check ordering of operands in intel/nonintel syntax
3755 "maskmovdqu\t{%2, %1|%1, %2}"
3756 [(set_attr "type" "ssecvt")
3757 (set_attr "mode" "TI")])
3759 (define_insn "sse_ldmxcsr"
3760 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3764 [(set_attr "type" "sse")
3765 (set_attr "memory" "load")])
3767 (define_insn "sse_stmxcsr"
3768 [(set (match_operand:SI 0 "memory_operand" "=m")
3769 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3772 [(set_attr "type" "sse")
3773 (set_attr "memory" "store")])
3775 (define_expand "sse_sfence"
3777 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3778 "TARGET_SSE || TARGET_3DNOW_A"
3780 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3781 MEM_VOLATILE_P (operands[0]) = 1;
3784 (define_insn "*sse_sfence"
3785 [(set (match_operand:BLK 0 "" "")
3786 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3787 "TARGET_SSE || TARGET_3DNOW_A"
3789 [(set_attr "type" "sse")
3790 (set_attr "memory" "unknown")])
3792 (define_insn "sse2_clflush"
3793 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3797 [(set_attr "type" "sse")
3798 (set_attr "memory" "unknown")])
3800 (define_expand "sse2_mfence"
3802 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3805 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3806 MEM_VOLATILE_P (operands[0]) = 1;
3809 (define_insn "*sse2_mfence"
3810 [(set (match_operand:BLK 0 "" "")
3811 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3814 [(set_attr "type" "sse")
3815 (set_attr "memory" "unknown")])
3817 (define_expand "sse2_lfence"
3819 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3822 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3823 MEM_VOLATILE_P (operands[0]) = 1;
3826 (define_insn "*sse2_lfence"
3827 [(set (match_operand:BLK 0 "" "")
3828 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3831 [(set_attr "type" "sse")
3832 (set_attr "memory" "unknown")])
3834 (define_insn "sse3_mwait"
3835 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3836 (match_operand:SI 1 "register_operand" "c")]
3840 [(set_attr "length" "3")])
3842 (define_insn "sse3_monitor"
3843 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3844 (match_operand:SI 1 "register_operand" "c")
3845 (match_operand:SI 2 "register_operand" "d")]
3848 "monitor\t%0, %1, %2"
3849 [(set_attr "length" "3")])