1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 if (get_attr_mode (insn) == MODE_V4SF)
69 return "xorps\t%0, %0";
71 return "pxor\t%0, %0";
74 if (get_attr_mode (insn) == MODE_V4SF)
75 return "movaps\t{%1, %0|%0, %1}";
77 return "movdqa\t{%1, %0|%0, %1}";
82 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
87 (eq_attr "alternative" "0,1")
89 (ne (symbol_ref "optimize_size")
93 (eq_attr "alternative" "2")
95 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
97 (ne (symbol_ref "optimize_size")
100 (const_string "TI"))]
101 (const_string "TI")))])
103 (define_expand "movv4sf"
104 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
105 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
108 ix86_expand_vector_move (V4SFmode, operands);
112 (define_insn "*movv4sf_internal"
113 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
114 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
118 movaps\t{%1, %0|%0, %1}
119 movaps\t{%1, %0|%0, %1}"
120 [(set_attr "type" "sselog1,ssemov,ssemov")
121 (set_attr "mode" "V4SF")])
124 [(set (match_operand:V4SF 0 "register_operand" "")
125 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
126 "TARGET_SSE && reload_completed"
129 (vec_duplicate:V4SF (match_dup 1))
133 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
134 operands[2] = CONST0_RTX (V4SFmode);
137 (define_expand "movv2df"
138 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
139 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
142 ix86_expand_vector_move (V2DFmode, operands);
146 (define_insn "*movv2df_internal"
147 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
148 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
149 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
151 switch (which_alternative)
154 if (get_attr_mode (insn) == MODE_V4SF)
155 return "xorps\t%0, %0";
157 return "xorpd\t%0, %0";
160 if (get_attr_mode (insn) == MODE_V4SF)
161 return "movaps\t{%1, %0|%0, %1}";
163 return "movapd\t{%1, %0|%0, %1}";
168 [(set_attr "type" "sselog1,ssemov,ssemov")
170 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
171 (const_string "V4SF")
172 (eq_attr "alternative" "0,1")
174 (ne (symbol_ref "optimize_size")
176 (const_string "V4SF")
177 (const_string "V2DF"))
178 (eq_attr "alternative" "2")
180 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
182 (ne (symbol_ref "optimize_size")
184 (const_string "V4SF")
185 (const_string "V2DF"))]
186 (const_string "V2DF")))])
189 [(set (match_operand:V2DF 0 "register_operand" "")
190 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
191 "TARGET_SSE2 && reload_completed"
192 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
194 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
195 operands[2] = CONST0_RTX (DFmode);
198 (define_expand "push<mode>1"
199 [(match_operand:SSEMODE 0 "register_operand" "")]
202 ix86_expand_push (<MODE>mode, operands[0]);
206 (define_expand "movmisalign<mode>"
207 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
208 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
211 ix86_expand_vector_move_misalign (<MODE>mode, operands);
215 (define_insn "sse_movups"
216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
217 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
219 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
220 "movups\t{%1, %0|%0, %1}"
221 [(set_attr "type" "ssemov")
222 (set_attr "mode" "V2DF")])
224 (define_insn "sse2_movupd"
225 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
226 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
228 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
229 "movupd\t{%1, %0|%0, %1}"
230 [(set_attr "type" "ssemov")
231 (set_attr "mode" "V2DF")])
233 (define_insn "sse2_movdqu"
234 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
235 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
237 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
238 "movdqu\t{%1, %0|%0, %1}"
239 [(set_attr "type" "ssemov")
240 (set_attr "mode" "TI")])
242 (define_insn "sse_movntv4sf"
243 [(set (match_operand:V4SF 0 "memory_operand" "=m")
244 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
247 "movntps\t{%1, %0|%0, %1}"
248 [(set_attr "type" "ssemov")
249 (set_attr "mode" "V4SF")])
251 (define_insn "sse2_movntv2df"
252 [(set (match_operand:V2DF 0 "memory_operand" "=m")
253 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
256 "movntpd\t{%1, %0|%0, %1}"
257 [(set_attr "type" "ssecvt")
258 (set_attr "mode" "V2DF")])
260 (define_insn "sse2_movntv2di"
261 [(set (match_operand:V2DI 0 "memory_operand" "=m")
262 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
265 "movntdq\t{%1, %0|%0, %1}"
266 [(set_attr "type" "ssecvt")
267 (set_attr "mode" "TI")])
269 (define_insn "sse2_movntsi"
270 [(set (match_operand:SI 0 "memory_operand" "=m")
271 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
274 "movnti\t{%1, %0|%0, %1}"
275 [(set_attr "type" "ssecvt")
276 (set_attr "mode" "V2DF")])
278 (define_insn "sse3_lddqu"
279 [(set (match_operand:V16QI 0 "register_operand" "=x")
280 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
283 "lddqu\t{%1, %0|%0, %1}"
284 [(set_attr "type" "ssecvt")
285 (set_attr "mode" "TI")])
287 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
289 ;; Parallel single-precision floating point arithmetic
291 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293 (define_expand "negv4sf2"
294 [(set (match_operand:V4SF 0 "register_operand" "")
295 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
297 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
299 (define_expand "absv4sf2"
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
303 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
305 (define_expand "addv4sf3"
306 [(set (match_operand:V4SF 0 "register_operand" "")
307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
308 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
310 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
312 (define_insn "*addv4sf3"
313 [(set (match_operand:V4SF 0 "register_operand" "=x")
314 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
315 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
316 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
317 "addps\t{%2, %0|%0, %2}"
318 [(set_attr "type" "sseadd")
319 (set_attr "mode" "V4SF")])
321 (define_insn "sse_vmaddv4sf3"
322 [(set (match_operand:V4SF 0 "register_operand" "=x")
324 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
325 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
328 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
329 "addss\t{%2, %0|%0, %2}"
330 [(set_attr "type" "sseadd")
331 (set_attr "mode" "SF")])
333 (define_expand "subv4sf3"
334 [(set (match_operand:V4SF 0 "register_operand" "")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
336 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
338 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
340 (define_insn "*subv4sf3"
341 [(set (match_operand:V4SF 0 "register_operand" "=x")
342 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
343 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
345 "subps\t{%2, %0|%0, %2}"
346 [(set_attr "type" "sseadd")
347 (set_attr "mode" "V4SF")])
349 (define_insn "sse_vmsubv4sf3"
350 [(set (match_operand:V4SF 0 "register_operand" "=x")
352 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
353 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
357 "subss\t{%2, %0|%0, %2}"
358 [(set_attr "type" "sseadd")
359 (set_attr "mode" "SF")])
361 (define_expand "mulv4sf3"
362 [(set (match_operand:V4SF 0 "register_operand" "")
363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
364 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
366 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
368 (define_insn "*mulv4sf3"
369 [(set (match_operand:V4SF 0 "register_operand" "=x")
370 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
371 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
372 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
373 "mulps\t{%2, %0|%0, %2}"
374 [(set_attr "type" "ssemul")
375 (set_attr "mode" "V4SF")])
377 (define_insn "sse_vmmulv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
380 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
381 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
384 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
385 "mulss\t{%2, %0|%0, %2}"
386 [(set_attr "type" "ssemul")
387 (set_attr "mode" "SF")])
389 (define_expand "divv4sf3"
390 [(set (match_operand:V4SF 0 "register_operand" "")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
392 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
394 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
396 (define_insn "*divv4sf3"
397 [(set (match_operand:V4SF 0 "register_operand" "=x")
398 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
399 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
401 "divps\t{%2, %0|%0, %2}"
402 [(set_attr "type" "ssediv")
403 (set_attr "mode" "V4SF")])
405 (define_insn "sse_vmdivv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "=x")
408 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
409 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
413 "divss\t{%2, %0|%0, %2}"
414 [(set_attr "type" "ssediv")
415 (set_attr "mode" "SF")])
417 (define_insn "sse_rcpv4sf2"
418 [(set (match_operand:V4SF 0 "register_operand" "=x")
420 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
422 "rcpps\t{%1, %0|%0, %1}"
423 [(set_attr "type" "sse")
424 (set_attr "mode" "V4SF")])
426 (define_insn "sse_vmrcpv4sf2"
427 [(set (match_operand:V4SF 0 "register_operand" "=x")
429 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
431 (match_operand:V4SF 2 "register_operand" "0")
434 "rcpss\t{%1, %0|%0, %1}"
435 [(set_attr "type" "sse")
436 (set_attr "mode" "SF")])
438 (define_insn "sse_rsqrtv4sf2"
439 [(set (match_operand:V4SF 0 "register_operand" "=x")
441 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
443 "rsqrtps\t{%1, %0|%0, %1}"
444 [(set_attr "type" "sse")
445 (set_attr "mode" "V4SF")])
447 (define_insn "sse_vmrsqrtv4sf2"
448 [(set (match_operand:V4SF 0 "register_operand" "=x")
450 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
452 (match_operand:V4SF 2 "register_operand" "0")
455 "rsqrtss\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "SF")])
459 (define_insn "sqrtv4sf2"
460 [(set (match_operand:V4SF 0 "register_operand" "=x")
461 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
463 "sqrtps\t{%1, %0|%0, %1}"
464 [(set_attr "type" "sse")
465 (set_attr "mode" "V4SF")])
467 (define_insn "sse_vmsqrtv4sf2"
468 [(set (match_operand:V4SF 0 "register_operand" "=x")
470 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
471 (match_operand:V4SF 2 "register_operand" "0")
474 "sqrtss\t{%1, %0|%0, %1}"
475 [(set_attr "type" "sse")
476 (set_attr "mode" "SF")])
478 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
479 ;; isn't really correct, as those rtl operators aren't defined when
480 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
482 (define_expand "smaxv4sf3"
483 [(set (match_operand:V4SF 0 "register_operand" "")
484 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
485 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
488 if (!flag_finite_math_only)
489 operands[1] = force_reg (V4SFmode, operands[1]);
490 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
493 (define_insn "*smaxv4sf3_finite"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
495 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
496 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
497 "TARGET_SSE && flag_finite_math_only
498 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
499 "maxps\t{%2, %0|%0, %2}"
500 [(set_attr "type" "sse")
501 (set_attr "mode" "V4SF")])
503 (define_insn "*smaxv4sf3"
504 [(set (match_operand:V4SF 0 "register_operand" "=x")
505 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
506 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
508 "maxps\t{%2, %0|%0, %2}"
509 [(set_attr "type" "sse")
510 (set_attr "mode" "V4SF")])
512 (define_insn "*sse_vmsmaxv4sf3_finite"
513 [(set (match_operand:V4SF 0 "register_operand" "=x")
515 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
516 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
519 "TARGET_SSE && flag_finite_math_only
520 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
521 "maxss\t{%2, %0|%0, %2}"
522 [(set_attr "type" "sse")
523 (set_attr "mode" "SF")])
525 (define_insn "sse_vmsmaxv4sf3"
526 [(set (match_operand:V4SF 0 "register_operand" "=x")
528 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
529 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
533 "maxss\t{%2, %0|%0, %2}"
534 [(set_attr "type" "sse")
535 (set_attr "mode" "SF")])
537 (define_expand "sminv4sf3"
538 [(set (match_operand:V4SF 0 "register_operand" "")
539 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
540 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
543 if (!flag_finite_math_only)
544 operands[1] = force_reg (V4SFmode, operands[1]);
545 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
548 (define_insn "*sminv4sf3_finite"
549 [(set (match_operand:V4SF 0 "register_operand" "=x")
550 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
551 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
552 "TARGET_SSE && flag_finite_math_only
553 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
554 "minps\t{%2, %0|%0, %2}"
555 [(set_attr "type" "sse")
556 (set_attr "mode" "V4SF")])
558 (define_insn "*sminv4sf3"
559 [(set (match_operand:V4SF 0 "register_operand" "=x")
560 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
561 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
563 "minps\t{%2, %0|%0, %2}"
564 [(set_attr "type" "sse")
565 (set_attr "mode" "V4SF")])
567 (define_insn "*sse_vmsminv4sf3_finite"
568 [(set (match_operand:V4SF 0 "register_operand" "=x")
570 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
571 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
574 "TARGET_SSE && flag_finite_math_only
575 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
576 "minss\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sse")
578 (set_attr "mode" "SF")])
580 (define_insn "sse_vmsminv4sf3"
581 [(set (match_operand:V4SF 0 "register_operand" "=x")
583 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
584 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
588 "minss\t{%2, %0|%0, %2}"
589 [(set_attr "type" "sse")
590 (set_attr "mode" "SF")])
592 ;; These versions of the min/max patterns implement exactly the operations
593 ;; min = (op1 < op2 ? op1 : op2)
594 ;; max = (!(op1 < op2) ? op1 : op2)
595 ;; Their operands are not commutative, and thus they may be used in the
596 ;; presence of -0.0 and NaN.
598 (define_insn "*ieee_sminv4sf3"
599 [(set (match_operand:V4SF 0 "register_operand" "=x")
600 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
601 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
604 "minps\t{%2, %0|%0, %2}"
605 [(set_attr "type" "sseadd")
606 (set_attr "mode" "V4SF")])
608 (define_insn "*ieee_smaxv4sf3"
609 [(set (match_operand:V4SF 0 "register_operand" "=x")
610 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
611 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
614 "maxps\t{%2, %0|%0, %2}"
615 [(set_attr "type" "sseadd")
616 (set_attr "mode" "V4SF")])
618 (define_insn "*ieee_sminv2df3"
619 [(set (match_operand:V2DF 0 "register_operand" "=x")
620 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
621 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
624 "minpd\t{%2, %0|%0, %2}"
625 [(set_attr "type" "sseadd")
626 (set_attr "mode" "V2DF")])
628 (define_insn "*ieee_smaxv2df3"
629 [(set (match_operand:V2DF 0 "register_operand" "=x")
630 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
631 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
634 "maxpd\t{%2, %0|%0, %2}"
635 [(set_attr "type" "sseadd")
636 (set_attr "mode" "V2DF")])
638 (define_insn "sse3_addsubv4sf3"
639 [(set (match_operand:V4SF 0 "register_operand" "=x")
642 (match_operand:V4SF 1 "register_operand" "0")
643 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
644 (minus:V4SF (match_dup 1) (match_dup 2))
647 "addsubps\t{%2, %0|%0, %2}"
648 [(set_attr "type" "sseadd")
649 (set_attr "mode" "V4SF")])
651 (define_insn "sse3_haddv4sf3"
652 [(set (match_operand:V4SF 0 "register_operand" "=x")
657 (match_operand:V4SF 1 "register_operand" "0")
658 (parallel [(const_int 0)]))
659 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
661 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
662 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
666 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
667 (parallel [(const_int 0)]))
668 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
670 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
671 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
673 "haddps\t{%2, %0|%0, %2}"
674 [(set_attr "type" "sseadd")
675 (set_attr "mode" "V4SF")])
677 (define_insn "sse3_hsubv4sf3"
678 [(set (match_operand:V4SF 0 "register_operand" "=x")
683 (match_operand:V4SF 1 "register_operand" "0")
684 (parallel [(const_int 0)]))
685 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
687 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
688 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
692 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
693 (parallel [(const_int 0)]))
694 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
696 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
697 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
699 "hsubps\t{%2, %0|%0, %2}"
700 [(set_attr "type" "sseadd")
701 (set_attr "mode" "V4SF")])
703 (define_expand "reduc_plus_v4sf"
704 [(match_operand:V4SF 0 "register_operand" "")
705 (match_operand:V4SF 1 "register_operand" "")]
710 rtx tmp = gen_reg_rtx (V4SFmode);
711 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
712 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
715 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
719 (define_expand "reduc_smax_v4sf"
720 [(match_operand:V4SF 0 "register_operand" "")
721 (match_operand:V4SF 1 "register_operand" "")]
724 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
728 (define_expand "reduc_smin_v4sf"
729 [(match_operand:V4SF 0 "register_operand" "")
730 (match_operand:V4SF 1 "register_operand" "")]
733 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
739 ;; Parallel single-precision floating point comparisons
741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
743 (define_insn "sse_maskcmpv4sf3"
744 [(set (match_operand:V4SF 0 "register_operand" "=x")
745 (match_operator:V4SF 3 "sse_comparison_operator"
746 [(match_operand:V4SF 1 "register_operand" "0")
747 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
749 "cmp%D3ps\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssecmp")
751 (set_attr "mode" "V4SF")])
753 (define_insn "sse_vmmaskcmpv4sf3"
754 [(set (match_operand:V4SF 0 "register_operand" "=x")
756 (match_operator:V4SF 3 "sse_comparison_operator"
757 [(match_operand:V4SF 1 "register_operand" "0")
758 (match_operand:V4SF 2 "register_operand" "x")])
762 "cmp%D3ss\t{%2, %0|%0, %2}"
763 [(set_attr "type" "ssecmp")
764 (set_attr "mode" "SF")])
766 (define_insn "sse_comi"
767 [(set (reg:CCFP FLAGS_REG)
770 (match_operand:V4SF 0 "register_operand" "x")
771 (parallel [(const_int 0)]))
773 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
774 (parallel [(const_int 0)]))))]
776 "comiss\t{%1, %0|%0, %1}"
777 [(set_attr "type" "ssecomi")
778 (set_attr "mode" "SF")])
780 (define_insn "sse_ucomi"
781 [(set (reg:CCFPU FLAGS_REG)
784 (match_operand:V4SF 0 "register_operand" "x")
785 (parallel [(const_int 0)]))
787 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
788 (parallel [(const_int 0)]))))]
790 "ucomiss\t{%1, %0|%0, %1}"
791 [(set_attr "type" "ssecomi")
792 (set_attr "mode" "SF")])
794 (define_expand "vcondv4sf"
795 [(set (match_operand:V4SF 0 "register_operand" "")
798 [(match_operand:V4SF 4 "nonimmediate_operand" "")
799 (match_operand:V4SF 5 "nonimmediate_operand" "")])
800 (match_operand:V4SF 1 "general_operand" "")
801 (match_operand:V4SF 2 "general_operand" "")))]
804 if (ix86_expand_fp_vcond (operands))
810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
812 ;; Parallel single-precision floating point logical operations
814 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
816 (define_expand "andv4sf3"
817 [(set (match_operand:V4SF 0 "register_operand" "")
818 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
819 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
821 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
823 (define_insn "*andv4sf3"
824 [(set (match_operand:V4SF 0 "register_operand" "=x")
825 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
826 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
827 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
828 "andps\t{%2, %0|%0, %2}"
829 [(set_attr "type" "sselog")
830 (set_attr "mode" "V4SF")])
832 (define_insn "sse_nandv4sf3"
833 [(set (match_operand:V4SF 0 "register_operand" "=x")
834 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
835 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
837 "andnps\t{%2, %0|%0, %2}"
838 [(set_attr "type" "sselog")
839 (set_attr "mode" "V4SF")])
841 (define_expand "iorv4sf3"
842 [(set (match_operand:V4SF 0 "register_operand" "")
843 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
844 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
846 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
848 (define_insn "*iorv4sf3"
849 [(set (match_operand:V4SF 0 "register_operand" "=x")
850 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
851 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
852 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
853 "orps\t{%2, %0|%0, %2}"
854 [(set_attr "type" "sselog")
855 (set_attr "mode" "V4SF")])
857 (define_expand "xorv4sf3"
858 [(set (match_operand:V4SF 0 "register_operand" "")
859 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
860 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
862 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
864 (define_insn "*xorv4sf3"
865 [(set (match_operand:V4SF 0 "register_operand" "=x")
866 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
867 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
868 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
869 "xorps\t{%2, %0|%0, %2}"
870 [(set_attr "type" "sselog")
871 (set_attr "mode" "V4SF")])
873 ;; Also define scalar versions. These are used for abs, neg, and
874 ;; conditional move. Using subregs into vector modes causes register
875 ;; allocation lossage. These patterns do not allow memory operands
876 ;; because the native instructions read the full 128-bits.
878 (define_insn "*andsf3"
879 [(set (match_operand:SF 0 "register_operand" "=x")
880 (and:SF (match_operand:SF 1 "register_operand" "0")
881 (match_operand:SF 2 "register_operand" "x")))]
883 "andps\t{%2, %0|%0, %2}"
884 [(set_attr "type" "sselog")
885 (set_attr "mode" "V4SF")])
887 (define_insn "*nandsf3"
888 [(set (match_operand:SF 0 "register_operand" "=x")
889 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
890 (match_operand:SF 2 "register_operand" "x")))]
892 "andnps\t{%2, %0|%0, %2}"
893 [(set_attr "type" "sselog")
894 (set_attr "mode" "V4SF")])
896 (define_insn "*iorsf3"
897 [(set (match_operand:SF 0 "register_operand" "=x")
898 (ior:SF (match_operand:SF 1 "register_operand" "0")
899 (match_operand:SF 2 "register_operand" "x")))]
901 "orps\t{%2, %0|%0, %2}"
902 [(set_attr "type" "sselog")
903 (set_attr "mode" "V4SF")])
905 (define_insn "*xorsf3"
906 [(set (match_operand:SF 0 "register_operand" "=x")
907 (xor:SF (match_operand:SF 1 "register_operand" "0")
908 (match_operand:SF 2 "register_operand" "x")))]
910 "xorps\t{%2, %0|%0, %2}"
911 [(set_attr "type" "sselog")
912 (set_attr "mode" "V4SF")])
914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
916 ;; Parallel single-precision floating point conversion operations
918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
920 (define_insn "sse_cvtpi2ps"
921 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
925 (match_operand:V4SF 1 "register_operand" "0")
928 "cvtpi2ps\t{%2, %0|%0, %2}"
929 [(set_attr "type" "ssecvt")
930 (set_attr "mode" "V4SF")])
932 (define_insn "sse_cvtps2pi"
933 [(set (match_operand:V2SI 0 "register_operand" "=y")
935 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
937 (parallel [(const_int 0) (const_int 1)])))]
939 "cvtps2pi\t{%1, %0|%0, %1}"
940 [(set_attr "type" "ssecvt")
941 (set_attr "unit" "mmx")
942 (set_attr "mode" "DI")])
944 (define_insn "sse_cvttps2pi"
945 [(set (match_operand:V2SI 0 "register_operand" "=y")
947 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
948 (parallel [(const_int 0) (const_int 1)])))]
950 "cvttps2pi\t{%1, %0|%0, %1}"
951 [(set_attr "type" "ssecvt")
952 (set_attr "unit" "mmx")
953 (set_attr "mode" "SF")])
955 (define_insn "sse_cvtsi2ss"
956 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
959 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
960 (match_operand:V4SF 1 "register_operand" "0,0")
963 "cvtsi2ss\t{%2, %0|%0, %2}"
964 [(set_attr "type" "sseicvt")
965 (set_attr "athlon_decode" "vector,double")
966 (set_attr "mode" "SF")])
968 (define_insn "sse_cvtsi2ssq"
969 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
972 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
973 (match_operand:V4SF 1 "register_operand" "0,0")
975 "TARGET_SSE && TARGET_64BIT"
976 "cvtsi2ssq\t{%2, %0|%0, %2}"
977 [(set_attr "type" "sseicvt")
978 (set_attr "athlon_decode" "vector,double")
979 (set_attr "mode" "SF")])
981 (define_insn "sse_cvtss2si"
982 [(set (match_operand:SI 0 "register_operand" "=r,r")
985 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
986 (parallel [(const_int 0)]))]
987 UNSPEC_FIX_NOTRUNC))]
989 "cvtss2si\t{%1, %0|%0, %1}"
990 [(set_attr "type" "sseicvt")
991 (set_attr "athlon_decode" "double,vector")
992 (set_attr "mode" "SI")])
994 (define_insn "sse_cvtss2siq"
995 [(set (match_operand:DI 0 "register_operand" "=r,r")
998 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
999 (parallel [(const_int 0)]))]
1000 UNSPEC_FIX_NOTRUNC))]
1001 "TARGET_SSE && TARGET_64BIT"
1002 "cvtss2siq\t{%1, %0|%0, %1}"
1003 [(set_attr "type" "sseicvt")
1004 (set_attr "athlon_decode" "double,vector")
1005 (set_attr "mode" "DI")])
1007 (define_insn "sse_cvttss2si"
1008 [(set (match_operand:SI 0 "register_operand" "=r,r")
1011 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1012 (parallel [(const_int 0)]))))]
1014 "cvttss2si\t{%1, %0|%0, %1}"
1015 [(set_attr "type" "sseicvt")
1016 (set_attr "athlon_decode" "double,vector")
1017 (set_attr "mode" "SI")])
1019 (define_insn "sse_cvttss2siq"
1020 [(set (match_operand:DI 0 "register_operand" "=r,r")
1023 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1024 (parallel [(const_int 0)]))))]
1025 "TARGET_SSE && TARGET_64BIT"
1026 "cvttss2siq\t{%1, %0|%0, %1}"
1027 [(set_attr "type" "sseicvt")
1028 (set_attr "athlon_decode" "double,vector")
1029 (set_attr "mode" "DI")])
1031 (define_insn "sse2_cvtdq2ps"
1032 [(set (match_operand:V4SF 0 "register_operand" "=x")
1033 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1035 "cvtdq2ps\t{%1, %0|%0, %1}"
1036 [(set_attr "type" "ssecvt")
1037 (set_attr "mode" "V2DF")])
1039 (define_insn "sse2_cvtps2dq"
1040 [(set (match_operand:V4SI 0 "register_operand" "=x")
1041 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1042 UNSPEC_FIX_NOTRUNC))]
1044 "cvtps2dq\t{%1, %0|%0, %1}"
1045 [(set_attr "type" "ssecvt")
1046 (set_attr "mode" "TI")])
1048 (define_insn "sse2_cvttps2dq"
1049 [(set (match_operand:V4SI 0 "register_operand" "=x")
1050 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1052 "cvttps2dq\t{%1, %0|%0, %1}"
1053 [(set_attr "type" "ssecvt")
1054 (set_attr "mode" "TI")])
1056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1058 ;; Parallel single-precision floating point element swizzling
1060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1062 (define_insn "sse_movhlps"
1063 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1066 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
1067 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
1068 (parallel [(const_int 6)
1072 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1074 movhlps\t{%2, %0|%0, %2}
1075 movlps\t{%H1, %0|%0, %H1}
1076 movhps\t{%1, %0|%0, %1}"
1077 [(set_attr "type" "ssemov")
1078 (set_attr "mode" "V4SF,V2SF,V2SF")])
1080 (define_insn "sse_movlhps"
1081 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1084 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1085 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1086 (parallel [(const_int 0)
1090 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1092 movlhps\t{%2, %0|%0, %2}
1093 movhps\t{%2, %0|%0, %2}
1094 movlps\t{%2, %H0|%H0, %2}"
1095 [(set_attr "type" "ssemov")
1096 (set_attr "mode" "V4SF,V2SF,V2SF")])
1098 (define_insn "sse_unpckhps"
1099 [(set (match_operand:V4SF 0 "register_operand" "=x")
1102 (match_operand:V4SF 1 "register_operand" "0")
1103 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1104 (parallel [(const_int 2) (const_int 6)
1105 (const_int 3) (const_int 7)])))]
1107 "unpckhps\t{%2, %0|%0, %2}"
1108 [(set_attr "type" "sselog")
1109 (set_attr "mode" "V4SF")])
1111 (define_insn "sse_unpcklps"
1112 [(set (match_operand:V4SF 0 "register_operand" "=x")
1115 (match_operand:V4SF 1 "register_operand" "0")
1116 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1117 (parallel [(const_int 0) (const_int 4)
1118 (const_int 1) (const_int 5)])))]
1120 "unpcklps\t{%2, %0|%0, %2}"
1121 [(set_attr "type" "sselog")
1122 (set_attr "mode" "V4SF")])
1124 ;; These are modeled with the same vec_concat as the others so that we
1125 ;; capture users of shufps that can use the new instructions
1126 (define_insn "sse3_movshdup"
1127 [(set (match_operand:V4SF 0 "register_operand" "=x")
1130 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1132 (parallel [(const_int 1)
1137 "movshdup\t{%1, %0|%0, %1}"
1138 [(set_attr "type" "sse")
1139 (set_attr "mode" "V4SF")])
1141 (define_insn "sse3_movsldup"
1142 [(set (match_operand:V4SF 0 "register_operand" "=x")
1145 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1147 (parallel [(const_int 0)
1152 "movsldup\t{%1, %0|%0, %1}"
1153 [(set_attr "type" "sse")
1154 (set_attr "mode" "V4SF")])
1156 (define_expand "sse_shufps"
1157 [(match_operand:V4SF 0 "register_operand" "")
1158 (match_operand:V4SF 1 "register_operand" "")
1159 (match_operand:V4SF 2 "nonimmediate_operand" "")
1160 (match_operand:SI 3 "const_int_operand" "")]
1163 int mask = INTVAL (operands[3]);
1164 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1165 GEN_INT ((mask >> 0) & 3),
1166 GEN_INT ((mask >> 2) & 3),
1167 GEN_INT (((mask >> 4) & 3) + 4),
1168 GEN_INT (((mask >> 6) & 3) + 4)));
1172 (define_insn "sse_shufps_1"
1173 [(set (match_operand:V4SF 0 "register_operand" "=x")
1176 (match_operand:V4SF 1 "register_operand" "0")
1177 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1178 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1179 (match_operand 4 "const_0_to_3_operand" "")
1180 (match_operand 5 "const_4_to_7_operand" "")
1181 (match_operand 6 "const_4_to_7_operand" "")])))]
1185 mask |= INTVAL (operands[3]) << 0;
1186 mask |= INTVAL (operands[4]) << 2;
1187 mask |= (INTVAL (operands[5]) - 4) << 4;
1188 mask |= (INTVAL (operands[6]) - 4) << 6;
1189 operands[3] = GEN_INT (mask);
1191 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1193 [(set_attr "type" "sselog")
1194 (set_attr "mode" "V4SF")])
1196 (define_insn "sse_storehps"
1197 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1200 (parallel [(const_int 2) (const_int 3)])))]
1203 movhps\t{%1, %0|%0, %1}
1204 movhlps\t{%1, %0|%0, %1}
1205 movlps\t{%H1, %0|%0, %H1}"
1206 [(set_attr "type" "ssemov")
1207 (set_attr "mode" "V2SF,V4SF,V2SF")])
1209 (define_insn "sse_loadhps"
1210 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1213 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1214 (parallel [(const_int 0) (const_int 1)]))
1215 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1218 movhps\t{%2, %0|%0, %2}
1219 movlhps\t{%2, %0|%0, %2}
1220 movlps\t{%2, %H0|%H0, %2}"
1221 [(set_attr "type" "ssemov")
1222 (set_attr "mode" "V2SF,V4SF,V2SF")])
1224 (define_insn "sse_storelps"
1225 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1227 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1228 (parallel [(const_int 0) (const_int 1)])))]
1231 movlps\t{%1, %0|%0, %1}
1232 movaps\t{%1, %0|%0, %1}
1233 movlps\t{%1, %0|%0, %1}"
1234 [(set_attr "type" "ssemov")
1235 (set_attr "mode" "V2SF,V4SF,V2SF")])
1237 (define_insn "sse_loadlps"
1238 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1240 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1242 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1243 (parallel [(const_int 2) (const_int 3)]))))]
1246 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1247 movlps\t{%2, %0|%0, %2}
1248 movlps\t{%2, %0|%0, %2}"
1249 [(set_attr "type" "sselog,ssemov,ssemov")
1250 (set_attr "mode" "V4SF,V2SF,V2SF")])
1252 (define_insn "sse_movss"
1253 [(set (match_operand:V4SF 0 "register_operand" "=x")
1255 (match_operand:V4SF 2 "register_operand" "x")
1256 (match_operand:V4SF 1 "register_operand" "0")
1259 "movss\t{%2, %0|%0, %2}"
1260 [(set_attr "type" "ssemov")
1261 (set_attr "mode" "SF")])
1263 (define_insn "*vec_dupv4sf"
1264 [(set (match_operand:V4SF 0 "register_operand" "=x")
1266 (match_operand:SF 1 "register_operand" "0")))]
1268 "shufps\t{$0, %0, %0|%0, %0, 0}"
1269 [(set_attr "type" "sselog1")
1270 (set_attr "mode" "V4SF")])
1272 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1273 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1274 ;; alternatives pretty much forces the MMX alternative to be chosen.
1275 (define_insn "*sse_concatv2sf"
1276 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1278 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1279 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1282 unpcklps\t{%2, %0|%0, %2}
1283 movss\t{%1, %0|%0, %1}
1284 punpckldq\t{%2, %0|%0, %2}
1285 movd\t{%1, %0|%0, %1}"
1286 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1287 (set_attr "mode" "V4SF,SF,DI,DI")])
1289 (define_insn "*sse_concatv4sf"
1290 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1292 (match_operand:V2SF 1 "register_operand" " 0,0")
1293 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1296 movlhps\t{%2, %0|%0, %2}
1297 movhps\t{%2, %0|%0, %2}"
1298 [(set_attr "type" "ssemov")
1299 (set_attr "mode" "V4SF,V2SF")])
1301 (define_expand "vec_initv4sf"
1302 [(match_operand:V4SF 0 "register_operand" "")
1303 (match_operand 1 "" "")]
1306 ix86_expand_vector_init (false, operands[0], operands[1]);
1310 (define_insn "*vec_setv4sf_0"
1311 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1314 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1315 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1319 movss\t{%2, %0|%0, %2}
1320 movss\t{%2, %0|%0, %2}
1321 movd\t{%2, %0|%0, %2}
1323 [(set_attr "type" "ssemov")
1324 (set_attr "mode" "SF")])
1327 [(set (match_operand:V4SF 0 "memory_operand" "")
1330 (match_operand:SF 1 "nonmemory_operand" ""))
1333 "TARGET_SSE && reload_completed"
1336 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1340 (define_expand "vec_setv4sf"
1341 [(match_operand:V4SF 0 "register_operand" "")
1342 (match_operand:SF 1 "register_operand" "")
1343 (match_operand 2 "const_int_operand" "")]
1346 ix86_expand_vector_set (false, operands[0], operands[1],
1347 INTVAL (operands[2]));
1351 (define_insn_and_split "*vec_extractv4sf_0"
1352 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1354 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1355 (parallel [(const_int 0)])))]
1356 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1358 "&& reload_completed"
1361 rtx op1 = operands[1];
1363 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1365 op1 = gen_lowpart (SFmode, op1);
1366 emit_move_insn (operands[0], op1);
1370 (define_expand "vec_extractv4sf"
1371 [(match_operand:SF 0 "register_operand" "")
1372 (match_operand:V4SF 1 "register_operand" "")
1373 (match_operand 2 "const_int_operand" "")]
1376 ix86_expand_vector_extract (false, operands[0], operands[1],
1377 INTVAL (operands[2]));
1381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1383 ;; Parallel double-precision floating point arithmetic
1385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1387 (define_expand "negv2df2"
1388 [(set (match_operand:V2DF 0 "register_operand" "")
1389 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1391 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1393 (define_expand "absv2df2"
1394 [(set (match_operand:V2DF 0 "register_operand" "")
1395 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1397 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1399 (define_expand "addv2df3"
1400 [(set (match_operand:V2DF 0 "register_operand" "")
1401 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1402 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1404 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1406 (define_insn "*addv2df3"
1407 [(set (match_operand:V2DF 0 "register_operand" "=x")
1408 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1409 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1410 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1411 "addpd\t{%2, %0|%0, %2}"
1412 [(set_attr "type" "sseadd")
1413 (set_attr "mode" "V2DF")])
1415 (define_insn "sse2_vmaddv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1418 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1419 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1422 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1423 "addsd\t{%2, %0|%0, %2}"
1424 [(set_attr "type" "sseadd")
1425 (set_attr "mode" "DF")])
1427 (define_expand "subv2df3"
1428 [(set (match_operand:V2DF 0 "register_operand" "")
1429 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1430 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1432 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1434 (define_insn "*subv2df3"
1435 [(set (match_operand:V2DF 0 "register_operand" "=x")
1436 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1437 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1439 "subpd\t{%2, %0|%0, %2}"
1440 [(set_attr "type" "sseadd")
1441 (set_attr "mode" "V2DF")])
1443 (define_insn "sse2_vmsubv2df3"
1444 [(set (match_operand:V2DF 0 "register_operand" "=x")
1446 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1447 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1451 "subsd\t{%2, %0|%0, %2}"
1452 [(set_attr "type" "sseadd")
1453 (set_attr "mode" "DF")])
1455 (define_expand "mulv2df3"
1456 [(set (match_operand:V2DF 0 "register_operand" "")
1457 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1458 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1460 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1462 (define_insn "*mulv2df3"
1463 [(set (match_operand:V2DF 0 "register_operand" "=x")
1464 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1465 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1466 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1467 "mulpd\t{%2, %0|%0, %2}"
1468 [(set_attr "type" "ssemul")
1469 (set_attr "mode" "V2DF")])
1471 (define_insn "sse2_vmmulv2df3"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1474 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1475 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1478 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1479 "mulsd\t{%2, %0|%0, %2}"
1480 [(set_attr "type" "ssemul")
1481 (set_attr "mode" "DF")])
1483 (define_expand "divv2df3"
1484 [(set (match_operand:V2DF 0 "register_operand" "")
1485 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1486 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1488 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1490 (define_insn "*divv2df3"
1491 [(set (match_operand:V2DF 0 "register_operand" "=x")
1492 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1493 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1495 "divpd\t{%2, %0|%0, %2}"
1496 [(set_attr "type" "ssediv")
1497 (set_attr "mode" "V2DF")])
1499 (define_insn "sse2_vmdivv2df3"
1500 [(set (match_operand:V2DF 0 "register_operand" "=x")
1502 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1503 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1507 "divsd\t{%2, %0|%0, %2}"
1508 [(set_attr "type" "ssediv")
1509 (set_attr "mode" "DF")])
1511 (define_insn "sqrtv2df2"
1512 [(set (match_operand:V2DF 0 "register_operand" "=x")
1513 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1515 "sqrtpd\t{%1, %0|%0, %1}"
1516 [(set_attr "type" "sse")
1517 (set_attr "mode" "V2DF")])
1519 (define_insn "sse2_vmsqrtv2df2"
1520 [(set (match_operand:V2DF 0 "register_operand" "=x")
1522 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1523 (match_operand:V2DF 2 "register_operand" "0")
1526 "sqrtsd\t{%1, %0|%0, %1}"
1527 [(set_attr "type" "sse")
1528 (set_attr "mode" "SF")])
1530 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1531 ;; isn't really correct, as those rtl operators aren't defined when
1532 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1534 (define_expand "smaxv2df3"
1535 [(set (match_operand:V2DF 0 "register_operand" "")
1536 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1537 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1540 if (!flag_finite_math_only)
1541 operands[1] = force_reg (V2DFmode, operands[1]);
1542 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1545 (define_insn "*smaxv2df3_finite"
1546 [(set (match_operand:V2DF 0 "register_operand" "=x")
1547 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1548 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1549 "TARGET_SSE2 && flag_finite_math_only
1550 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1551 "maxpd\t{%2, %0|%0, %2}"
1552 [(set_attr "type" "sseadd")
1553 (set_attr "mode" "V2DF")])
1555 (define_insn "*smaxv2df3"
1556 [(set (match_operand:V2DF 0 "register_operand" "=x")
1557 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1558 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1560 "maxpd\t{%2, %0|%0, %2}"
1561 [(set_attr "type" "sseadd")
1562 (set_attr "mode" "V2DF")])
1564 (define_insn "*sse2_vmsmaxv2df3_finite"
1565 [(set (match_operand:V2DF 0 "register_operand" "=x")
1567 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1568 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1571 "TARGET_SSE2 && flag_finite_math_only
1572 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1573 "maxsd\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "sseadd")
1575 (set_attr "mode" "DF")])
1577 (define_insn "sse2_vmsmaxv2df3"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x")
1580 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1581 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1585 "maxsd\t{%2, %0|%0, %2}"
1586 [(set_attr "type" "sseadd")
1587 (set_attr "mode" "DF")])
1589 (define_expand "sminv2df3"
1590 [(set (match_operand:V2DF 0 "register_operand" "")
1591 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1592 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1595 if (!flag_finite_math_only)
1596 operands[1] = force_reg (V2DFmode, operands[1]);
1597 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1600 (define_insn "*sminv2df3_finite"
1601 [(set (match_operand:V2DF 0 "register_operand" "=x")
1602 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1603 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1604 "TARGET_SSE2 && flag_finite_math_only
1605 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1606 "minpd\t{%2, %0|%0, %2}"
1607 [(set_attr "type" "sseadd")
1608 (set_attr "mode" "V2DF")])
1610 (define_insn "*sminv2df3"
1611 [(set (match_operand:V2DF 0 "register_operand" "=x")
1612 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1613 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1615 "minpd\t{%2, %0|%0, %2}"
1616 [(set_attr "type" "sseadd")
1617 (set_attr "mode" "V2DF")])
1619 (define_insn "*sse2_vmsminv2df3_finite"
1620 [(set (match_operand:V2DF 0 "register_operand" "=x")
1622 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1623 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1626 "TARGET_SSE2 && flag_finite_math_only
1627 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1628 "minsd\t{%2, %0|%0, %2}"
1629 [(set_attr "type" "sseadd")
1630 (set_attr "mode" "DF")])
1632 (define_insn "sse2_vmsminv2df3"
1633 [(set (match_operand:V2DF 0 "register_operand" "=x")
1635 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1636 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1640 "minsd\t{%2, %0|%0, %2}"
1641 [(set_attr "type" "sseadd")
1642 (set_attr "mode" "DF")])
1644 (define_insn "sse3_addsubv2df3"
1645 [(set (match_operand:V2DF 0 "register_operand" "=x")
1648 (match_operand:V2DF 1 "register_operand" "0")
1649 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1650 (minus:V2DF (match_dup 1) (match_dup 2))
1653 "addsubpd\t{%2, %0|%0, %2}"
1654 [(set_attr "type" "sseadd")
1655 (set_attr "mode" "V2DF")])
1657 (define_insn "sse3_haddv2df3"
1658 [(set (match_operand:V2DF 0 "register_operand" "=x")
1662 (match_operand:V2DF 1 "register_operand" "0")
1663 (parallel [(const_int 0)]))
1664 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1667 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1668 (parallel [(const_int 0)]))
1669 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1671 "haddpd\t{%2, %0|%0, %2}"
1672 [(set_attr "type" "sseadd")
1673 (set_attr "mode" "V2DF")])
1675 (define_insn "sse3_hsubv2df3"
1676 [(set (match_operand:V2DF 0 "register_operand" "=x")
1680 (match_operand:V2DF 1 "register_operand" "0")
1681 (parallel [(const_int 0)]))
1682 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1685 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1686 (parallel [(const_int 0)]))
1687 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1689 "hsubpd\t{%2, %0|%0, %2}"
1690 [(set_attr "type" "sseadd")
1691 (set_attr "mode" "V2DF")])
1693 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1695 ;; Parallel double-precision floating point comparisons
1697 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1699 (define_insn "sse2_maskcmpv2df3"
1700 [(set (match_operand:V2DF 0 "register_operand" "=x")
1701 (match_operator:V2DF 3 "sse_comparison_operator"
1702 [(match_operand:V2DF 1 "register_operand" "0")
1703 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1705 "cmp%D3pd\t{%2, %0|%0, %2}"
1706 [(set_attr "type" "ssecmp")
1707 (set_attr "mode" "V2DF")])
1709 (define_insn "sse2_vmmaskcmpv2df3"
1710 [(set (match_operand:V2DF 0 "register_operand" "=x")
1712 (match_operator:V2DF 3 "sse_comparison_operator"
1713 [(match_operand:V2DF 1 "register_operand" "0")
1714 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1718 "cmp%D3sd\t{%2, %0|%0, %2}"
1719 [(set_attr "type" "ssecmp")
1720 (set_attr "mode" "DF")])
1722 (define_insn "sse2_comi"
1723 [(set (reg:CCFP FLAGS_REG)
1726 (match_operand:V2DF 0 "register_operand" "x")
1727 (parallel [(const_int 0)]))
1729 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1730 (parallel [(const_int 0)]))))]
1732 "comisd\t{%1, %0|%0, %1}"
1733 [(set_attr "type" "ssecomi")
1734 (set_attr "mode" "DF")])
1736 (define_insn "sse2_ucomi"
1737 [(set (reg:CCFPU FLAGS_REG)
1740 (match_operand:V2DF 0 "register_operand" "x")
1741 (parallel [(const_int 0)]))
1743 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1744 (parallel [(const_int 0)]))))]
1746 "ucomisd\t{%1, %0|%0, %1}"
1747 [(set_attr "type" "ssecomi")
1748 (set_attr "mode" "DF")])
1750 (define_expand "vcondv2df"
1751 [(set (match_operand:V2DF 0 "register_operand" "")
1753 (match_operator 3 ""
1754 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1755 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1756 (match_operand:V2DF 1 "general_operand" "")
1757 (match_operand:V2DF 2 "general_operand" "")))]
1760 if (ix86_expand_fp_vcond (operands))
1766 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1768 ;; Parallel double-precision floating point logical operations
1770 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1772 (define_expand "andv2df3"
1773 [(set (match_operand:V2DF 0 "register_operand" "")
1774 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1775 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1777 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1779 (define_insn "*andv2df3"
1780 [(set (match_operand:V2DF 0 "register_operand" "=x")
1781 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1782 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1783 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1784 "andpd\t{%2, %0|%0, %2}"
1785 [(set_attr "type" "sselog")
1786 (set_attr "mode" "V2DF")])
1788 (define_insn "sse2_nandv2df3"
1789 [(set (match_operand:V2DF 0 "register_operand" "=x")
1790 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1791 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1793 "andnpd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "sselog")
1795 (set_attr "mode" "V2DF")])
1797 (define_expand "iorv2df3"
1798 [(set (match_operand:V2DF 0 "register_operand" "")
1799 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1800 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1802 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1804 (define_insn "*iorv2df3"
1805 [(set (match_operand:V2DF 0 "register_operand" "=x")
1806 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1807 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1808 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1809 "orpd\t{%2, %0|%0, %2}"
1810 [(set_attr "type" "sselog")
1811 (set_attr "mode" "V2DF")])
1813 (define_expand "xorv2df3"
1814 [(set (match_operand:V2DF 0 "register_operand" "")
1815 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1816 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1818 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1820 (define_insn "*xorv2df3"
1821 [(set (match_operand:V2DF 0 "register_operand" "=x")
1822 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1823 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1824 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1825 "xorpd\t{%2, %0|%0, %2}"
1826 [(set_attr "type" "sselog")
1827 (set_attr "mode" "V2DF")])
1829 ;; Also define scalar versions. These are used for abs, neg, and
1830 ;; conditional move. Using subregs into vector modes causes register
1831 ;; allocation lossage. These patterns do not allow memory operands
1832 ;; because the native instructions read the full 128-bits.
1834 (define_insn "*anddf3"
1835 [(set (match_operand:DF 0 "register_operand" "=x")
1836 (and:DF (match_operand:DF 1 "register_operand" "0")
1837 (match_operand:DF 2 "register_operand" "x")))]
1839 "andpd\t{%2, %0|%0, %2}"
1840 [(set_attr "type" "sselog")
1841 (set_attr "mode" "V2DF")])
1843 (define_insn "*nanddf3"
1844 [(set (match_operand:DF 0 "register_operand" "=x")
1845 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1846 (match_operand:DF 2 "register_operand" "x")))]
1848 "andnpd\t{%2, %0|%0, %2}"
1849 [(set_attr "type" "sselog")
1850 (set_attr "mode" "V2DF")])
1852 (define_insn "*iordf3"
1853 [(set (match_operand:DF 0 "register_operand" "=x")
1854 (ior:DF (match_operand:DF 1 "register_operand" "0")
1855 (match_operand:DF 2 "register_operand" "x")))]
1857 "orpd\t{%2, %0|%0, %2}"
1858 [(set_attr "type" "sselog")
1859 (set_attr "mode" "V2DF")])
1861 (define_insn "*xordf3"
1862 [(set (match_operand:DF 0 "register_operand" "=x")
1863 (xor:DF (match_operand:DF 1 "register_operand" "0")
1864 (match_operand:DF 2 "register_operand" "x")))]
1866 "xorpd\t{%2, %0|%0, %2}"
1867 [(set_attr "type" "sselog")
1868 (set_attr "mode" "V2DF")])
1870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1872 ;; Parallel double-precision floating point conversion operations
1874 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1876 (define_insn "sse2_cvtpi2pd"
1877 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1878 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1880 "cvtpi2pd\t{%1, %0|%0, %1}"
1881 [(set_attr "type" "ssecvt")
1882 (set_attr "unit" "mmx,*")
1883 (set_attr "mode" "V2DF")])
1885 (define_insn "sse2_cvtpd2pi"
1886 [(set (match_operand:V2SI 0 "register_operand" "=y")
1887 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1888 UNSPEC_FIX_NOTRUNC))]
1890 "cvtpd2pi\t{%1, %0|%0, %1}"
1891 [(set_attr "type" "ssecvt")
1892 (set_attr "unit" "mmx")
1893 (set_attr "mode" "DI")])
1895 (define_insn "sse2_cvttpd2pi"
1896 [(set (match_operand:V2SI 0 "register_operand" "=y")
1897 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1899 "cvttpd2pi\t{%1, %0|%0, %1}"
1900 [(set_attr "type" "ssecvt")
1901 (set_attr "unit" "mmx")
1902 (set_attr "mode" "TI")])
1904 (define_insn "sse2_cvtsi2sd"
1905 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1908 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1909 (match_operand:V2DF 1 "register_operand" "0,0")
1912 "cvtsi2sd\t{%2, %0|%0, %2}"
1913 [(set_attr "type" "sseicvt")
1914 (set_attr "mode" "DF")
1915 (set_attr "athlon_decode" "double,direct")])
1917 (define_insn "sse2_cvtsi2sdq"
1918 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1921 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1922 (match_operand:V2DF 1 "register_operand" "0,0")
1924 "TARGET_SSE2 && TARGET_64BIT"
1925 "cvtsi2sdq\t{%2, %0|%0, %2}"
1926 [(set_attr "type" "sseicvt")
1927 (set_attr "mode" "DF")
1928 (set_attr "athlon_decode" "double,direct")])
1930 (define_insn "sse2_cvtsd2si"
1931 [(set (match_operand:SI 0 "register_operand" "=r,r")
1934 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1935 (parallel [(const_int 0)]))]
1936 UNSPEC_FIX_NOTRUNC))]
1938 "cvtsd2si\t{%1, %0|%0, %1}"
1939 [(set_attr "type" "sseicvt")
1940 (set_attr "athlon_decode" "double,vector")
1941 (set_attr "mode" "SI")])
1943 (define_insn "sse2_cvtsd2siq"
1944 [(set (match_operand:DI 0 "register_operand" "=r,r")
1947 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1948 (parallel [(const_int 0)]))]
1949 UNSPEC_FIX_NOTRUNC))]
1950 "TARGET_SSE2 && TARGET_64BIT"
1951 "cvtsd2siq\t{%1, %0|%0, %1}"
1952 [(set_attr "type" "sseicvt")
1953 (set_attr "athlon_decode" "double,vector")
1954 (set_attr "mode" "DI")])
1956 (define_insn "sse2_cvttsd2si"
1957 [(set (match_operand:SI 0 "register_operand" "=r,r")
1960 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1961 (parallel [(const_int 0)]))))]
1963 "cvttsd2si\t{%1, %0|%0, %1}"
1964 [(set_attr "type" "sseicvt")
1965 (set_attr "mode" "SI")
1966 (set_attr "athlon_decode" "double,vector")])
1968 (define_insn "sse2_cvttsd2siq"
1969 [(set (match_operand:DI 0 "register_operand" "=r,r")
1972 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1973 (parallel [(const_int 0)]))))]
1974 "TARGET_SSE2 && TARGET_64BIT"
1975 "cvttsd2siq\t{%1, %0|%0, %1}"
1976 [(set_attr "type" "sseicvt")
1977 (set_attr "mode" "DI")
1978 (set_attr "athlon_decode" "double,vector")])
1980 (define_insn "sse2_cvtdq2pd"
1981 [(set (match_operand:V2DF 0 "register_operand" "=x")
1984 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1985 (parallel [(const_int 0) (const_int 1)]))))]
1987 "cvtdq2pd\t{%1, %0|%0, %1}"
1988 [(set_attr "type" "ssecvt")
1989 (set_attr "mode" "V2DF")])
1991 (define_expand "sse2_cvtpd2dq"
1992 [(set (match_operand:V4SI 0 "register_operand" "")
1994 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1998 "operands[2] = CONST0_RTX (V2SImode);")
2000 (define_insn "*sse2_cvtpd2dq"
2001 [(set (match_operand:V4SI 0 "register_operand" "=x")
2003 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2005 (match_operand:V2SI 2 "const0_operand" "")))]
2007 "cvtpd2dq\t{%1, %0|%0, %1}"
2008 [(set_attr "type" "ssecvt")
2009 (set_attr "mode" "TI")])
2011 (define_expand "sse2_cvttpd2dq"
2012 [(set (match_operand:V4SI 0 "register_operand" "")
2014 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2017 "operands[2] = CONST0_RTX (V2SImode);")
2019 (define_insn "*sse2_cvttpd2dq"
2020 [(set (match_operand:V4SI 0 "register_operand" "=x")
2022 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2023 (match_operand:V2SI 2 "const0_operand" "")))]
2025 "cvttpd2dq\t{%1, %0|%0, %1}"
2026 [(set_attr "type" "ssecvt")
2027 (set_attr "mode" "TI")])
2029 (define_insn "sse2_cvtsd2ss"
2030 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2033 (float_truncate:V2SF
2034 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2035 (match_operand:V4SF 1 "register_operand" "0,0")
2038 "cvtsd2ss\t{%2, %0|%0, %2}"
2039 [(set_attr "type" "ssecvt")
2040 (set_attr "athlon_decode" "vector,double")
2041 (set_attr "mode" "SF")])
2043 (define_insn "sse2_cvtss2sd"
2044 [(set (match_operand:V2DF 0 "register_operand" "=x")
2048 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2049 (parallel [(const_int 0) (const_int 1)])))
2050 (match_operand:V2DF 1 "register_operand" "0")
2053 "cvtss2sd\t{%2, %0|%0, %2}"
2054 [(set_attr "type" "ssecvt")
2055 (set_attr "mode" "DF")])
2057 (define_expand "sse2_cvtpd2ps"
2058 [(set (match_operand:V4SF 0 "register_operand" "")
2060 (float_truncate:V2SF
2061 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2064 "operands[2] = CONST0_RTX (V2SFmode);")
2066 (define_insn "*sse2_cvtpd2ps"
2067 [(set (match_operand:V4SF 0 "register_operand" "=x")
2069 (float_truncate:V2SF
2070 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2071 (match_operand:V2SF 2 "const0_operand" "")))]
2073 "cvtpd2ps\t{%1, %0|%0, %1}"
2074 [(set_attr "type" "ssecvt")
2075 (set_attr "mode" "V4SF")])
2077 (define_insn "sse2_cvtps2pd"
2078 [(set (match_operand:V2DF 0 "register_operand" "=x")
2081 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2082 (parallel [(const_int 0) (const_int 1)]))))]
2084 "cvtps2pd\t{%1, %0|%0, %1}"
2085 [(set_attr "type" "ssecvt")
2086 (set_attr "mode" "V2DF")])
2088 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2090 ;; Parallel double-precision floating point element swizzling
2092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2094 (define_insn "sse2_unpckhpd"
2095 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2098 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2099 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2100 (parallel [(const_int 1)
2102 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2104 unpckhpd\t{%2, %0|%0, %2}
2105 movlpd\t{%H1, %0|%0, %H1}
2106 movhpd\t{%1, %0|%0, %1}"
2107 [(set_attr "type" "sselog,ssemov,ssemov")
2108 (set_attr "mode" "V2DF,V1DF,V1DF")])
2110 (define_insn "*sse3_movddup"
2111 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2114 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2116 (parallel [(const_int 0)
2118 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2120 movddup\t{%1, %0|%0, %1}
2122 [(set_attr "type" "sselog,ssemov")
2123 (set_attr "mode" "V2DF")])
2126 [(set (match_operand:V2DF 0 "memory_operand" "")
2129 (match_operand:V2DF 1 "register_operand" "")
2131 (parallel [(const_int 0)
2133 "TARGET_SSE3 && reload_completed"
2136 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2137 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2138 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2142 (define_insn "sse2_unpcklpd"
2143 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2146 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2147 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2148 (parallel [(const_int 0)
2150 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2152 unpcklpd\t{%2, %0|%0, %2}
2153 movhpd\t{%2, %0|%0, %2}
2154 movlpd\t{%2, %H0|%H0, %2}"
2155 [(set_attr "type" "sselog,ssemov,ssemov")
2156 (set_attr "mode" "V2DF,V1DF,V1DF")])
2158 (define_expand "sse2_shufpd"
2159 [(match_operand:V2DF 0 "register_operand" "")
2160 (match_operand:V2DF 1 "register_operand" "")
2161 (match_operand:V2DF 2 "nonimmediate_operand" "")
2162 (match_operand:SI 3 "const_int_operand" "")]
2165 int mask = INTVAL (operands[3]);
2166 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2168 GEN_INT (mask & 2 ? 3 : 2)));
2172 (define_insn "sse2_shufpd_1"
2173 [(set (match_operand:V2DF 0 "register_operand" "=x")
2176 (match_operand:V2DF 1 "register_operand" "0")
2177 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2178 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2179 (match_operand 4 "const_2_to_3_operand" "")])))]
2183 mask = INTVAL (operands[3]);
2184 mask |= (INTVAL (operands[4]) - 2) << 1;
2185 operands[3] = GEN_INT (mask);
2187 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2189 [(set_attr "type" "sselog")
2190 (set_attr "mode" "V2DF")])
2192 (define_insn "sse2_storehpd"
2193 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2195 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2196 (parallel [(const_int 1)])))]
2197 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2199 movhpd\t{%1, %0|%0, %1}
2202 [(set_attr "type" "ssemov,sselog1,ssemov")
2203 (set_attr "mode" "V1DF,V2DF,DF")])
2206 [(set (match_operand:DF 0 "register_operand" "")
2208 (match_operand:V2DF 1 "memory_operand" "")
2209 (parallel [(const_int 1)])))]
2210 "TARGET_SSE2 && reload_completed"
2211 [(set (match_dup 0) (match_dup 1))]
2213 operands[1] = adjust_address (operands[1], DFmode, 8);
2216 (define_insn "sse2_storelpd"
2217 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2219 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2220 (parallel [(const_int 0)])))]
2221 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2223 movlpd\t{%1, %0|%0, %1}
2226 [(set_attr "type" "ssemov")
2227 (set_attr "mode" "V1DF,DF,DF")])
2230 [(set (match_operand:DF 0 "register_operand" "")
2232 (match_operand:V2DF 1 "nonimmediate_operand" "")
2233 (parallel [(const_int 0)])))]
2234 "TARGET_SSE2 && reload_completed"
2237 rtx op1 = operands[1];
2239 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2241 op1 = gen_lowpart (DFmode, op1);
2242 emit_move_insn (operands[0], op1);
2246 (define_insn "sse2_loadhpd"
2247 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2250 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2251 (parallel [(const_int 0)]))
2252 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2253 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2255 movhpd\t{%2, %0|%0, %2}
2256 unpcklpd\t{%2, %0|%0, %2}
2257 shufpd\t{$1, %1, %0|%0, %1, 1}
2259 [(set_attr "type" "ssemov,sselog,sselog,other")
2260 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2263 [(set (match_operand:V2DF 0 "memory_operand" "")
2265 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2266 (match_operand:DF 1 "register_operand" "")))]
2267 "TARGET_SSE2 && reload_completed"
2268 [(set (match_dup 0) (match_dup 1))]
2270 operands[0] = adjust_address (operands[0], DFmode, 8);
2273 (define_insn "sse2_loadlpd"
2274 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2276 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2278 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2279 (parallel [(const_int 1)]))))]
2280 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2282 movsd\t{%2, %0|%0, %2}
2283 movlpd\t{%2, %0|%0, %2}
2284 movsd\t{%2, %0|%0, %2}
2285 shufpd\t{$2, %2, %0|%0, %2, 2}
2286 movhpd\t{%H1, %0|%0, %H1}
2288 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2289 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2292 [(set (match_operand:V2DF 0 "memory_operand" "")
2294 (match_operand:DF 1 "register_operand" "")
2295 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2296 "TARGET_SSE2 && reload_completed"
2297 [(set (match_dup 0) (match_dup 1))]
2299 operands[0] = adjust_address (operands[0], DFmode, 8);
2302 (define_insn "sse2_movsd"
2303 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2305 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2306 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2310 movsd\t{%2, %0|%0, %2}
2311 movlpd\t{%2, %0|%0, %2}
2312 movlpd\t{%2, %0|%0, %2}
2313 shufpd\t{$2, %2, %0|%0, %2, 2}
2314 movhps\t{%H1, %0|%0, %H1}
2315 movhps\t{%1, %H0|%H0, %1}"
2316 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2317 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2319 (define_insn "*vec_dupv2df_sse3"
2320 [(set (match_operand:V2DF 0 "register_operand" "=x")
2322 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2324 "movddup\t{%1, %0|%0, %1}"
2325 [(set_attr "type" "sselog1")
2326 (set_attr "mode" "DF")])
2328 (define_insn "*vec_dupv2df"
2329 [(set (match_operand:V2DF 0 "register_operand" "=x")
2331 (match_operand:DF 1 "register_operand" "0")))]
2334 [(set_attr "type" "sselog1")
2335 (set_attr "mode" "V4SF")])
2337 (define_insn "*vec_concatv2df_sse3"
2338 [(set (match_operand:V2DF 0 "register_operand" "=x")
2340 (match_operand:DF 1 "nonimmediate_operand" "xm")
2343 "movddup\t{%1, %0|%0, %1}"
2344 [(set_attr "type" "sselog1")
2345 (set_attr "mode" "DF")])
2347 (define_insn "*vec_concatv2df"
2348 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2350 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2351 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2354 unpcklpd\t{%2, %0|%0, %2}
2355 movhpd\t{%2, %0|%0, %2}
2356 movsd\t{%1, %0|%0, %1}
2357 movlhps\t{%2, %0|%0, %2}
2358 movhps\t{%2, %0|%0, %2}"
2359 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2360 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2362 (define_expand "vec_setv2df"
2363 [(match_operand:V2DF 0 "register_operand" "")
2364 (match_operand:DF 1 "register_operand" "")
2365 (match_operand 2 "const_int_operand" "")]
2368 ix86_expand_vector_set (false, operands[0], operands[1],
2369 INTVAL (operands[2]));
2373 (define_expand "vec_extractv2df"
2374 [(match_operand:DF 0 "register_operand" "")
2375 (match_operand:V2DF 1 "register_operand" "")
2376 (match_operand 2 "const_int_operand" "")]
2379 ix86_expand_vector_extract (false, operands[0], operands[1],
2380 INTVAL (operands[2]));
2384 (define_expand "vec_initv2df"
2385 [(match_operand:V2DF 0 "register_operand" "")
2386 (match_operand 1 "" "")]
2389 ix86_expand_vector_init (false, operands[0], operands[1]);
2393 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2395 ;; Parallel integral arithmetic
2397 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2399 (define_expand "neg<mode>2"
2400 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2403 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2405 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2407 (define_expand "add<mode>3"
2408 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2409 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2410 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2412 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2414 (define_insn "*add<mode>3"
2415 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2417 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2418 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2419 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2420 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2421 [(set_attr "type" "sseiadd")
2422 (set_attr "mode" "TI")])
2424 (define_insn "sse2_ssadd<mode>3"
2425 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2427 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2428 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2429 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2430 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2431 [(set_attr "type" "sseiadd")
2432 (set_attr "mode" "TI")])
2434 (define_insn "sse2_usadd<mode>3"
2435 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2437 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2438 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2439 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2440 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2441 [(set_attr "type" "sseiadd")
2442 (set_attr "mode" "TI")])
2444 (define_expand "sub<mode>3"
2445 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2446 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2447 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2449 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2451 (define_insn "*sub<mode>3"
2452 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2454 (match_operand:SSEMODEI 1 "register_operand" "0")
2455 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2457 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2458 [(set_attr "type" "sseiadd")
2459 (set_attr "mode" "TI")])
2461 (define_insn "sse2_sssub<mode>3"
2462 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2464 (match_operand:SSEMODE12 1 "register_operand" "0")
2465 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2467 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2468 [(set_attr "type" "sseiadd")
2469 (set_attr "mode" "TI")])
2471 (define_insn "sse2_ussub<mode>3"
2472 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2474 (match_operand:SSEMODE12 1 "register_operand" "0")
2475 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2477 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2478 [(set_attr "type" "sseiadd")
2479 (set_attr "mode" "TI")])
2481 (define_expand "mulv16qi3"
2482 [(set (match_operand:V16QI 0 "register_operand" "")
2483 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2484 (match_operand:V16QI 2 "register_operand" "")))]
2490 for (i = 0; i < 12; ++i)
2491 t[i] = gen_reg_rtx (V16QImode);
2493 /* Unpack data such that we've got a source byte in each low byte of
2494 each word. We don't care what goes into the high byte of each word.
2495 Rather than trying to get zero in there, most convenient is to let
2496 it be a copy of the low byte. */
2497 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2498 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2499 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2500 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2502 /* Multiply words. The end-of-line annotations here give a picture of what
2503 the output of that instruction looks like. Dot means don't care; the
2504 letters are the bytes of the result with A being the most significant. */
2505 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2506 gen_lowpart (V8HImode, t[0]),
2507 gen_lowpart (V8HImode, t[1])));
2508 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2509 gen_lowpart (V8HImode, t[2]),
2510 gen_lowpart (V8HImode, t[3])));
2512 /* Extract the relevant bytes and merge them back together. */
2513 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2514 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2515 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2516 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2517 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2518 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2521 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2525 (define_expand "mulv8hi3"
2526 [(set (match_operand:V8HI 0 "register_operand" "")
2527 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2528 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2530 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2532 (define_insn "*mulv8hi3"
2533 [(set (match_operand:V8HI 0 "register_operand" "=x")
2534 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2535 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2536 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2537 "pmullw\t{%2, %0|%0, %2}"
2538 [(set_attr "type" "sseimul")
2539 (set_attr "mode" "TI")])
2541 (define_insn "sse2_smulv8hi3_highpart"
2542 [(set (match_operand:V8HI 0 "register_operand" "=x")
2547 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2549 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2551 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2552 "pmulhw\t{%2, %0|%0, %2}"
2553 [(set_attr "type" "sseimul")
2554 (set_attr "mode" "TI")])
2556 (define_insn "sse2_umulv8hi3_highpart"
2557 [(set (match_operand:V8HI 0 "register_operand" "=x")
2562 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2564 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2566 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2567 "pmulhuw\t{%2, %0|%0, %2}"
2568 [(set_attr "type" "sseimul")
2569 (set_attr "mode" "TI")])
2571 (define_insn "sse2_umulv2siv2di3"
2572 [(set (match_operand:V2DI 0 "register_operand" "=x")
2576 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2577 (parallel [(const_int 0) (const_int 2)])))
2580 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2581 (parallel [(const_int 0) (const_int 2)])))))]
2582 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2583 "pmuludq\t{%2, %0|%0, %2}"
2584 [(set_attr "type" "sseimul")
2585 (set_attr "mode" "TI")])
2587 (define_insn "sse2_pmaddwd"
2588 [(set (match_operand:V4SI 0 "register_operand" "=x")
2593 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2594 (parallel [(const_int 0)
2600 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2601 (parallel [(const_int 0)
2607 (vec_select:V4HI (match_dup 1)
2608 (parallel [(const_int 1)
2613 (vec_select:V4HI (match_dup 2)
2614 (parallel [(const_int 1)
2617 (const_int 7)]))))))]
2619 "pmaddwd\t{%2, %0|%0, %2}"
2620 [(set_attr "type" "sseiadd")
2621 (set_attr "mode" "TI")])
2623 (define_expand "mulv4si3"
2624 [(set (match_operand:V4SI 0 "register_operand" "")
2625 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2626 (match_operand:V4SI 2 "register_operand" "")))]
2629 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2635 t1 = gen_reg_rtx (V4SImode);
2636 t2 = gen_reg_rtx (V4SImode);
2637 t3 = gen_reg_rtx (V4SImode);
2638 t4 = gen_reg_rtx (V4SImode);
2639 t5 = gen_reg_rtx (V4SImode);
2640 t6 = gen_reg_rtx (V4SImode);
2641 thirtytwo = GEN_INT (32);
2643 /* Multiply elements 2 and 0. */
2644 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2646 /* Shift both input vectors down one element, so that elements 3 and 1
2647 are now in the slots for elements 2 and 0. For K8, at least, this is
2648 faster than using a shuffle. */
2649 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2650 gen_lowpart (TImode, op1), thirtytwo));
2651 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2652 gen_lowpart (TImode, op2), thirtytwo));
2654 /* Multiply elements 3 and 1. */
2655 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2657 /* Move the results in element 2 down to element 1; we don't care what
2658 goes in elements 2 and 3. */
2659 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2660 const0_rtx, const0_rtx));
2661 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2662 const0_rtx, const0_rtx));
2664 /* Merge the parts back together. */
2665 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2669 (define_expand "mulv2di3"
2670 [(set (match_operand:V2DI 0 "register_operand" "")
2671 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2672 (match_operand:V2DI 2 "register_operand" "")))]
2675 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2681 t1 = gen_reg_rtx (V2DImode);
2682 t2 = gen_reg_rtx (V2DImode);
2683 t3 = gen_reg_rtx (V2DImode);
2684 t4 = gen_reg_rtx (V2DImode);
2685 t5 = gen_reg_rtx (V2DImode);
2686 t6 = gen_reg_rtx (V2DImode);
2687 thirtytwo = GEN_INT (32);
2689 /* Multiply low parts. */
2690 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2691 gen_lowpart (V4SImode, op2)));
2693 /* Shift input vectors left 32 bits so we can multiply high parts. */
2694 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2695 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2697 /* Multiply high parts by low parts. */
2698 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2699 gen_lowpart (V4SImode, t3)));
2700 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2701 gen_lowpart (V4SImode, t2)));
2703 /* Shift them back. */
2704 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2705 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2707 /* Add the three parts together. */
2708 emit_insn (gen_addv2di3 (t6, t1, t4));
2709 emit_insn (gen_addv2di3 (op0, t6, t5));
2713 (define_insn "ashr<mode>3"
2714 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2716 (match_operand:SSEMODE24 1 "register_operand" "0")
2717 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2719 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2720 [(set_attr "type" "sseishft")
2721 (set_attr "mode" "TI")])
2723 (define_insn "lshr<mode>3"
2724 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2725 (lshiftrt:SSEMODE248
2726 (match_operand:SSEMODE248 1 "register_operand" "0")
2727 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2729 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2730 [(set_attr "type" "sseishft")
2731 (set_attr "mode" "TI")])
2733 (define_insn "ashl<mode>3"
2734 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2736 (match_operand:SSEMODE248 1 "register_operand" "0")
2737 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2739 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2740 [(set_attr "type" "sseishft")
2741 (set_attr "mode" "TI")])
2743 (define_insn "sse2_ashlti3"
2744 [(set (match_operand:TI 0 "register_operand" "=x")
2745 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2746 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2749 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2750 return "pslldq\t{%2, %0|%0, %2}";
2752 [(set_attr "type" "sseishft")
2753 (set_attr "mode" "TI")])
2755 (define_expand "vec_shl_<mode>"
2756 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2757 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2758 (match_operand:SI 2 "general_operand" "")))]
2761 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2763 operands[0] = gen_lowpart (TImode, operands[0]);
2764 operands[1] = gen_lowpart (TImode, operands[1]);
2767 (define_insn "sse2_lshrti3"
2768 [(set (match_operand:TI 0 "register_operand" "=x")
2769 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2770 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2773 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2774 return "psrldq\t{%2, %0|%0, %2}";
2776 [(set_attr "type" "sseishft")
2777 (set_attr "mode" "TI")])
2779 (define_expand "vec_shr_<mode>"
2780 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2781 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2782 (match_operand:SI 2 "general_operand" "")))]
2785 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2787 operands[0] = gen_lowpart (TImode, operands[0]);
2788 operands[1] = gen_lowpart (TImode, operands[1]);
2791 (define_expand "umaxv16qi3"
2792 [(set (match_operand:V16QI 0 "register_operand" "")
2793 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2794 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2796 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2798 (define_insn "*umaxv16qi3"
2799 [(set (match_operand:V16QI 0 "register_operand" "=x")
2800 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2801 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2802 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2803 "pmaxub\t{%2, %0|%0, %2}"
2804 [(set_attr "type" "sseiadd")
2805 (set_attr "mode" "TI")])
2807 (define_expand "smaxv8hi3"
2808 [(set (match_operand:V8HI 0 "register_operand" "")
2809 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2810 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2812 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2814 (define_insn "*smaxv8hi3"
2815 [(set (match_operand:V8HI 0 "register_operand" "=x")
2816 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2817 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2818 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2819 "pmaxsw\t{%2, %0|%0, %2}"
2820 [(set_attr "type" "sseiadd")
2821 (set_attr "mode" "TI")])
2823 (define_expand "umaxv8hi3"
2824 [(set (match_operand:V8HI 0 "register_operand" "=x")
2825 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2826 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2828 (plus:V8HI (match_dup 0) (match_dup 2)))]
2831 operands[3] = operands[0];
2832 if (rtx_equal_p (operands[0], operands[2]))
2833 operands[0] = gen_reg_rtx (V8HImode);
2836 (define_expand "smax<mode>3"
2837 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2838 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2839 (match_operand:SSEMODE14 2 "register_operand" "")))]
2845 xops[0] = operands[0];
2846 xops[1] = operands[1];
2847 xops[2] = operands[2];
2848 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2849 xops[4] = operands[1];
2850 xops[5] = operands[2];
2851 ok = ix86_expand_int_vcond (xops);
2856 (define_expand "umaxv4si3"
2857 [(set (match_operand:V4SI 0 "register_operand" "")
2858 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2859 (match_operand:V4SI 2 "register_operand" "")))]
2865 xops[0] = operands[0];
2866 xops[1] = operands[1];
2867 xops[2] = operands[2];
2868 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2869 xops[4] = operands[1];
2870 xops[5] = operands[2];
2871 ok = ix86_expand_int_vcond (xops);
2876 (define_expand "uminv16qi3"
2877 [(set (match_operand:V16QI 0 "register_operand" "")
2878 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2879 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2881 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2883 (define_insn "*uminv16qi3"
2884 [(set (match_operand:V16QI 0 "register_operand" "=x")
2885 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2886 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2887 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2888 "pminub\t{%2, %0|%0, %2}"
2889 [(set_attr "type" "sseiadd")
2890 (set_attr "mode" "TI")])
2892 (define_expand "sminv8hi3"
2893 [(set (match_operand:V8HI 0 "register_operand" "")
2894 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2895 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2897 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2899 (define_insn "*sminv8hi3"
2900 [(set (match_operand:V8HI 0 "register_operand" "=x")
2901 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2902 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2903 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2904 "pminsw\t{%2, %0|%0, %2}"
2905 [(set_attr "type" "sseiadd")
2906 (set_attr "mode" "TI")])
2908 (define_expand "smin<mode>3"
2909 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2910 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2911 (match_operand:SSEMODE14 2 "register_operand" "")))]
2917 xops[0] = operands[0];
2918 xops[1] = operands[2];
2919 xops[2] = operands[1];
2920 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2921 xops[4] = operands[1];
2922 xops[5] = operands[2];
2923 ok = ix86_expand_int_vcond (xops);
2928 (define_expand "umin<mode>3"
2929 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2930 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2931 (match_operand:SSEMODE24 2 "register_operand" "")))]
2937 xops[0] = operands[0];
2938 xops[1] = operands[2];
2939 xops[2] = operands[1];
2940 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2941 xops[4] = operands[1];
2942 xops[5] = operands[2];
2943 ok = ix86_expand_int_vcond (xops);
2948 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2950 ;; Parallel integral comparisons
2952 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2954 (define_insn "sse2_eq<mode>3"
2955 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2957 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2958 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2959 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2960 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2961 [(set_attr "type" "ssecmp")
2962 (set_attr "mode" "TI")])
2964 (define_insn "sse2_gt<mode>3"
2965 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2967 (match_operand:SSEMODE124 1 "register_operand" "0")
2968 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2970 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2971 [(set_attr "type" "ssecmp")
2972 (set_attr "mode" "TI")])
2974 (define_expand "vcond<mode>"
2975 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2976 (if_then_else:SSEMODE124
2977 (match_operator 3 ""
2978 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2979 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2980 (match_operand:SSEMODE124 1 "general_operand" "")
2981 (match_operand:SSEMODE124 2 "general_operand" "")))]
2984 if (ix86_expand_int_vcond (operands))
2990 (define_expand "vcondu<mode>"
2991 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2992 (if_then_else:SSEMODE124
2993 (match_operator 3 ""
2994 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2995 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2996 (match_operand:SSEMODE124 1 "general_operand" "")
2997 (match_operand:SSEMODE124 2 "general_operand" "")))]
3000 if (ix86_expand_int_vcond (operands))
3006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3008 ;; Parallel integral logical operations
3010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3012 (define_expand "one_cmpl<mode>2"
3013 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3014 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3018 int i, n = GET_MODE_NUNITS (<MODE>mode);
3019 rtvec v = rtvec_alloc (n);
3021 for (i = 0; i < n; ++i)
3022 RTVEC_ELT (v, i) = constm1_rtx;
3024 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3027 (define_expand "and<mode>3"
3028 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3029 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3030 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3032 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3034 (define_insn "*and<mode>3"
3035 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3037 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3038 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3039 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3040 "pand\t{%2, %0|%0, %2}"
3041 [(set_attr "type" "sselog")
3042 (set_attr "mode" "TI")])
3044 (define_insn "sse2_nand<mode>3"
3045 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3047 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3048 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3050 "pandn\t{%2, %0|%0, %2}"
3051 [(set_attr "type" "sselog")
3052 (set_attr "mode" "TI")])
3054 (define_expand "ior<mode>3"
3055 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3056 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3057 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3059 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3061 (define_insn "*ior<mode>3"
3062 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3064 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3065 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3066 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3067 "por\t{%2, %0|%0, %2}"
3068 [(set_attr "type" "sselog")
3069 (set_attr "mode" "TI")])
3071 (define_expand "xor<mode>3"
3072 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3073 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3074 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3076 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3078 (define_insn "*xor<mode>3"
3079 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3081 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3082 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3083 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3084 "pxor\t{%2, %0|%0, %2}"
3085 [(set_attr "type" "sselog")
3086 (set_attr "mode" "TI")])
3088 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3090 ;; Parallel integral element swizzling
3092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3094 (define_insn "sse2_packsswb"
3095 [(set (match_operand:V16QI 0 "register_operand" "=x")
3098 (match_operand:V8HI 1 "register_operand" "0"))
3100 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3102 "packsswb\t{%2, %0|%0, %2}"
3103 [(set_attr "type" "sselog")
3104 (set_attr "mode" "TI")])
3106 (define_insn "sse2_packssdw"
3107 [(set (match_operand:V8HI 0 "register_operand" "=x")
3110 (match_operand:V4SI 1 "register_operand" "0"))
3112 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3114 "packssdw\t{%2, %0|%0, %2}"
3115 [(set_attr "type" "sselog")
3116 (set_attr "mode" "TI")])
3118 (define_insn "sse2_packuswb"
3119 [(set (match_operand:V16QI 0 "register_operand" "=x")
3122 (match_operand:V8HI 1 "register_operand" "0"))
3124 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3126 "packuswb\t{%2, %0|%0, %2}"
3127 [(set_attr "type" "sselog")
3128 (set_attr "mode" "TI")])
3130 (define_insn "sse2_punpckhbw"
3131 [(set (match_operand:V16QI 0 "register_operand" "=x")
3134 (match_operand:V16QI 1 "register_operand" "0")
3135 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3136 (parallel [(const_int 8) (const_int 24)
3137 (const_int 9) (const_int 25)
3138 (const_int 10) (const_int 26)
3139 (const_int 11) (const_int 27)
3140 (const_int 12) (const_int 28)
3141 (const_int 13) (const_int 29)
3142 (const_int 14) (const_int 30)
3143 (const_int 15) (const_int 31)])))]
3145 "punpckhbw\t{%2, %0|%0, %2}"
3146 [(set_attr "type" "sselog")
3147 (set_attr "mode" "TI")])
3149 (define_insn "sse2_punpcklbw"
3150 [(set (match_operand:V16QI 0 "register_operand" "=x")
3153 (match_operand:V16QI 1 "register_operand" "0")
3154 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3155 (parallel [(const_int 0) (const_int 16)
3156 (const_int 1) (const_int 17)
3157 (const_int 2) (const_int 18)
3158 (const_int 3) (const_int 19)
3159 (const_int 4) (const_int 20)
3160 (const_int 5) (const_int 21)
3161 (const_int 6) (const_int 22)
3162 (const_int 7) (const_int 23)])))]
3164 "punpcklbw\t{%2, %0|%0, %2}"
3165 [(set_attr "type" "sselog")
3166 (set_attr "mode" "TI")])
3168 (define_insn "sse2_punpckhwd"
3169 [(set (match_operand:V8HI 0 "register_operand" "=x")
3172 (match_operand:V8HI 1 "register_operand" "0")
3173 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3174 (parallel [(const_int 4) (const_int 12)
3175 (const_int 5) (const_int 13)
3176 (const_int 6) (const_int 14)
3177 (const_int 7) (const_int 15)])))]
3179 "punpckhwd\t{%2, %0|%0, %2}"
3180 [(set_attr "type" "sselog")
3181 (set_attr "mode" "TI")])
3183 (define_insn "sse2_punpcklwd"
3184 [(set (match_operand:V8HI 0 "register_operand" "=x")
3187 (match_operand:V8HI 1 "register_operand" "0")
3188 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3189 (parallel [(const_int 0) (const_int 8)
3190 (const_int 1) (const_int 9)
3191 (const_int 2) (const_int 10)
3192 (const_int 3) (const_int 11)])))]
3194 "punpcklwd\t{%2, %0|%0, %2}"
3195 [(set_attr "type" "sselog")
3196 (set_attr "mode" "TI")])
3198 (define_insn "sse2_punpckhdq"
3199 [(set (match_operand:V4SI 0 "register_operand" "=x")
3202 (match_operand:V4SI 1 "register_operand" "0")
3203 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3204 (parallel [(const_int 2) (const_int 6)
3205 (const_int 3) (const_int 7)])))]
3207 "punpckhdq\t{%2, %0|%0, %2}"
3208 [(set_attr "type" "sselog")
3209 (set_attr "mode" "TI")])
3211 (define_insn "sse2_punpckldq"
3212 [(set (match_operand:V4SI 0 "register_operand" "=x")
3215 (match_operand:V4SI 1 "register_operand" "0")
3216 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3217 (parallel [(const_int 0) (const_int 4)
3218 (const_int 1) (const_int 5)])))]
3220 "punpckldq\t{%2, %0|%0, %2}"
3221 [(set_attr "type" "sselog")
3222 (set_attr "mode" "TI")])
3224 (define_insn "sse2_punpckhqdq"
3225 [(set (match_operand:V2DI 0 "register_operand" "=x")
3228 (match_operand:V2DI 1 "register_operand" "0")
3229 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3230 (parallel [(const_int 1)
3233 "punpckhqdq\t{%2, %0|%0, %2}"
3234 [(set_attr "type" "sselog")
3235 (set_attr "mode" "TI")])
3237 (define_insn "sse2_punpcklqdq"
3238 [(set (match_operand:V2DI 0 "register_operand" "=x")
3241 (match_operand:V2DI 1 "register_operand" "0")
3242 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3243 (parallel [(const_int 0)
3246 "punpcklqdq\t{%2, %0|%0, %2}"
3247 [(set_attr "type" "sselog")
3248 (set_attr "mode" "TI")])
3250 (define_expand "sse2_pinsrw"
3251 [(set (match_operand:V8HI 0 "register_operand" "")
3254 (match_operand:SI 2 "nonimmediate_operand" ""))
3255 (match_operand:V8HI 1 "register_operand" "")
3256 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3259 operands[2] = gen_lowpart (HImode, operands[2]);
3260 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3263 (define_insn "*sse2_pinsrw"
3264 [(set (match_operand:V8HI 0 "register_operand" "=x")
3267 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3268 (match_operand:V8HI 1 "register_operand" "0")
3269 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3272 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3273 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3275 [(set_attr "type" "sselog")
3276 (set_attr "mode" "TI")])
3278 (define_insn "sse2_pextrw"
3279 [(set (match_operand:SI 0 "register_operand" "=r")
3282 (match_operand:V8HI 1 "register_operand" "x")
3283 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3285 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3286 [(set_attr "type" "sselog")
3287 (set_attr "mode" "TI")])
3289 (define_expand "sse2_pshufd"
3290 [(match_operand:V4SI 0 "register_operand" "")
3291 (match_operand:V4SI 1 "nonimmediate_operand" "")
3292 (match_operand:SI 2 "const_int_operand" "")]
3295 int mask = INTVAL (operands[2]);
3296 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3297 GEN_INT ((mask >> 0) & 3),
3298 GEN_INT ((mask >> 2) & 3),
3299 GEN_INT ((mask >> 4) & 3),
3300 GEN_INT ((mask >> 6) & 3)));
3304 (define_insn "sse2_pshufd_1"
3305 [(set (match_operand:V4SI 0 "register_operand" "=x")
3307 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3308 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3309 (match_operand 3 "const_0_to_3_operand" "")
3310 (match_operand 4 "const_0_to_3_operand" "")
3311 (match_operand 5 "const_0_to_3_operand" "")])))]
3315 mask |= INTVAL (operands[2]) << 0;
3316 mask |= INTVAL (operands[3]) << 2;
3317 mask |= INTVAL (operands[4]) << 4;
3318 mask |= INTVAL (operands[5]) << 6;
3319 operands[2] = GEN_INT (mask);
3321 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3323 [(set_attr "type" "sselog1")
3324 (set_attr "mode" "TI")])
3326 (define_expand "sse2_pshuflw"
3327 [(match_operand:V8HI 0 "register_operand" "")
3328 (match_operand:V8HI 1 "nonimmediate_operand" "")
3329 (match_operand:SI 2 "const_int_operand" "")]
3332 int mask = INTVAL (operands[2]);
3333 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3334 GEN_INT ((mask >> 0) & 3),
3335 GEN_INT ((mask >> 2) & 3),
3336 GEN_INT ((mask >> 4) & 3),
3337 GEN_INT ((mask >> 6) & 3)));
3341 (define_insn "sse2_pshuflw_1"
3342 [(set (match_operand:V8HI 0 "register_operand" "=x")
3344 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3345 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3346 (match_operand 3 "const_0_to_3_operand" "")
3347 (match_operand 4 "const_0_to_3_operand" "")
3348 (match_operand 5 "const_0_to_3_operand" "")
3356 mask |= INTVAL (operands[2]) << 0;
3357 mask |= INTVAL (operands[3]) << 2;
3358 mask |= INTVAL (operands[4]) << 4;
3359 mask |= INTVAL (operands[5]) << 6;
3360 operands[2] = GEN_INT (mask);
3362 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3364 [(set_attr "type" "sselog")
3365 (set_attr "mode" "TI")])
3367 (define_expand "sse2_pshufhw"
3368 [(match_operand:V8HI 0 "register_operand" "")
3369 (match_operand:V8HI 1 "nonimmediate_operand" "")
3370 (match_operand:SI 2 "const_int_operand" "")]
3373 int mask = INTVAL (operands[2]);
3374 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3375 GEN_INT (((mask >> 0) & 3) + 4),
3376 GEN_INT (((mask >> 2) & 3) + 4),
3377 GEN_INT (((mask >> 4) & 3) + 4),
3378 GEN_INT (((mask >> 6) & 3) + 4)));
3382 (define_insn "sse2_pshufhw_1"
3383 [(set (match_operand:V8HI 0 "register_operand" "=x")
3385 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3386 (parallel [(const_int 0)
3390 (match_operand 2 "const_4_to_7_operand" "")
3391 (match_operand 3 "const_4_to_7_operand" "")
3392 (match_operand 4 "const_4_to_7_operand" "")
3393 (match_operand 5 "const_4_to_7_operand" "")])))]
3397 mask |= (INTVAL (operands[2]) - 4) << 0;
3398 mask |= (INTVAL (operands[3]) - 4) << 2;
3399 mask |= (INTVAL (operands[4]) - 4) << 4;
3400 mask |= (INTVAL (operands[5]) - 4) << 6;
3401 operands[2] = GEN_INT (mask);
3403 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3405 [(set_attr "type" "sselog")
3406 (set_attr "mode" "TI")])
3408 (define_expand "sse2_loadd"
3409 [(set (match_operand:V4SI 0 "register_operand" "")
3412 (match_operand:SI 1 "nonimmediate_operand" ""))
3416 "operands[2] = CONST0_RTX (V4SImode);")
3418 (define_insn "sse2_loadld"
3419 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3422 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3423 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3427 movd\t{%2, %0|%0, %2}
3428 movss\t{%2, %0|%0, %2}
3429 movss\t{%2, %0|%0, %2}"
3430 [(set_attr "type" "ssemov")
3431 (set_attr "mode" "TI,V4SF,SF")])
3433 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3434 ;; be taken into account, and movdi isn't fully populated even without.
3435 (define_insn_and_split "sse2_stored"
3436 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3438 (match_operand:V4SI 1 "register_operand" "x")
3439 (parallel [(const_int 0)])))]
3442 "&& reload_completed"
3443 [(set (match_dup 0) (match_dup 1))]
3445 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3448 (define_expand "sse_storeq"
3449 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3451 (match_operand:V2DI 1 "register_operand" "")
3452 (parallel [(const_int 0)])))]
3456 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3457 ;; be taken into account, and movdi isn't fully populated even without.
3458 (define_insn "*sse2_storeq"
3459 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3461 (match_operand:V2DI 1 "register_operand" "x")
3462 (parallel [(const_int 0)])))]
3467 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3469 (match_operand:V2DI 1 "register_operand" "")
3470 (parallel [(const_int 0)])))]
3471 "TARGET_SSE && reload_completed"
3472 [(set (match_dup 0) (match_dup 1))]
3474 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3477 (define_insn "*vec_dupv4si"
3478 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3480 (match_operand:SI 1 "register_operand" " Y,0")))]
3483 pshufd\t{$0, %1, %0|%0, %1, 0}
3484 shufps\t{$0, %0, %0|%0, %0, 0}"
3485 [(set_attr "type" "sselog1")
3486 (set_attr "mode" "TI,V4SF")])
3488 (define_insn "*vec_dupv2di"
3489 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3491 (match_operand:DI 1 "register_operand" " 0,0")))]
3496 [(set_attr "type" "sselog1,ssemov")
3497 (set_attr "mode" "TI,V4SF")])
3499 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3500 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3501 ;; alternatives pretty much forces the MMX alternative to be chosen.
3502 (define_insn "*sse2_concatv2si"
3503 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3505 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3506 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3509 punpckldq\t{%2, %0|%0, %2}
3510 movd\t{%1, %0|%0, %1}
3511 punpckldq\t{%2, %0|%0, %2}
3512 movd\t{%1, %0|%0, %1}"
3513 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3514 (set_attr "mode" "TI,TI,DI,DI")])
3516 (define_insn "*sse1_concatv2si"
3517 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3519 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3520 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3523 unpcklps\t{%2, %0|%0, %2}
3524 movss\t{%1, %0|%0, %1}
3525 punpckldq\t{%2, %0|%0, %2}
3526 movd\t{%1, %0|%0, %1}"
3527 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3528 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3530 (define_insn "*vec_concatv4si_1"
3531 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3533 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3534 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3537 punpcklqdq\t{%2, %0|%0, %2}
3538 movlhps\t{%2, %0|%0, %2}
3539 movhps\t{%2, %0|%0, %2}"
3540 [(set_attr "type" "sselog,ssemov,ssemov")
3541 (set_attr "mode" "TI,V4SF,V2SF")])
3543 (define_insn "*vec_concatv2di"
3544 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3546 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3547 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3550 movq\t{%1, %0|%0, %1}
3551 movq2dq\t{%1, %0|%0, %1}
3552 punpcklqdq\t{%2, %0|%0, %2}
3553 movlhps\t{%2, %0|%0, %2}
3554 movhps\t{%2, %0|%0, %2}
3555 movlps\t{%1, %0|%0, %1}"
3556 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3557 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3559 (define_expand "vec_setv2di"
3560 [(match_operand:V2DI 0 "register_operand" "")
3561 (match_operand:DI 1 "register_operand" "")
3562 (match_operand 2 "const_int_operand" "")]
3565 ix86_expand_vector_set (false, operands[0], operands[1],
3566 INTVAL (operands[2]));
3570 (define_expand "vec_extractv2di"
3571 [(match_operand:DI 0 "register_operand" "")
3572 (match_operand:V2DI 1 "register_operand" "")
3573 (match_operand 2 "const_int_operand" "")]
3576 ix86_expand_vector_extract (false, operands[0], operands[1],
3577 INTVAL (operands[2]));
3581 (define_expand "vec_initv2di"
3582 [(match_operand:V2DI 0 "register_operand" "")
3583 (match_operand 1 "" "")]
3586 ix86_expand_vector_init (false, operands[0], operands[1]);
3590 (define_expand "vec_setv4si"
3591 [(match_operand:V4SI 0 "register_operand" "")
3592 (match_operand:SI 1 "register_operand" "")
3593 (match_operand 2 "const_int_operand" "")]
3596 ix86_expand_vector_set (false, operands[0], operands[1],
3597 INTVAL (operands[2]));
3601 (define_expand "vec_extractv4si"
3602 [(match_operand:SI 0 "register_operand" "")
3603 (match_operand:V4SI 1 "register_operand" "")
3604 (match_operand 2 "const_int_operand" "")]
3607 ix86_expand_vector_extract (false, operands[0], operands[1],
3608 INTVAL (operands[2]));
3612 (define_expand "vec_initv4si"
3613 [(match_operand:V4SI 0 "register_operand" "")
3614 (match_operand 1 "" "")]
3617 ix86_expand_vector_init (false, operands[0], operands[1]);
3621 (define_expand "vec_setv8hi"
3622 [(match_operand:V8HI 0 "register_operand" "")
3623 (match_operand:HI 1 "register_operand" "")
3624 (match_operand 2 "const_int_operand" "")]
3627 ix86_expand_vector_set (false, operands[0], operands[1],
3628 INTVAL (operands[2]));
3632 (define_expand "vec_extractv8hi"
3633 [(match_operand:HI 0 "register_operand" "")
3634 (match_operand:V8HI 1 "register_operand" "")
3635 (match_operand 2 "const_int_operand" "")]
3638 ix86_expand_vector_extract (false, operands[0], operands[1],
3639 INTVAL (operands[2]));
3643 (define_expand "vec_initv8hi"
3644 [(match_operand:V8HI 0 "register_operand" "")
3645 (match_operand 1 "" "")]
3648 ix86_expand_vector_init (false, operands[0], operands[1]);
3652 (define_expand "vec_setv16qi"
3653 [(match_operand:V16QI 0 "register_operand" "")
3654 (match_operand:QI 1 "register_operand" "")
3655 (match_operand 2 "const_int_operand" "")]
3658 ix86_expand_vector_set (false, operands[0], operands[1],
3659 INTVAL (operands[2]));
3663 (define_expand "vec_extractv16qi"
3664 [(match_operand:QI 0 "register_operand" "")
3665 (match_operand:V16QI 1 "register_operand" "")
3666 (match_operand 2 "const_int_operand" "")]
3669 ix86_expand_vector_extract (false, operands[0], operands[1],
3670 INTVAL (operands[2]));
3674 (define_expand "vec_initv16qi"
3675 [(match_operand:V16QI 0 "register_operand" "")
3676 (match_operand 1 "" "")]
3679 ix86_expand_vector_init (false, operands[0], operands[1]);
3683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3689 (define_insn "sse2_uavgv16qi3"
3690 [(set (match_operand:V16QI 0 "register_operand" "=x")
3696 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3698 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3699 (const_vector:V16QI [(const_int 1) (const_int 1)
3700 (const_int 1) (const_int 1)
3701 (const_int 1) (const_int 1)
3702 (const_int 1) (const_int 1)
3703 (const_int 1) (const_int 1)
3704 (const_int 1) (const_int 1)
3705 (const_int 1) (const_int 1)
3706 (const_int 1) (const_int 1)]))
3708 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3709 "pavgb\t{%2, %0|%0, %2}"
3710 [(set_attr "type" "sseiadd")
3711 (set_attr "mode" "TI")])
3713 (define_insn "sse2_uavgv8hi3"
3714 [(set (match_operand:V8HI 0 "register_operand" "=x")
3720 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3722 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3723 (const_vector:V8HI [(const_int 1) (const_int 1)
3724 (const_int 1) (const_int 1)
3725 (const_int 1) (const_int 1)
3726 (const_int 1) (const_int 1)]))
3728 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3729 "pavgw\t{%2, %0|%0, %2}"
3730 [(set_attr "type" "sseiadd")
3731 (set_attr "mode" "TI")])
3733 ;; The correct representation for this is absolutely enormous, and
3734 ;; surely not generally useful.
3735 (define_insn "sse2_psadbw"
3736 [(set (match_operand:V2DI 0 "register_operand" "=x")
3737 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3738 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3741 "psadbw\t{%2, %0|%0, %2}"
3742 [(set_attr "type" "sseiadd")
3743 (set_attr "mode" "TI")])
3745 (define_insn "sse_movmskps"
3746 [(set (match_operand:SI 0 "register_operand" "=r")
3747 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3750 "movmskps\t{%1, %0|%0, %1}"
3751 [(set_attr "type" "ssecvt")
3752 (set_attr "mode" "V4SF")])
3754 (define_insn "sse2_movmskpd"
3755 [(set (match_operand:SI 0 "register_operand" "=r")
3756 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3759 "movmskpd\t{%1, %0|%0, %1}"
3760 [(set_attr "type" "ssecvt")
3761 (set_attr "mode" "V2DF")])
3763 (define_insn "sse2_pmovmskb"
3764 [(set (match_operand:SI 0 "register_operand" "=r")
3765 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3768 "pmovmskb\t{%1, %0|%0, %1}"
3769 [(set_attr "type" "ssecvt")
3770 (set_attr "mode" "V2DF")])
3772 (define_expand "sse2_maskmovdqu"
3773 [(set (match_operand:V16QI 0 "memory_operand" "")
3774 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3775 (match_operand:V16QI 2 "register_operand" "x")
3781 (define_insn "*sse2_maskmovdqu"
3782 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3783 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3784 (match_operand:V16QI 2 "register_operand" "x")
3785 (mem:V16QI (match_dup 0))]
3787 "TARGET_SSE2 && !TARGET_64BIT"
3788 ;; @@@ check ordering of operands in intel/nonintel syntax
3789 "maskmovdqu\t{%2, %1|%1, %2}"
3790 [(set_attr "type" "ssecvt")
3791 (set_attr "mode" "TI")])
3793 (define_insn "*sse2_maskmovdqu_rex64"
3794 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3795 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3796 (match_operand:V16QI 2 "register_operand" "x")
3797 (mem:V16QI (match_dup 0))]
3799 "TARGET_SSE2 && TARGET_64BIT"
3800 ;; @@@ check ordering of operands in intel/nonintel syntax
3801 "maskmovdqu\t{%2, %1|%1, %2}"
3802 [(set_attr "type" "ssecvt")
3803 (set_attr "mode" "TI")])
3805 (define_insn "sse_ldmxcsr"
3806 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3810 [(set_attr "type" "sse")
3811 (set_attr "memory" "load")])
3813 (define_insn "sse_stmxcsr"
3814 [(set (match_operand:SI 0 "memory_operand" "=m")
3815 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3818 [(set_attr "type" "sse")
3819 (set_attr "memory" "store")])
3821 (define_expand "sse_sfence"
3823 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3824 "TARGET_SSE || TARGET_3DNOW_A"
3826 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3827 MEM_VOLATILE_P (operands[0]) = 1;
3830 (define_insn "*sse_sfence"
3831 [(set (match_operand:BLK 0 "" "")
3832 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3833 "TARGET_SSE || TARGET_3DNOW_A"
3835 [(set_attr "type" "sse")
3836 (set_attr "memory" "unknown")])
3838 (define_insn "sse2_clflush"
3839 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3843 [(set_attr "type" "sse")
3844 (set_attr "memory" "unknown")])
3846 (define_expand "sse2_mfence"
3848 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3851 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3852 MEM_VOLATILE_P (operands[0]) = 1;
3855 (define_insn "*sse2_mfence"
3856 [(set (match_operand:BLK 0 "" "")
3857 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3860 [(set_attr "type" "sse")
3861 (set_attr "memory" "unknown")])
3863 (define_expand "sse2_lfence"
3865 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3868 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3869 MEM_VOLATILE_P (operands[0]) = 1;
3872 (define_insn "*sse2_lfence"
3873 [(set (match_operand:BLK 0 "" "")
3874 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3877 [(set_attr "type" "sse")
3878 (set_attr "memory" "unknown")])
3880 (define_insn "sse3_mwait"
3881 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3882 (match_operand:SI 1 "register_operand" "c")]
3886 [(set_attr "length" "3")])
3888 (define_insn "sse3_monitor"
3889 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3890 (match_operand:SI 1 "register_operand" "c")
3891 (match_operand:SI 2 "register_operand" "d")]
3894 "monitor\t%0, %1, %2"
3895 [(set_attr "length" "3")])