1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 if (get_attr_mode (insn) == MODE_V4SF)
69 return "xorps\t%0, %0";
71 return "pxor\t%0, %0";
74 if (get_attr_mode (insn) == MODE_V4SF)
75 return "movaps\t{%1, %0|%0, %1}";
77 return "movdqa\t{%1, %0|%0, %1}";
82 [(set_attr "type" "sselog1,ssemov,ssemov")
85 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
86 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
87 (and (eq_attr "alternative" "2")
88 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
91 (const_string "TI")))])
93 (define_expand "movv4sf"
94 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
95 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
98 ix86_expand_vector_move (V4SFmode, operands);
102 (define_insn "*movv4sf_internal"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
104 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
108 movaps\t{%1, %0|%0, %1}
109 movaps\t{%1, %0|%0, %1}"
110 [(set_attr "type" "sselog1,ssemov,ssemov")
111 (set_attr "mode" "V4SF")])
114 [(set (match_operand:V4SF 0 "register_operand" "")
115 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
116 "TARGET_SSE && reload_completed"
119 (vec_duplicate:V4SF (match_dup 1))
123 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
124 operands[2] = CONST0_RTX (V4SFmode);
127 (define_expand "movv2df"
128 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
129 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
132 ix86_expand_vector_move (V2DFmode, operands);
136 (define_insn "*movv2df_internal"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
138 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
139 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
141 switch (which_alternative)
144 if (get_attr_mode (insn) == MODE_V4SF)
145 return "xorps\t%0, %0";
147 return "xorpd\t%0, %0";
150 if (get_attr_mode (insn) == MODE_V4SF)
151 return "movaps\t{%1, %0|%0, %1}";
153 return "movapd\t{%1, %0|%0, %1}";
158 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
162 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
163 (and (eq_attr "alternative" "2")
164 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
166 (const_string "V4SF")
167 (const_string "V2DF")))])
170 [(set (match_operand:V2DF 0 "register_operand" "")
171 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
172 "TARGET_SSE2 && reload_completed"
173 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
175 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
176 operands[2] = CONST0_RTX (DFmode);
179 (define_expand "push<mode>1"
180 [(match_operand:SSEMODE 0 "register_operand" "")]
183 ix86_expand_push (<MODE>mode, operands[0]);
187 (define_expand "movmisalign<mode>"
188 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 ix86_expand_vector_move_misalign (<MODE>mode, operands);
196 (define_insn "sse_movups"
197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
198 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
200 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
201 "movups\t{%1, %0|%0, %1}"
202 [(set_attr "type" "ssemov")
203 (set_attr "mode" "V2DF")])
205 (define_insn "sse2_movupd"
206 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
207 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
209 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
210 "movupd\t{%1, %0|%0, %1}"
211 [(set_attr "type" "ssemov")
212 (set_attr "mode" "V2DF")])
214 (define_insn "sse2_movdqu"
215 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
216 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
218 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219 "movdqu\t{%1, %0|%0, %1}"
220 [(set_attr "type" "ssemov")
221 (set_attr "mode" "TI")])
223 (define_insn "sse_movntv4sf"
224 [(set (match_operand:V4SF 0 "memory_operand" "=m")
225 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
228 "movntps\t{%1, %0|%0, %1}"
229 [(set_attr "type" "ssemov")
230 (set_attr "mode" "V4SF")])
232 (define_insn "sse2_movntv2df"
233 [(set (match_operand:V2DF 0 "memory_operand" "=m")
234 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
237 "movntpd\t{%1, %0|%0, %1}"
238 [(set_attr "type" "ssecvt")
239 (set_attr "mode" "V2DF")])
241 (define_insn "sse2_movntv2di"
242 [(set (match_operand:V2DI 0 "memory_operand" "=m")
243 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
246 "movntdq\t{%1, %0|%0, %1}"
247 [(set_attr "type" "ssecvt")
248 (set_attr "mode" "TI")])
250 (define_insn "sse2_movntsi"
251 [(set (match_operand:SI 0 "memory_operand" "=m")
252 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
255 "movnti\t{%1, %0|%0, %1}"
256 [(set_attr "type" "ssecvt")
257 (set_attr "mode" "V2DF")])
259 (define_insn "sse3_lddqu"
260 [(set (match_operand:V16QI 0 "register_operand" "=x")
261 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
264 "lddqu\t{%1, %0|%0, %1}"
265 [(set_attr "type" "ssecvt")
266 (set_attr "mode" "TI")])
268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
270 ;; Parallel single-precision floating point arithmetic
272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
274 (define_expand "negv4sf2"
275 [(set (match_operand:V4SF 0 "register_operand" "")
276 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
278 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
280 (define_expand "absv4sf2"
281 [(set (match_operand:V4SF 0 "register_operand" "")
282 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
284 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
286 (define_expand "addv4sf3"
287 [(set (match_operand:V4SF 0 "register_operand" "")
288 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
289 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
291 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
293 (define_insn "*addv4sf3"
294 [(set (match_operand:V4SF 0 "register_operand" "=x")
295 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
297 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
298 "addps\t{%2, %0|%0, %2}"
299 [(set_attr "type" "sseadd")
300 (set_attr "mode" "V4SF")])
302 (define_insn "sse_vmaddv4sf3"
303 [(set (match_operand:V4SF 0 "register_operand" "=x")
305 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
306 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
309 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
310 "addss\t{%2, %0|%0, %2}"
311 [(set_attr "type" "sseadd")
312 (set_attr "mode" "SF")])
314 (define_expand "subv4sf3"
315 [(set (match_operand:V4SF 0 "register_operand" "")
316 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
317 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
319 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
321 (define_insn "*subv4sf3"
322 [(set (match_operand:V4SF 0 "register_operand" "=x")
323 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
324 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
326 "subps\t{%2, %0|%0, %2}"
327 [(set_attr "type" "sseadd")
328 (set_attr "mode" "V4SF")])
330 (define_insn "sse_vmsubv4sf3"
331 [(set (match_operand:V4SF 0 "register_operand" "=x")
333 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
334 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
338 "subss\t{%2, %0|%0, %2}"
339 [(set_attr "type" "sseadd")
340 (set_attr "mode" "SF")])
342 (define_expand "mulv4sf3"
343 [(set (match_operand:V4SF 0 "register_operand" "")
344 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
345 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
347 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
349 (define_insn "*mulv4sf3"
350 [(set (match_operand:V4SF 0 "register_operand" "=x")
351 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
352 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
353 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
354 "mulps\t{%2, %0|%0, %2}"
355 [(set_attr "type" "ssemul")
356 (set_attr "mode" "V4SF")])
358 (define_insn "sse_vmmulv4sf3"
359 [(set (match_operand:V4SF 0 "register_operand" "=x")
361 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
362 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
365 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
366 "mulss\t{%2, %0|%0, %2}"
367 [(set_attr "type" "ssemul")
368 (set_attr "mode" "SF")])
370 (define_expand "divv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "")
372 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
373 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
377 (define_insn "*divv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
382 "divps\t{%2, %0|%0, %2}"
383 [(set_attr "type" "ssediv")
384 (set_attr "mode" "V4SF")])
386 (define_insn "sse_vmdivv4sf3"
387 [(set (match_operand:V4SF 0 "register_operand" "=x")
389 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
390 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
394 "divss\t{%2, %0|%0, %2}"
395 [(set_attr "type" "ssediv")
396 (set_attr "mode" "SF")])
398 (define_insn "sse_rcpv4sf2"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
401 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
403 "rcpps\t{%1, %0|%0, %1}"
404 [(set_attr "type" "sse")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmrcpv4sf2"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
412 (match_operand:V4SF 2 "register_operand" "0")
415 "rcpss\t{%1, %0|%0, %1}"
416 [(set_attr "type" "sse")
417 (set_attr "mode" "SF")])
419 (define_insn "sse_rsqrtv4sf2"
420 [(set (match_operand:V4SF 0 "register_operand" "=x")
422 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
424 "rsqrtps\t{%1, %0|%0, %1}"
425 [(set_attr "type" "sse")
426 (set_attr "mode" "V4SF")])
428 (define_insn "sse_vmrsqrtv4sf2"
429 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
433 (match_operand:V4SF 2 "register_operand" "0")
436 "rsqrtss\t{%1, %0|%0, %1}"
437 [(set_attr "type" "sse")
438 (set_attr "mode" "SF")])
440 (define_insn "sqrtv4sf2"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
444 "sqrtps\t{%1, %0|%0, %1}"
445 [(set_attr "type" "sse")
446 (set_attr "mode" "V4SF")])
448 (define_insn "sse_vmsqrtv4sf2"
449 [(set (match_operand:V4SF 0 "register_operand" "=x")
451 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
452 (match_operand:V4SF 2 "register_operand" "0")
455 "sqrtss\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "SF")])
459 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
460 ;; isn't really correct, as those rtl operators aren't defined when
461 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
463 (define_expand "smaxv4sf3"
464 [(set (match_operand:V4SF 0 "register_operand" "")
465 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
466 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
469 if (!flag_finite_math_only)
470 operands[1] = force_reg (V4SFmode, operands[1]);
471 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
474 (define_insn "*smaxv4sf3_finite"
475 [(set (match_operand:V4SF 0 "register_operand" "=x")
476 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
477 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
478 "TARGET_SSE && flag_finite_math_only
479 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
480 "maxps\t{%2, %0|%0, %2}"
481 [(set_attr "type" "sse")
482 (set_attr "mode" "V4SF")])
484 (define_insn "*smaxv4sf3"
485 [(set (match_operand:V4SF 0 "register_operand" "=x")
486 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
487 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
489 "maxps\t{%2, %0|%0, %2}"
490 [(set_attr "type" "sse")
491 (set_attr "mode" "V4SF")])
493 (define_insn "*sse_vmsmaxv4sf3_finite"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
496 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
497 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
500 "TARGET_SSE && flag_finite_math_only
501 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
502 "maxss\t{%2, %0|%0, %2}"
503 [(set_attr "type" "sse")
504 (set_attr "mode" "SF")])
506 (define_insn "sse_vmsmaxv4sf3"
507 [(set (match_operand:V4SF 0 "register_operand" "=x")
509 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
510 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
514 "maxss\t{%2, %0|%0, %2}"
515 [(set_attr "type" "sse")
516 (set_attr "mode" "SF")])
518 (define_expand "sminv4sf3"
519 [(set (match_operand:V4SF 0 "register_operand" "")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
521 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
524 if (!flag_finite_math_only)
525 operands[1] = force_reg (V4SFmode, operands[1]);
526 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
529 (define_insn "*sminv4sf3_finite"
530 [(set (match_operand:V4SF 0 "register_operand" "=x")
531 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
532 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
533 "TARGET_SSE && flag_finite_math_only
534 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
535 "minps\t{%2, %0|%0, %2}"
536 [(set_attr "type" "sse")
537 (set_attr "mode" "V4SF")])
539 (define_insn "*sminv4sf3"
540 [(set (match_operand:V4SF 0 "register_operand" "=x")
541 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
542 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
544 "minps\t{%2, %0|%0, %2}"
545 [(set_attr "type" "sse")
546 (set_attr "mode" "V4SF")])
548 (define_insn "*sse_vmsminv4sf3_finite"
549 [(set (match_operand:V4SF 0 "register_operand" "=x")
551 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
552 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
555 "TARGET_SSE && flag_finite_math_only
556 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
557 "minss\t{%2, %0|%0, %2}"
558 [(set_attr "type" "sse")
559 (set_attr "mode" "SF")])
561 (define_insn "sse_vmsminv4sf3"
562 [(set (match_operand:V4SF 0 "register_operand" "=x")
564 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
565 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
569 "minss\t{%2, %0|%0, %2}"
570 [(set_attr "type" "sse")
571 (set_attr "mode" "SF")])
573 ;; These versions of the min/max patterns implement exactly the operations
574 ;; min = (op1 < op2 ? op1 : op2)
575 ;; max = (!(op1 < op2) ? op1 : op2)
576 ;; Their operands are not commutative, and thus they may be used in the
577 ;; presence of -0.0 and NaN.
579 (define_insn "*ieee_sminv4sf3"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
581 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
582 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
585 "minps\t{%2, %0|%0, %2}"
586 [(set_attr "type" "sseadd")
587 (set_attr "mode" "V4SF")])
589 (define_insn "*ieee_smaxv4sf3"
590 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
592 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
595 "maxps\t{%2, %0|%0, %2}"
596 [(set_attr "type" "sseadd")
597 (set_attr "mode" "V4SF")])
599 (define_insn "*ieee_sminv2df3"
600 [(set (match_operand:V2DF 0 "register_operand" "=x")
601 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
602 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
605 "minpd\t{%2, %0|%0, %2}"
606 [(set_attr "type" "sseadd")
607 (set_attr "mode" "V2DF")])
609 (define_insn "*ieee_smaxv2df3"
610 [(set (match_operand:V2DF 0 "register_operand" "=x")
611 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
612 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
615 "maxpd\t{%2, %0|%0, %2}"
616 [(set_attr "type" "sseadd")
617 (set_attr "mode" "V2DF")])
619 (define_insn "sse3_addsubv4sf3"
620 [(set (match_operand:V4SF 0 "register_operand" "=x")
623 (match_operand:V4SF 1 "register_operand" "0")
624 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
625 (minus:V4SF (match_dup 1) (match_dup 2))
628 "addsubps\t{%2, %0|%0, %2}"
629 [(set_attr "type" "sseadd")
630 (set_attr "mode" "V4SF")])
632 (define_insn "sse3_haddv4sf3"
633 [(set (match_operand:V4SF 0 "register_operand" "=x")
638 (match_operand:V4SF 1 "register_operand" "0")
639 (parallel [(const_int 0)]))
640 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
643 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
648 (parallel [(const_int 0)]))
649 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
652 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
654 "haddps\t{%2, %0|%0, %2}"
655 [(set_attr "type" "sseadd")
656 (set_attr "mode" "V4SF")])
658 (define_insn "sse3_hsubv4sf3"
659 [(set (match_operand:V4SF 0 "register_operand" "=x")
664 (match_operand:V4SF 1 "register_operand" "0")
665 (parallel [(const_int 0)]))
666 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
669 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
673 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
674 (parallel [(const_int 0)]))
675 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
678 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
680 "hsubps\t{%2, %0|%0, %2}"
681 [(set_attr "type" "sseadd")
682 (set_attr "mode" "V4SF")])
684 (define_expand "reduc_splus_v4sf"
685 [(match_operand:V4SF 0 "register_operand" "")
686 (match_operand:V4SF 1 "register_operand" "")]
691 rtx tmp = gen_reg_rtx (V4SFmode);
692 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
693 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
696 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
700 (define_expand "reduc_smax_v4sf"
701 [(match_operand:V4SF 0 "register_operand" "")
702 (match_operand:V4SF 1 "register_operand" "")]
705 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
709 (define_expand "reduc_smin_v4sf"
710 [(match_operand:V4SF 0 "register_operand" "")
711 (match_operand:V4SF 1 "register_operand" "")]
714 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
718 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
720 ;; Parallel single-precision floating point comparisons
722 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
724 (define_insn "sse_maskcmpv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
726 (match_operator:V4SF 3 "sse_comparison_operator"
727 [(match_operand:V4SF 1 "register_operand" "0")
728 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
730 "cmp%D3ps\t{%2, %0|%0, %2}"
731 [(set_attr "type" "ssecmp")
732 (set_attr "mode" "V4SF")])
734 (define_insn "sse_vmmaskcmpv4sf3"
735 [(set (match_operand:V4SF 0 "register_operand" "=x")
737 (match_operator:V4SF 3 "sse_comparison_operator"
738 [(match_operand:V4SF 1 "register_operand" "0")
739 (match_operand:V4SF 2 "register_operand" "x")])
743 "cmp%D3ss\t{%2, %0|%0, %2}"
744 [(set_attr "type" "ssecmp")
745 (set_attr "mode" "SF")])
747 (define_insn "sse_comi"
748 [(set (reg:CCFP FLAGS_REG)
751 (match_operand:V4SF 0 "register_operand" "x")
752 (parallel [(const_int 0)]))
754 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
755 (parallel [(const_int 0)]))))]
757 "comiss\t{%1, %0|%0, %1}"
758 [(set_attr "type" "ssecomi")
759 (set_attr "mode" "SF")])
761 (define_insn "sse_ucomi"
762 [(set (reg:CCFPU FLAGS_REG)
765 (match_operand:V4SF 0 "register_operand" "x")
766 (parallel [(const_int 0)]))
768 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
769 (parallel [(const_int 0)]))))]
771 "ucomiss\t{%1, %0|%0, %1}"
772 [(set_attr "type" "ssecomi")
773 (set_attr "mode" "SF")])
775 (define_expand "vcondv4sf"
776 [(set (match_operand:V4SF 0 "register_operand" "")
779 [(match_operand:V4SF 4 "nonimmediate_operand" "")
780 (match_operand:V4SF 5 "nonimmediate_operand" "")])
781 (match_operand:V4SF 1 "general_operand" "")
782 (match_operand:V4SF 2 "general_operand" "")))]
785 if (ix86_expand_fp_vcond (operands))
791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
793 ;; Parallel single-precision floating point logical operations
795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
797 (define_expand "andv4sf3"
798 [(set (match_operand:V4SF 0 "register_operand" "")
799 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
800 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
802 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
804 (define_insn "*andv4sf3"
805 [(set (match_operand:V4SF 0 "register_operand" "=x")
806 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
807 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
808 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
809 "andps\t{%2, %0|%0, %2}"
810 [(set_attr "type" "sselog")
811 (set_attr "mode" "V4SF")])
813 (define_insn "sse_nandv4sf3"
814 [(set (match_operand:V4SF 0 "register_operand" "=x")
815 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
816 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
818 "andnps\t{%2, %0|%0, %2}"
819 [(set_attr "type" "sselog")
820 (set_attr "mode" "V4SF")])
822 (define_expand "iorv4sf3"
823 [(set (match_operand:V4SF 0 "register_operand" "")
824 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
825 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
827 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
829 (define_insn "*iorv4sf3"
830 [(set (match_operand:V4SF 0 "register_operand" "=x")
831 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
832 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
833 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
834 "orps\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sselog")
836 (set_attr "mode" "V4SF")])
838 (define_expand "xorv4sf3"
839 [(set (match_operand:V4SF 0 "register_operand" "")
840 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
841 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
843 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
845 (define_insn "*xorv4sf3"
846 [(set (match_operand:V4SF 0 "register_operand" "=x")
847 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
848 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
849 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
850 "xorps\t{%2, %0|%0, %2}"
851 [(set_attr "type" "sselog")
852 (set_attr "mode" "V4SF")])
854 ;; Also define scalar versions. These are used for abs, neg, and
855 ;; conditional move. Using subregs into vector modes causes register
856 ;; allocation lossage. These patterns do not allow memory operands
857 ;; because the native instructions read the full 128-bits.
859 (define_insn "*andsf3"
860 [(set (match_operand:SF 0 "register_operand" "=x")
861 (and:SF (match_operand:SF 1 "register_operand" "0")
862 (match_operand:SF 2 "register_operand" "x")))]
864 "andps\t{%2, %0|%0, %2}"
865 [(set_attr "type" "sselog")
866 (set_attr "mode" "V4SF")])
868 (define_insn "*nandsf3"
869 [(set (match_operand:SF 0 "register_operand" "=x")
870 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
871 (match_operand:SF 2 "register_operand" "x")))]
873 "andnps\t{%2, %0|%0, %2}"
874 [(set_attr "type" "sselog")
875 (set_attr "mode" "V4SF")])
877 (define_insn "*iorsf3"
878 [(set (match_operand:SF 0 "register_operand" "=x")
879 (ior:SF (match_operand:SF 1 "register_operand" "0")
880 (match_operand:SF 2 "register_operand" "x")))]
882 "orps\t{%2, %0|%0, %2}"
883 [(set_attr "type" "sselog")
884 (set_attr "mode" "V4SF")])
886 (define_insn "*xorsf3"
887 [(set (match_operand:SF 0 "register_operand" "=x")
888 (xor:SF (match_operand:SF 1 "register_operand" "0")
889 (match_operand:SF 2 "register_operand" "x")))]
891 "xorps\t{%2, %0|%0, %2}"
892 [(set_attr "type" "sselog")
893 (set_attr "mode" "V4SF")])
895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
897 ;; Parallel single-precision floating point conversion operations
899 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
901 (define_insn "sse_cvtpi2ps"
902 [(set (match_operand:V4SF 0 "register_operand" "=x")
905 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
906 (match_operand:V4SF 1 "register_operand" "0")
909 "cvtpi2ps\t{%2, %0|%0, %2}"
910 [(set_attr "type" "ssecvt")
911 (set_attr "mode" "V4SF")])
913 (define_insn "sse_cvtps2pi"
914 [(set (match_operand:V2SI 0 "register_operand" "=y")
916 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
918 (parallel [(const_int 0) (const_int 1)])))]
920 "cvtps2pi\t{%1, %0|%0, %1}"
921 [(set_attr "type" "ssecvt")
922 (set_attr "unit" "mmx")
923 (set_attr "mode" "DI")])
925 (define_insn "sse_cvttps2pi"
926 [(set (match_operand:V2SI 0 "register_operand" "=y")
928 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
929 (parallel [(const_int 0) (const_int 1)])))]
931 "cvttps2pi\t{%1, %0|%0, %1}"
932 [(set_attr "type" "ssecvt")
933 (set_attr "unit" "mmx")
934 (set_attr "mode" "SF")])
936 (define_insn "sse_cvtsi2ss"
937 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
940 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
941 (match_operand:V4SF 1 "register_operand" "0,0")
944 "cvtsi2ss\t{%2, %0|%0, %2}"
945 [(set_attr "type" "sseicvt")
946 (set_attr "athlon_decode" "vector,double")
947 (set_attr "mode" "SF")])
949 (define_insn "sse_cvtsi2ssq"
950 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
953 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
954 (match_operand:V4SF 1 "register_operand" "0,0")
956 "TARGET_SSE && TARGET_64BIT"
957 "cvtsi2ssq\t{%2, %0|%0, %2}"
958 [(set_attr "type" "sseicvt")
959 (set_attr "athlon_decode" "vector,double")
960 (set_attr "mode" "SF")])
962 (define_insn "sse_cvtss2si"
963 [(set (match_operand:SI 0 "register_operand" "=r,r")
966 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
967 (parallel [(const_int 0)]))]
968 UNSPEC_FIX_NOTRUNC))]
970 "cvtss2si\t{%1, %0|%0, %1}"
971 [(set_attr "type" "sseicvt")
972 (set_attr "athlon_decode" "double,vector")
973 (set_attr "mode" "SI")])
975 (define_insn "sse_cvtss2siq"
976 [(set (match_operand:DI 0 "register_operand" "=r,r")
979 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
980 (parallel [(const_int 0)]))]
981 UNSPEC_FIX_NOTRUNC))]
982 "TARGET_SSE && TARGET_64BIT"
983 "cvtss2siq\t{%1, %0|%0, %1}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "double,vector")
986 (set_attr "mode" "DI")])
988 (define_insn "sse_cvttss2si"
989 [(set (match_operand:SI 0 "register_operand" "=r,r")
992 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
993 (parallel [(const_int 0)]))))]
995 "cvttss2si\t{%1, %0|%0, %1}"
996 [(set_attr "type" "sseicvt")
997 (set_attr "athlon_decode" "double,vector")
998 (set_attr "mode" "SI")])
1000 (define_insn "sse_cvttss2siq"
1001 [(set (match_operand:DI 0 "register_operand" "=r,r")
1004 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1005 (parallel [(const_int 0)]))))]
1006 "TARGET_SSE && TARGET_64BIT"
1007 "cvttss2siq\t{%1, %0|%0, %1}"
1008 [(set_attr "type" "sseicvt")
1009 (set_attr "athlon_decode" "double,vector")
1010 (set_attr "mode" "DI")])
1012 (define_insn "sse2_cvtdq2ps"
1013 [(set (match_operand:V4SF 0 "register_operand" "=x")
1014 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1016 "cvtdq2ps\t{%1, %0|%0, %1}"
1017 [(set_attr "type" "ssecvt")
1018 (set_attr "mode" "V2DF")])
1020 (define_insn "sse2_cvtps2dq"
1021 [(set (match_operand:V4SI 0 "register_operand" "=x")
1022 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1023 UNSPEC_FIX_NOTRUNC))]
1025 "cvtps2dq\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "ssecvt")
1027 (set_attr "mode" "TI")])
1029 (define_insn "sse2_cvttps2dq"
1030 [(set (match_operand:V4SI 0 "register_operand" "=x")
1031 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1033 "cvttps2dq\t{%1, %0|%0, %1}"
1034 [(set_attr "type" "ssecvt")
1035 (set_attr "mode" "TI")])
1037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1039 ;; Parallel single-precision floating point element swizzling
1041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1043 (define_insn "sse_movhlps"
1044 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1047 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1048 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1049 (parallel [(const_int 6)
1053 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1055 movhlps\t{%2, %0|%0, %2}
1056 movlps\t{%H2, %0|%0, %H2}
1057 movhps\t{%2, %0|%0, %2}"
1058 [(set_attr "type" "ssemov")
1059 (set_attr "mode" "V4SF,V2SF,V2SF")])
1061 (define_insn "sse_movlhps"
1062 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1065 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1066 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1067 (parallel [(const_int 0)
1071 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1073 movlhps\t{%2, %0|%0, %2}
1074 movhps\t{%2, %0|%0, %2}
1075 movlps\t{%2, %H0|%H0, %2}"
1076 [(set_attr "type" "ssemov")
1077 (set_attr "mode" "V4SF,V2SF,V2SF")])
1079 (define_insn "sse_unpckhps"
1080 [(set (match_operand:V4SF 0 "register_operand" "=x")
1083 (match_operand:V4SF 1 "register_operand" "0")
1084 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1085 (parallel [(const_int 2) (const_int 6)
1086 (const_int 3) (const_int 7)])))]
1088 "unpckhps\t{%2, %0|%0, %2}"
1089 [(set_attr "type" "sselog")
1090 (set_attr "mode" "V4SF")])
1092 (define_insn "sse_unpcklps"
1093 [(set (match_operand:V4SF 0 "register_operand" "=x")
1096 (match_operand:V4SF 1 "register_operand" "0")
1097 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1098 (parallel [(const_int 0) (const_int 4)
1099 (const_int 1) (const_int 5)])))]
1101 "unpcklps\t{%2, %0|%0, %2}"
1102 [(set_attr "type" "sselog")
1103 (set_attr "mode" "V4SF")])
1105 ;; These are modeled with the same vec_concat as the others so that we
1106 ;; capture users of shufps that can use the new instructions
1107 (define_insn "sse3_movshdup"
1108 [(set (match_operand:V4SF 0 "register_operand" "=x")
1111 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1113 (parallel [(const_int 1)
1118 "movshdup\t{%1, %0|%0, %1}"
1119 [(set_attr "type" "sse")
1120 (set_attr "mode" "V4SF")])
1122 (define_insn "sse3_movsldup"
1123 [(set (match_operand:V4SF 0 "register_operand" "=x")
1126 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1128 (parallel [(const_int 0)
1133 "movsldup\t{%1, %0|%0, %1}"
1134 [(set_attr "type" "sse")
1135 (set_attr "mode" "V4SF")])
1137 (define_expand "sse_shufps"
1138 [(match_operand:V4SF 0 "register_operand" "")
1139 (match_operand:V4SF 1 "register_operand" "")
1140 (match_operand:V4SF 2 "nonimmediate_operand" "")
1141 (match_operand:SI 3 "const_int_operand" "")]
1144 int mask = INTVAL (operands[3]);
1145 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1146 GEN_INT ((mask >> 0) & 3),
1147 GEN_INT ((mask >> 2) & 3),
1148 GEN_INT (((mask >> 4) & 3) + 4),
1149 GEN_INT (((mask >> 6) & 3) + 4)));
1153 (define_insn "sse_shufps_1"
1154 [(set (match_operand:V4SF 0 "register_operand" "=x")
1157 (match_operand:V4SF 1 "register_operand" "0")
1158 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1159 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1160 (match_operand 4 "const_0_to_3_operand" "")
1161 (match_operand 5 "const_4_to_7_operand" "")
1162 (match_operand 6 "const_4_to_7_operand" "")])))]
1166 mask |= INTVAL (operands[3]) << 0;
1167 mask |= INTVAL (operands[4]) << 2;
1168 mask |= (INTVAL (operands[5]) - 4) << 4;
1169 mask |= (INTVAL (operands[6]) - 4) << 6;
1170 operands[3] = GEN_INT (mask);
1172 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1174 [(set_attr "type" "sselog")
1175 (set_attr "mode" "V4SF")])
1177 (define_insn "sse_storehps"
1178 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1180 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1181 (parallel [(const_int 2) (const_int 3)])))]
1184 movhps\t{%1, %0|%0, %1}
1185 movhlps\t{%1, %0|%0, %1}
1186 movlps\t{%H1, %0|%0, %H1}"
1187 [(set_attr "type" "ssemov")
1188 (set_attr "mode" "V2SF,V4SF,V2SF")])
1190 (define_insn "sse_loadhps"
1191 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1194 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1195 (parallel [(const_int 0) (const_int 1)]))
1196 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1199 movhps\t{%2, %0|%0, %2}
1200 movlhps\t{%2, %0|%0, %2}
1201 movlps\t{%2, %H0|%H0, %2}"
1202 [(set_attr "type" "ssemov")
1203 (set_attr "mode" "V2SF,V4SF,V2SF")])
1205 (define_insn "sse_storelps"
1206 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1208 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1209 (parallel [(const_int 0) (const_int 1)])))]
1212 movlps\t{%1, %0|%0, %1}
1213 movaps\t{%1, %0|%0, %1}
1214 movlps\t{%1, %0|%0, %1}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V2SF,V4SF,V2SF")])
1218 (define_insn "sse_loadlps"
1219 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1221 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1223 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1224 (parallel [(const_int 2) (const_int 3)]))))]
1227 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1228 movlps\t{%2, %0|%0, %2}
1229 movlps\t{%2, %0|%0, %2}"
1230 [(set_attr "type" "sselog,ssemov,ssemov")
1231 (set_attr "mode" "V4SF,V2SF,V2SF")])
1233 (define_insn "sse_movss"
1234 [(set (match_operand:V4SF 0 "register_operand" "=x")
1236 (match_operand:V4SF 2 "register_operand" "x")
1237 (match_operand:V4SF 1 "register_operand" "0")
1240 "movss\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "ssemov")
1242 (set_attr "mode" "SF")])
1244 (define_insn "*vec_dupv4sf"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1247 (match_operand:SF 1 "register_operand" "0")))]
1249 "shufps\t{$0, %0, %0|%0, %0, 0}"
1250 [(set_attr "type" "sselog1")
1251 (set_attr "mode" "V4SF")])
1253 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1254 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1255 ;; alternatives pretty much forces the MMX alternative to be chosen.
1256 (define_insn "*sse_concatv2sf"
1257 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1259 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1260 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1263 unpcklps\t{%2, %0|%0, %2}
1264 movss\t{%1, %0|%0, %1}
1265 punpckldq\t{%2, %0|%0, %2}
1266 movd\t{%1, %0|%0, %1}"
1267 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1268 (set_attr "mode" "V4SF,SF,DI,DI")])
1270 (define_insn "*sse_concatv4sf"
1271 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1273 (match_operand:V2SF 1 "register_operand" " 0,0")
1274 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1277 movlhps\t{%2, %0|%0, %2}
1278 movhps\t{%2, %0|%0, %2}"
1279 [(set_attr "type" "ssemov")
1280 (set_attr "mode" "V4SF,V2SF")])
1282 (define_expand "vec_initv4sf"
1283 [(match_operand:V4SF 0 "register_operand" "")
1284 (match_operand 1 "" "")]
1287 ix86_expand_vector_init (false, operands[0], operands[1]);
1291 (define_insn "*vec_setv4sf_0"
1292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1295 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1296 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1300 movss\t{%2, %0|%0, %2}
1301 movss\t{%2, %0|%0, %2}
1302 movd\t{%2, %0|%0, %2}
1304 [(set_attr "type" "ssemov")
1305 (set_attr "mode" "SF")])
1308 [(set (match_operand:V4SF 0 "memory_operand" "")
1311 (match_operand:SF 1 "nonmemory_operand" ""))
1314 "TARGET_SSE && reload_completed"
1317 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1321 (define_expand "vec_setv4sf"
1322 [(match_operand:V4SF 0 "register_operand" "")
1323 (match_operand:SF 1 "register_operand" "")
1324 (match_operand 2 "const_int_operand" "")]
1327 ix86_expand_vector_set (false, operands[0], operands[1],
1328 INTVAL (operands[2]));
1332 (define_insn_and_split "*vec_extractv4sf_0"
1333 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1335 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1336 (parallel [(const_int 0)])))]
1337 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1339 "&& reload_completed"
1342 rtx op1 = operands[1];
1344 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1346 op1 = gen_lowpart (SFmode, op1);
1347 emit_move_insn (operands[0], op1);
1351 (define_expand "vec_extractv4sf"
1352 [(match_operand:SF 0 "register_operand" "")
1353 (match_operand:V4SF 1 "register_operand" "")
1354 (match_operand 2 "const_int_operand" "")]
1357 ix86_expand_vector_extract (false, operands[0], operands[1],
1358 INTVAL (operands[2]));
1362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1364 ;; Parallel double-precision floating point arithmetic
1366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1368 (define_expand "negv2df2"
1369 [(set (match_operand:V2DF 0 "register_operand" "")
1370 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1372 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1374 (define_expand "absv2df2"
1375 [(set (match_operand:V2DF 0 "register_operand" "")
1376 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1378 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1380 (define_expand "addv2df3"
1381 [(set (match_operand:V2DF 0 "register_operand" "")
1382 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1383 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1385 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1387 (define_insn "*addv2df3"
1388 [(set (match_operand:V2DF 0 "register_operand" "=x")
1389 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1390 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1391 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1392 "addpd\t{%2, %0|%0, %2}"
1393 [(set_attr "type" "sseadd")
1394 (set_attr "mode" "V2DF")])
1396 (define_insn "sse2_vmaddv2df3"
1397 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1403 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1404 "addsd\t{%2, %0|%0, %2}"
1405 [(set_attr "type" "sseadd")
1406 (set_attr "mode" "DF")])
1408 (define_expand "subv2df3"
1409 [(set (match_operand:V2DF 0 "register_operand" "")
1410 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1411 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1413 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1415 (define_insn "*subv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1417 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1418 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1420 "subpd\t{%2, %0|%0, %2}"
1421 [(set_attr "type" "sseadd")
1422 (set_attr "mode" "V2DF")])
1424 (define_insn "sse2_vmsubv2df3"
1425 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1432 "subsd\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "sseadd")
1434 (set_attr "mode" "DF")])
1436 (define_expand "mulv2df3"
1437 [(set (match_operand:V2DF 0 "register_operand" "")
1438 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1439 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1441 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1443 (define_insn "*mulv2df3"
1444 [(set (match_operand:V2DF 0 "register_operand" "=x")
1445 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1446 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1447 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1448 "mulpd\t{%2, %0|%0, %2}"
1449 [(set_attr "type" "ssemul")
1450 (set_attr "mode" "V2DF")])
1452 (define_insn "sse2_vmmulv2df3"
1453 [(set (match_operand:V2DF 0 "register_operand" "=x")
1455 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1459 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1460 "mulsd\t{%2, %0|%0, %2}"
1461 [(set_attr "type" "ssemul")
1462 (set_attr "mode" "DF")])
1464 (define_expand "divv2df3"
1465 [(set (match_operand:V2DF 0 "register_operand" "")
1466 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1467 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1469 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1471 (define_insn "*divv2df3"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1473 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1474 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1476 "divpd\t{%2, %0|%0, %2}"
1477 [(set_attr "type" "ssediv")
1478 (set_attr "mode" "V2DF")])
1480 (define_insn "sse2_vmdivv2df3"
1481 [(set (match_operand:V2DF 0 "register_operand" "=x")
1483 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1484 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1488 "divsd\t{%2, %0|%0, %2}"
1489 [(set_attr "type" "ssediv")
1490 (set_attr "mode" "DF")])
1492 (define_insn "sqrtv2df2"
1493 [(set (match_operand:V2DF 0 "register_operand" "=x")
1494 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1496 "sqrtpd\t{%1, %0|%0, %1}"
1497 [(set_attr "type" "sse")
1498 (set_attr "mode" "V2DF")])
1500 (define_insn "sse2_vmsqrtv2df2"
1501 [(set (match_operand:V2DF 0 "register_operand" "=x")
1503 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1504 (match_operand:V2DF 2 "register_operand" "0")
1507 "sqrtsd\t{%1, %0|%0, %1}"
1508 [(set_attr "type" "sse")
1509 (set_attr "mode" "DF")])
1511 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1512 ;; isn't really correct, as those rtl operators aren't defined when
1513 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1515 (define_expand "smaxv2df3"
1516 [(set (match_operand:V2DF 0 "register_operand" "")
1517 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1518 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1521 if (!flag_finite_math_only)
1522 operands[1] = force_reg (V2DFmode, operands[1]);
1523 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1526 (define_insn "*smaxv2df3_finite"
1527 [(set (match_operand:V2DF 0 "register_operand" "=x")
1528 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1529 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1530 "TARGET_SSE2 && flag_finite_math_only
1531 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1532 "maxpd\t{%2, %0|%0, %2}"
1533 [(set_attr "type" "sseadd")
1534 (set_attr "mode" "V2DF")])
1536 (define_insn "*smaxv2df3"
1537 [(set (match_operand:V2DF 0 "register_operand" "=x")
1538 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1539 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1541 "maxpd\t{%2, %0|%0, %2}"
1542 [(set_attr "type" "sseadd")
1543 (set_attr "mode" "V2DF")])
1545 (define_insn "*sse2_vmsmaxv2df3_finite"
1546 [(set (match_operand:V2DF 0 "register_operand" "=x")
1548 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1549 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1552 "TARGET_SSE2 && flag_finite_math_only
1553 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1554 "maxsd\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "sseadd")
1556 (set_attr "mode" "DF")])
1558 (define_insn "sse2_vmsmaxv2df3"
1559 [(set (match_operand:V2DF 0 "register_operand" "=x")
1561 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1562 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1566 "maxsd\t{%2, %0|%0, %2}"
1567 [(set_attr "type" "sseadd")
1568 (set_attr "mode" "DF")])
1570 (define_expand "sminv2df3"
1571 [(set (match_operand:V2DF 0 "register_operand" "")
1572 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1573 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1576 if (!flag_finite_math_only)
1577 operands[1] = force_reg (V2DFmode, operands[1]);
1578 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1581 (define_insn "*sminv2df3_finite"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1585 "TARGET_SSE2 && flag_finite_math_only
1586 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1587 "minpd\t{%2, %0|%0, %2}"
1588 [(set_attr "type" "sseadd")
1589 (set_attr "mode" "V2DF")])
1591 (define_insn "*sminv2df3"
1592 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1596 "minpd\t{%2, %0|%0, %2}"
1597 [(set_attr "type" "sseadd")
1598 (set_attr "mode" "V2DF")])
1600 (define_insn "*sse2_vmsminv2df3_finite"
1601 [(set (match_operand:V2DF 0 "register_operand" "=x")
1603 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1604 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1607 "TARGET_SSE2 && flag_finite_math_only
1608 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1609 "minsd\t{%2, %0|%0, %2}"
1610 [(set_attr "type" "sseadd")
1611 (set_attr "mode" "DF")])
1613 (define_insn "sse2_vmsminv2df3"
1614 [(set (match_operand:V2DF 0 "register_operand" "=x")
1616 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1617 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1621 "minsd\t{%2, %0|%0, %2}"
1622 [(set_attr "type" "sseadd")
1623 (set_attr "mode" "DF")])
1625 (define_insn "sse3_addsubv2df3"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x")
1629 (match_operand:V2DF 1 "register_operand" "0")
1630 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1631 (minus:V2DF (match_dup 1) (match_dup 2))
1634 "addsubpd\t{%2, %0|%0, %2}"
1635 [(set_attr "type" "sseadd")
1636 (set_attr "mode" "V2DF")])
1638 (define_insn "sse3_haddv2df3"
1639 [(set (match_operand:V2DF 0 "register_operand" "=x")
1643 (match_operand:V2DF 1 "register_operand" "0")
1644 (parallel [(const_int 0)]))
1645 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1648 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1649 (parallel [(const_int 0)]))
1650 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1652 "haddpd\t{%2, %0|%0, %2}"
1653 [(set_attr "type" "sseadd")
1654 (set_attr "mode" "V2DF")])
1656 (define_insn "sse3_hsubv2df3"
1657 [(set (match_operand:V2DF 0 "register_operand" "=x")
1661 (match_operand:V2DF 1 "register_operand" "0")
1662 (parallel [(const_int 0)]))
1663 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1666 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1667 (parallel [(const_int 0)]))
1668 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1670 "hsubpd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "sseadd")
1672 (set_attr "mode" "V2DF")])
1674 (define_expand "reduc_splus_v2df"
1675 [(match_operand:V2DF 0 "register_operand" "")
1676 (match_operand:V2DF 1 "register_operand" "")]
1679 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1685 ;; Parallel double-precision floating point comparisons
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1689 (define_insn "sse2_maskcmpv2df3"
1690 [(set (match_operand:V2DF 0 "register_operand" "=x")
1691 (match_operator:V2DF 3 "sse_comparison_operator"
1692 [(match_operand:V2DF 1 "register_operand" "0")
1693 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1695 "cmp%D3pd\t{%2, %0|%0, %2}"
1696 [(set_attr "type" "ssecmp")
1697 (set_attr "mode" "V2DF")])
1699 (define_insn "sse2_vmmaskcmpv2df3"
1700 [(set (match_operand:V2DF 0 "register_operand" "=x")
1702 (match_operator:V2DF 3 "sse_comparison_operator"
1703 [(match_operand:V2DF 1 "register_operand" "0")
1704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1708 "cmp%D3sd\t{%2, %0|%0, %2}"
1709 [(set_attr "type" "ssecmp")
1710 (set_attr "mode" "DF")])
1712 (define_insn "sse2_comi"
1713 [(set (reg:CCFP FLAGS_REG)
1716 (match_operand:V2DF 0 "register_operand" "x")
1717 (parallel [(const_int 0)]))
1719 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1720 (parallel [(const_int 0)]))))]
1722 "comisd\t{%1, %0|%0, %1}"
1723 [(set_attr "type" "ssecomi")
1724 (set_attr "mode" "DF")])
1726 (define_insn "sse2_ucomi"
1727 [(set (reg:CCFPU FLAGS_REG)
1730 (match_operand:V2DF 0 "register_operand" "x")
1731 (parallel [(const_int 0)]))
1733 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1734 (parallel [(const_int 0)]))))]
1736 "ucomisd\t{%1, %0|%0, %1}"
1737 [(set_attr "type" "ssecomi")
1738 (set_attr "mode" "DF")])
1740 (define_expand "vcondv2df"
1741 [(set (match_operand:V2DF 0 "register_operand" "")
1743 (match_operator 3 ""
1744 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1745 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1746 (match_operand:V2DF 1 "general_operand" "")
1747 (match_operand:V2DF 2 "general_operand" "")))]
1750 if (ix86_expand_fp_vcond (operands))
1756 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1758 ;; Parallel double-precision floating point logical operations
1760 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1762 (define_expand "andv2df3"
1763 [(set (match_operand:V2DF 0 "register_operand" "")
1764 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1765 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1767 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1769 (define_insn "*andv2df3"
1770 [(set (match_operand:V2DF 0 "register_operand" "=x")
1771 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1772 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1773 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1774 "andpd\t{%2, %0|%0, %2}"
1775 [(set_attr "type" "sselog")
1776 (set_attr "mode" "V2DF")])
1778 (define_insn "sse2_nandv2df3"
1779 [(set (match_operand:V2DF 0 "register_operand" "=x")
1780 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1781 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1783 "andnpd\t{%2, %0|%0, %2}"
1784 [(set_attr "type" "sselog")
1785 (set_attr "mode" "V2DF")])
1787 (define_expand "iorv2df3"
1788 [(set (match_operand:V2DF 0 "register_operand" "")
1789 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1790 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1792 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1794 (define_insn "*iorv2df3"
1795 [(set (match_operand:V2DF 0 "register_operand" "=x")
1796 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1797 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1798 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1799 "orpd\t{%2, %0|%0, %2}"
1800 [(set_attr "type" "sselog")
1801 (set_attr "mode" "V2DF")])
1803 (define_expand "xorv2df3"
1804 [(set (match_operand:V2DF 0 "register_operand" "")
1805 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1806 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1808 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1810 (define_insn "*xorv2df3"
1811 [(set (match_operand:V2DF 0 "register_operand" "=x")
1812 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1813 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1814 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1815 "xorpd\t{%2, %0|%0, %2}"
1816 [(set_attr "type" "sselog")
1817 (set_attr "mode" "V2DF")])
1819 ;; Also define scalar versions. These are used for abs, neg, and
1820 ;; conditional move. Using subregs into vector modes causes register
1821 ;; allocation lossage. These patterns do not allow memory operands
1822 ;; because the native instructions read the full 128-bits.
1824 (define_insn "*anddf3"
1825 [(set (match_operand:DF 0 "register_operand" "=x")
1826 (and:DF (match_operand:DF 1 "register_operand" "0")
1827 (match_operand:DF 2 "register_operand" "x")))]
1829 "andpd\t{%2, %0|%0, %2}"
1830 [(set_attr "type" "sselog")
1831 (set_attr "mode" "V2DF")])
1833 (define_insn "*nanddf3"
1834 [(set (match_operand:DF 0 "register_operand" "=x")
1835 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1836 (match_operand:DF 2 "register_operand" "x")))]
1838 "andnpd\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "sselog")
1840 (set_attr "mode" "V2DF")])
1842 (define_insn "*iordf3"
1843 [(set (match_operand:DF 0 "register_operand" "=x")
1844 (ior:DF (match_operand:DF 1 "register_operand" "0")
1845 (match_operand:DF 2 "register_operand" "x")))]
1847 "orpd\t{%2, %0|%0, %2}"
1848 [(set_attr "type" "sselog")
1849 (set_attr "mode" "V2DF")])
1851 (define_insn "*xordf3"
1852 [(set (match_operand:DF 0 "register_operand" "=x")
1853 (xor:DF (match_operand:DF 1 "register_operand" "0")
1854 (match_operand:DF 2 "register_operand" "x")))]
1856 "xorpd\t{%2, %0|%0, %2}"
1857 [(set_attr "type" "sselog")
1858 (set_attr "mode" "V2DF")])
1860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1862 ;; Parallel double-precision floating point conversion operations
1864 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1866 (define_insn "sse2_cvtpi2pd"
1867 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1868 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1870 "cvtpi2pd\t{%1, %0|%0, %1}"
1871 [(set_attr "type" "ssecvt")
1872 (set_attr "unit" "mmx,*")
1873 (set_attr "mode" "V2DF")])
1875 (define_insn "sse2_cvtpd2pi"
1876 [(set (match_operand:V2SI 0 "register_operand" "=y")
1877 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1878 UNSPEC_FIX_NOTRUNC))]
1880 "cvtpd2pi\t{%1, %0|%0, %1}"
1881 [(set_attr "type" "ssecvt")
1882 (set_attr "unit" "mmx")
1883 (set_attr "mode" "DI")])
1885 (define_insn "sse2_cvttpd2pi"
1886 [(set (match_operand:V2SI 0 "register_operand" "=y")
1887 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1889 "cvttpd2pi\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "ssecvt")
1891 (set_attr "unit" "mmx")
1892 (set_attr "mode" "TI")])
1894 (define_insn "sse2_cvtsi2sd"
1895 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1898 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1899 (match_operand:V2DF 1 "register_operand" "0,0")
1902 "cvtsi2sd\t{%2, %0|%0, %2}"
1903 [(set_attr "type" "sseicvt")
1904 (set_attr "mode" "DF")
1905 (set_attr "athlon_decode" "double,direct")])
1907 (define_insn "sse2_cvtsi2sdq"
1908 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1911 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1912 (match_operand:V2DF 1 "register_operand" "0,0")
1914 "TARGET_SSE2 && TARGET_64BIT"
1915 "cvtsi2sdq\t{%2, %0|%0, %2}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "mode" "DF")
1918 (set_attr "athlon_decode" "double,direct")])
1920 (define_insn "sse2_cvtsd2si"
1921 [(set (match_operand:SI 0 "register_operand" "=r,r")
1924 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1925 (parallel [(const_int 0)]))]
1926 UNSPEC_FIX_NOTRUNC))]
1928 "cvtsd2si\t{%1, %0|%0, %1}"
1929 [(set_attr "type" "sseicvt")
1930 (set_attr "athlon_decode" "double,vector")
1931 (set_attr "mode" "SI")])
1933 (define_insn "sse2_cvtsd2siq"
1934 [(set (match_operand:DI 0 "register_operand" "=r,r")
1937 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1938 (parallel [(const_int 0)]))]
1939 UNSPEC_FIX_NOTRUNC))]
1940 "TARGET_SSE2 && TARGET_64BIT"
1941 "cvtsd2siq\t{%1, %0|%0, %1}"
1942 [(set_attr "type" "sseicvt")
1943 (set_attr "athlon_decode" "double,vector")
1944 (set_attr "mode" "DI")])
1946 (define_insn "sse2_cvttsd2si"
1947 [(set (match_operand:SI 0 "register_operand" "=r,r")
1950 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1951 (parallel [(const_int 0)]))))]
1953 "cvttsd2si\t{%1, %0|%0, %1}"
1954 [(set_attr "type" "sseicvt")
1955 (set_attr "mode" "SI")
1956 (set_attr "athlon_decode" "double,vector")])
1958 (define_insn "sse2_cvttsd2siq"
1959 [(set (match_operand:DI 0 "register_operand" "=r,r")
1962 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1963 (parallel [(const_int 0)]))))]
1964 "TARGET_SSE2 && TARGET_64BIT"
1965 "cvttsd2siq\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "sseicvt")
1967 (set_attr "mode" "DI")
1968 (set_attr "athlon_decode" "double,vector")])
1970 (define_insn "sse2_cvtdq2pd"
1971 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1975 (parallel [(const_int 0) (const_int 1)]))))]
1977 "cvtdq2pd\t{%1, %0|%0, %1}"
1978 [(set_attr "type" "ssecvt")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "sse2_cvtpd2dq"
1982 [(set (match_operand:V4SI 0 "register_operand" "")
1984 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1988 "operands[2] = CONST0_RTX (V2SImode);")
1990 (define_insn "*sse2_cvtpd2dq"
1991 [(set (match_operand:V4SI 0 "register_operand" "=x")
1993 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1995 (match_operand:V2SI 2 "const0_operand" "")))]
1997 "cvtpd2dq\t{%1, %0|%0, %1}"
1998 [(set_attr "type" "ssecvt")
1999 (set_attr "mode" "TI")])
2001 (define_expand "sse2_cvttpd2dq"
2002 [(set (match_operand:V4SI 0 "register_operand" "")
2004 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2007 "operands[2] = CONST0_RTX (V2SImode);")
2009 (define_insn "*sse2_cvttpd2dq"
2010 [(set (match_operand:V4SI 0 "register_operand" "=x")
2012 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2013 (match_operand:V2SI 2 "const0_operand" "")))]
2015 "cvttpd2dq\t{%1, %0|%0, %1}"
2016 [(set_attr "type" "ssecvt")
2017 (set_attr "mode" "TI")])
2019 (define_insn "sse2_cvtsd2ss"
2020 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2023 (float_truncate:V2SF
2024 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2025 (match_operand:V4SF 1 "register_operand" "0,0")
2028 "cvtsd2ss\t{%2, %0|%0, %2}"
2029 [(set_attr "type" "ssecvt")
2030 (set_attr "athlon_decode" "vector,double")
2031 (set_attr "mode" "SF")])
2033 (define_insn "sse2_cvtss2sd"
2034 [(set (match_operand:V2DF 0 "register_operand" "=x")
2038 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2039 (parallel [(const_int 0) (const_int 1)])))
2040 (match_operand:V2DF 1 "register_operand" "0")
2043 "cvtss2sd\t{%2, %0|%0, %2}"
2044 [(set_attr "type" "ssecvt")
2045 (set_attr "mode" "DF")])
2047 (define_expand "sse2_cvtpd2ps"
2048 [(set (match_operand:V4SF 0 "register_operand" "")
2050 (float_truncate:V2SF
2051 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2054 "operands[2] = CONST0_RTX (V2SFmode);")
2056 (define_insn "*sse2_cvtpd2ps"
2057 [(set (match_operand:V4SF 0 "register_operand" "=x")
2059 (float_truncate:V2SF
2060 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2061 (match_operand:V2SF 2 "const0_operand" "")))]
2063 "cvtpd2ps\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "ssecvt")
2065 (set_attr "mode" "V4SF")])
2067 (define_insn "sse2_cvtps2pd"
2068 [(set (match_operand:V2DF 0 "register_operand" "=x")
2071 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2072 (parallel [(const_int 0) (const_int 1)]))))]
2074 "cvtps2pd\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "ssecvt")
2076 (set_attr "mode" "V2DF")])
2078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2080 ;; Parallel double-precision floating point element swizzling
2082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2084 (define_insn "sse2_unpckhpd"
2085 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2088 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2089 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2090 (parallel [(const_int 1)
2092 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2094 unpckhpd\t{%2, %0|%0, %2}
2095 movlpd\t{%H1, %0|%0, %H1}
2096 movhpd\t{%1, %0|%0, %1}"
2097 [(set_attr "type" "sselog,ssemov,ssemov")
2098 (set_attr "mode" "V2DF,V1DF,V1DF")])
2100 (define_insn "*sse3_movddup"
2101 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2104 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2106 (parallel [(const_int 0)
2108 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2110 movddup\t{%1, %0|%0, %1}
2112 [(set_attr "type" "sselog,ssemov")
2113 (set_attr "mode" "V2DF")])
2116 [(set (match_operand:V2DF 0 "memory_operand" "")
2119 (match_operand:V2DF 1 "register_operand" "")
2121 (parallel [(const_int 0)
2123 "TARGET_SSE3 && reload_completed"
2126 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2127 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2128 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2132 (define_insn "sse2_unpcklpd"
2133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2136 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2137 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2138 (parallel [(const_int 0)
2140 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2142 unpcklpd\t{%2, %0|%0, %2}
2143 movhpd\t{%2, %0|%0, %2}
2144 movlpd\t{%2, %H0|%H0, %2}"
2145 [(set_attr "type" "sselog,ssemov,ssemov")
2146 (set_attr "mode" "V2DF,V1DF,V1DF")])
2148 (define_expand "sse2_shufpd"
2149 [(match_operand:V2DF 0 "register_operand" "")
2150 (match_operand:V2DF 1 "register_operand" "")
2151 (match_operand:V2DF 2 "nonimmediate_operand" "")
2152 (match_operand:SI 3 "const_int_operand" "")]
2155 int mask = INTVAL (operands[3]);
2156 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2158 GEN_INT (mask & 2 ? 3 : 2)));
2162 (define_insn "sse2_shufpd_1"
2163 [(set (match_operand:V2DF 0 "register_operand" "=x")
2166 (match_operand:V2DF 1 "register_operand" "0")
2167 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2168 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2169 (match_operand 4 "const_2_to_3_operand" "")])))]
2173 mask = INTVAL (operands[3]);
2174 mask |= (INTVAL (operands[4]) - 2) << 1;
2175 operands[3] = GEN_INT (mask);
2177 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2179 [(set_attr "type" "sselog")
2180 (set_attr "mode" "V2DF")])
2182 (define_insn "sse2_storehpd"
2183 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2185 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2186 (parallel [(const_int 1)])))]
2187 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2189 movhpd\t{%1, %0|%0, %1}
2192 [(set_attr "type" "ssemov,sselog1,ssemov")
2193 (set_attr "mode" "V1DF,V2DF,DF")])
2196 [(set (match_operand:DF 0 "register_operand" "")
2198 (match_operand:V2DF 1 "memory_operand" "")
2199 (parallel [(const_int 1)])))]
2200 "TARGET_SSE2 && reload_completed"
2201 [(set (match_dup 0) (match_dup 1))]
2203 operands[1] = adjust_address (operands[1], DFmode, 8);
2206 (define_insn "sse2_storelpd"
2207 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2209 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2210 (parallel [(const_int 0)])))]
2211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2213 movlpd\t{%1, %0|%0, %1}
2216 [(set_attr "type" "ssemov")
2217 (set_attr "mode" "V1DF,DF,DF")])
2220 [(set (match_operand:DF 0 "register_operand" "")
2222 (match_operand:V2DF 1 "nonimmediate_operand" "")
2223 (parallel [(const_int 0)])))]
2224 "TARGET_SSE2 && reload_completed"
2227 rtx op1 = operands[1];
2229 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2231 op1 = gen_lowpart (DFmode, op1);
2232 emit_move_insn (operands[0], op1);
2236 (define_insn "sse2_loadhpd"
2237 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2240 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2241 (parallel [(const_int 0)]))
2242 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2243 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2245 movhpd\t{%2, %0|%0, %2}
2246 unpcklpd\t{%2, %0|%0, %2}
2247 shufpd\t{$1, %1, %0|%0, %1, 1}
2249 [(set_attr "type" "ssemov,sselog,sselog,other")
2250 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2253 [(set (match_operand:V2DF 0 "memory_operand" "")
2255 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2256 (match_operand:DF 1 "register_operand" "")))]
2257 "TARGET_SSE2 && reload_completed"
2258 [(set (match_dup 0) (match_dup 1))]
2260 operands[0] = adjust_address (operands[0], DFmode, 8);
2263 (define_insn "sse2_loadlpd"
2264 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2266 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2268 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2269 (parallel [(const_int 1)]))))]
2270 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2272 movsd\t{%2, %0|%0, %2}
2273 movlpd\t{%2, %0|%0, %2}
2274 movsd\t{%2, %0|%0, %2}
2275 shufpd\t{$2, %2, %0|%0, %2, 2}
2276 movhpd\t{%H1, %0|%0, %H1}
2278 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2279 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2282 [(set (match_operand:V2DF 0 "memory_operand" "")
2284 (match_operand:DF 1 "register_operand" "")
2285 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2286 "TARGET_SSE2 && reload_completed"
2287 [(set (match_dup 0) (match_dup 1))]
2289 operands[0] = adjust_address (operands[0], DFmode, 8);
2292 ;; Not sure these two are ever used, but it doesn't hurt to have
2294 (define_insn "*vec_extractv2df_1_sse"
2295 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2297 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2298 (parallel [(const_int 1)])))]
2299 "!TARGET_SSE2 && TARGET_SSE
2300 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2302 movhps\t{%1, %0|%0, %1}
2303 movhlps\t{%1, %0|%0, %1}
2304 movlps\t{%H1, %0|%0, %H1}"
2305 [(set_attr "type" "ssemov")
2306 (set_attr "mode" "V2SF,V4SF,V2SF")])
2308 (define_insn "*vec_extractv2df_0_sse"
2309 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2311 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2312 (parallel [(const_int 0)])))]
2313 "!TARGET_SSE2 && TARGET_SSE
2314 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2316 movlps\t{%1, %0|%0, %1}
2317 movaps\t{%1, %0|%0, %1}
2318 movlps\t{%1, %0|%0, %1}"
2319 [(set_attr "type" "ssemov")
2320 (set_attr "mode" "V2SF,V4SF,V2SF")])
2322 (define_insn "sse2_movsd"
2323 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2325 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2326 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2330 movsd\t{%2, %0|%0, %2}
2331 movlpd\t{%2, %0|%0, %2}
2332 movlpd\t{%2, %0|%0, %2}
2333 shufpd\t{$2, %2, %0|%0, %2, 2}
2334 movhps\t{%H1, %0|%0, %H1}
2335 movhps\t{%1, %H0|%H0, %1}"
2336 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2337 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2339 (define_insn "*vec_dupv2df_sse3"
2340 [(set (match_operand:V2DF 0 "register_operand" "=x")
2342 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2344 "movddup\t{%1, %0|%0, %1}"
2345 [(set_attr "type" "sselog1")
2346 (set_attr "mode" "DF")])
2348 (define_insn "*vec_dupv2df"
2349 [(set (match_operand:V2DF 0 "register_operand" "=x")
2351 (match_operand:DF 1 "register_operand" "0")))]
2354 [(set_attr "type" "sselog1")
2355 (set_attr "mode" "V4SF")])
2357 (define_insn "*vec_concatv2df_sse3"
2358 [(set (match_operand:V2DF 0 "register_operand" "=x")
2360 (match_operand:DF 1 "nonimmediate_operand" "xm")
2363 "movddup\t{%1, %0|%0, %1}"
2364 [(set_attr "type" "sselog1")
2365 (set_attr "mode" "DF")])
2367 (define_insn "*vec_concatv2df"
2368 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2370 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2371 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2374 unpcklpd\t{%2, %0|%0, %2}
2375 movhpd\t{%2, %0|%0, %2}
2376 movsd\t{%1, %0|%0, %1}
2377 movlhps\t{%2, %0|%0, %2}
2378 movhps\t{%2, %0|%0, %2}"
2379 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2380 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2382 (define_expand "vec_setv2df"
2383 [(match_operand:V2DF 0 "register_operand" "")
2384 (match_operand:DF 1 "register_operand" "")
2385 (match_operand 2 "const_int_operand" "")]
2388 ix86_expand_vector_set (false, operands[0], operands[1],
2389 INTVAL (operands[2]));
2393 (define_expand "vec_extractv2df"
2394 [(match_operand:DF 0 "register_operand" "")
2395 (match_operand:V2DF 1 "register_operand" "")
2396 (match_operand 2 "const_int_operand" "")]
2399 ix86_expand_vector_extract (false, operands[0], operands[1],
2400 INTVAL (operands[2]));
2404 (define_expand "vec_initv2df"
2405 [(match_operand:V2DF 0 "register_operand" "")
2406 (match_operand 1 "" "")]
2409 ix86_expand_vector_init (false, operands[0], operands[1]);
2413 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2415 ;; Parallel integral arithmetic
2417 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2419 (define_expand "neg<mode>2"
2420 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2423 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2425 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2427 (define_expand "add<mode>3"
2428 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2429 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2430 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2432 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2434 (define_insn "*add<mode>3"
2435 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2437 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2438 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2439 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2440 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2441 [(set_attr "type" "sseiadd")
2442 (set_attr "mode" "TI")])
2444 (define_insn "sse2_ssadd<mode>3"
2445 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2447 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2448 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2449 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2450 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2451 [(set_attr "type" "sseiadd")
2452 (set_attr "mode" "TI")])
2454 (define_insn "sse2_usadd<mode>3"
2455 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2457 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2458 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2459 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2460 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2461 [(set_attr "type" "sseiadd")
2462 (set_attr "mode" "TI")])
2464 (define_expand "sub<mode>3"
2465 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2466 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2467 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2469 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2471 (define_insn "*sub<mode>3"
2472 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2474 (match_operand:SSEMODEI 1 "register_operand" "0")
2475 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2477 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2478 [(set_attr "type" "sseiadd")
2479 (set_attr "mode" "TI")])
2481 (define_insn "sse2_sssub<mode>3"
2482 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2484 (match_operand:SSEMODE12 1 "register_operand" "0")
2485 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2487 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2488 [(set_attr "type" "sseiadd")
2489 (set_attr "mode" "TI")])
2491 (define_insn "sse2_ussub<mode>3"
2492 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2494 (match_operand:SSEMODE12 1 "register_operand" "0")
2495 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2497 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2498 [(set_attr "type" "sseiadd")
2499 (set_attr "mode" "TI")])
2501 (define_expand "mulv16qi3"
2502 [(set (match_operand:V16QI 0 "register_operand" "")
2503 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2504 (match_operand:V16QI 2 "register_operand" "")))]
2510 for (i = 0; i < 12; ++i)
2511 t[i] = gen_reg_rtx (V16QImode);
2513 /* Unpack data such that we've got a source byte in each low byte of
2514 each word. We don't care what goes into the high byte of each word.
2515 Rather than trying to get zero in there, most convenient is to let
2516 it be a copy of the low byte. */
2517 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2518 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2519 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2520 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2522 /* Multiply words. The end-of-line annotations here give a picture of what
2523 the output of that instruction looks like. Dot means don't care; the
2524 letters are the bytes of the result with A being the most significant. */
2525 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2526 gen_lowpart (V8HImode, t[0]),
2527 gen_lowpart (V8HImode, t[1])));
2528 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2529 gen_lowpart (V8HImode, t[2]),
2530 gen_lowpart (V8HImode, t[3])));
2532 /* Extract the relevant bytes and merge them back together. */
2533 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2534 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2535 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2536 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2537 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2538 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2541 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2545 (define_expand "mulv8hi3"
2546 [(set (match_operand:V8HI 0 "register_operand" "")
2547 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2548 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2550 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2552 (define_insn "*mulv8hi3"
2553 [(set (match_operand:V8HI 0 "register_operand" "=x")
2554 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2555 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2556 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2557 "pmullw\t{%2, %0|%0, %2}"
2558 [(set_attr "type" "sseimul")
2559 (set_attr "mode" "TI")])
2561 (define_insn "sse2_smulv8hi3_highpart"
2562 [(set (match_operand:V8HI 0 "register_operand" "=x")
2567 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2569 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2571 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2572 "pmulhw\t{%2, %0|%0, %2}"
2573 [(set_attr "type" "sseimul")
2574 (set_attr "mode" "TI")])
2576 (define_insn "sse2_umulv8hi3_highpart"
2577 [(set (match_operand:V8HI 0 "register_operand" "=x")
2582 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2584 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2586 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2587 "pmulhuw\t{%2, %0|%0, %2}"
2588 [(set_attr "type" "sseimul")
2589 (set_attr "mode" "TI")])
2591 (define_insn "sse2_umulv2siv2di3"
2592 [(set (match_operand:V2DI 0 "register_operand" "=x")
2596 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2597 (parallel [(const_int 0) (const_int 2)])))
2600 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2601 (parallel [(const_int 0) (const_int 2)])))))]
2602 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2603 "pmuludq\t{%2, %0|%0, %2}"
2604 [(set_attr "type" "sseimul")
2605 (set_attr "mode" "TI")])
2607 (define_insn "sse2_pmaddwd"
2608 [(set (match_operand:V4SI 0 "register_operand" "=x")
2613 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2614 (parallel [(const_int 0)
2620 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2621 (parallel [(const_int 0)
2627 (vec_select:V4HI (match_dup 1)
2628 (parallel [(const_int 1)
2633 (vec_select:V4HI (match_dup 2)
2634 (parallel [(const_int 1)
2637 (const_int 7)]))))))]
2639 "pmaddwd\t{%2, %0|%0, %2}"
2640 [(set_attr "type" "sseiadd")
2641 (set_attr "mode" "TI")])
2643 (define_expand "mulv4si3"
2644 [(set (match_operand:V4SI 0 "register_operand" "")
2645 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2646 (match_operand:V4SI 2 "register_operand" "")))]
2649 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2655 t1 = gen_reg_rtx (V4SImode);
2656 t2 = gen_reg_rtx (V4SImode);
2657 t3 = gen_reg_rtx (V4SImode);
2658 t4 = gen_reg_rtx (V4SImode);
2659 t5 = gen_reg_rtx (V4SImode);
2660 t6 = gen_reg_rtx (V4SImode);
2661 thirtytwo = GEN_INT (32);
2663 /* Multiply elements 2 and 0. */
2664 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2666 /* Shift both input vectors down one element, so that elements 3 and 1
2667 are now in the slots for elements 2 and 0. For K8, at least, this is
2668 faster than using a shuffle. */
2669 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2670 gen_lowpart (TImode, op1), thirtytwo));
2671 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2672 gen_lowpart (TImode, op2), thirtytwo));
2674 /* Multiply elements 3 and 1. */
2675 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2677 /* Move the results in element 2 down to element 1; we don't care what
2678 goes in elements 2 and 3. */
2679 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2680 const0_rtx, const0_rtx));
2681 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2682 const0_rtx, const0_rtx));
2684 /* Merge the parts back together. */
2685 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2689 (define_expand "mulv2di3"
2690 [(set (match_operand:V2DI 0 "register_operand" "")
2691 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2692 (match_operand:V2DI 2 "register_operand" "")))]
2695 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2701 t1 = gen_reg_rtx (V2DImode);
2702 t2 = gen_reg_rtx (V2DImode);
2703 t3 = gen_reg_rtx (V2DImode);
2704 t4 = gen_reg_rtx (V2DImode);
2705 t5 = gen_reg_rtx (V2DImode);
2706 t6 = gen_reg_rtx (V2DImode);
2707 thirtytwo = GEN_INT (32);
2709 /* Multiply low parts. */
2710 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2711 gen_lowpart (V4SImode, op2)));
2713 /* Shift input vectors left 32 bits so we can multiply high parts. */
2714 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2715 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2717 /* Multiply high parts by low parts. */
2718 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2719 gen_lowpart (V4SImode, t3)));
2720 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2721 gen_lowpart (V4SImode, t2)));
2723 /* Shift them back. */
2724 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2725 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2727 /* Add the three parts together. */
2728 emit_insn (gen_addv2di3 (t6, t1, t4));
2729 emit_insn (gen_addv2di3 (op0, t6, t5));
2733 (define_expand "sdot_prodv8hi"
2734 [(match_operand:V4SI 0 "register_operand" "")
2735 (match_operand:V8HI 1 "nonimmediate_operand" "")
2736 (match_operand:V8HI 2 "nonimmediate_operand" "")
2737 (match_operand:V4SI 3 "register_operand" "")]
2740 rtx t = gen_reg_rtx (V4SImode);
2741 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2742 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2746 (define_expand "udot_prodv4si"
2747 [(match_operand:V2DI 0 "register_operand" "")
2748 (match_operand:V4SI 1 "register_operand" "")
2749 (match_operand:V4SI 2 "register_operand" "")
2750 (match_operand:V2DI 3 "register_operand" "")]
2755 t1 = gen_reg_rtx (V2DImode);
2756 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2757 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2759 t2 = gen_reg_rtx (V4SImode);
2760 t3 = gen_reg_rtx (V4SImode);
2761 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2762 gen_lowpart (TImode, operands[1]),
2764 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2765 gen_lowpart (TImode, operands[2]),
2768 t4 = gen_reg_rtx (V2DImode);
2769 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2771 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2775 (define_insn "ashr<mode>3"
2776 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2778 (match_operand:SSEMODE24 1 "register_operand" "0")
2779 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2781 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2782 [(set_attr "type" "sseishft")
2783 (set_attr "mode" "TI")])
2785 (define_insn "lshr<mode>3"
2786 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2787 (lshiftrt:SSEMODE248
2788 (match_operand:SSEMODE248 1 "register_operand" "0")
2789 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2791 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2792 [(set_attr "type" "sseishft")
2793 (set_attr "mode" "TI")])
2795 (define_insn "ashl<mode>3"
2796 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2798 (match_operand:SSEMODE248 1 "register_operand" "0")
2799 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2801 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2802 [(set_attr "type" "sseishft")
2803 (set_attr "mode" "TI")])
2805 (define_insn "sse2_ashlti3"
2806 [(set (match_operand:TI 0 "register_operand" "=x")
2807 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2808 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2811 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2812 return "pslldq\t{%2, %0|%0, %2}";
2814 [(set_attr "type" "sseishft")
2815 (set_attr "mode" "TI")])
2817 (define_expand "vec_shl_<mode>"
2818 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2819 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2820 (match_operand:SI 2 "general_operand" "")))]
2823 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2825 operands[0] = gen_lowpart (TImode, operands[0]);
2826 operands[1] = gen_lowpart (TImode, operands[1]);
2829 (define_insn "sse2_lshrti3"
2830 [(set (match_operand:TI 0 "register_operand" "=x")
2831 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2832 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2835 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2836 return "psrldq\t{%2, %0|%0, %2}";
2838 [(set_attr "type" "sseishft")
2839 (set_attr "mode" "TI")])
2841 (define_expand "vec_shr_<mode>"
2842 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2843 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2844 (match_operand:SI 2 "general_operand" "")))]
2847 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2849 operands[0] = gen_lowpart (TImode, operands[0]);
2850 operands[1] = gen_lowpart (TImode, operands[1]);
2853 (define_expand "umaxv16qi3"
2854 [(set (match_operand:V16QI 0 "register_operand" "")
2855 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2856 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2858 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2860 (define_insn "*umaxv16qi3"
2861 [(set (match_operand:V16QI 0 "register_operand" "=x")
2862 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2863 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2864 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2865 "pmaxub\t{%2, %0|%0, %2}"
2866 [(set_attr "type" "sseiadd")
2867 (set_attr "mode" "TI")])
2869 (define_expand "smaxv8hi3"
2870 [(set (match_operand:V8HI 0 "register_operand" "")
2871 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2872 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2874 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2876 (define_insn "*smaxv8hi3"
2877 [(set (match_operand:V8HI 0 "register_operand" "=x")
2878 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2879 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2880 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2881 "pmaxsw\t{%2, %0|%0, %2}"
2882 [(set_attr "type" "sseiadd")
2883 (set_attr "mode" "TI")])
2885 (define_expand "umaxv8hi3"
2886 [(set (match_operand:V8HI 0 "register_operand" "=x")
2887 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2888 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2890 (plus:V8HI (match_dup 0) (match_dup 2)))]
2893 operands[3] = operands[0];
2894 if (rtx_equal_p (operands[0], operands[2]))
2895 operands[0] = gen_reg_rtx (V8HImode);
2898 (define_expand "smax<mode>3"
2899 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2900 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2901 (match_operand:SSEMODE14 2 "register_operand" "")))]
2907 xops[0] = operands[0];
2908 xops[1] = operands[1];
2909 xops[2] = operands[2];
2910 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2911 xops[4] = operands[1];
2912 xops[5] = operands[2];
2913 ok = ix86_expand_int_vcond (xops);
2918 (define_expand "umaxv4si3"
2919 [(set (match_operand:V4SI 0 "register_operand" "")
2920 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2921 (match_operand:V4SI 2 "register_operand" "")))]
2927 xops[0] = operands[0];
2928 xops[1] = operands[1];
2929 xops[2] = operands[2];
2930 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2931 xops[4] = operands[1];
2932 xops[5] = operands[2];
2933 ok = ix86_expand_int_vcond (xops);
2938 (define_expand "uminv16qi3"
2939 [(set (match_operand:V16QI 0 "register_operand" "")
2940 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2941 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2943 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2945 (define_insn "*uminv16qi3"
2946 [(set (match_operand:V16QI 0 "register_operand" "=x")
2947 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2948 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2949 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2950 "pminub\t{%2, %0|%0, %2}"
2951 [(set_attr "type" "sseiadd")
2952 (set_attr "mode" "TI")])
2954 (define_expand "sminv8hi3"
2955 [(set (match_operand:V8HI 0 "register_operand" "")
2956 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2957 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2959 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2961 (define_insn "*sminv8hi3"
2962 [(set (match_operand:V8HI 0 "register_operand" "=x")
2963 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2964 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2965 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2966 "pminsw\t{%2, %0|%0, %2}"
2967 [(set_attr "type" "sseiadd")
2968 (set_attr "mode" "TI")])
2970 (define_expand "smin<mode>3"
2971 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2972 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2973 (match_operand:SSEMODE14 2 "register_operand" "")))]
2979 xops[0] = operands[0];
2980 xops[1] = operands[2];
2981 xops[2] = operands[1];
2982 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2983 xops[4] = operands[1];
2984 xops[5] = operands[2];
2985 ok = ix86_expand_int_vcond (xops);
2990 (define_expand "umin<mode>3"
2991 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2992 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2993 (match_operand:SSEMODE24 2 "register_operand" "")))]
2999 xops[0] = operands[0];
3000 xops[1] = operands[2];
3001 xops[2] = operands[1];
3002 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3003 xops[4] = operands[1];
3004 xops[5] = operands[2];
3005 ok = ix86_expand_int_vcond (xops);
3010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3012 ;; Parallel integral comparisons
3014 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3016 (define_insn "sse2_eq<mode>3"
3017 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3019 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3020 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3021 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3022 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3023 [(set_attr "type" "ssecmp")
3024 (set_attr "mode" "TI")])
3026 (define_insn "sse2_gt<mode>3"
3027 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3029 (match_operand:SSEMODE124 1 "register_operand" "0")
3030 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3032 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3033 [(set_attr "type" "ssecmp")
3034 (set_attr "mode" "TI")])
3036 (define_expand "vcond<mode>"
3037 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3038 (if_then_else:SSEMODE124
3039 (match_operator 3 ""
3040 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3041 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3042 (match_operand:SSEMODE124 1 "general_operand" "")
3043 (match_operand:SSEMODE124 2 "general_operand" "")))]
3046 if (ix86_expand_int_vcond (operands))
3052 (define_expand "vcondu<mode>"
3053 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3054 (if_then_else:SSEMODE124
3055 (match_operator 3 ""
3056 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3057 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3058 (match_operand:SSEMODE124 1 "general_operand" "")
3059 (match_operand:SSEMODE124 2 "general_operand" "")))]
3062 if (ix86_expand_int_vcond (operands))
3068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3070 ;; Parallel integral logical operations
3072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3074 (define_expand "one_cmpl<mode>2"
3075 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3076 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3080 int i, n = GET_MODE_NUNITS (<MODE>mode);
3081 rtvec v = rtvec_alloc (n);
3083 for (i = 0; i < n; ++i)
3084 RTVEC_ELT (v, i) = constm1_rtx;
3086 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3089 (define_expand "and<mode>3"
3090 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3091 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3092 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3094 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3096 (define_insn "*and<mode>3"
3097 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3099 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3100 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3101 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3102 "pand\t{%2, %0|%0, %2}"
3103 [(set_attr "type" "sselog")
3104 (set_attr "mode" "TI")])
3106 (define_insn "sse2_nand<mode>3"
3107 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3109 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3110 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3112 "pandn\t{%2, %0|%0, %2}"
3113 [(set_attr "type" "sselog")
3114 (set_attr "mode" "TI")])
3116 (define_expand "ior<mode>3"
3117 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3118 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3119 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3121 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3123 (define_insn "*ior<mode>3"
3124 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3126 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3127 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3128 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3129 "por\t{%2, %0|%0, %2}"
3130 [(set_attr "type" "sselog")
3131 (set_attr "mode" "TI")])
3133 (define_expand "xor<mode>3"
3134 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3135 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3136 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3138 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3140 (define_insn "*xor<mode>3"
3141 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3143 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3144 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3145 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3146 "pxor\t{%2, %0|%0, %2}"
3147 [(set_attr "type" "sselog")
3148 (set_attr "mode" "TI")])
3150 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3152 ;; Parallel integral element swizzling
3154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3156 (define_insn "sse2_packsswb"
3157 [(set (match_operand:V16QI 0 "register_operand" "=x")
3160 (match_operand:V8HI 1 "register_operand" "0"))
3162 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3164 "packsswb\t{%2, %0|%0, %2}"
3165 [(set_attr "type" "sselog")
3166 (set_attr "mode" "TI")])
3168 (define_insn "sse2_packssdw"
3169 [(set (match_operand:V8HI 0 "register_operand" "=x")
3172 (match_operand:V4SI 1 "register_operand" "0"))
3174 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3176 "packssdw\t{%2, %0|%0, %2}"
3177 [(set_attr "type" "sselog")
3178 (set_attr "mode" "TI")])
3180 (define_insn "sse2_packuswb"
3181 [(set (match_operand:V16QI 0 "register_operand" "=x")
3184 (match_operand:V8HI 1 "register_operand" "0"))
3186 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3188 "packuswb\t{%2, %0|%0, %2}"
3189 [(set_attr "type" "sselog")
3190 (set_attr "mode" "TI")])
3192 (define_insn "sse2_punpckhbw"
3193 [(set (match_operand:V16QI 0 "register_operand" "=x")
3196 (match_operand:V16QI 1 "register_operand" "0")
3197 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3198 (parallel [(const_int 8) (const_int 24)
3199 (const_int 9) (const_int 25)
3200 (const_int 10) (const_int 26)
3201 (const_int 11) (const_int 27)
3202 (const_int 12) (const_int 28)
3203 (const_int 13) (const_int 29)
3204 (const_int 14) (const_int 30)
3205 (const_int 15) (const_int 31)])))]
3207 "punpckhbw\t{%2, %0|%0, %2}"
3208 [(set_attr "type" "sselog")
3209 (set_attr "mode" "TI")])
3211 (define_insn "sse2_punpcklbw"
3212 [(set (match_operand:V16QI 0 "register_operand" "=x")
3215 (match_operand:V16QI 1 "register_operand" "0")
3216 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3217 (parallel [(const_int 0) (const_int 16)
3218 (const_int 1) (const_int 17)
3219 (const_int 2) (const_int 18)
3220 (const_int 3) (const_int 19)
3221 (const_int 4) (const_int 20)
3222 (const_int 5) (const_int 21)
3223 (const_int 6) (const_int 22)
3224 (const_int 7) (const_int 23)])))]
3226 "punpcklbw\t{%2, %0|%0, %2}"
3227 [(set_attr "type" "sselog")
3228 (set_attr "mode" "TI")])
3230 (define_insn "sse2_punpckhwd"
3231 [(set (match_operand:V8HI 0 "register_operand" "=x")
3234 (match_operand:V8HI 1 "register_operand" "0")
3235 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3236 (parallel [(const_int 4) (const_int 12)
3237 (const_int 5) (const_int 13)
3238 (const_int 6) (const_int 14)
3239 (const_int 7) (const_int 15)])))]
3241 "punpckhwd\t{%2, %0|%0, %2}"
3242 [(set_attr "type" "sselog")
3243 (set_attr "mode" "TI")])
3245 (define_insn "sse2_punpcklwd"
3246 [(set (match_operand:V8HI 0 "register_operand" "=x")
3249 (match_operand:V8HI 1 "register_operand" "0")
3250 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3251 (parallel [(const_int 0) (const_int 8)
3252 (const_int 1) (const_int 9)
3253 (const_int 2) (const_int 10)
3254 (const_int 3) (const_int 11)])))]
3256 "punpcklwd\t{%2, %0|%0, %2}"
3257 [(set_attr "type" "sselog")
3258 (set_attr "mode" "TI")])
3260 (define_insn "sse2_punpckhdq"
3261 [(set (match_operand:V4SI 0 "register_operand" "=x")
3264 (match_operand:V4SI 1 "register_operand" "0")
3265 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3266 (parallel [(const_int 2) (const_int 6)
3267 (const_int 3) (const_int 7)])))]
3269 "punpckhdq\t{%2, %0|%0, %2}"
3270 [(set_attr "type" "sselog")
3271 (set_attr "mode" "TI")])
3273 (define_insn "sse2_punpckldq"
3274 [(set (match_operand:V4SI 0 "register_operand" "=x")
3277 (match_operand:V4SI 1 "register_operand" "0")
3278 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3279 (parallel [(const_int 0) (const_int 4)
3280 (const_int 1) (const_int 5)])))]
3282 "punpckldq\t{%2, %0|%0, %2}"
3283 [(set_attr "type" "sselog")
3284 (set_attr "mode" "TI")])
3286 (define_insn "sse2_punpckhqdq"
3287 [(set (match_operand:V2DI 0 "register_operand" "=x")
3290 (match_operand:V2DI 1 "register_operand" "0")
3291 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3292 (parallel [(const_int 1)
3295 "punpckhqdq\t{%2, %0|%0, %2}"
3296 [(set_attr "type" "sselog")
3297 (set_attr "mode" "TI")])
3299 (define_insn "sse2_punpcklqdq"
3300 [(set (match_operand:V2DI 0 "register_operand" "=x")
3303 (match_operand:V2DI 1 "register_operand" "0")
3304 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3305 (parallel [(const_int 0)
3308 "punpcklqdq\t{%2, %0|%0, %2}"
3309 [(set_attr "type" "sselog")
3310 (set_attr "mode" "TI")])
3312 (define_expand "sse2_pinsrw"
3313 [(set (match_operand:V8HI 0 "register_operand" "")
3316 (match_operand:SI 2 "nonimmediate_operand" ""))
3317 (match_operand:V8HI 1 "register_operand" "")
3318 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3321 operands[2] = gen_lowpart (HImode, operands[2]);
3322 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3325 (define_insn "*sse2_pinsrw"
3326 [(set (match_operand:V8HI 0 "register_operand" "=x")
3329 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3330 (match_operand:V8HI 1 "register_operand" "0")
3331 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3334 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3335 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3337 [(set_attr "type" "sselog")
3338 (set_attr "mode" "TI")])
3340 (define_insn "sse2_pextrw"
3341 [(set (match_operand:SI 0 "register_operand" "=r")
3344 (match_operand:V8HI 1 "register_operand" "x")
3345 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3347 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3348 [(set_attr "type" "sselog")
3349 (set_attr "mode" "TI")])
3351 (define_expand "sse2_pshufd"
3352 [(match_operand:V4SI 0 "register_operand" "")
3353 (match_operand:V4SI 1 "nonimmediate_operand" "")
3354 (match_operand:SI 2 "const_int_operand" "")]
3357 int mask = INTVAL (operands[2]);
3358 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3359 GEN_INT ((mask >> 0) & 3),
3360 GEN_INT ((mask >> 2) & 3),
3361 GEN_INT ((mask >> 4) & 3),
3362 GEN_INT ((mask >> 6) & 3)));
3366 (define_insn "sse2_pshufd_1"
3367 [(set (match_operand:V4SI 0 "register_operand" "=x")
3369 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3370 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3371 (match_operand 3 "const_0_to_3_operand" "")
3372 (match_operand 4 "const_0_to_3_operand" "")
3373 (match_operand 5 "const_0_to_3_operand" "")])))]
3377 mask |= INTVAL (operands[2]) << 0;
3378 mask |= INTVAL (operands[3]) << 2;
3379 mask |= INTVAL (operands[4]) << 4;
3380 mask |= INTVAL (operands[5]) << 6;
3381 operands[2] = GEN_INT (mask);
3383 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3385 [(set_attr "type" "sselog1")
3386 (set_attr "mode" "TI")])
3388 (define_expand "sse2_pshuflw"
3389 [(match_operand:V8HI 0 "register_operand" "")
3390 (match_operand:V8HI 1 "nonimmediate_operand" "")
3391 (match_operand:SI 2 "const_int_operand" "")]
3394 int mask = INTVAL (operands[2]);
3395 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3396 GEN_INT ((mask >> 0) & 3),
3397 GEN_INT ((mask >> 2) & 3),
3398 GEN_INT ((mask >> 4) & 3),
3399 GEN_INT ((mask >> 6) & 3)));
3403 (define_insn "sse2_pshuflw_1"
3404 [(set (match_operand:V8HI 0 "register_operand" "=x")
3406 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3407 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3408 (match_operand 3 "const_0_to_3_operand" "")
3409 (match_operand 4 "const_0_to_3_operand" "")
3410 (match_operand 5 "const_0_to_3_operand" "")
3418 mask |= INTVAL (operands[2]) << 0;
3419 mask |= INTVAL (operands[3]) << 2;
3420 mask |= INTVAL (operands[4]) << 4;
3421 mask |= INTVAL (operands[5]) << 6;
3422 operands[2] = GEN_INT (mask);
3424 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3426 [(set_attr "type" "sselog")
3427 (set_attr "mode" "TI")])
3429 (define_expand "sse2_pshufhw"
3430 [(match_operand:V8HI 0 "register_operand" "")
3431 (match_operand:V8HI 1 "nonimmediate_operand" "")
3432 (match_operand:SI 2 "const_int_operand" "")]
3435 int mask = INTVAL (operands[2]);
3436 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3437 GEN_INT (((mask >> 0) & 3) + 4),
3438 GEN_INT (((mask >> 2) & 3) + 4),
3439 GEN_INT (((mask >> 4) & 3) + 4),
3440 GEN_INT (((mask >> 6) & 3) + 4)));
3444 (define_insn "sse2_pshufhw_1"
3445 [(set (match_operand:V8HI 0 "register_operand" "=x")
3447 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3448 (parallel [(const_int 0)
3452 (match_operand 2 "const_4_to_7_operand" "")
3453 (match_operand 3 "const_4_to_7_operand" "")
3454 (match_operand 4 "const_4_to_7_operand" "")
3455 (match_operand 5 "const_4_to_7_operand" "")])))]
3459 mask |= (INTVAL (operands[2]) - 4) << 0;
3460 mask |= (INTVAL (operands[3]) - 4) << 2;
3461 mask |= (INTVAL (operands[4]) - 4) << 4;
3462 mask |= (INTVAL (operands[5]) - 4) << 6;
3463 operands[2] = GEN_INT (mask);
3465 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3467 [(set_attr "type" "sselog")
3468 (set_attr "mode" "TI")])
3470 (define_expand "sse2_loadd"
3471 [(set (match_operand:V4SI 0 "register_operand" "")
3474 (match_operand:SI 1 "nonimmediate_operand" ""))
3478 "operands[2] = CONST0_RTX (V4SImode);")
3480 (define_insn "sse2_loadld"
3481 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3484 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3485 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3489 movd\t{%2, %0|%0, %2}
3490 movss\t{%2, %0|%0, %2}
3491 movss\t{%2, %0|%0, %2}"
3492 [(set_attr "type" "ssemov")
3493 (set_attr "mode" "TI,V4SF,SF")])
3495 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3496 ;; be taken into account, and movdi isn't fully populated even without.
3497 (define_insn_and_split "sse2_stored"
3498 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3500 (match_operand:V4SI 1 "register_operand" "x")
3501 (parallel [(const_int 0)])))]
3504 "&& reload_completed"
3505 [(set (match_dup 0) (match_dup 1))]
3507 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3510 (define_expand "sse_storeq"
3511 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3513 (match_operand:V2DI 1 "register_operand" "")
3514 (parallel [(const_int 0)])))]
3518 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3519 ;; be taken into account, and movdi isn't fully populated even without.
3520 (define_insn "*sse2_storeq"
3521 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3523 (match_operand:V2DI 1 "register_operand" "x")
3524 (parallel [(const_int 0)])))]
3529 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3531 (match_operand:V2DI 1 "register_operand" "")
3532 (parallel [(const_int 0)])))]
3533 "TARGET_SSE && reload_completed"
3534 [(set (match_dup 0) (match_dup 1))]
3536 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3539 (define_insn "*vec_extractv2di_1_sse2"
3540 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3542 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3543 (parallel [(const_int 1)])))]
3544 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3546 movhps\t{%1, %0|%0, %1}
3547 psrldq\t{$4, %0|%0, 4}
3548 movq\t{%H1, %0|%0, %H1}"
3549 [(set_attr "type" "ssemov,sseishft,ssemov")
3550 (set_attr "mode" "V2SF,TI,TI")])
3552 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3553 (define_insn "*vec_extractv2di_1_sse"
3554 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3556 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3557 (parallel [(const_int 1)])))]
3558 "!TARGET_SSE2 && TARGET_SSE
3559 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3561 movhps\t{%1, %0|%0, %1}
3562 movhlps\t{%1, %0|%0, %1}
3563 movlps\t{%H1, %0|%0, %H1}"
3564 [(set_attr "type" "ssemov")
3565 (set_attr "mode" "V2SF,V4SF,V2SF")])
3567 (define_insn "*vec_dupv4si"
3568 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3570 (match_operand:SI 1 "register_operand" " Y,0")))]
3573 pshufd\t{$0, %1, %0|%0, %1, 0}
3574 shufps\t{$0, %0, %0|%0, %0, 0}"
3575 [(set_attr "type" "sselog1")
3576 (set_attr "mode" "TI,V4SF")])
3578 (define_insn "*vec_dupv2di"
3579 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3581 (match_operand:DI 1 "register_operand" " 0,0")))]
3586 [(set_attr "type" "sselog1,ssemov")
3587 (set_attr "mode" "TI,V4SF")])
3589 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3590 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3591 ;; alternatives pretty much forces the MMX alternative to be chosen.
3592 (define_insn "*sse2_concatv2si"
3593 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3595 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3596 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3599 punpckldq\t{%2, %0|%0, %2}
3600 movd\t{%1, %0|%0, %1}
3601 punpckldq\t{%2, %0|%0, %2}
3602 movd\t{%1, %0|%0, %1}"
3603 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3604 (set_attr "mode" "TI,TI,DI,DI")])
3606 (define_insn "*sse1_concatv2si"
3607 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3609 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3610 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3613 unpcklps\t{%2, %0|%0, %2}
3614 movss\t{%1, %0|%0, %1}
3615 punpckldq\t{%2, %0|%0, %2}
3616 movd\t{%1, %0|%0, %1}"
3617 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3618 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3620 (define_insn "*vec_concatv4si_1"
3621 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3623 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3624 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3627 punpcklqdq\t{%2, %0|%0, %2}
3628 movlhps\t{%2, %0|%0, %2}
3629 movhps\t{%2, %0|%0, %2}"
3630 [(set_attr "type" "sselog,ssemov,ssemov")
3631 (set_attr "mode" "TI,V4SF,V2SF")])
3633 (define_insn "*vec_concatv2di"
3634 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3636 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3637 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3640 movq\t{%1, %0|%0, %1}
3641 movq2dq\t{%1, %0|%0, %1}
3642 punpcklqdq\t{%2, %0|%0, %2}
3643 movlhps\t{%2, %0|%0, %2}
3644 movhps\t{%2, %0|%0, %2}
3645 movlps\t{%1, %0|%0, %1}"
3646 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3647 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3649 (define_expand "vec_setv2di"
3650 [(match_operand:V2DI 0 "register_operand" "")
3651 (match_operand:DI 1 "register_operand" "")
3652 (match_operand 2 "const_int_operand" "")]
3655 ix86_expand_vector_set (false, operands[0], operands[1],
3656 INTVAL (operands[2]));
3660 (define_expand "vec_extractv2di"
3661 [(match_operand:DI 0 "register_operand" "")
3662 (match_operand:V2DI 1 "register_operand" "")
3663 (match_operand 2 "const_int_operand" "")]
3666 ix86_expand_vector_extract (false, operands[0], operands[1],
3667 INTVAL (operands[2]));
3671 (define_expand "vec_initv2di"
3672 [(match_operand:V2DI 0 "register_operand" "")
3673 (match_operand 1 "" "")]
3676 ix86_expand_vector_init (false, operands[0], operands[1]);
3680 (define_expand "vec_setv4si"
3681 [(match_operand:V4SI 0 "register_operand" "")
3682 (match_operand:SI 1 "register_operand" "")
3683 (match_operand 2 "const_int_operand" "")]
3686 ix86_expand_vector_set (false, operands[0], operands[1],
3687 INTVAL (operands[2]));
3691 (define_expand "vec_extractv4si"
3692 [(match_operand:SI 0 "register_operand" "")
3693 (match_operand:V4SI 1 "register_operand" "")
3694 (match_operand 2 "const_int_operand" "")]
3697 ix86_expand_vector_extract (false, operands[0], operands[1],
3698 INTVAL (operands[2]));
3702 (define_expand "vec_initv4si"
3703 [(match_operand:V4SI 0 "register_operand" "")
3704 (match_operand 1 "" "")]
3707 ix86_expand_vector_init (false, operands[0], operands[1]);
3711 (define_expand "vec_setv8hi"
3712 [(match_operand:V8HI 0 "register_operand" "")
3713 (match_operand:HI 1 "register_operand" "")
3714 (match_operand 2 "const_int_operand" "")]
3717 ix86_expand_vector_set (false, operands[0], operands[1],
3718 INTVAL (operands[2]));
3722 (define_expand "vec_extractv8hi"
3723 [(match_operand:HI 0 "register_operand" "")
3724 (match_operand:V8HI 1 "register_operand" "")
3725 (match_operand 2 "const_int_operand" "")]
3728 ix86_expand_vector_extract (false, operands[0], operands[1],
3729 INTVAL (operands[2]));
3733 (define_expand "vec_initv8hi"
3734 [(match_operand:V8HI 0 "register_operand" "")
3735 (match_operand 1 "" "")]
3738 ix86_expand_vector_init (false, operands[0], operands[1]);
3742 (define_expand "vec_setv16qi"
3743 [(match_operand:V16QI 0 "register_operand" "")
3744 (match_operand:QI 1 "register_operand" "")
3745 (match_operand 2 "const_int_operand" "")]
3748 ix86_expand_vector_set (false, operands[0], operands[1],
3749 INTVAL (operands[2]));
3753 (define_expand "vec_extractv16qi"
3754 [(match_operand:QI 0 "register_operand" "")
3755 (match_operand:V16QI 1 "register_operand" "")
3756 (match_operand 2 "const_int_operand" "")]
3759 ix86_expand_vector_extract (false, operands[0], operands[1],
3760 INTVAL (operands[2]));
3764 (define_expand "vec_initv16qi"
3765 [(match_operand:V16QI 0 "register_operand" "")
3766 (match_operand 1 "" "")]
3769 ix86_expand_vector_init (false, operands[0], operands[1]);
3773 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3777 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3779 (define_insn "sse2_uavgv16qi3"
3780 [(set (match_operand:V16QI 0 "register_operand" "=x")
3786 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3788 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3789 (const_vector:V16QI [(const_int 1) (const_int 1)
3790 (const_int 1) (const_int 1)
3791 (const_int 1) (const_int 1)
3792 (const_int 1) (const_int 1)
3793 (const_int 1) (const_int 1)
3794 (const_int 1) (const_int 1)
3795 (const_int 1) (const_int 1)
3796 (const_int 1) (const_int 1)]))
3798 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3799 "pavgb\t{%2, %0|%0, %2}"
3800 [(set_attr "type" "sseiadd")
3801 (set_attr "mode" "TI")])
3803 (define_insn "sse2_uavgv8hi3"
3804 [(set (match_operand:V8HI 0 "register_operand" "=x")
3810 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3812 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3813 (const_vector:V8HI [(const_int 1) (const_int 1)
3814 (const_int 1) (const_int 1)
3815 (const_int 1) (const_int 1)
3816 (const_int 1) (const_int 1)]))
3818 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3819 "pavgw\t{%2, %0|%0, %2}"
3820 [(set_attr "type" "sseiadd")
3821 (set_attr "mode" "TI")])
3823 ;; The correct representation for this is absolutely enormous, and
3824 ;; surely not generally useful.
3825 (define_insn "sse2_psadbw"
3826 [(set (match_operand:V2DI 0 "register_operand" "=x")
3827 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3828 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3831 "psadbw\t{%2, %0|%0, %2}"
3832 [(set_attr "type" "sseiadd")
3833 (set_attr "mode" "TI")])
3835 (define_insn "sse_movmskps"
3836 [(set (match_operand:SI 0 "register_operand" "=r")
3837 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3840 "movmskps\t{%1, %0|%0, %1}"
3841 [(set_attr "type" "ssecvt")
3842 (set_attr "mode" "V4SF")])
3844 (define_insn "sse2_movmskpd"
3845 [(set (match_operand:SI 0 "register_operand" "=r")
3846 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3849 "movmskpd\t{%1, %0|%0, %1}"
3850 [(set_attr "type" "ssecvt")
3851 (set_attr "mode" "V2DF")])
3853 (define_insn "sse2_pmovmskb"
3854 [(set (match_operand:SI 0 "register_operand" "=r")
3855 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3858 "pmovmskb\t{%1, %0|%0, %1}"
3859 [(set_attr "type" "ssecvt")
3860 (set_attr "mode" "V2DF")])
3862 (define_expand "sse2_maskmovdqu"
3863 [(set (match_operand:V16QI 0 "memory_operand" "")
3864 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3865 (match_operand:V16QI 2 "register_operand" "x")
3871 (define_insn "*sse2_maskmovdqu"
3872 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3873 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3874 (match_operand:V16QI 2 "register_operand" "x")
3875 (mem:V16QI (match_dup 0))]
3877 "TARGET_SSE2 && !TARGET_64BIT"
3878 ;; @@@ check ordering of operands in intel/nonintel syntax
3879 "maskmovdqu\t{%2, %1|%1, %2}"
3880 [(set_attr "type" "ssecvt")
3881 (set_attr "mode" "TI")])
3883 (define_insn "*sse2_maskmovdqu_rex64"
3884 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3885 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3886 (match_operand:V16QI 2 "register_operand" "x")
3887 (mem:V16QI (match_dup 0))]
3889 "TARGET_SSE2 && TARGET_64BIT"
3890 ;; @@@ check ordering of operands in intel/nonintel syntax
3891 "maskmovdqu\t{%2, %1|%1, %2}"
3892 [(set_attr "type" "ssecvt")
3893 (set_attr "mode" "TI")])
3895 (define_insn "sse_ldmxcsr"
3896 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3900 [(set_attr "type" "sse")
3901 (set_attr "memory" "load")])
3903 (define_insn "sse_stmxcsr"
3904 [(set (match_operand:SI 0 "memory_operand" "=m")
3905 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3908 [(set_attr "type" "sse")
3909 (set_attr "memory" "store")])
3911 (define_expand "sse_sfence"
3913 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3914 "TARGET_SSE || TARGET_3DNOW_A"
3916 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3917 MEM_VOLATILE_P (operands[0]) = 1;
3920 (define_insn "*sse_sfence"
3921 [(set (match_operand:BLK 0 "" "")
3922 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3923 "TARGET_SSE || TARGET_3DNOW_A"
3925 [(set_attr "type" "sse")
3926 (set_attr "memory" "unknown")])
3928 (define_insn "sse2_clflush"
3929 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3933 [(set_attr "type" "sse")
3934 (set_attr "memory" "unknown")])
3936 (define_expand "sse2_mfence"
3938 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3941 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3942 MEM_VOLATILE_P (operands[0]) = 1;
3945 (define_insn "*sse2_mfence"
3946 [(set (match_operand:BLK 0 "" "")
3947 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3950 [(set_attr "type" "sse")
3951 (set_attr "memory" "unknown")])
3953 (define_expand "sse2_lfence"
3955 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3958 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3959 MEM_VOLATILE_P (operands[0]) = 1;
3962 (define_insn "*sse2_lfence"
3963 [(set (match_operand:BLK 0 "" "")
3964 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3967 [(set_attr "type" "sse")
3968 (set_attr "memory" "unknown")])
3970 (define_insn "sse3_mwait"
3971 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3972 (match_operand:SI 1 "register_operand" "c")]
3976 [(set_attr "length" "3")])
3978 (define_insn "sse3_monitor"
3979 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3980 (match_operand:SI 1 "register_operand" "c")
3981 (match_operand:SI 2 "register_operand" "d")]
3984 "monitor\t%0, %1, %2"
3985 [(set_attr "length" "3")])