1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 if (get_attr_mode (insn) == MODE_V4SF)
69 return "xorps\t%0, %0";
71 return "pxor\t%0, %0";
74 if (get_attr_mode (insn) == MODE_V4SF)
75 return "movaps\t{%1, %0|%0, %1}";
77 return "movdqa\t{%1, %0|%0, %1}";
82 [(set_attr "type" "sselog1,ssemov,ssemov")
85 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
86 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
87 (and (eq_attr "alternative" "2")
88 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
91 (const_string "TI")))])
93 (define_expand "movv4sf"
94 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
95 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
98 ix86_expand_vector_move (V4SFmode, operands);
102 (define_insn "*movv4sf_internal"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
104 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
108 movaps\t{%1, %0|%0, %1}
109 movaps\t{%1, %0|%0, %1}"
110 [(set_attr "type" "sselog1,ssemov,ssemov")
111 (set_attr "mode" "V4SF")])
114 [(set (match_operand:V4SF 0 "register_operand" "")
115 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
116 "TARGET_SSE && reload_completed"
119 (vec_duplicate:V4SF (match_dup 1))
123 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
124 operands[2] = CONST0_RTX (V4SFmode);
127 (define_expand "movv2df"
128 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
129 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
132 ix86_expand_vector_move (V2DFmode, operands);
136 (define_insn "*movv2df_internal"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
138 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
139 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
141 switch (which_alternative)
144 if (get_attr_mode (insn) == MODE_V4SF)
145 return "xorps\t%0, %0";
147 return "xorpd\t%0, %0";
150 if (get_attr_mode (insn) == MODE_V4SF)
151 return "movaps\t{%1, %0|%0, %1}";
153 return "movapd\t{%1, %0|%0, %1}";
158 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
162 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
163 (and (eq_attr "alternative" "2")
164 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
166 (const_string "V4SF")
167 (const_string "V2DF")))])
170 [(set (match_operand:V2DF 0 "register_operand" "")
171 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
172 "TARGET_SSE2 && reload_completed"
173 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
175 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
176 operands[2] = CONST0_RTX (DFmode);
179 (define_expand "push<mode>1"
180 [(match_operand:SSEMODE 0 "register_operand" "")]
183 ix86_expand_push (<MODE>mode, operands[0]);
187 (define_expand "movmisalign<mode>"
188 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 ix86_expand_vector_move_misalign (<MODE>mode, operands);
196 (define_insn "sse_movups"
197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
198 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
200 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
201 "movups\t{%1, %0|%0, %1}"
202 [(set_attr "type" "ssemov")
203 (set_attr "mode" "V2DF")])
205 (define_insn "sse2_movupd"
206 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
207 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
209 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
210 "movupd\t{%1, %0|%0, %1}"
211 [(set_attr "type" "ssemov")
212 (set_attr "mode" "V2DF")])
214 (define_insn "sse2_movdqu"
215 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
216 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
218 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219 "movdqu\t{%1, %0|%0, %1}"
220 [(set_attr "type" "ssemov")
221 (set_attr "mode" "TI")])
223 (define_insn "sse_movntv4sf"
224 [(set (match_operand:V4SF 0 "memory_operand" "=m")
225 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
228 "movntps\t{%1, %0|%0, %1}"
229 [(set_attr "type" "ssemov")
230 (set_attr "mode" "V4SF")])
232 (define_insn "sse2_movntv2df"
233 [(set (match_operand:V2DF 0 "memory_operand" "=m")
234 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
237 "movntpd\t{%1, %0|%0, %1}"
238 [(set_attr "type" "ssecvt")
239 (set_attr "mode" "V2DF")])
241 (define_insn "sse2_movntv2di"
242 [(set (match_operand:V2DI 0 "memory_operand" "=m")
243 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
246 "movntdq\t{%1, %0|%0, %1}"
247 [(set_attr "type" "ssecvt")
248 (set_attr "mode" "TI")])
250 (define_insn "sse2_movntsi"
251 [(set (match_operand:SI 0 "memory_operand" "=m")
252 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
255 "movnti\t{%1, %0|%0, %1}"
256 [(set_attr "type" "ssecvt")
257 (set_attr "mode" "V2DF")])
259 (define_insn "sse3_lddqu"
260 [(set (match_operand:V16QI 0 "register_operand" "=x")
261 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
264 "lddqu\t{%1, %0|%0, %1}"
265 [(set_attr "type" "ssecvt")
266 (set_attr "mode" "TI")])
268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
270 ;; Parallel single-precision floating point arithmetic
272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
274 (define_expand "negv4sf2"
275 [(set (match_operand:V4SF 0 "register_operand" "")
276 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
278 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
280 (define_expand "absv4sf2"
281 [(set (match_operand:V4SF 0 "register_operand" "")
282 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
284 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
286 (define_expand "addv4sf3"
287 [(set (match_operand:V4SF 0 "register_operand" "")
288 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
289 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
291 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
293 (define_insn "*addv4sf3"
294 [(set (match_operand:V4SF 0 "register_operand" "=x")
295 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
297 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
298 "addps\t{%2, %0|%0, %2}"
299 [(set_attr "type" "sseadd")
300 (set_attr "mode" "V4SF")])
302 (define_insn "sse_vmaddv4sf3"
303 [(set (match_operand:V4SF 0 "register_operand" "=x")
305 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
306 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
309 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
310 "addss\t{%2, %0|%0, %2}"
311 [(set_attr "type" "sseadd")
312 (set_attr "mode" "SF")])
314 (define_expand "subv4sf3"
315 [(set (match_operand:V4SF 0 "register_operand" "")
316 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
317 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
319 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
321 (define_insn "*subv4sf3"
322 [(set (match_operand:V4SF 0 "register_operand" "=x")
323 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
324 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
326 "subps\t{%2, %0|%0, %2}"
327 [(set_attr "type" "sseadd")
328 (set_attr "mode" "V4SF")])
330 (define_insn "sse_vmsubv4sf3"
331 [(set (match_operand:V4SF 0 "register_operand" "=x")
333 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
334 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
338 "subss\t{%2, %0|%0, %2}"
339 [(set_attr "type" "sseadd")
340 (set_attr "mode" "SF")])
342 (define_expand "mulv4sf3"
343 [(set (match_operand:V4SF 0 "register_operand" "")
344 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
345 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
347 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
349 (define_insn "*mulv4sf3"
350 [(set (match_operand:V4SF 0 "register_operand" "=x")
351 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
352 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
353 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
354 "mulps\t{%2, %0|%0, %2}"
355 [(set_attr "type" "ssemul")
356 (set_attr "mode" "V4SF")])
358 (define_insn "sse_vmmulv4sf3"
359 [(set (match_operand:V4SF 0 "register_operand" "=x")
361 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
362 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
365 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
366 "mulss\t{%2, %0|%0, %2}"
367 [(set_attr "type" "ssemul")
368 (set_attr "mode" "SF")])
370 (define_expand "divv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "")
372 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
373 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
377 (define_insn "*divv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
382 "divps\t{%2, %0|%0, %2}"
383 [(set_attr "type" "ssediv")
384 (set_attr "mode" "V4SF")])
386 (define_insn "sse_vmdivv4sf3"
387 [(set (match_operand:V4SF 0 "register_operand" "=x")
389 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
390 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
394 "divss\t{%2, %0|%0, %2}"
395 [(set_attr "type" "ssediv")
396 (set_attr "mode" "SF")])
398 (define_insn "sse_rcpv4sf2"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
401 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
403 "rcpps\t{%1, %0|%0, %1}"
404 [(set_attr "type" "sse")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmrcpv4sf2"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
412 (match_operand:V4SF 2 "register_operand" "0")
415 "rcpss\t{%1, %0|%0, %1}"
416 [(set_attr "type" "sse")
417 (set_attr "mode" "SF")])
419 (define_insn "sse_rsqrtv4sf2"
420 [(set (match_operand:V4SF 0 "register_operand" "=x")
422 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
424 "rsqrtps\t{%1, %0|%0, %1}"
425 [(set_attr "type" "sse")
426 (set_attr "mode" "V4SF")])
428 (define_insn "sse_vmrsqrtv4sf2"
429 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
433 (match_operand:V4SF 2 "register_operand" "0")
436 "rsqrtss\t{%1, %0|%0, %1}"
437 [(set_attr "type" "sse")
438 (set_attr "mode" "SF")])
440 (define_insn "sqrtv4sf2"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
444 "sqrtps\t{%1, %0|%0, %1}"
445 [(set_attr "type" "sse")
446 (set_attr "mode" "V4SF")])
448 (define_insn "sse_vmsqrtv4sf2"
449 [(set (match_operand:V4SF 0 "register_operand" "=x")
451 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
452 (match_operand:V4SF 2 "register_operand" "0")
455 "sqrtss\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "SF")])
459 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
460 ;; isn't really correct, as those rtl operators aren't defined when
461 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
463 (define_expand "smaxv4sf3"
464 [(set (match_operand:V4SF 0 "register_operand" "")
465 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
466 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
469 if (!flag_finite_math_only)
470 operands[1] = force_reg (V4SFmode, operands[1]);
471 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
474 (define_insn "*smaxv4sf3_finite"
475 [(set (match_operand:V4SF 0 "register_operand" "=x")
476 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
477 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
478 "TARGET_SSE && flag_finite_math_only
479 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
480 "maxps\t{%2, %0|%0, %2}"
481 [(set_attr "type" "sse")
482 (set_attr "mode" "V4SF")])
484 (define_insn "*smaxv4sf3"
485 [(set (match_operand:V4SF 0 "register_operand" "=x")
486 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
487 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
489 "maxps\t{%2, %0|%0, %2}"
490 [(set_attr "type" "sse")
491 (set_attr "mode" "V4SF")])
493 (define_insn "*sse_vmsmaxv4sf3_finite"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
496 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
497 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
500 "TARGET_SSE && flag_finite_math_only
501 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
502 "maxss\t{%2, %0|%0, %2}"
503 [(set_attr "type" "sse")
504 (set_attr "mode" "SF")])
506 (define_insn "sse_vmsmaxv4sf3"
507 [(set (match_operand:V4SF 0 "register_operand" "=x")
509 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
510 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
514 "maxss\t{%2, %0|%0, %2}"
515 [(set_attr "type" "sse")
516 (set_attr "mode" "SF")])
518 (define_expand "sminv4sf3"
519 [(set (match_operand:V4SF 0 "register_operand" "")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
521 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
524 if (!flag_finite_math_only)
525 operands[1] = force_reg (V4SFmode, operands[1]);
526 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
529 (define_insn "*sminv4sf3_finite"
530 [(set (match_operand:V4SF 0 "register_operand" "=x")
531 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
532 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
533 "TARGET_SSE && flag_finite_math_only
534 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
535 "minps\t{%2, %0|%0, %2}"
536 [(set_attr "type" "sse")
537 (set_attr "mode" "V4SF")])
539 (define_insn "*sminv4sf3"
540 [(set (match_operand:V4SF 0 "register_operand" "=x")
541 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
542 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
544 "minps\t{%2, %0|%0, %2}"
545 [(set_attr "type" "sse")
546 (set_attr "mode" "V4SF")])
548 (define_insn "*sse_vmsminv4sf3_finite"
549 [(set (match_operand:V4SF 0 "register_operand" "=x")
551 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
552 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
555 "TARGET_SSE && flag_finite_math_only
556 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
557 "minss\t{%2, %0|%0, %2}"
558 [(set_attr "type" "sse")
559 (set_attr "mode" "SF")])
561 (define_insn "sse_vmsminv4sf3"
562 [(set (match_operand:V4SF 0 "register_operand" "=x")
564 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
565 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
569 "minss\t{%2, %0|%0, %2}"
570 [(set_attr "type" "sse")
571 (set_attr "mode" "SF")])
573 ;; These versions of the min/max patterns implement exactly the operations
574 ;; min = (op1 < op2 ? op1 : op2)
575 ;; max = (!(op1 < op2) ? op1 : op2)
576 ;; Their operands are not commutative, and thus they may be used in the
577 ;; presence of -0.0 and NaN.
579 (define_insn "*ieee_sminv4sf3"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
581 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
582 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
585 "minps\t{%2, %0|%0, %2}"
586 [(set_attr "type" "sseadd")
587 (set_attr "mode" "V4SF")])
589 (define_insn "*ieee_smaxv4sf3"
590 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
592 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
595 "maxps\t{%2, %0|%0, %2}"
596 [(set_attr "type" "sseadd")
597 (set_attr "mode" "V4SF")])
599 (define_insn "*ieee_sminv2df3"
600 [(set (match_operand:V2DF 0 "register_operand" "=x")
601 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
602 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
605 "minpd\t{%2, %0|%0, %2}"
606 [(set_attr "type" "sseadd")
607 (set_attr "mode" "V2DF")])
609 (define_insn "*ieee_smaxv2df3"
610 [(set (match_operand:V2DF 0 "register_operand" "=x")
611 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
612 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
615 "maxpd\t{%2, %0|%0, %2}"
616 [(set_attr "type" "sseadd")
617 (set_attr "mode" "V2DF")])
619 (define_insn "sse3_addsubv4sf3"
620 [(set (match_operand:V4SF 0 "register_operand" "=x")
623 (match_operand:V4SF 1 "register_operand" "0")
624 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
625 (minus:V4SF (match_dup 1) (match_dup 2))
628 "addsubps\t{%2, %0|%0, %2}"
629 [(set_attr "type" "sseadd")
630 (set_attr "mode" "V4SF")])
632 (define_insn "sse3_haddv4sf3"
633 [(set (match_operand:V4SF 0 "register_operand" "=x")
638 (match_operand:V4SF 1 "register_operand" "0")
639 (parallel [(const_int 0)]))
640 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
643 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
648 (parallel [(const_int 0)]))
649 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
652 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
654 "haddps\t{%2, %0|%0, %2}"
655 [(set_attr "type" "sseadd")
656 (set_attr "mode" "V4SF")])
658 (define_insn "sse3_hsubv4sf3"
659 [(set (match_operand:V4SF 0 "register_operand" "=x")
664 (match_operand:V4SF 1 "register_operand" "0")
665 (parallel [(const_int 0)]))
666 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
669 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
673 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
674 (parallel [(const_int 0)]))
675 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
678 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
680 "hsubps\t{%2, %0|%0, %2}"
681 [(set_attr "type" "sseadd")
682 (set_attr "mode" "V4SF")])
684 (define_expand "reduc_splus_v4sf"
685 [(match_operand:V4SF 0 "register_operand" "")
686 (match_operand:V4SF 1 "register_operand" "")]
691 rtx tmp = gen_reg_rtx (V4SFmode);
692 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
693 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
696 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
700 (define_expand "reduc_smax_v4sf"
701 [(match_operand:V4SF 0 "register_operand" "")
702 (match_operand:V4SF 1 "register_operand" "")]
705 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
709 (define_expand "reduc_smin_v4sf"
710 [(match_operand:V4SF 0 "register_operand" "")
711 (match_operand:V4SF 1 "register_operand" "")]
714 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
718 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
720 ;; Parallel single-precision floating point comparisons
722 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
724 (define_insn "sse_maskcmpv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
726 (match_operator:V4SF 3 "sse_comparison_operator"
727 [(match_operand:V4SF 1 "register_operand" "0")
728 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
730 "cmp%D3ps\t{%2, %0|%0, %2}"
731 [(set_attr "type" "ssecmp")
732 (set_attr "mode" "V4SF")])
734 (define_insn "sse_vmmaskcmpv4sf3"
735 [(set (match_operand:V4SF 0 "register_operand" "=x")
737 (match_operator:V4SF 3 "sse_comparison_operator"
738 [(match_operand:V4SF 1 "register_operand" "0")
739 (match_operand:V4SF 2 "register_operand" "x")])
743 "cmp%D3ss\t{%2, %0|%0, %2}"
744 [(set_attr "type" "ssecmp")
745 (set_attr "mode" "SF")])
747 (define_insn "sse_comi"
748 [(set (reg:CCFP FLAGS_REG)
751 (match_operand:V4SF 0 "register_operand" "x")
752 (parallel [(const_int 0)]))
754 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
755 (parallel [(const_int 0)]))))]
757 "comiss\t{%1, %0|%0, %1}"
758 [(set_attr "type" "ssecomi")
759 (set_attr "mode" "SF")])
761 (define_insn "sse_ucomi"
762 [(set (reg:CCFPU FLAGS_REG)
765 (match_operand:V4SF 0 "register_operand" "x")
766 (parallel [(const_int 0)]))
768 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
769 (parallel [(const_int 0)]))))]
771 "ucomiss\t{%1, %0|%0, %1}"
772 [(set_attr "type" "ssecomi")
773 (set_attr "mode" "SF")])
775 (define_expand "vcondv4sf"
776 [(set (match_operand:V4SF 0 "register_operand" "")
779 [(match_operand:V4SF 4 "nonimmediate_operand" "")
780 (match_operand:V4SF 5 "nonimmediate_operand" "")])
781 (match_operand:V4SF 1 "general_operand" "")
782 (match_operand:V4SF 2 "general_operand" "")))]
785 if (ix86_expand_fp_vcond (operands))
791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
793 ;; Parallel single-precision floating point logical operations
795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
797 (define_expand "andv4sf3"
798 [(set (match_operand:V4SF 0 "register_operand" "")
799 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
800 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
802 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
804 (define_insn "*andv4sf3"
805 [(set (match_operand:V4SF 0 "register_operand" "=x")
806 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
807 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
808 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
809 "andps\t{%2, %0|%0, %2}"
810 [(set_attr "type" "sselog")
811 (set_attr "mode" "V4SF")])
813 (define_insn "sse_nandv4sf3"
814 [(set (match_operand:V4SF 0 "register_operand" "=x")
815 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
816 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
818 "andnps\t{%2, %0|%0, %2}"
819 [(set_attr "type" "sselog")
820 (set_attr "mode" "V4SF")])
822 (define_expand "iorv4sf3"
823 [(set (match_operand:V4SF 0 "register_operand" "")
824 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
825 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
827 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
829 (define_insn "*iorv4sf3"
830 [(set (match_operand:V4SF 0 "register_operand" "=x")
831 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
832 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
833 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
834 "orps\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sselog")
836 (set_attr "mode" "V4SF")])
838 (define_expand "xorv4sf3"
839 [(set (match_operand:V4SF 0 "register_operand" "")
840 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
841 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
843 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
845 (define_insn "*xorv4sf3"
846 [(set (match_operand:V4SF 0 "register_operand" "=x")
847 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
848 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
849 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
850 "xorps\t{%2, %0|%0, %2}"
851 [(set_attr "type" "sselog")
852 (set_attr "mode" "V4SF")])
854 ;; Also define scalar versions. These are used for abs, neg, and
855 ;; conditional move. Using subregs into vector modes causes register
856 ;; allocation lossage. These patterns do not allow memory operands
857 ;; because the native instructions read the full 128-bits.
859 (define_insn "*andsf3"
860 [(set (match_operand:SF 0 "register_operand" "=x")
861 (and:SF (match_operand:SF 1 "register_operand" "0")
862 (match_operand:SF 2 "register_operand" "x")))]
864 "andps\t{%2, %0|%0, %2}"
865 [(set_attr "type" "sselog")
866 (set_attr "mode" "V4SF")])
868 (define_insn "*nandsf3"
869 [(set (match_operand:SF 0 "register_operand" "=x")
870 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
871 (match_operand:SF 2 "register_operand" "x")))]
873 "andnps\t{%2, %0|%0, %2}"
874 [(set_attr "type" "sselog")
875 (set_attr "mode" "V4SF")])
877 (define_insn "*iorsf3"
878 [(set (match_operand:SF 0 "register_operand" "=x")
879 (ior:SF (match_operand:SF 1 "register_operand" "0")
880 (match_operand:SF 2 "register_operand" "x")))]
882 "orps\t{%2, %0|%0, %2}"
883 [(set_attr "type" "sselog")
884 (set_attr "mode" "V4SF")])
886 (define_insn "*xorsf3"
887 [(set (match_operand:SF 0 "register_operand" "=x")
888 (xor:SF (match_operand:SF 1 "register_operand" "0")
889 (match_operand:SF 2 "register_operand" "x")))]
891 "xorps\t{%2, %0|%0, %2}"
892 [(set_attr "type" "sselog")
893 (set_attr "mode" "V4SF")])
895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
897 ;; Parallel single-precision floating point conversion operations
899 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
901 (define_insn "sse_cvtpi2ps"
902 [(set (match_operand:V4SF 0 "register_operand" "=x")
905 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
906 (match_operand:V4SF 1 "register_operand" "0")
909 "cvtpi2ps\t{%2, %0|%0, %2}"
910 [(set_attr "type" "ssecvt")
911 (set_attr "mode" "V4SF")])
913 (define_insn "sse_cvtps2pi"
914 [(set (match_operand:V2SI 0 "register_operand" "=y")
916 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
918 (parallel [(const_int 0) (const_int 1)])))]
920 "cvtps2pi\t{%1, %0|%0, %1}"
921 [(set_attr "type" "ssecvt")
922 (set_attr "unit" "mmx")
923 (set_attr "mode" "DI")])
925 (define_insn "sse_cvttps2pi"
926 [(set (match_operand:V2SI 0 "register_operand" "=y")
928 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
929 (parallel [(const_int 0) (const_int 1)])))]
931 "cvttps2pi\t{%1, %0|%0, %1}"
932 [(set_attr "type" "ssecvt")
933 (set_attr "unit" "mmx")
934 (set_attr "mode" "SF")])
936 (define_insn "sse_cvtsi2ss"
937 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
940 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
941 (match_operand:V4SF 1 "register_operand" "0,0")
944 "cvtsi2ss\t{%2, %0|%0, %2}"
945 [(set_attr "type" "sseicvt")
946 (set_attr "athlon_decode" "vector,double")
947 (set_attr "mode" "SF")])
949 (define_insn "sse_cvtsi2ssq"
950 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
953 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
954 (match_operand:V4SF 1 "register_operand" "0,0")
956 "TARGET_SSE && TARGET_64BIT"
957 "cvtsi2ssq\t{%2, %0|%0, %2}"
958 [(set_attr "type" "sseicvt")
959 (set_attr "athlon_decode" "vector,double")
960 (set_attr "mode" "SF")])
962 (define_insn "sse_cvtss2si"
963 [(set (match_operand:SI 0 "register_operand" "=r,r")
966 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
967 (parallel [(const_int 0)]))]
968 UNSPEC_FIX_NOTRUNC))]
970 "cvtss2si\t{%1, %0|%0, %1}"
971 [(set_attr "type" "sseicvt")
972 (set_attr "athlon_decode" "double,vector")
973 (set_attr "mode" "SI")])
975 (define_insn "sse_cvtss2siq"
976 [(set (match_operand:DI 0 "register_operand" "=r,r")
979 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
980 (parallel [(const_int 0)]))]
981 UNSPEC_FIX_NOTRUNC))]
982 "TARGET_SSE && TARGET_64BIT"
983 "cvtss2siq\t{%1, %0|%0, %1}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "double,vector")
986 (set_attr "mode" "DI")])
988 (define_insn "sse_cvttss2si"
989 [(set (match_operand:SI 0 "register_operand" "=r,r")
992 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
993 (parallel [(const_int 0)]))))]
995 "cvttss2si\t{%1, %0|%0, %1}"
996 [(set_attr "type" "sseicvt")
997 (set_attr "athlon_decode" "double,vector")
998 (set_attr "mode" "SI")])
1000 (define_insn "sse_cvttss2siq"
1001 [(set (match_operand:DI 0 "register_operand" "=r,r")
1004 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1005 (parallel [(const_int 0)]))))]
1006 "TARGET_SSE && TARGET_64BIT"
1007 "cvttss2siq\t{%1, %0|%0, %1}"
1008 [(set_attr "type" "sseicvt")
1009 (set_attr "athlon_decode" "double,vector")
1010 (set_attr "mode" "DI")])
1012 (define_insn "sse2_cvtdq2ps"
1013 [(set (match_operand:V4SF 0 "register_operand" "=x")
1014 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1016 "cvtdq2ps\t{%1, %0|%0, %1}"
1017 [(set_attr "type" "ssecvt")
1018 (set_attr "mode" "V2DF")])
1020 (define_insn "sse2_cvtps2dq"
1021 [(set (match_operand:V4SI 0 "register_operand" "=x")
1022 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1023 UNSPEC_FIX_NOTRUNC))]
1025 "cvtps2dq\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "ssecvt")
1027 (set_attr "mode" "TI")])
1029 (define_insn "sse2_cvttps2dq"
1030 [(set (match_operand:V4SI 0 "register_operand" "=x")
1031 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1033 "cvttps2dq\t{%1, %0|%0, %1}"
1034 [(set_attr "type" "ssecvt")
1035 (set_attr "mode" "TI")])
1037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1039 ;; Parallel single-precision floating point element swizzling
1041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1043 (define_insn "sse_movhlps"
1044 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1047 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1048 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1049 (parallel [(const_int 6)
1053 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1055 movhlps\t{%2, %0|%0, %2}
1056 movlps\t{%H2, %0|%0, %H2}
1057 movhps\t{%2, %0|%0, %2}"
1058 [(set_attr "type" "ssemov")
1059 (set_attr "mode" "V4SF,V2SF,V2SF")])
1061 (define_insn "sse_movlhps"
1062 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1065 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1066 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1067 (parallel [(const_int 0)
1071 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1073 movlhps\t{%2, %0|%0, %2}
1074 movhps\t{%2, %0|%0, %2}
1075 movlps\t{%2, %H0|%H0, %2}"
1076 [(set_attr "type" "ssemov")
1077 (set_attr "mode" "V4SF,V2SF,V2SF")])
1079 (define_insn "sse_unpckhps"
1080 [(set (match_operand:V4SF 0 "register_operand" "=x")
1083 (match_operand:V4SF 1 "register_operand" "0")
1084 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1085 (parallel [(const_int 2) (const_int 6)
1086 (const_int 3) (const_int 7)])))]
1088 "unpckhps\t{%2, %0|%0, %2}"
1089 [(set_attr "type" "sselog")
1090 (set_attr "mode" "V4SF")])
1092 (define_insn "sse_unpcklps"
1093 [(set (match_operand:V4SF 0 "register_operand" "=x")
1096 (match_operand:V4SF 1 "register_operand" "0")
1097 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1098 (parallel [(const_int 0) (const_int 4)
1099 (const_int 1) (const_int 5)])))]
1101 "unpcklps\t{%2, %0|%0, %2}"
1102 [(set_attr "type" "sselog")
1103 (set_attr "mode" "V4SF")])
1105 ;; These are modeled with the same vec_concat as the others so that we
1106 ;; capture users of shufps that can use the new instructions
1107 (define_insn "sse3_movshdup"
1108 [(set (match_operand:V4SF 0 "register_operand" "=x")
1111 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1113 (parallel [(const_int 1)
1118 "movshdup\t{%1, %0|%0, %1}"
1119 [(set_attr "type" "sse")
1120 (set_attr "mode" "V4SF")])
1122 (define_insn "sse3_movsldup"
1123 [(set (match_operand:V4SF 0 "register_operand" "=x")
1126 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1128 (parallel [(const_int 0)
1133 "movsldup\t{%1, %0|%0, %1}"
1134 [(set_attr "type" "sse")
1135 (set_attr "mode" "V4SF")])
1137 (define_expand "sse_shufps"
1138 [(match_operand:V4SF 0 "register_operand" "")
1139 (match_operand:V4SF 1 "register_operand" "")
1140 (match_operand:V4SF 2 "nonimmediate_operand" "")
1141 (match_operand:SI 3 "const_int_operand" "")]
1144 int mask = INTVAL (operands[3]);
1145 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1146 GEN_INT ((mask >> 0) & 3),
1147 GEN_INT ((mask >> 2) & 3),
1148 GEN_INT (((mask >> 4) & 3) + 4),
1149 GEN_INT (((mask >> 6) & 3) + 4)));
1153 (define_insn "sse_shufps_1"
1154 [(set (match_operand:V4SF 0 "register_operand" "=x")
1157 (match_operand:V4SF 1 "register_operand" "0")
1158 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1159 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1160 (match_operand 4 "const_0_to_3_operand" "")
1161 (match_operand 5 "const_4_to_7_operand" "")
1162 (match_operand 6 "const_4_to_7_operand" "")])))]
1166 mask |= INTVAL (operands[3]) << 0;
1167 mask |= INTVAL (operands[4]) << 2;
1168 mask |= (INTVAL (operands[5]) - 4) << 4;
1169 mask |= (INTVAL (operands[6]) - 4) << 6;
1170 operands[3] = GEN_INT (mask);
1172 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1174 [(set_attr "type" "sselog")
1175 (set_attr "mode" "V4SF")])
1177 (define_insn "sse_storehps"
1178 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1180 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1181 (parallel [(const_int 2) (const_int 3)])))]
1184 movhps\t{%1, %0|%0, %1}
1185 movhlps\t{%1, %0|%0, %1}
1186 movlps\t{%H1, %0|%0, %H1}"
1187 [(set_attr "type" "ssemov")
1188 (set_attr "mode" "V2SF,V4SF,V2SF")])
1190 (define_insn "sse_loadhps"
1191 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1194 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1195 (parallel [(const_int 0) (const_int 1)]))
1196 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1199 movhps\t{%2, %0|%0, %2}
1200 movlhps\t{%2, %0|%0, %2}
1201 movlps\t{%2, %H0|%H0, %2}"
1202 [(set_attr "type" "ssemov")
1203 (set_attr "mode" "V2SF,V4SF,V2SF")])
1205 (define_insn "sse_storelps"
1206 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1208 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1209 (parallel [(const_int 0) (const_int 1)])))]
1212 movlps\t{%1, %0|%0, %1}
1213 movaps\t{%1, %0|%0, %1}
1214 movlps\t{%1, %0|%0, %1}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V2SF,V4SF,V2SF")])
1218 (define_insn "sse_loadlps"
1219 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1221 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1223 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1224 (parallel [(const_int 2) (const_int 3)]))))]
1227 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1228 movlps\t{%2, %0|%0, %2}
1229 movlps\t{%2, %0|%0, %2}"
1230 [(set_attr "type" "sselog,ssemov,ssemov")
1231 (set_attr "mode" "V4SF,V2SF,V2SF")])
1233 (define_insn "sse_movss"
1234 [(set (match_operand:V4SF 0 "register_operand" "=x")
1236 (match_operand:V4SF 2 "register_operand" "x")
1237 (match_operand:V4SF 1 "register_operand" "0")
1240 "movss\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "ssemov")
1242 (set_attr "mode" "SF")])
1244 (define_insn "*vec_dupv4sf"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1247 (match_operand:SF 1 "register_operand" "0")))]
1249 "shufps\t{$0, %0, %0|%0, %0, 0}"
1250 [(set_attr "type" "sselog1")
1251 (set_attr "mode" "V4SF")])
1253 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1254 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1255 ;; alternatives pretty much forces the MMX alternative to be chosen.
1256 (define_insn "*sse_concatv2sf"
1257 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1259 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1260 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1263 unpcklps\t{%2, %0|%0, %2}
1264 movss\t{%1, %0|%0, %1}
1265 punpckldq\t{%2, %0|%0, %2}
1266 movd\t{%1, %0|%0, %1}"
1267 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1268 (set_attr "mode" "V4SF,SF,DI,DI")])
1270 (define_insn "*sse_concatv4sf"
1271 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1273 (match_operand:V2SF 1 "register_operand" " 0,0")
1274 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1277 movlhps\t{%2, %0|%0, %2}
1278 movhps\t{%2, %0|%0, %2}"
1279 [(set_attr "type" "ssemov")
1280 (set_attr "mode" "V4SF,V2SF")])
1282 (define_expand "vec_initv4sf"
1283 [(match_operand:V4SF 0 "register_operand" "")
1284 (match_operand 1 "" "")]
1287 ix86_expand_vector_init (false, operands[0], operands[1]);
1291 (define_insn "*vec_setv4sf_0"
1292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1295 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1296 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1300 movss\t{%2, %0|%0, %2}
1301 movss\t{%2, %0|%0, %2}
1302 movd\t{%2, %0|%0, %2}
1304 [(set_attr "type" "ssemov")
1305 (set_attr "mode" "SF")])
1308 [(set (match_operand:V4SF 0 "memory_operand" "")
1311 (match_operand:SF 1 "nonmemory_operand" ""))
1314 "TARGET_SSE && reload_completed"
1317 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1321 (define_expand "vec_setv4sf"
1322 [(match_operand:V4SF 0 "register_operand" "")
1323 (match_operand:SF 1 "register_operand" "")
1324 (match_operand 2 "const_int_operand" "")]
1327 ix86_expand_vector_set (false, operands[0], operands[1],
1328 INTVAL (operands[2]));
1332 (define_insn_and_split "*vec_extractv4sf_0"
1333 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1335 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1336 (parallel [(const_int 0)])))]
1337 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1339 "&& reload_completed"
1342 rtx op1 = operands[1];
1344 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1346 op1 = gen_lowpart (SFmode, op1);
1347 emit_move_insn (operands[0], op1);
1351 (define_expand "vec_extractv4sf"
1352 [(match_operand:SF 0 "register_operand" "")
1353 (match_operand:V4SF 1 "register_operand" "")
1354 (match_operand 2 "const_int_operand" "")]
1357 ix86_expand_vector_extract (false, operands[0], operands[1],
1358 INTVAL (operands[2]));
1362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1364 ;; Parallel double-precision floating point arithmetic
1366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1368 (define_expand "negv2df2"
1369 [(set (match_operand:V2DF 0 "register_operand" "")
1370 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1372 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1374 (define_expand "absv2df2"
1375 [(set (match_operand:V2DF 0 "register_operand" "")
1376 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1378 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1380 (define_expand "addv2df3"
1381 [(set (match_operand:V2DF 0 "register_operand" "")
1382 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1383 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1385 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1387 (define_insn "*addv2df3"
1388 [(set (match_operand:V2DF 0 "register_operand" "=x")
1389 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1390 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1391 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1392 "addpd\t{%2, %0|%0, %2}"
1393 [(set_attr "type" "sseadd")
1394 (set_attr "mode" "V2DF")])
1396 (define_insn "sse2_vmaddv2df3"
1397 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1403 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1404 "addsd\t{%2, %0|%0, %2}"
1405 [(set_attr "type" "sseadd")
1406 (set_attr "mode" "DF")])
1408 (define_expand "subv2df3"
1409 [(set (match_operand:V2DF 0 "register_operand" "")
1410 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1411 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1413 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1415 (define_insn "*subv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1417 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1418 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1420 "subpd\t{%2, %0|%0, %2}"
1421 [(set_attr "type" "sseadd")
1422 (set_attr "mode" "V2DF")])
1424 (define_insn "sse2_vmsubv2df3"
1425 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1432 "subsd\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "sseadd")
1434 (set_attr "mode" "DF")])
1436 (define_expand "mulv2df3"
1437 [(set (match_operand:V2DF 0 "register_operand" "")
1438 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1439 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1441 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1443 (define_insn "*mulv2df3"
1444 [(set (match_operand:V2DF 0 "register_operand" "=x")
1445 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1446 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1447 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1448 "mulpd\t{%2, %0|%0, %2}"
1449 [(set_attr "type" "ssemul")
1450 (set_attr "mode" "V2DF")])
1452 (define_insn "sse2_vmmulv2df3"
1453 [(set (match_operand:V2DF 0 "register_operand" "=x")
1455 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1459 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1460 "mulsd\t{%2, %0|%0, %2}"
1461 [(set_attr "type" "ssemul")
1462 (set_attr "mode" "DF")])
1464 (define_expand "divv2df3"
1465 [(set (match_operand:V2DF 0 "register_operand" "")
1466 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1467 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1469 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1471 (define_insn "*divv2df3"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1473 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1474 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1476 "divpd\t{%2, %0|%0, %2}"
1477 [(set_attr "type" "ssediv")
1478 (set_attr "mode" "V2DF")])
1480 (define_insn "sse2_vmdivv2df3"
1481 [(set (match_operand:V2DF 0 "register_operand" "=x")
1483 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1484 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1488 "divsd\t{%2, %0|%0, %2}"
1489 [(set_attr "type" "ssediv")
1490 (set_attr "mode" "DF")])
1492 (define_insn "sqrtv2df2"
1493 [(set (match_operand:V2DF 0 "register_operand" "=x")
1494 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1496 "sqrtpd\t{%1, %0|%0, %1}"
1497 [(set_attr "type" "sse")
1498 (set_attr "mode" "V2DF")])
1500 (define_insn "sse2_vmsqrtv2df2"
1501 [(set (match_operand:V2DF 0 "register_operand" "=x")
1503 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1504 (match_operand:V2DF 2 "register_operand" "0")
1507 "sqrtsd\t{%1, %0|%0, %1}"
1508 [(set_attr "type" "sse")
1509 (set_attr "mode" "DF")])
1511 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1512 ;; isn't really correct, as those rtl operators aren't defined when
1513 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1515 (define_expand "smaxv2df3"
1516 [(set (match_operand:V2DF 0 "register_operand" "")
1517 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1518 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1521 if (!flag_finite_math_only)
1522 operands[1] = force_reg (V2DFmode, operands[1]);
1523 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1526 (define_insn "*smaxv2df3_finite"
1527 [(set (match_operand:V2DF 0 "register_operand" "=x")
1528 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1529 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1530 "TARGET_SSE2 && flag_finite_math_only
1531 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1532 "maxpd\t{%2, %0|%0, %2}"
1533 [(set_attr "type" "sseadd")
1534 (set_attr "mode" "V2DF")])
1536 (define_insn "*smaxv2df3"
1537 [(set (match_operand:V2DF 0 "register_operand" "=x")
1538 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1539 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1541 "maxpd\t{%2, %0|%0, %2}"
1542 [(set_attr "type" "sseadd")
1543 (set_attr "mode" "V2DF")])
1545 (define_insn "*sse2_vmsmaxv2df3_finite"
1546 [(set (match_operand:V2DF 0 "register_operand" "=x")
1548 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1549 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1552 "TARGET_SSE2 && flag_finite_math_only
1553 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1554 "maxsd\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "sseadd")
1556 (set_attr "mode" "DF")])
1558 (define_insn "sse2_vmsmaxv2df3"
1559 [(set (match_operand:V2DF 0 "register_operand" "=x")
1561 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1562 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1566 "maxsd\t{%2, %0|%0, %2}"
1567 [(set_attr "type" "sseadd")
1568 (set_attr "mode" "DF")])
1570 (define_expand "sminv2df3"
1571 [(set (match_operand:V2DF 0 "register_operand" "")
1572 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1573 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1576 if (!flag_finite_math_only)
1577 operands[1] = force_reg (V2DFmode, operands[1]);
1578 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1581 (define_insn "*sminv2df3_finite"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1585 "TARGET_SSE2 && flag_finite_math_only
1586 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1587 "minpd\t{%2, %0|%0, %2}"
1588 [(set_attr "type" "sseadd")
1589 (set_attr "mode" "V2DF")])
1591 (define_insn "*sminv2df3"
1592 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1596 "minpd\t{%2, %0|%0, %2}"
1597 [(set_attr "type" "sseadd")
1598 (set_attr "mode" "V2DF")])
1600 (define_insn "*sse2_vmsminv2df3_finite"
1601 [(set (match_operand:V2DF 0 "register_operand" "=x")
1603 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1604 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1607 "TARGET_SSE2 && flag_finite_math_only
1608 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1609 "minsd\t{%2, %0|%0, %2}"
1610 [(set_attr "type" "sseadd")
1611 (set_attr "mode" "DF")])
1613 (define_insn "sse2_vmsminv2df3"
1614 [(set (match_operand:V2DF 0 "register_operand" "=x")
1616 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1617 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1621 "minsd\t{%2, %0|%0, %2}"
1622 [(set_attr "type" "sseadd")
1623 (set_attr "mode" "DF")])
1625 (define_insn "sse3_addsubv2df3"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x")
1629 (match_operand:V2DF 1 "register_operand" "0")
1630 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1631 (minus:V2DF (match_dup 1) (match_dup 2))
1634 "addsubpd\t{%2, %0|%0, %2}"
1635 [(set_attr "type" "sseadd")
1636 (set_attr "mode" "V2DF")])
1638 (define_insn "sse3_haddv2df3"
1639 [(set (match_operand:V2DF 0 "register_operand" "=x")
1643 (match_operand:V2DF 1 "register_operand" "0")
1644 (parallel [(const_int 0)]))
1645 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1648 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1649 (parallel [(const_int 0)]))
1650 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1652 "haddpd\t{%2, %0|%0, %2}"
1653 [(set_attr "type" "sseadd")
1654 (set_attr "mode" "V2DF")])
1656 (define_insn "sse3_hsubv2df3"
1657 [(set (match_operand:V2DF 0 "register_operand" "=x")
1661 (match_operand:V2DF 1 "register_operand" "0")
1662 (parallel [(const_int 0)]))
1663 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1666 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1667 (parallel [(const_int 0)]))
1668 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1670 "hsubpd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "sseadd")
1672 (set_attr "mode" "V2DF")])
1674 (define_expand "reduc_splus_v2df"
1675 [(match_operand:V2DF 0 "register_operand" "")
1676 (match_operand:V2DF 1 "register_operand" "")]
1679 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1685 ;; Parallel double-precision floating point comparisons
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1689 (define_insn "sse2_maskcmpv2df3"
1690 [(set (match_operand:V2DF 0 "register_operand" "=x")
1691 (match_operator:V2DF 3 "sse_comparison_operator"
1692 [(match_operand:V2DF 1 "register_operand" "0")
1693 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1695 "cmp%D3pd\t{%2, %0|%0, %2}"
1696 [(set_attr "type" "ssecmp")
1697 (set_attr "mode" "V2DF")])
1699 (define_insn "sse2_vmmaskcmpv2df3"
1700 [(set (match_operand:V2DF 0 "register_operand" "=x")
1702 (match_operator:V2DF 3 "sse_comparison_operator"
1703 [(match_operand:V2DF 1 "register_operand" "0")
1704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1708 "cmp%D3sd\t{%2, %0|%0, %2}"
1709 [(set_attr "type" "ssecmp")
1710 (set_attr "mode" "DF")])
1712 (define_insn "sse2_comi"
1713 [(set (reg:CCFP FLAGS_REG)
1716 (match_operand:V2DF 0 "register_operand" "x")
1717 (parallel [(const_int 0)]))
1719 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1720 (parallel [(const_int 0)]))))]
1722 "comisd\t{%1, %0|%0, %1}"
1723 [(set_attr "type" "ssecomi")
1724 (set_attr "mode" "DF")])
1726 (define_insn "sse2_ucomi"
1727 [(set (reg:CCFPU FLAGS_REG)
1730 (match_operand:V2DF 0 "register_operand" "x")
1731 (parallel [(const_int 0)]))
1733 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1734 (parallel [(const_int 0)]))))]
1736 "ucomisd\t{%1, %0|%0, %1}"
1737 [(set_attr "type" "ssecomi")
1738 (set_attr "mode" "DF")])
1740 (define_expand "vcondv2df"
1741 [(set (match_operand:V2DF 0 "register_operand" "")
1743 (match_operator 3 ""
1744 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1745 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1746 (match_operand:V2DF 1 "general_operand" "")
1747 (match_operand:V2DF 2 "general_operand" "")))]
1750 if (ix86_expand_fp_vcond (operands))
1756 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1758 ;; Parallel double-precision floating point logical operations
1760 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1762 (define_expand "andv2df3"
1763 [(set (match_operand:V2DF 0 "register_operand" "")
1764 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1765 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1767 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1769 (define_insn "*andv2df3"
1770 [(set (match_operand:V2DF 0 "register_operand" "=x")
1771 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1772 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1773 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1774 "andpd\t{%2, %0|%0, %2}"
1775 [(set_attr "type" "sselog")
1776 (set_attr "mode" "V2DF")])
1778 (define_insn "sse2_nandv2df3"
1779 [(set (match_operand:V2DF 0 "register_operand" "=x")
1780 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1781 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1783 "andnpd\t{%2, %0|%0, %2}"
1784 [(set_attr "type" "sselog")
1785 (set_attr "mode" "V2DF")])
1787 (define_expand "iorv2df3"
1788 [(set (match_operand:V2DF 0 "register_operand" "")
1789 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1790 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1792 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1794 (define_insn "*iorv2df3"
1795 [(set (match_operand:V2DF 0 "register_operand" "=x")
1796 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1797 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1798 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1799 "orpd\t{%2, %0|%0, %2}"
1800 [(set_attr "type" "sselog")
1801 (set_attr "mode" "V2DF")])
1803 (define_expand "xorv2df3"
1804 [(set (match_operand:V2DF 0 "register_operand" "")
1805 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1806 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1808 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1810 (define_insn "*xorv2df3"
1811 [(set (match_operand:V2DF 0 "register_operand" "=x")
1812 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1813 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1814 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1815 "xorpd\t{%2, %0|%0, %2}"
1816 [(set_attr "type" "sselog")
1817 (set_attr "mode" "V2DF")])
1819 ;; Also define scalar versions. These are used for abs, neg, and
1820 ;; conditional move. Using subregs into vector modes causes register
1821 ;; allocation lossage. These patterns do not allow memory operands
1822 ;; because the native instructions read the full 128-bits.
1824 (define_insn "*anddf3"
1825 [(set (match_operand:DF 0 "register_operand" "=x")
1826 (and:DF (match_operand:DF 1 "register_operand" "0")
1827 (match_operand:DF 2 "register_operand" "x")))]
1829 "andpd\t{%2, %0|%0, %2}"
1830 [(set_attr "type" "sselog")
1831 (set_attr "mode" "V2DF")])
1833 (define_insn "*nanddf3"
1834 [(set (match_operand:DF 0 "register_operand" "=x")
1835 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1836 (match_operand:DF 2 "register_operand" "x")))]
1838 "andnpd\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "sselog")
1840 (set_attr "mode" "V2DF")])
1842 (define_insn "*iordf3"
1843 [(set (match_operand:DF 0 "register_operand" "=x")
1844 (ior:DF (match_operand:DF 1 "register_operand" "0")
1845 (match_operand:DF 2 "register_operand" "x")))]
1847 "orpd\t{%2, %0|%0, %2}"
1848 [(set_attr "type" "sselog")
1849 (set_attr "mode" "V2DF")])
1851 (define_insn "*xordf3"
1852 [(set (match_operand:DF 0 "register_operand" "=x")
1853 (xor:DF (match_operand:DF 1 "register_operand" "0")
1854 (match_operand:DF 2 "register_operand" "x")))]
1856 "xorpd\t{%2, %0|%0, %2}"
1857 [(set_attr "type" "sselog")
1858 (set_attr "mode" "V2DF")])
1860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1862 ;; Parallel double-precision floating point conversion operations
1864 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1866 (define_insn "sse2_cvtpi2pd"
1867 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1868 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1870 "cvtpi2pd\t{%1, %0|%0, %1}"
1871 [(set_attr "type" "ssecvt")
1872 (set_attr "unit" "mmx,*")
1873 (set_attr "mode" "V2DF")])
1875 (define_insn "sse2_cvtpd2pi"
1876 [(set (match_operand:V2SI 0 "register_operand" "=y")
1877 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1878 UNSPEC_FIX_NOTRUNC))]
1880 "cvtpd2pi\t{%1, %0|%0, %1}"
1881 [(set_attr "type" "ssecvt")
1882 (set_attr "unit" "mmx")
1883 (set_attr "mode" "DI")])
1885 (define_insn "sse2_cvttpd2pi"
1886 [(set (match_operand:V2SI 0 "register_operand" "=y")
1887 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1889 "cvttpd2pi\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "ssecvt")
1891 (set_attr "unit" "mmx")
1892 (set_attr "mode" "TI")])
1894 (define_insn "sse2_cvtsi2sd"
1895 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1898 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1899 (match_operand:V2DF 1 "register_operand" "0,0")
1902 "cvtsi2sd\t{%2, %0|%0, %2}"
1903 [(set_attr "type" "sseicvt")
1904 (set_attr "mode" "DF")
1905 (set_attr "athlon_decode" "double,direct")])
1907 (define_insn "sse2_cvtsi2sdq"
1908 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1911 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1912 (match_operand:V2DF 1 "register_operand" "0,0")
1914 "TARGET_SSE2 && TARGET_64BIT"
1915 "cvtsi2sdq\t{%2, %0|%0, %2}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "mode" "DF")
1918 (set_attr "athlon_decode" "double,direct")])
1920 (define_insn "sse2_cvtsd2si"
1921 [(set (match_operand:SI 0 "register_operand" "=r,r")
1924 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1925 (parallel [(const_int 0)]))]
1926 UNSPEC_FIX_NOTRUNC))]
1928 "cvtsd2si\t{%1, %0|%0, %1}"
1929 [(set_attr "type" "sseicvt")
1930 (set_attr "athlon_decode" "double,vector")
1931 (set_attr "mode" "SI")])
1933 (define_insn "sse2_cvtsd2siq"
1934 [(set (match_operand:DI 0 "register_operand" "=r,r")
1937 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1938 (parallel [(const_int 0)]))]
1939 UNSPEC_FIX_NOTRUNC))]
1940 "TARGET_SSE2 && TARGET_64BIT"
1941 "cvtsd2siq\t{%1, %0|%0, %1}"
1942 [(set_attr "type" "sseicvt")
1943 (set_attr "athlon_decode" "double,vector")
1944 (set_attr "mode" "DI")])
1946 (define_insn "sse2_cvttsd2si"
1947 [(set (match_operand:SI 0 "register_operand" "=r,r")
1950 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1951 (parallel [(const_int 0)]))))]
1953 "cvttsd2si\t{%1, %0|%0, %1}"
1954 [(set_attr "type" "sseicvt")
1955 (set_attr "mode" "SI")
1956 (set_attr "athlon_decode" "double,vector")])
1958 (define_insn "sse2_cvttsd2siq"
1959 [(set (match_operand:DI 0 "register_operand" "=r,r")
1962 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1963 (parallel [(const_int 0)]))))]
1964 "TARGET_SSE2 && TARGET_64BIT"
1965 "cvttsd2siq\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "sseicvt")
1967 (set_attr "mode" "DI")
1968 (set_attr "athlon_decode" "double,vector")])
1970 (define_insn "sse2_cvtdq2pd"
1971 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1975 (parallel [(const_int 0) (const_int 1)]))))]
1977 "cvtdq2pd\t{%1, %0|%0, %1}"
1978 [(set_attr "type" "ssecvt")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "sse2_cvtpd2dq"
1982 [(set (match_operand:V4SI 0 "register_operand" "")
1984 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1988 "operands[2] = CONST0_RTX (V2SImode);")
1990 (define_insn "*sse2_cvtpd2dq"
1991 [(set (match_operand:V4SI 0 "register_operand" "=x")
1993 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1995 (match_operand:V2SI 2 "const0_operand" "")))]
1997 "cvtpd2dq\t{%1, %0|%0, %1}"
1998 [(set_attr "type" "ssecvt")
1999 (set_attr "mode" "TI")])
2001 (define_expand "sse2_cvttpd2dq"
2002 [(set (match_operand:V4SI 0 "register_operand" "")
2004 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2007 "operands[2] = CONST0_RTX (V2SImode);")
2009 (define_insn "*sse2_cvttpd2dq"
2010 [(set (match_operand:V4SI 0 "register_operand" "=x")
2012 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2013 (match_operand:V2SI 2 "const0_operand" "")))]
2015 "cvttpd2dq\t{%1, %0|%0, %1}"
2016 [(set_attr "type" "ssecvt")
2017 (set_attr "mode" "TI")])
2019 (define_insn "sse2_cvtsd2ss"
2020 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2023 (float_truncate:V2SF
2024 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2025 (match_operand:V4SF 1 "register_operand" "0,0")
2028 "cvtsd2ss\t{%2, %0|%0, %2}"
2029 [(set_attr "type" "ssecvt")
2030 (set_attr "athlon_decode" "vector,double")
2031 (set_attr "mode" "SF")])
2033 (define_insn "sse2_cvtss2sd"
2034 [(set (match_operand:V2DF 0 "register_operand" "=x")
2038 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2039 (parallel [(const_int 0) (const_int 1)])))
2040 (match_operand:V2DF 1 "register_operand" "0")
2043 "cvtss2sd\t{%2, %0|%0, %2}"
2044 [(set_attr "type" "ssecvt")
2045 (set_attr "mode" "DF")])
2047 (define_expand "sse2_cvtpd2ps"
2048 [(set (match_operand:V4SF 0 "register_operand" "")
2050 (float_truncate:V2SF
2051 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2054 "operands[2] = CONST0_RTX (V2SFmode);")
2056 (define_insn "*sse2_cvtpd2ps"
2057 [(set (match_operand:V4SF 0 "register_operand" "=x")
2059 (float_truncate:V2SF
2060 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2061 (match_operand:V2SF 2 "const0_operand" "")))]
2063 "cvtpd2ps\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "ssecvt")
2065 (set_attr "mode" "V4SF")])
2067 (define_insn "sse2_cvtps2pd"
2068 [(set (match_operand:V2DF 0 "register_operand" "=x")
2071 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2072 (parallel [(const_int 0) (const_int 1)]))))]
2074 "cvtps2pd\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "ssecvt")
2076 (set_attr "mode" "V2DF")])
2078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2080 ;; Parallel double-precision floating point element swizzling
2082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2084 (define_insn "sse2_unpckhpd"
2085 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2088 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2089 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2090 (parallel [(const_int 1)
2092 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2094 unpckhpd\t{%2, %0|%0, %2}
2095 movlpd\t{%H1, %0|%0, %H1}
2096 movhpd\t{%1, %0|%0, %1}"
2097 [(set_attr "type" "sselog,ssemov,ssemov")
2098 (set_attr "mode" "V2DF,V1DF,V1DF")])
2100 (define_insn "*sse3_movddup"
2101 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2104 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2106 (parallel [(const_int 0)
2108 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2110 movddup\t{%1, %0|%0, %1}
2112 [(set_attr "type" "sselog,ssemov")
2113 (set_attr "mode" "V2DF")])
2116 [(set (match_operand:V2DF 0 "memory_operand" "")
2119 (match_operand:V2DF 1 "register_operand" "")
2121 (parallel [(const_int 0)
2123 "TARGET_SSE3 && reload_completed"
2126 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2127 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2128 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2132 (define_insn "sse2_unpcklpd"
2133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2136 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2137 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2138 (parallel [(const_int 0)
2140 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2142 unpcklpd\t{%2, %0|%0, %2}
2143 movhpd\t{%2, %0|%0, %2}
2144 movlpd\t{%2, %H0|%H0, %2}"
2145 [(set_attr "type" "sselog,ssemov,ssemov")
2146 (set_attr "mode" "V2DF,V1DF,V1DF")])
2148 (define_expand "sse2_shufpd"
2149 [(match_operand:V2DF 0 "register_operand" "")
2150 (match_operand:V2DF 1 "register_operand" "")
2151 (match_operand:V2DF 2 "nonimmediate_operand" "")
2152 (match_operand:SI 3 "const_int_operand" "")]
2155 int mask = INTVAL (operands[3]);
2156 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2158 GEN_INT (mask & 2 ? 3 : 2)));
2162 (define_insn "sse2_shufpd_1"
2163 [(set (match_operand:V2DF 0 "register_operand" "=x")
2166 (match_operand:V2DF 1 "register_operand" "0")
2167 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2168 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2169 (match_operand 4 "const_2_to_3_operand" "")])))]
2173 mask = INTVAL (operands[3]);
2174 mask |= (INTVAL (operands[4]) - 2) << 1;
2175 operands[3] = GEN_INT (mask);
2177 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2179 [(set_attr "type" "sselog")
2180 (set_attr "mode" "V2DF")])
2182 (define_insn "sse2_storehpd"
2183 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2185 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2186 (parallel [(const_int 1)])))]
2187 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2189 movhpd\t{%1, %0|%0, %1}
2192 [(set_attr "type" "ssemov,sselog1,ssemov")
2193 (set_attr "mode" "V1DF,V2DF,DF")])
2196 [(set (match_operand:DF 0 "register_operand" "")
2198 (match_operand:V2DF 1 "memory_operand" "")
2199 (parallel [(const_int 1)])))]
2200 "TARGET_SSE2 && reload_completed"
2201 [(set (match_dup 0) (match_dup 1))]
2203 operands[1] = adjust_address (operands[1], DFmode, 8);
2206 (define_insn "sse2_storelpd"
2207 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2209 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2210 (parallel [(const_int 0)])))]
2211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2213 movlpd\t{%1, %0|%0, %1}
2216 [(set_attr "type" "ssemov")
2217 (set_attr "mode" "V1DF,DF,DF")])
2220 [(set (match_operand:DF 0 "register_operand" "")
2222 (match_operand:V2DF 1 "nonimmediate_operand" "")
2223 (parallel [(const_int 0)])))]
2224 "TARGET_SSE2 && reload_completed"
2227 rtx op1 = operands[1];
2229 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2231 op1 = gen_lowpart (DFmode, op1);
2232 emit_move_insn (operands[0], op1);
2236 (define_insn "sse2_loadhpd"
2237 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2240 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2241 (parallel [(const_int 0)]))
2242 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2243 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2245 movhpd\t{%2, %0|%0, %2}
2246 unpcklpd\t{%2, %0|%0, %2}
2247 shufpd\t{$1, %1, %0|%0, %1, 1}
2249 [(set_attr "type" "ssemov,sselog,sselog,other")
2250 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2253 [(set (match_operand:V2DF 0 "memory_operand" "")
2255 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2256 (match_operand:DF 1 "register_operand" "")))]
2257 "TARGET_SSE2 && reload_completed"
2258 [(set (match_dup 0) (match_dup 1))]
2260 operands[0] = adjust_address (operands[0], DFmode, 8);
2263 (define_insn "sse2_loadlpd"
2264 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2266 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2268 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2269 (parallel [(const_int 1)]))))]
2270 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2272 movsd\t{%2, %0|%0, %2}
2273 movlpd\t{%2, %0|%0, %2}
2274 movsd\t{%2, %0|%0, %2}
2275 shufpd\t{$2, %2, %0|%0, %2, 2}
2276 movhpd\t{%H1, %0|%0, %H1}
2278 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2279 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2282 [(set (match_operand:V2DF 0 "memory_operand" "")
2284 (match_operand:DF 1 "register_operand" "")
2285 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2286 "TARGET_SSE2 && reload_completed"
2287 [(set (match_dup 0) (match_dup 1))]
2289 operands[0] = adjust_address (operands[0], DFmode, 8);
2292 (define_insn "sse2_movsd"
2293 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2295 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2296 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2300 movsd\t{%2, %0|%0, %2}
2301 movlpd\t{%2, %0|%0, %2}
2302 movlpd\t{%2, %0|%0, %2}
2303 shufpd\t{$2, %2, %0|%0, %2, 2}
2304 movhps\t{%H1, %0|%0, %H1}
2305 movhps\t{%1, %H0|%H0, %1}"
2306 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2307 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2309 (define_insn "*vec_dupv2df_sse3"
2310 [(set (match_operand:V2DF 0 "register_operand" "=x")
2312 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2314 "movddup\t{%1, %0|%0, %1}"
2315 [(set_attr "type" "sselog1")
2316 (set_attr "mode" "DF")])
2318 (define_insn "*vec_dupv2df"
2319 [(set (match_operand:V2DF 0 "register_operand" "=x")
2321 (match_operand:DF 1 "register_operand" "0")))]
2324 [(set_attr "type" "sselog1")
2325 (set_attr "mode" "V4SF")])
2327 (define_insn "*vec_concatv2df_sse3"
2328 [(set (match_operand:V2DF 0 "register_operand" "=x")
2330 (match_operand:DF 1 "nonimmediate_operand" "xm")
2333 "movddup\t{%1, %0|%0, %1}"
2334 [(set_attr "type" "sselog1")
2335 (set_attr "mode" "DF")])
2337 (define_insn "*vec_concatv2df"
2338 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2340 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2341 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2344 unpcklpd\t{%2, %0|%0, %2}
2345 movhpd\t{%2, %0|%0, %2}
2346 movsd\t{%1, %0|%0, %1}
2347 movlhps\t{%2, %0|%0, %2}
2348 movhps\t{%2, %0|%0, %2}"
2349 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2350 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2352 (define_expand "vec_setv2df"
2353 [(match_operand:V2DF 0 "register_operand" "")
2354 (match_operand:DF 1 "register_operand" "")
2355 (match_operand 2 "const_int_operand" "")]
2358 ix86_expand_vector_set (false, operands[0], operands[1],
2359 INTVAL (operands[2]));
2363 (define_expand "vec_extractv2df"
2364 [(match_operand:DF 0 "register_operand" "")
2365 (match_operand:V2DF 1 "register_operand" "")
2366 (match_operand 2 "const_int_operand" "")]
2369 ix86_expand_vector_extract (false, operands[0], operands[1],
2370 INTVAL (operands[2]));
2374 (define_expand "vec_initv2df"
2375 [(match_operand:V2DF 0 "register_operand" "")
2376 (match_operand 1 "" "")]
2379 ix86_expand_vector_init (false, operands[0], operands[1]);
2383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2385 ;; Parallel integral arithmetic
2387 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2389 (define_expand "neg<mode>2"
2390 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2393 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2395 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2397 (define_expand "add<mode>3"
2398 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2399 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2400 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2402 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2404 (define_insn "*add<mode>3"
2405 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2407 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2408 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2409 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2410 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2411 [(set_attr "type" "sseiadd")
2412 (set_attr "mode" "TI")])
2414 (define_insn "sse2_ssadd<mode>3"
2415 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2417 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2418 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2419 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2420 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2421 [(set_attr "type" "sseiadd")
2422 (set_attr "mode" "TI")])
2424 (define_insn "sse2_usadd<mode>3"
2425 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2427 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2428 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2429 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2430 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2431 [(set_attr "type" "sseiadd")
2432 (set_attr "mode" "TI")])
2434 (define_expand "sub<mode>3"
2435 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2436 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2437 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2439 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2441 (define_insn "*sub<mode>3"
2442 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2444 (match_operand:SSEMODEI 1 "register_operand" "0")
2445 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2447 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2448 [(set_attr "type" "sseiadd")
2449 (set_attr "mode" "TI")])
2451 (define_insn "sse2_sssub<mode>3"
2452 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2454 (match_operand:SSEMODE12 1 "register_operand" "0")
2455 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2457 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2458 [(set_attr "type" "sseiadd")
2459 (set_attr "mode" "TI")])
2461 (define_insn "sse2_ussub<mode>3"
2462 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2464 (match_operand:SSEMODE12 1 "register_operand" "0")
2465 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2467 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2468 [(set_attr "type" "sseiadd")
2469 (set_attr "mode" "TI")])
2471 (define_expand "mulv16qi3"
2472 [(set (match_operand:V16QI 0 "register_operand" "")
2473 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2474 (match_operand:V16QI 2 "register_operand" "")))]
2480 for (i = 0; i < 12; ++i)
2481 t[i] = gen_reg_rtx (V16QImode);
2483 /* Unpack data such that we've got a source byte in each low byte of
2484 each word. We don't care what goes into the high byte of each word.
2485 Rather than trying to get zero in there, most convenient is to let
2486 it be a copy of the low byte. */
2487 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2488 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2489 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2490 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2492 /* Multiply words. The end-of-line annotations here give a picture of what
2493 the output of that instruction looks like. Dot means don't care; the
2494 letters are the bytes of the result with A being the most significant. */
2495 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2496 gen_lowpart (V8HImode, t[0]),
2497 gen_lowpart (V8HImode, t[1])));
2498 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2499 gen_lowpart (V8HImode, t[2]),
2500 gen_lowpart (V8HImode, t[3])));
2502 /* Extract the relevant bytes and merge them back together. */
2503 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2504 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2505 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2506 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2507 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2508 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2511 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2515 (define_expand "mulv8hi3"
2516 [(set (match_operand:V8HI 0 "register_operand" "")
2517 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2518 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2520 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2522 (define_insn "*mulv8hi3"
2523 [(set (match_operand:V8HI 0 "register_operand" "=x")
2524 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2525 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2526 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2527 "pmullw\t{%2, %0|%0, %2}"
2528 [(set_attr "type" "sseimul")
2529 (set_attr "mode" "TI")])
2531 (define_insn "sse2_smulv8hi3_highpart"
2532 [(set (match_operand:V8HI 0 "register_operand" "=x")
2537 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2539 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2541 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2542 "pmulhw\t{%2, %0|%0, %2}"
2543 [(set_attr "type" "sseimul")
2544 (set_attr "mode" "TI")])
2546 (define_insn "sse2_umulv8hi3_highpart"
2547 [(set (match_operand:V8HI 0 "register_operand" "=x")
2552 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2554 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2556 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2557 "pmulhuw\t{%2, %0|%0, %2}"
2558 [(set_attr "type" "sseimul")
2559 (set_attr "mode" "TI")])
2561 (define_insn "sse2_umulv2siv2di3"
2562 [(set (match_operand:V2DI 0 "register_operand" "=x")
2566 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2567 (parallel [(const_int 0) (const_int 2)])))
2570 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2571 (parallel [(const_int 0) (const_int 2)])))))]
2572 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2573 "pmuludq\t{%2, %0|%0, %2}"
2574 [(set_attr "type" "sseimul")
2575 (set_attr "mode" "TI")])
2577 (define_insn "sse2_pmaddwd"
2578 [(set (match_operand:V4SI 0 "register_operand" "=x")
2583 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2584 (parallel [(const_int 0)
2590 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2591 (parallel [(const_int 0)
2597 (vec_select:V4HI (match_dup 1)
2598 (parallel [(const_int 1)
2603 (vec_select:V4HI (match_dup 2)
2604 (parallel [(const_int 1)
2607 (const_int 7)]))))))]
2609 "pmaddwd\t{%2, %0|%0, %2}"
2610 [(set_attr "type" "sseiadd")
2611 (set_attr "mode" "TI")])
2613 (define_expand "mulv4si3"
2614 [(set (match_operand:V4SI 0 "register_operand" "")
2615 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2616 (match_operand:V4SI 2 "register_operand" "")))]
2619 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2625 t1 = gen_reg_rtx (V4SImode);
2626 t2 = gen_reg_rtx (V4SImode);
2627 t3 = gen_reg_rtx (V4SImode);
2628 t4 = gen_reg_rtx (V4SImode);
2629 t5 = gen_reg_rtx (V4SImode);
2630 t6 = gen_reg_rtx (V4SImode);
2631 thirtytwo = GEN_INT (32);
2633 /* Multiply elements 2 and 0. */
2634 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2636 /* Shift both input vectors down one element, so that elements 3 and 1
2637 are now in the slots for elements 2 and 0. For K8, at least, this is
2638 faster than using a shuffle. */
2639 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2640 gen_lowpart (TImode, op1), thirtytwo));
2641 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2642 gen_lowpart (TImode, op2), thirtytwo));
2644 /* Multiply elements 3 and 1. */
2645 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2647 /* Move the results in element 2 down to element 1; we don't care what
2648 goes in elements 2 and 3. */
2649 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2650 const0_rtx, const0_rtx));
2651 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2652 const0_rtx, const0_rtx));
2654 /* Merge the parts back together. */
2655 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2659 (define_expand "mulv2di3"
2660 [(set (match_operand:V2DI 0 "register_operand" "")
2661 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2662 (match_operand:V2DI 2 "register_operand" "")))]
2665 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2671 t1 = gen_reg_rtx (V2DImode);
2672 t2 = gen_reg_rtx (V2DImode);
2673 t3 = gen_reg_rtx (V2DImode);
2674 t4 = gen_reg_rtx (V2DImode);
2675 t5 = gen_reg_rtx (V2DImode);
2676 t6 = gen_reg_rtx (V2DImode);
2677 thirtytwo = GEN_INT (32);
2679 /* Multiply low parts. */
2680 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2681 gen_lowpart (V4SImode, op2)));
2683 /* Shift input vectors left 32 bits so we can multiply high parts. */
2684 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2685 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2687 /* Multiply high parts by low parts. */
2688 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2689 gen_lowpart (V4SImode, t3)));
2690 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2691 gen_lowpart (V4SImode, t2)));
2693 /* Shift them back. */
2694 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2695 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2697 /* Add the three parts together. */
2698 emit_insn (gen_addv2di3 (t6, t1, t4));
2699 emit_insn (gen_addv2di3 (op0, t6, t5));
2703 (define_insn "ashr<mode>3"
2704 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2706 (match_operand:SSEMODE24 1 "register_operand" "0")
2707 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2709 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2710 [(set_attr "type" "sseishft")
2711 (set_attr "mode" "TI")])
2713 (define_insn "lshr<mode>3"
2714 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2715 (lshiftrt:SSEMODE248
2716 (match_operand:SSEMODE248 1 "register_operand" "0")
2717 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2719 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2720 [(set_attr "type" "sseishft")
2721 (set_attr "mode" "TI")])
2723 (define_insn "ashl<mode>3"
2724 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2726 (match_operand:SSEMODE248 1 "register_operand" "0")
2727 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2729 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2730 [(set_attr "type" "sseishft")
2731 (set_attr "mode" "TI")])
2733 (define_insn "sse2_ashlti3"
2734 [(set (match_operand:TI 0 "register_operand" "=x")
2735 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2736 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2739 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2740 return "pslldq\t{%2, %0|%0, %2}";
2742 [(set_attr "type" "sseishft")
2743 (set_attr "mode" "TI")])
2745 (define_expand "vec_shl_<mode>"
2746 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2747 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2748 (match_operand:SI 2 "general_operand" "")))]
2751 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2753 operands[0] = gen_lowpart (TImode, operands[0]);
2754 operands[1] = gen_lowpart (TImode, operands[1]);
2757 (define_insn "sse2_lshrti3"
2758 [(set (match_operand:TI 0 "register_operand" "=x")
2759 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2760 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2763 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2764 return "psrldq\t{%2, %0|%0, %2}";
2766 [(set_attr "type" "sseishft")
2767 (set_attr "mode" "TI")])
2769 (define_expand "vec_shr_<mode>"
2770 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2771 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2772 (match_operand:SI 2 "general_operand" "")))]
2775 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2777 operands[0] = gen_lowpart (TImode, operands[0]);
2778 operands[1] = gen_lowpart (TImode, operands[1]);
2781 (define_expand "umaxv16qi3"
2782 [(set (match_operand:V16QI 0 "register_operand" "")
2783 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2784 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2786 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2788 (define_insn "*umaxv16qi3"
2789 [(set (match_operand:V16QI 0 "register_operand" "=x")
2790 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2791 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2792 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2793 "pmaxub\t{%2, %0|%0, %2}"
2794 [(set_attr "type" "sseiadd")
2795 (set_attr "mode" "TI")])
2797 (define_expand "smaxv8hi3"
2798 [(set (match_operand:V8HI 0 "register_operand" "")
2799 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2800 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2802 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2804 (define_insn "*smaxv8hi3"
2805 [(set (match_operand:V8HI 0 "register_operand" "=x")
2806 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2807 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2808 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2809 "pmaxsw\t{%2, %0|%0, %2}"
2810 [(set_attr "type" "sseiadd")
2811 (set_attr "mode" "TI")])
2813 (define_expand "umaxv8hi3"
2814 [(set (match_operand:V8HI 0 "register_operand" "=x")
2815 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2816 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2818 (plus:V8HI (match_dup 0) (match_dup 2)))]
2821 operands[3] = operands[0];
2822 if (rtx_equal_p (operands[0], operands[2]))
2823 operands[0] = gen_reg_rtx (V8HImode);
2826 (define_expand "smax<mode>3"
2827 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2828 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2829 (match_operand:SSEMODE14 2 "register_operand" "")))]
2835 xops[0] = operands[0];
2836 xops[1] = operands[1];
2837 xops[2] = operands[2];
2838 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2839 xops[4] = operands[1];
2840 xops[5] = operands[2];
2841 ok = ix86_expand_int_vcond (xops);
2846 (define_expand "umaxv4si3"
2847 [(set (match_operand:V4SI 0 "register_operand" "")
2848 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2849 (match_operand:V4SI 2 "register_operand" "")))]
2855 xops[0] = operands[0];
2856 xops[1] = operands[1];
2857 xops[2] = operands[2];
2858 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2859 xops[4] = operands[1];
2860 xops[5] = operands[2];
2861 ok = ix86_expand_int_vcond (xops);
2866 (define_expand "uminv16qi3"
2867 [(set (match_operand:V16QI 0 "register_operand" "")
2868 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2869 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2871 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2873 (define_insn "*uminv16qi3"
2874 [(set (match_operand:V16QI 0 "register_operand" "=x")
2875 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2876 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2877 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2878 "pminub\t{%2, %0|%0, %2}"
2879 [(set_attr "type" "sseiadd")
2880 (set_attr "mode" "TI")])
2882 (define_expand "sminv8hi3"
2883 [(set (match_operand:V8HI 0 "register_operand" "")
2884 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2885 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2887 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2889 (define_insn "*sminv8hi3"
2890 [(set (match_operand:V8HI 0 "register_operand" "=x")
2891 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2892 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2893 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2894 "pminsw\t{%2, %0|%0, %2}"
2895 [(set_attr "type" "sseiadd")
2896 (set_attr "mode" "TI")])
2898 (define_expand "smin<mode>3"
2899 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2900 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2901 (match_operand:SSEMODE14 2 "register_operand" "")))]
2907 xops[0] = operands[0];
2908 xops[1] = operands[2];
2909 xops[2] = operands[1];
2910 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2911 xops[4] = operands[1];
2912 xops[5] = operands[2];
2913 ok = ix86_expand_int_vcond (xops);
2918 (define_expand "umin<mode>3"
2919 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2920 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2921 (match_operand:SSEMODE24 2 "register_operand" "")))]
2927 xops[0] = operands[0];
2928 xops[1] = operands[2];
2929 xops[2] = operands[1];
2930 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2931 xops[4] = operands[1];
2932 xops[5] = operands[2];
2933 ok = ix86_expand_int_vcond (xops);
2938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2940 ;; Parallel integral comparisons
2942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2944 (define_insn "sse2_eq<mode>3"
2945 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2947 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2948 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2949 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2950 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2951 [(set_attr "type" "ssecmp")
2952 (set_attr "mode" "TI")])
2954 (define_insn "sse2_gt<mode>3"
2955 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2957 (match_operand:SSEMODE124 1 "register_operand" "0")
2958 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2960 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2961 [(set_attr "type" "ssecmp")
2962 (set_attr "mode" "TI")])
2964 (define_expand "vcond<mode>"
2965 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2966 (if_then_else:SSEMODE124
2967 (match_operator 3 ""
2968 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2969 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2970 (match_operand:SSEMODE124 1 "general_operand" "")
2971 (match_operand:SSEMODE124 2 "general_operand" "")))]
2974 if (ix86_expand_int_vcond (operands))
2980 (define_expand "vcondu<mode>"
2981 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2982 (if_then_else:SSEMODE124
2983 (match_operator 3 ""
2984 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2985 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2986 (match_operand:SSEMODE124 1 "general_operand" "")
2987 (match_operand:SSEMODE124 2 "general_operand" "")))]
2990 if (ix86_expand_int_vcond (operands))
2996 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2998 ;; Parallel integral logical operations
3000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3002 (define_expand "one_cmpl<mode>2"
3003 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3004 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3008 int i, n = GET_MODE_NUNITS (<MODE>mode);
3009 rtvec v = rtvec_alloc (n);
3011 for (i = 0; i < n; ++i)
3012 RTVEC_ELT (v, i) = constm1_rtx;
3014 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3017 (define_expand "and<mode>3"
3018 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3019 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3020 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3022 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3024 (define_insn "*and<mode>3"
3025 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3027 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3028 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3029 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3030 "pand\t{%2, %0|%0, %2}"
3031 [(set_attr "type" "sselog")
3032 (set_attr "mode" "TI")])
3034 (define_insn "sse2_nand<mode>3"
3035 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3037 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3038 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3040 "pandn\t{%2, %0|%0, %2}"
3041 [(set_attr "type" "sselog")
3042 (set_attr "mode" "TI")])
3044 (define_expand "ior<mode>3"
3045 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3046 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3047 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3049 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3051 (define_insn "*ior<mode>3"
3052 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3054 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3055 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3056 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3057 "por\t{%2, %0|%0, %2}"
3058 [(set_attr "type" "sselog")
3059 (set_attr "mode" "TI")])
3061 (define_expand "xor<mode>3"
3062 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3063 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3064 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3066 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3068 (define_insn "*xor<mode>3"
3069 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3071 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3072 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3073 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3074 "pxor\t{%2, %0|%0, %2}"
3075 [(set_attr "type" "sselog")
3076 (set_attr "mode" "TI")])
3078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3080 ;; Parallel integral element swizzling
3082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3084 (define_insn "sse2_packsswb"
3085 [(set (match_operand:V16QI 0 "register_operand" "=x")
3088 (match_operand:V8HI 1 "register_operand" "0"))
3090 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3092 "packsswb\t{%2, %0|%0, %2}"
3093 [(set_attr "type" "sselog")
3094 (set_attr "mode" "TI")])
3096 (define_insn "sse2_packssdw"
3097 [(set (match_operand:V8HI 0 "register_operand" "=x")
3100 (match_operand:V4SI 1 "register_operand" "0"))
3102 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3104 "packssdw\t{%2, %0|%0, %2}"
3105 [(set_attr "type" "sselog")
3106 (set_attr "mode" "TI")])
3108 (define_insn "sse2_packuswb"
3109 [(set (match_operand:V16QI 0 "register_operand" "=x")
3112 (match_operand:V8HI 1 "register_operand" "0"))
3114 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3116 "packuswb\t{%2, %0|%0, %2}"
3117 [(set_attr "type" "sselog")
3118 (set_attr "mode" "TI")])
3120 (define_insn "sse2_punpckhbw"
3121 [(set (match_operand:V16QI 0 "register_operand" "=x")
3124 (match_operand:V16QI 1 "register_operand" "0")
3125 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3126 (parallel [(const_int 8) (const_int 24)
3127 (const_int 9) (const_int 25)
3128 (const_int 10) (const_int 26)
3129 (const_int 11) (const_int 27)
3130 (const_int 12) (const_int 28)
3131 (const_int 13) (const_int 29)
3132 (const_int 14) (const_int 30)
3133 (const_int 15) (const_int 31)])))]
3135 "punpckhbw\t{%2, %0|%0, %2}"
3136 [(set_attr "type" "sselog")
3137 (set_attr "mode" "TI")])
3139 (define_insn "sse2_punpcklbw"
3140 [(set (match_operand:V16QI 0 "register_operand" "=x")
3143 (match_operand:V16QI 1 "register_operand" "0")
3144 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3145 (parallel [(const_int 0) (const_int 16)
3146 (const_int 1) (const_int 17)
3147 (const_int 2) (const_int 18)
3148 (const_int 3) (const_int 19)
3149 (const_int 4) (const_int 20)
3150 (const_int 5) (const_int 21)
3151 (const_int 6) (const_int 22)
3152 (const_int 7) (const_int 23)])))]
3154 "punpcklbw\t{%2, %0|%0, %2}"
3155 [(set_attr "type" "sselog")
3156 (set_attr "mode" "TI")])
3158 (define_insn "sse2_punpckhwd"
3159 [(set (match_operand:V8HI 0 "register_operand" "=x")
3162 (match_operand:V8HI 1 "register_operand" "0")
3163 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3164 (parallel [(const_int 4) (const_int 12)
3165 (const_int 5) (const_int 13)
3166 (const_int 6) (const_int 14)
3167 (const_int 7) (const_int 15)])))]
3169 "punpckhwd\t{%2, %0|%0, %2}"
3170 [(set_attr "type" "sselog")
3171 (set_attr "mode" "TI")])
3173 (define_insn "sse2_punpcklwd"
3174 [(set (match_operand:V8HI 0 "register_operand" "=x")
3177 (match_operand:V8HI 1 "register_operand" "0")
3178 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3179 (parallel [(const_int 0) (const_int 8)
3180 (const_int 1) (const_int 9)
3181 (const_int 2) (const_int 10)
3182 (const_int 3) (const_int 11)])))]
3184 "punpcklwd\t{%2, %0|%0, %2}"
3185 [(set_attr "type" "sselog")
3186 (set_attr "mode" "TI")])
3188 (define_insn "sse2_punpckhdq"
3189 [(set (match_operand:V4SI 0 "register_operand" "=x")
3192 (match_operand:V4SI 1 "register_operand" "0")
3193 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3194 (parallel [(const_int 2) (const_int 6)
3195 (const_int 3) (const_int 7)])))]
3197 "punpckhdq\t{%2, %0|%0, %2}"
3198 [(set_attr "type" "sselog")
3199 (set_attr "mode" "TI")])
3201 (define_insn "sse2_punpckldq"
3202 [(set (match_operand:V4SI 0 "register_operand" "=x")
3205 (match_operand:V4SI 1 "register_operand" "0")
3206 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3207 (parallel [(const_int 0) (const_int 4)
3208 (const_int 1) (const_int 5)])))]
3210 "punpckldq\t{%2, %0|%0, %2}"
3211 [(set_attr "type" "sselog")
3212 (set_attr "mode" "TI")])
3214 (define_insn "sse2_punpckhqdq"
3215 [(set (match_operand:V2DI 0 "register_operand" "=x")
3218 (match_operand:V2DI 1 "register_operand" "0")
3219 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3220 (parallel [(const_int 1)
3223 "punpckhqdq\t{%2, %0|%0, %2}"
3224 [(set_attr "type" "sselog")
3225 (set_attr "mode" "TI")])
3227 (define_insn "sse2_punpcklqdq"
3228 [(set (match_operand:V2DI 0 "register_operand" "=x")
3231 (match_operand:V2DI 1 "register_operand" "0")
3232 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3233 (parallel [(const_int 0)
3236 "punpcklqdq\t{%2, %0|%0, %2}"
3237 [(set_attr "type" "sselog")
3238 (set_attr "mode" "TI")])
3240 (define_expand "sse2_pinsrw"
3241 [(set (match_operand:V8HI 0 "register_operand" "")
3244 (match_operand:SI 2 "nonimmediate_operand" ""))
3245 (match_operand:V8HI 1 "register_operand" "")
3246 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3249 operands[2] = gen_lowpart (HImode, operands[2]);
3250 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3253 (define_insn "*sse2_pinsrw"
3254 [(set (match_operand:V8HI 0 "register_operand" "=x")
3257 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3258 (match_operand:V8HI 1 "register_operand" "0")
3259 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3262 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3263 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3265 [(set_attr "type" "sselog")
3266 (set_attr "mode" "TI")])
3268 (define_insn "sse2_pextrw"
3269 [(set (match_operand:SI 0 "register_operand" "=r")
3272 (match_operand:V8HI 1 "register_operand" "x")
3273 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3275 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3276 [(set_attr "type" "sselog")
3277 (set_attr "mode" "TI")])
3279 (define_expand "sse2_pshufd"
3280 [(match_operand:V4SI 0 "register_operand" "")
3281 (match_operand:V4SI 1 "nonimmediate_operand" "")
3282 (match_operand:SI 2 "const_int_operand" "")]
3285 int mask = INTVAL (operands[2]);
3286 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3287 GEN_INT ((mask >> 0) & 3),
3288 GEN_INT ((mask >> 2) & 3),
3289 GEN_INT ((mask >> 4) & 3),
3290 GEN_INT ((mask >> 6) & 3)));
3294 (define_insn "sse2_pshufd_1"
3295 [(set (match_operand:V4SI 0 "register_operand" "=x")
3297 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3298 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3299 (match_operand 3 "const_0_to_3_operand" "")
3300 (match_operand 4 "const_0_to_3_operand" "")
3301 (match_operand 5 "const_0_to_3_operand" "")])))]
3305 mask |= INTVAL (operands[2]) << 0;
3306 mask |= INTVAL (operands[3]) << 2;
3307 mask |= INTVAL (operands[4]) << 4;
3308 mask |= INTVAL (operands[5]) << 6;
3309 operands[2] = GEN_INT (mask);
3311 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3313 [(set_attr "type" "sselog1")
3314 (set_attr "mode" "TI")])
3316 (define_expand "sse2_pshuflw"
3317 [(match_operand:V8HI 0 "register_operand" "")
3318 (match_operand:V8HI 1 "nonimmediate_operand" "")
3319 (match_operand:SI 2 "const_int_operand" "")]
3322 int mask = INTVAL (operands[2]);
3323 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3324 GEN_INT ((mask >> 0) & 3),
3325 GEN_INT ((mask >> 2) & 3),
3326 GEN_INT ((mask >> 4) & 3),
3327 GEN_INT ((mask >> 6) & 3)));
3331 (define_insn "sse2_pshuflw_1"
3332 [(set (match_operand:V8HI 0 "register_operand" "=x")
3334 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3335 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3336 (match_operand 3 "const_0_to_3_operand" "")
3337 (match_operand 4 "const_0_to_3_operand" "")
3338 (match_operand 5 "const_0_to_3_operand" "")
3346 mask |= INTVAL (operands[2]) << 0;
3347 mask |= INTVAL (operands[3]) << 2;
3348 mask |= INTVAL (operands[4]) << 4;
3349 mask |= INTVAL (operands[5]) << 6;
3350 operands[2] = GEN_INT (mask);
3352 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3354 [(set_attr "type" "sselog")
3355 (set_attr "mode" "TI")])
3357 (define_expand "sse2_pshufhw"
3358 [(match_operand:V8HI 0 "register_operand" "")
3359 (match_operand:V8HI 1 "nonimmediate_operand" "")
3360 (match_operand:SI 2 "const_int_operand" "")]
3363 int mask = INTVAL (operands[2]);
3364 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3365 GEN_INT (((mask >> 0) & 3) + 4),
3366 GEN_INT (((mask >> 2) & 3) + 4),
3367 GEN_INT (((mask >> 4) & 3) + 4),
3368 GEN_INT (((mask >> 6) & 3) + 4)));
3372 (define_insn "sse2_pshufhw_1"
3373 [(set (match_operand:V8HI 0 "register_operand" "=x")
3375 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3376 (parallel [(const_int 0)
3380 (match_operand 2 "const_4_to_7_operand" "")
3381 (match_operand 3 "const_4_to_7_operand" "")
3382 (match_operand 4 "const_4_to_7_operand" "")
3383 (match_operand 5 "const_4_to_7_operand" "")])))]
3387 mask |= (INTVAL (operands[2]) - 4) << 0;
3388 mask |= (INTVAL (operands[3]) - 4) << 2;
3389 mask |= (INTVAL (operands[4]) - 4) << 4;
3390 mask |= (INTVAL (operands[5]) - 4) << 6;
3391 operands[2] = GEN_INT (mask);
3393 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3395 [(set_attr "type" "sselog")
3396 (set_attr "mode" "TI")])
3398 (define_expand "sse2_loadd"
3399 [(set (match_operand:V4SI 0 "register_operand" "")
3402 (match_operand:SI 1 "nonimmediate_operand" ""))
3406 "operands[2] = CONST0_RTX (V4SImode);")
3408 (define_insn "sse2_loadld"
3409 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3412 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3413 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3417 movd\t{%2, %0|%0, %2}
3418 movss\t{%2, %0|%0, %2}
3419 movss\t{%2, %0|%0, %2}"
3420 [(set_attr "type" "ssemov")
3421 (set_attr "mode" "TI,V4SF,SF")])
3423 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3424 ;; be taken into account, and movdi isn't fully populated even without.
3425 (define_insn_and_split "sse2_stored"
3426 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3428 (match_operand:V4SI 1 "register_operand" "x")
3429 (parallel [(const_int 0)])))]
3432 "&& reload_completed"
3433 [(set (match_dup 0) (match_dup 1))]
3435 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3438 (define_expand "sse_storeq"
3439 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3441 (match_operand:V2DI 1 "register_operand" "")
3442 (parallel [(const_int 0)])))]
3446 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3447 ;; be taken into account, and movdi isn't fully populated even without.
3448 (define_insn "*sse2_storeq"
3449 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3451 (match_operand:V2DI 1 "register_operand" "x")
3452 (parallel [(const_int 0)])))]
3457 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3459 (match_operand:V2DI 1 "register_operand" "")
3460 (parallel [(const_int 0)])))]
3461 "TARGET_SSE && reload_completed"
3462 [(set (match_dup 0) (match_dup 1))]
3464 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3467 (define_insn "*vec_dupv4si"
3468 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3470 (match_operand:SI 1 "register_operand" " Y,0")))]
3473 pshufd\t{$0, %1, %0|%0, %1, 0}
3474 shufps\t{$0, %0, %0|%0, %0, 0}"
3475 [(set_attr "type" "sselog1")
3476 (set_attr "mode" "TI,V4SF")])
3478 (define_insn "*vec_dupv2di"
3479 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3481 (match_operand:DI 1 "register_operand" " 0,0")))]
3486 [(set_attr "type" "sselog1,ssemov")
3487 (set_attr "mode" "TI,V4SF")])
3489 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3490 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3491 ;; alternatives pretty much forces the MMX alternative to be chosen.
3492 (define_insn "*sse2_concatv2si"
3493 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3495 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3496 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3499 punpckldq\t{%2, %0|%0, %2}
3500 movd\t{%1, %0|%0, %1}
3501 punpckldq\t{%2, %0|%0, %2}
3502 movd\t{%1, %0|%0, %1}"
3503 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3504 (set_attr "mode" "TI,TI,DI,DI")])
3506 (define_insn "*sse1_concatv2si"
3507 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3509 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3510 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3513 unpcklps\t{%2, %0|%0, %2}
3514 movss\t{%1, %0|%0, %1}
3515 punpckldq\t{%2, %0|%0, %2}
3516 movd\t{%1, %0|%0, %1}"
3517 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3518 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3520 (define_insn "*vec_concatv4si_1"
3521 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3523 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3524 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3527 punpcklqdq\t{%2, %0|%0, %2}
3528 movlhps\t{%2, %0|%0, %2}
3529 movhps\t{%2, %0|%0, %2}"
3530 [(set_attr "type" "sselog,ssemov,ssemov")
3531 (set_attr "mode" "TI,V4SF,V2SF")])
3533 (define_insn "*vec_concatv2di"
3534 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3536 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3537 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3540 movq\t{%1, %0|%0, %1}
3541 movq2dq\t{%1, %0|%0, %1}
3542 punpcklqdq\t{%2, %0|%0, %2}
3543 movlhps\t{%2, %0|%0, %2}
3544 movhps\t{%2, %0|%0, %2}
3545 movlps\t{%1, %0|%0, %1}"
3546 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3547 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3549 (define_expand "vec_setv2di"
3550 [(match_operand:V2DI 0 "register_operand" "")
3551 (match_operand:DI 1 "register_operand" "")
3552 (match_operand 2 "const_int_operand" "")]
3555 ix86_expand_vector_set (false, operands[0], operands[1],
3556 INTVAL (operands[2]));
3560 (define_expand "vec_extractv2di"
3561 [(match_operand:DI 0 "register_operand" "")
3562 (match_operand:V2DI 1 "register_operand" "")
3563 (match_operand 2 "const_int_operand" "")]
3566 ix86_expand_vector_extract (false, operands[0], operands[1],
3567 INTVAL (operands[2]));
3571 (define_expand "vec_initv2di"
3572 [(match_operand:V2DI 0 "register_operand" "")
3573 (match_operand 1 "" "")]
3576 ix86_expand_vector_init (false, operands[0], operands[1]);
3580 (define_expand "vec_setv4si"
3581 [(match_operand:V4SI 0 "register_operand" "")
3582 (match_operand:SI 1 "register_operand" "")
3583 (match_operand 2 "const_int_operand" "")]
3586 ix86_expand_vector_set (false, operands[0], operands[1],
3587 INTVAL (operands[2]));
3591 (define_expand "vec_extractv4si"
3592 [(match_operand:SI 0 "register_operand" "")
3593 (match_operand:V4SI 1 "register_operand" "")
3594 (match_operand 2 "const_int_operand" "")]
3597 ix86_expand_vector_extract (false, operands[0], operands[1],
3598 INTVAL (operands[2]));
3602 (define_expand "vec_initv4si"
3603 [(match_operand:V4SI 0 "register_operand" "")
3604 (match_operand 1 "" "")]
3607 ix86_expand_vector_init (false, operands[0], operands[1]);
3611 (define_expand "vec_setv8hi"
3612 [(match_operand:V8HI 0 "register_operand" "")
3613 (match_operand:HI 1 "register_operand" "")
3614 (match_operand 2 "const_int_operand" "")]
3617 ix86_expand_vector_set (false, operands[0], operands[1],
3618 INTVAL (operands[2]));
3622 (define_expand "vec_extractv8hi"
3623 [(match_operand:HI 0 "register_operand" "")
3624 (match_operand:V8HI 1 "register_operand" "")
3625 (match_operand 2 "const_int_operand" "")]
3628 ix86_expand_vector_extract (false, operands[0], operands[1],
3629 INTVAL (operands[2]));
3633 (define_expand "vec_initv8hi"
3634 [(match_operand:V8HI 0 "register_operand" "")
3635 (match_operand 1 "" "")]
3638 ix86_expand_vector_init (false, operands[0], operands[1]);
3642 (define_expand "vec_setv16qi"
3643 [(match_operand:V16QI 0 "register_operand" "")
3644 (match_operand:QI 1 "register_operand" "")
3645 (match_operand 2 "const_int_operand" "")]
3648 ix86_expand_vector_set (false, operands[0], operands[1],
3649 INTVAL (operands[2]));
3653 (define_expand "vec_extractv16qi"
3654 [(match_operand:QI 0 "register_operand" "")
3655 (match_operand:V16QI 1 "register_operand" "")
3656 (match_operand 2 "const_int_operand" "")]
3659 ix86_expand_vector_extract (false, operands[0], operands[1],
3660 INTVAL (operands[2]));
3664 (define_expand "vec_initv16qi"
3665 [(match_operand:V16QI 0 "register_operand" "")
3666 (match_operand 1 "" "")]
3669 ix86_expand_vector_init (false, operands[0], operands[1]);
3673 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3677 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3679 (define_insn "sse2_uavgv16qi3"
3680 [(set (match_operand:V16QI 0 "register_operand" "=x")
3686 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3688 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3689 (const_vector:V16QI [(const_int 1) (const_int 1)
3690 (const_int 1) (const_int 1)
3691 (const_int 1) (const_int 1)
3692 (const_int 1) (const_int 1)
3693 (const_int 1) (const_int 1)
3694 (const_int 1) (const_int 1)
3695 (const_int 1) (const_int 1)
3696 (const_int 1) (const_int 1)]))
3698 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3699 "pavgb\t{%2, %0|%0, %2}"
3700 [(set_attr "type" "sseiadd")
3701 (set_attr "mode" "TI")])
3703 (define_insn "sse2_uavgv8hi3"
3704 [(set (match_operand:V8HI 0 "register_operand" "=x")
3710 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3712 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3713 (const_vector:V8HI [(const_int 1) (const_int 1)
3714 (const_int 1) (const_int 1)
3715 (const_int 1) (const_int 1)
3716 (const_int 1) (const_int 1)]))
3718 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3719 "pavgw\t{%2, %0|%0, %2}"
3720 [(set_attr "type" "sseiadd")
3721 (set_attr "mode" "TI")])
3723 ;; The correct representation for this is absolutely enormous, and
3724 ;; surely not generally useful.
3725 (define_insn "sse2_psadbw"
3726 [(set (match_operand:V2DI 0 "register_operand" "=x")
3727 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3728 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3731 "psadbw\t{%2, %0|%0, %2}"
3732 [(set_attr "type" "sseiadd")
3733 (set_attr "mode" "TI")])
3735 (define_insn "sse_movmskps"
3736 [(set (match_operand:SI 0 "register_operand" "=r")
3737 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3740 "movmskps\t{%1, %0|%0, %1}"
3741 [(set_attr "type" "ssecvt")
3742 (set_attr "mode" "V4SF")])
3744 (define_insn "sse2_movmskpd"
3745 [(set (match_operand:SI 0 "register_operand" "=r")
3746 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3749 "movmskpd\t{%1, %0|%0, %1}"
3750 [(set_attr "type" "ssecvt")
3751 (set_attr "mode" "V2DF")])
3753 (define_insn "sse2_pmovmskb"
3754 [(set (match_operand:SI 0 "register_operand" "=r")
3755 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3758 "pmovmskb\t{%1, %0|%0, %1}"
3759 [(set_attr "type" "ssecvt")
3760 (set_attr "mode" "V2DF")])
3762 (define_expand "sse2_maskmovdqu"
3763 [(set (match_operand:V16QI 0 "memory_operand" "")
3764 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3765 (match_operand:V16QI 2 "register_operand" "x")
3771 (define_insn "*sse2_maskmovdqu"
3772 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3773 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3774 (match_operand:V16QI 2 "register_operand" "x")
3775 (mem:V16QI (match_dup 0))]
3777 "TARGET_SSE2 && !TARGET_64BIT"
3778 ;; @@@ check ordering of operands in intel/nonintel syntax
3779 "maskmovdqu\t{%2, %1|%1, %2}"
3780 [(set_attr "type" "ssecvt")
3781 (set_attr "mode" "TI")])
3783 (define_insn "*sse2_maskmovdqu_rex64"
3784 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3785 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3786 (match_operand:V16QI 2 "register_operand" "x")
3787 (mem:V16QI (match_dup 0))]
3789 "TARGET_SSE2 && TARGET_64BIT"
3790 ;; @@@ check ordering of operands in intel/nonintel syntax
3791 "maskmovdqu\t{%2, %1|%1, %2}"
3792 [(set_attr "type" "ssecvt")
3793 (set_attr "mode" "TI")])
3795 (define_insn "sse_ldmxcsr"
3796 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3800 [(set_attr "type" "sse")
3801 (set_attr "memory" "load")])
3803 (define_insn "sse_stmxcsr"
3804 [(set (match_operand:SI 0 "memory_operand" "=m")
3805 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3808 [(set_attr "type" "sse")
3809 (set_attr "memory" "store")])
3811 (define_expand "sse_sfence"
3813 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3814 "TARGET_SSE || TARGET_3DNOW_A"
3816 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3817 MEM_VOLATILE_P (operands[0]) = 1;
3820 (define_insn "*sse_sfence"
3821 [(set (match_operand:BLK 0 "" "")
3822 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3823 "TARGET_SSE || TARGET_3DNOW_A"
3825 [(set_attr "type" "sse")
3826 (set_attr "memory" "unknown")])
3828 (define_insn "sse2_clflush"
3829 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3833 [(set_attr "type" "sse")
3834 (set_attr "memory" "unknown")])
3836 (define_expand "sse2_mfence"
3838 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3841 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3842 MEM_VOLATILE_P (operands[0]) = 1;
3845 (define_insn "*sse2_mfence"
3846 [(set (match_operand:BLK 0 "" "")
3847 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3850 [(set_attr "type" "sse")
3851 (set_attr "memory" "unknown")])
3853 (define_expand "sse2_lfence"
3855 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3858 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3859 MEM_VOLATILE_P (operands[0]) = 1;
3862 (define_insn "*sse2_lfence"
3863 [(set (match_operand:BLK 0 "" "")
3864 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3867 [(set_attr "type" "sse")
3868 (set_attr "memory" "unknown")])
3870 (define_insn "sse3_mwait"
3871 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3872 (match_operand:SI 1 "register_operand" "c")]
3876 [(set_attr "length" "3")])
3878 (define_insn "sse3_monitor"
3879 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3880 (match_operand:SI 1 "register_operand" "c")
3881 (match_operand:SI 2 "register_operand" "d")]
3884 "monitor\t%0, %1, %2"
3885 [(set_attr "length" "3")])