1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
62 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64 switch (which_alternative)
67 if (get_attr_mode (insn) == MODE_V4SF)
68 return "xorps\t%0, %0";
70 return "pxor\t%0, %0";
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
86 (eq_attr "alternative" "0,1")
88 (ne (symbol_ref "optimize_size")
92 (eq_attr "alternative" "2")
94 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
96 (ne (symbol_ref "optimize_size")
100 (const_string "TI")))])
102 (define_expand "movv4sf"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
104 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
107 ix86_expand_vector_move (V4SFmode, operands);
111 (define_insn "*movv4sf_internal"
112 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
113 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
117 movaps\t{%1, %0|%0, %1}
118 movaps\t{%1, %0|%0, %1}"
119 [(set_attr "type" "sselog1,ssemov,ssemov")
120 (set_attr "mode" "V4SF")])
123 [(set (match_operand:V4SF 0 "register_operand" "")
124 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
125 "TARGET_SSE && reload_completed"
128 (vec_duplicate:V4SF (match_dup 1))
132 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
133 operands[2] = CONST0_RTX (V4SFmode);
136 (define_expand "movv2df"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
138 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
141 ix86_expand_vector_move (V2DFmode, operands);
145 (define_insn "*movv2df_internal"
146 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
147 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
148 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
150 switch (which_alternative)
153 if (get_attr_mode (insn) == MODE_V4SF)
154 return "xorps\t%0, %0";
156 return "xorpd\t%0, %0";
159 if (get_attr_mode (insn) == MODE_V4SF)
160 return "movaps\t{%1, %0|%0, %1}";
162 return "movapd\t{%1, %0|%0, %1}";
167 [(set_attr "type" "sselog1,ssemov,ssemov")
169 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
170 (const_string "V4SF")
171 (eq_attr "alternative" "0,1")
173 (ne (symbol_ref "optimize_size")
175 (const_string "V4SF")
176 (const_string "V2DF"))
177 (eq_attr "alternative" "2")
179 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
181 (ne (symbol_ref "optimize_size")
183 (const_string "V4SF")
184 (const_string "V2DF"))]
185 (const_string "V2DF")))])
188 [(set (match_operand:V2DF 0 "register_operand" "")
189 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
190 "TARGET_SSE2 && reload_completed"
191 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
193 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
194 operands[2] = CONST0_RTX (DFmode);
197 (define_expand "push<mode>1"
198 [(match_operand:SSEMODE 0 "register_operand" "")]
201 ix86_expand_push (<MODE>mode, operands[0]);
205 (define_expand "movmisalign<mode>"
206 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
207 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
210 ix86_expand_vector_move_misalign (<MODE>mode, operands);
214 (define_insn "sse_movups"
215 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
216 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
218 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219 "movups\t{%1, %0|%0, %1}"
220 [(set_attr "type" "ssemov")
221 (set_attr "mode" "V2DF")])
223 (define_insn "sse2_movupd"
224 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
225 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
227 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
228 "movupd\t{%1, %0|%0, %1}"
229 [(set_attr "type" "ssemov")
230 (set_attr "mode" "V2DF")])
232 (define_insn "sse2_movdqu"
233 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
234 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
236 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
237 "movdqu\t{%1, %0|%0, %1}"
238 [(set_attr "type" "ssemov")
239 (set_attr "mode" "TI")])
241 (define_insn "sse_movntv4sf"
242 [(set (match_operand:V4SF 0 "memory_operand" "=m")
243 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
246 "movntps\t{%1, %0|%0, %1}"
247 [(set_attr "type" "ssemov")
248 (set_attr "mode" "V4SF")])
250 (define_insn "sse2_movntv2df"
251 [(set (match_operand:V2DF 0 "memory_operand" "=m")
252 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
255 "movntpd\t{%1, %0|%0, %1}"
256 [(set_attr "type" "ssecvt")
257 (set_attr "mode" "V2DF")])
259 (define_insn "sse2_movntv2di"
260 [(set (match_operand:V2DI 0 "memory_operand" "=m")
261 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
264 "movntdq\t{%1, %0|%0, %1}"
265 [(set_attr "type" "ssecvt")
266 (set_attr "mode" "TI")])
268 (define_insn "sse2_movntsi"
269 [(set (match_operand:SI 0 "memory_operand" "=m")
270 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
273 "movnti\t{%1, %0|%0, %1}"
274 [(set_attr "type" "ssecvt")
275 (set_attr "mode" "V2DF")])
277 (define_insn "sse3_lddqu"
278 [(set (match_operand:V16QI 0 "register_operand" "=x")
279 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
282 "lddqu\t{%1, %0|%0, %1}"
283 [(set_attr "type" "ssecvt")
284 (set_attr "mode" "TI")])
286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
288 ;; Parallel single-precision floating point arithmetic
290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
292 (define_expand "negv4sf2"
293 [(set (match_operand:V4SF 0 "register_operand" "")
294 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
296 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
298 (define_expand "absv4sf2"
299 [(set (match_operand:V4SF 0 "register_operand" "")
300 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
302 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
304 (define_expand "addv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "")
306 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
307 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
309 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
311 (define_insn "*addv4sf3"
312 [(set (match_operand:V4SF 0 "register_operand" "=x")
313 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
314 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
315 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
316 "addps\t{%2, %0|%0, %2}"
317 [(set_attr "type" "sseadd")
318 (set_attr "mode" "V4SF")])
320 (define_insn "sse_vmaddv4sf3"
321 [(set (match_operand:V4SF 0 "register_operand" "=x")
323 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
324 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
327 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
328 "addss\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "SF")])
332 (define_expand "subv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "")
334 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
335 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
337 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
339 (define_insn "*subv4sf3"
340 [(set (match_operand:V4SF 0 "register_operand" "=x")
341 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
342 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
344 "subps\t{%2, %0|%0, %2}"
345 [(set_attr "type" "sseadd")
346 (set_attr "mode" "V4SF")])
348 (define_insn "sse_vmsubv4sf3"
349 [(set (match_operand:V4SF 0 "register_operand" "=x")
351 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
352 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
356 "subss\t{%2, %0|%0, %2}"
357 [(set_attr "type" "sseadd")
358 (set_attr "mode" "SF")])
360 (define_expand "mulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "")
362 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
363 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
365 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
367 (define_insn "*mulv4sf3"
368 [(set (match_operand:V4SF 0 "register_operand" "=x")
369 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
370 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
371 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
372 "mulps\t{%2, %0|%0, %2}"
373 [(set_attr "type" "ssemul")
374 (set_attr "mode" "V4SF")])
376 (define_insn "sse_vmmulv4sf3"
377 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
383 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
384 "mulss\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssemul")
386 (set_attr "mode" "SF")])
388 (define_expand "divv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "")
390 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
391 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
393 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
395 (define_insn "*divv4sf3"
396 [(set (match_operand:V4SF 0 "register_operand" "=x")
397 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
398 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
400 "divps\t{%2, %0|%0, %2}"
401 [(set_attr "type" "ssediv")
402 (set_attr "mode" "V4SF")])
404 (define_insn "sse_vmdivv4sf3"
405 [(set (match_operand:V4SF 0 "register_operand" "=x")
407 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
408 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
412 "divss\t{%2, %0|%0, %2}"
413 [(set_attr "type" "ssediv")
414 (set_attr "mode" "SF")])
416 (define_insn "sse_rcpv4sf2"
417 [(set (match_operand:V4SF 0 "register_operand" "=x")
419 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
421 "rcpps\t{%1, %0|%0, %1}"
422 [(set_attr "type" "sse")
423 (set_attr "mode" "V4SF")])
425 (define_insn "sse_vmrcpv4sf2"
426 [(set (match_operand:V4SF 0 "register_operand" "=x")
428 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
430 (match_operand:V4SF 2 "register_operand" "0")
433 "rcpss\t{%1, %0|%0, %1}"
434 [(set_attr "type" "sse")
435 (set_attr "mode" "SF")])
437 (define_insn "sse_rsqrtv4sf2"
438 [(set (match_operand:V4SF 0 "register_operand" "=x")
440 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
442 "rsqrtps\t{%1, %0|%0, %1}"
443 [(set_attr "type" "sse")
444 (set_attr "mode" "V4SF")])
446 (define_insn "sse_vmrsqrtv4sf2"
447 [(set (match_operand:V4SF 0 "register_operand" "=x")
449 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
451 (match_operand:V4SF 2 "register_operand" "0")
454 "rsqrtss\t{%1, %0|%0, %1}"
455 [(set_attr "type" "sse")
456 (set_attr "mode" "SF")])
458 (define_insn "sqrtv4sf2"
459 [(set (match_operand:V4SF 0 "register_operand" "=x")
460 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
462 "sqrtps\t{%1, %0|%0, %1}"
463 [(set_attr "type" "sse")
464 (set_attr "mode" "V4SF")])
466 (define_insn "sse_vmsqrtv4sf2"
467 [(set (match_operand:V4SF 0 "register_operand" "=x")
469 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
470 (match_operand:V4SF 2 "register_operand" "0")
473 "sqrtss\t{%1, %0|%0, %1}"
474 [(set_attr "type" "sse")
475 (set_attr "mode" "SF")])
477 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
478 ;; isn't really correct, as those rtl operators aren't defined when
479 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
481 (define_expand "smaxv4sf3"
482 [(set (match_operand:V4SF 0 "register_operand" "")
483 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
484 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
487 if (!flag_finite_math_only)
488 operands[1] = force_reg (V4SFmode, operands[1]);
489 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
492 (define_insn "*smaxv4sf3_finite"
493 [(set (match_operand:V4SF 0 "register_operand" "=x")
494 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
495 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
496 "TARGET_SSE && flag_finite_math_only
497 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
498 "maxps\t{%2, %0|%0, %2}"
499 [(set_attr "type" "sse")
500 (set_attr "mode" "V4SF")])
502 (define_insn "*smaxv4sf3"
503 [(set (match_operand:V4SF 0 "register_operand" "=x")
504 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
505 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
507 "maxps\t{%2, %0|%0, %2}"
508 [(set_attr "type" "sse")
509 (set_attr "mode" "V4SF")])
511 (define_insn "*sse_vmsmaxv4sf3_finite"
512 [(set (match_operand:V4SF 0 "register_operand" "=x")
514 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
515 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
518 "TARGET_SSE && flag_finite_math_only
519 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
520 "maxss\t{%2, %0|%0, %2}"
521 [(set_attr "type" "sse")
522 (set_attr "mode" "SF")])
524 (define_insn "sse_vmsmaxv4sf3"
525 [(set (match_operand:V4SF 0 "register_operand" "=x")
527 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
528 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
532 "maxss\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sse")
534 (set_attr "mode" "SF")])
536 (define_expand "sminv4sf3"
537 [(set (match_operand:V4SF 0 "register_operand" "")
538 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
539 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
542 if (!flag_finite_math_only)
543 operands[1] = force_reg (V4SFmode, operands[1]);
544 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
547 (define_insn "*sminv4sf3_finite"
548 [(set (match_operand:V4SF 0 "register_operand" "=x")
549 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
550 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
551 "TARGET_SSE && flag_finite_math_only
552 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
553 "minps\t{%2, %0|%0, %2}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "V4SF")])
557 (define_insn "*sminv4sf3"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
560 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
562 "minps\t{%2, %0|%0, %2}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "V4SF")])
566 (define_insn "*sse_vmsminv4sf3_finite"
567 [(set (match_operand:V4SF 0 "register_operand" "=x")
569 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
570 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
573 "TARGET_SSE && flag_finite_math_only
574 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
575 "minss\t{%2, %0|%0, %2}"
576 [(set_attr "type" "sse")
577 (set_attr "mode" "SF")])
579 (define_insn "sse_vmsminv4sf3"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
583 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
587 "minss\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sse")
589 (set_attr "mode" "SF")])
591 (define_insn "sse3_addsubv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
595 (match_operand:V4SF 1 "register_operand" "0")
596 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
597 (minus:V4SF (match_dup 1) (match_dup 2))
600 "addsubps\t{%2, %0|%0, %2}"
601 [(set_attr "type" "sseadd")
602 (set_attr "mode" "V4SF")])
604 (define_insn "sse3_haddv4sf3"
605 [(set (match_operand:V4SF 0 "register_operand" "=x")
610 (match_operand:V4SF 1 "register_operand" "0")
611 (parallel [(const_int 0)]))
612 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
614 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
615 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
619 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
620 (parallel [(const_int 0)]))
621 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
623 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
624 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
626 "haddps\t{%2, %0|%0, %2}"
627 [(set_attr "type" "sseadd")
628 (set_attr "mode" "V4SF")])
630 (define_insn "sse3_hsubv4sf3"
631 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (match_operand:V4SF 1 "register_operand" "0")
637 (parallel [(const_int 0)]))
638 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
640 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
641 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
645 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
646 (parallel [(const_int 0)]))
647 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
649 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
650 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
652 "hsubps\t{%2, %0|%0, %2}"
653 [(set_attr "type" "sseadd")
654 (set_attr "mode" "V4SF")])
656 (define_expand "reduc_plus_v4sf"
657 [(match_operand:V4SF 0 "register_operand" "")
658 (match_operand:V4SF 1 "register_operand" "")]
663 rtx tmp = gen_reg_rtx (V4SFmode);
664 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
665 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
668 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
672 (define_expand "reduc_smax_v4sf"
673 [(match_operand:V4SF 0 "register_operand" "")
674 (match_operand:V4SF 1 "register_operand" "")]
677 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
681 (define_expand "reduc_smin_v4sf"
682 [(match_operand:V4SF 0 "register_operand" "")
683 (match_operand:V4SF 1 "register_operand" "")]
686 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
692 ;; Parallel single-precision floating point comparisons
694 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
696 (define_insn "sse_maskcmpv4sf3"
697 [(set (match_operand:V4SF 0 "register_operand" "=x")
698 (match_operator:V4SF 3 "sse_comparison_operator"
699 [(match_operand:V4SF 1 "register_operand" "0")
700 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
702 "cmp%D3ps\t{%2, %0|%0, %2}"
703 [(set_attr "type" "ssecmp")
704 (set_attr "mode" "V4SF")])
706 (define_insn "sse_vmmaskcmpv4sf3"
707 [(set (match_operand:V4SF 0 "register_operand" "=x")
709 (match_operator:V4SF 3 "sse_comparison_operator"
710 [(match_operand:V4SF 1 "register_operand" "0")
711 (match_operand:V4SF 2 "register_operand" "x")])
715 "cmp%D3ss\t{%2, %0|%0, %2}"
716 [(set_attr "type" "ssecmp")
717 (set_attr "mode" "SF")])
719 (define_insn "sse_comi"
720 [(set (reg:CCFP FLAGS_REG)
723 (match_operand:V4SF 0 "register_operand" "x")
724 (parallel [(const_int 0)]))
726 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
727 (parallel [(const_int 0)]))))]
729 "comiss\t{%1, %0|%0, %1}"
730 [(set_attr "type" "ssecomi")
731 (set_attr "mode" "SF")])
733 (define_insn "sse_ucomi"
734 [(set (reg:CCFPU FLAGS_REG)
737 (match_operand:V4SF 0 "register_operand" "x")
738 (parallel [(const_int 0)]))
740 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
741 (parallel [(const_int 0)]))))]
743 "ucomiss\t{%1, %0|%0, %1}"
744 [(set_attr "type" "ssecomi")
745 (set_attr "mode" "SF")])
747 (define_expand "vcondv4sf"
748 [(set (match_operand:V4SF 0 "register_operand" "")
751 [(match_operand:V4SF 4 "nonimmediate_operand" "")
752 (match_operand:V4SF 5 "nonimmediate_operand" "")])
753 (match_operand:V4SF 1 "general_operand" "")
754 (match_operand:V4SF 2 "general_operand" "")))]
757 if (ix86_expand_fp_vcond (operands))
763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
765 ;; Parallel single-precision floating point logical operations
767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
769 (define_expand "andv4sf3"
770 [(set (match_operand:V4SF 0 "register_operand" "")
771 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
772 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
774 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
776 (define_insn "*andv4sf3"
777 [(set (match_operand:V4SF 0 "register_operand" "=x")
778 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
779 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
780 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
781 "andps\t{%2, %0|%0, %2}"
782 [(set_attr "type" "sselog")
783 (set_attr "mode" "V4SF")])
785 (define_insn "sse_nandv4sf3"
786 [(set (match_operand:V4SF 0 "register_operand" "=x")
787 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
788 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
790 "andnps\t{%2, %0|%0, %2}"
791 [(set_attr "type" "sselog")
792 (set_attr "mode" "V4SF")])
794 (define_expand "iorv4sf3"
795 [(set (match_operand:V4SF 0 "register_operand" "")
796 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
797 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
799 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
801 (define_insn "*iorv4sf3"
802 [(set (match_operand:V4SF 0 "register_operand" "=x")
803 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
804 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
805 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
806 "orps\t{%2, %0|%0, %2}"
807 [(set_attr "type" "sselog")
808 (set_attr "mode" "V4SF")])
810 (define_expand "xorv4sf3"
811 [(set (match_operand:V4SF 0 "register_operand" "")
812 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
813 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
815 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
817 (define_insn "*xorv4sf3"
818 [(set (match_operand:V4SF 0 "register_operand" "=x")
819 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
820 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
821 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
822 "xorps\t{%2, %0|%0, %2}"
823 [(set_attr "type" "sselog")
824 (set_attr "mode" "V4SF")])
826 ;; Also define scalar versions. These are used for abs, neg, and
827 ;; conditional move. Using subregs into vector modes causes register
828 ;; allocation lossage. These patterns do not allow memory operands
829 ;; because the native instructions read the full 128-bits.
831 (define_insn "*andsf3"
832 [(set (match_operand:SF 0 "register_operand" "=x")
833 (and:SF (match_operand:SF 1 "register_operand" "0")
834 (match_operand:SF 2 "register_operand" "x")))]
836 "andps\t{%2, %0|%0, %2}"
837 [(set_attr "type" "sselog")
838 (set_attr "mode" "V4SF")])
840 (define_insn "*nandsf3"
841 [(set (match_operand:SF 0 "register_operand" "=x")
842 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
843 (match_operand:SF 2 "register_operand" "x")))]
845 "andnps\t{%2, %0|%0, %2}"
846 [(set_attr "type" "sselog")
847 (set_attr "mode" "V4SF")])
849 (define_insn "*iorsf3"
850 [(set (match_operand:SF 0 "register_operand" "=x")
851 (ior:SF (match_operand:SF 1 "register_operand" "0")
852 (match_operand:SF 2 "register_operand" "x")))]
854 "orps\t{%2, %0|%0, %2}"
855 [(set_attr "type" "sselog")
856 (set_attr "mode" "V4SF")])
858 (define_insn "*xorsf3"
859 [(set (match_operand:SF 0 "register_operand" "=x")
860 (xor:SF (match_operand:SF 1 "register_operand" "0")
861 (match_operand:SF 2 "register_operand" "x")))]
863 "xorps\t{%2, %0|%0, %2}"
864 [(set_attr "type" "sselog")
865 (set_attr "mode" "V4SF")])
867 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
869 ;; Parallel single-precision floating point conversion operations
871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
873 (define_insn "sse_cvtpi2ps"
874 [(set (match_operand:V4SF 0 "register_operand" "=x")
877 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
878 (match_operand:V4SF 1 "register_operand" "0")
881 "cvtpi2ps\t{%2, %0|%0, %2}"
882 [(set_attr "type" "ssecvt")
883 (set_attr "mode" "V4SF")])
885 (define_insn "sse_cvtps2pi"
886 [(set (match_operand:V2SI 0 "register_operand" "=y")
888 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
890 (parallel [(const_int 0) (const_int 1)])))]
892 "cvtps2pi\t{%1, %0|%0, %1}"
893 [(set_attr "type" "ssecvt")
894 (set_attr "unit" "mmx")
895 (set_attr "mode" "DI")])
897 (define_insn "sse_cvttps2pi"
898 [(set (match_operand:V2SI 0 "register_operand" "=y")
900 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
901 (parallel [(const_int 0) (const_int 1)])))]
903 "cvttps2pi\t{%1, %0|%0, %1}"
904 [(set_attr "type" "ssecvt")
905 (set_attr "unit" "mmx")
906 (set_attr "mode" "SF")])
908 (define_insn "sse_cvtsi2ss"
909 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
912 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
913 (match_operand:V4SF 1 "register_operand" "0,0")
916 "cvtsi2ss\t{%2, %0|%0, %2}"
917 [(set_attr "type" "sseicvt")
918 (set_attr "athlon_decode" "vector,double")
919 (set_attr "mode" "SF")])
921 (define_insn "sse_cvtsi2ssq"
922 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
925 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
926 (match_operand:V4SF 1 "register_operand" "0,0")
928 "TARGET_SSE && TARGET_64BIT"
929 "cvtsi2ssq\t{%2, %0|%0, %2}"
930 [(set_attr "type" "sseicvt")
931 (set_attr "athlon_decode" "vector,double")
932 (set_attr "mode" "SF")])
934 (define_insn "sse_cvtss2si"
935 [(set (match_operand:SI 0 "register_operand" "=r,r")
938 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
939 (parallel [(const_int 0)]))]
940 UNSPEC_FIX_NOTRUNC))]
942 "cvtss2si\t{%1, %0|%0, %1}"
943 [(set_attr "type" "sseicvt")
944 (set_attr "athlon_decode" "double,vector")
945 (set_attr "mode" "SI")])
947 (define_insn "sse_cvtss2siq"
948 [(set (match_operand:DI 0 "register_operand" "=r,r")
951 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
952 (parallel [(const_int 0)]))]
953 UNSPEC_FIX_NOTRUNC))]
954 "TARGET_SSE && TARGET_64BIT"
955 "cvtss2siq\t{%1, %0|%0, %1}"
956 [(set_attr "type" "sseicvt")
957 (set_attr "athlon_decode" "double,vector")
958 (set_attr "mode" "DI")])
960 (define_insn "sse_cvttss2si"
961 [(set (match_operand:SI 0 "register_operand" "=r,r")
964 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
965 (parallel [(const_int 0)]))))]
967 "cvttss2si\t{%1, %0|%0, %1}"
968 [(set_attr "type" "sseicvt")
969 (set_attr "athlon_decode" "double,vector")
970 (set_attr "mode" "SI")])
972 (define_insn "sse_cvttss2siq"
973 [(set (match_operand:DI 0 "register_operand" "=r,r")
976 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
977 (parallel [(const_int 0)]))))]
978 "TARGET_SSE && TARGET_64BIT"
979 "cvttss2siq\t{%1, %0|%0, %1}"
980 [(set_attr "type" "sseicvt")
981 (set_attr "athlon_decode" "double,vector")
982 (set_attr "mode" "DI")])
984 (define_insn "sse2_cvtdq2ps"
985 [(set (match_operand:V4SF 0 "register_operand" "=x")
986 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
988 "cvtdq2ps\t{%1, %0|%0, %1}"
989 [(set_attr "type" "ssecvt")
990 (set_attr "mode" "V2DF")])
992 (define_insn "sse2_cvtps2dq"
993 [(set (match_operand:V4SI 0 "register_operand" "=x")
994 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
995 UNSPEC_FIX_NOTRUNC))]
997 "cvtps2dq\t{%1, %0|%0, %1}"
998 [(set_attr "type" "ssecvt")
999 (set_attr "mode" "TI")])
1001 (define_insn "sse2_cvttps2dq"
1002 [(set (match_operand:V4SI 0 "register_operand" "=x")
1003 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1005 "cvttps2dq\t{%1, %0|%0, %1}"
1006 [(set_attr "type" "ssecvt")
1007 (set_attr "mode" "TI")])
1009 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1011 ;; Parallel single-precision floating point element swizzling
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1015 (define_insn "sse_movhlps"
1016 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1019 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
1020 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
1021 (parallel [(const_int 4)
1025 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1027 movhlps\t{%2, %0|%0, %2}
1028 movlps\t{%H1, %0|%0, %H1}
1029 movhps\t{%1, %0|%0, %1}"
1030 [(set_attr "type" "ssemov")
1031 (set_attr "mode" "V4SF,V2SF,V2SF")])
1033 (define_insn "sse_movlhps"
1034 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1037 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1038 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1039 (parallel [(const_int 0)
1043 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1045 movlhps\t{%2, %0|%0, %2}
1046 movhps\t{%2, %0|%0, %2}
1047 movlps\t{%2, %H0|%H0, %2}"
1048 [(set_attr "type" "ssemov")
1049 (set_attr "mode" "V4SF,V2SF,V2SF")])
1051 (define_insn "sse_unpckhps"
1052 [(set (match_operand:V4SF 0 "register_operand" "=x")
1055 (match_operand:V4SF 1 "register_operand" "0")
1056 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1057 (parallel [(const_int 2) (const_int 6)
1058 (const_int 3) (const_int 7)])))]
1060 "unpckhps\t{%2, %0|%0, %2}"
1061 [(set_attr "type" "sselog")
1062 (set_attr "mode" "V4SF")])
1064 (define_insn "sse_unpcklps"
1065 [(set (match_operand:V4SF 0 "register_operand" "=x")
1068 (match_operand:V4SF 1 "register_operand" "0")
1069 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1070 (parallel [(const_int 0) (const_int 4)
1071 (const_int 1) (const_int 5)])))]
1073 "unpcklps\t{%2, %0|%0, %2}"
1074 [(set_attr "type" "sselog")
1075 (set_attr "mode" "V4SF")])
1077 ;; These are modeled with the same vec_concat as the others so that we
1078 ;; capture users of shufps that can use the new instructions
1079 (define_insn "sse3_movshdup"
1080 [(set (match_operand:V4SF 0 "register_operand" "=x")
1083 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1085 (parallel [(const_int 1)
1090 "movshdup\t{%1, %0|%0, %1}"
1091 [(set_attr "type" "sse")
1092 (set_attr "mode" "V4SF")])
1094 (define_insn "sse3_movsldup"
1095 [(set (match_operand:V4SF 0 "register_operand" "=x")
1098 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1100 (parallel [(const_int 0)
1105 "movsldup\t{%1, %0|%0, %1}"
1106 [(set_attr "type" "sse")
1107 (set_attr "mode" "V4SF")])
1109 (define_expand "sse_shufps"
1110 [(match_operand:V4SF 0 "register_operand" "")
1111 (match_operand:V4SF 1 "register_operand" "")
1112 (match_operand:V4SF 2 "nonimmediate_operand" "")
1113 (match_operand:SI 3 "const_int_operand" "")]
1116 int mask = INTVAL (operands[3]);
1117 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1118 GEN_INT ((mask >> 0) & 3),
1119 GEN_INT ((mask >> 2) & 3),
1120 GEN_INT (((mask >> 4) & 3) + 4),
1121 GEN_INT (((mask >> 6) & 3) + 4)));
1125 (define_insn "sse_shufps_1"
1126 [(set (match_operand:V4SF 0 "register_operand" "=x")
1129 (match_operand:V4SF 1 "register_operand" "0")
1130 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1131 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1132 (match_operand 4 "const_0_to_3_operand" "")
1133 (match_operand 5 "const_4_to_7_operand" "")
1134 (match_operand 6 "const_4_to_7_operand" "")])))]
1138 mask |= INTVAL (operands[3]) << 0;
1139 mask |= INTVAL (operands[4]) << 2;
1140 mask |= (INTVAL (operands[5]) - 4) << 4;
1141 mask |= (INTVAL (operands[6]) - 4) << 6;
1142 operands[3] = GEN_INT (mask);
1144 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1146 [(set_attr "type" "sselog")
1147 (set_attr "mode" "V4SF")])
1149 (define_insn "sse_storehps"
1150 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1152 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1153 (parallel [(const_int 2) (const_int 3)])))]
1156 movhps\t{%1, %0|%0, %1}
1157 movhlps\t{%1, %0|%0, %1}
1158 movlps\t{%H1, %0|%0, %H1}"
1159 [(set_attr "type" "ssemov")
1160 (set_attr "mode" "V2SF,V4SF,V2SF")])
1162 (define_insn "sse_loadhps"
1163 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1166 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1167 (parallel [(const_int 0) (const_int 1)]))
1168 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1171 movhps\t{%2, %0|%0, %2}
1172 movlhps\t{%2, %0|%0, %2}
1173 movlps\t{%2, %H0|%H0, %2}"
1174 [(set_attr "type" "ssemov")
1175 (set_attr "mode" "V2SF,V4SF,V2SF")])
1177 (define_insn "sse_storelps"
1178 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1180 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1181 (parallel [(const_int 0) (const_int 1)])))]
1184 movlps\t{%1, %0|%0, %1}
1185 movaps\t{%1, %0|%0, %1}
1186 movlps\t{%1, %0|%0, %1}"
1187 [(set_attr "type" "ssemov")
1188 (set_attr "mode" "V2SF,V4SF,V2SF")])
1190 (define_insn "sse_loadlps"
1191 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1193 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1195 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1196 (parallel [(const_int 2) (const_int 3)]))))]
1199 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1200 movlps\t{%2, %0|%0, %2}
1201 movlps\t{%2, %0|%0, %2}"
1202 [(set_attr "type" "sselog,ssemov,ssemov")
1203 (set_attr "mode" "V4SF,V2SF,V2SF")])
1205 (define_insn "sse_movss"
1206 [(set (match_operand:V4SF 0 "register_operand" "=x")
1208 (match_operand:V4SF 2 "register_operand" "x")
1209 (match_operand:V4SF 1 "register_operand" "0")
1212 "movss\t{%2, %0|%0, %2}"
1213 [(set_attr "type" "ssemov")
1214 (set_attr "mode" "SF")])
1216 (define_insn "*vec_dupv4sf"
1217 [(set (match_operand:V4SF 0 "register_operand" "=x")
1219 (match_operand:SF 1 "register_operand" "0")))]
1221 "shufps\t{$0, %0, %0|%0, %0, 0}"
1222 [(set_attr "type" "sselog1")
1223 (set_attr "mode" "V4SF")])
1225 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1226 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1227 ;; alternatives pretty much forces the MMX alternative to be chosen.
1228 (define_insn "*sse_concatv2sf"
1229 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1231 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1232 (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
1235 unpcklps\t{%2, %0|%0, %2}
1236 movss\t{%1, %0|%0, %1}
1237 punpckldq\t{%2, %0|%0, %2}
1238 movd\t{%1, %0|%0, %1}"
1239 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1240 (set_attr "mode" "V4SF,SF,DI,DI")])
1242 (define_insn "*sse_concatv4sf"
1243 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1245 (match_operand:V2SF 1 "register_operand" " 0,0")
1246 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1249 movlhps\t{%2, %0|%0, %2}
1250 movhps\t{%2, %0|%0, %2}"
1251 [(set_attr "type" "ssemov")
1252 (set_attr "mode" "V4SF,V2SF")])
1254 (define_expand "vec_initv4sf"
1255 [(match_operand:V4SF 0 "register_operand" "")
1256 (match_operand 1 "" "")]
1259 ix86_expand_vector_init (false, operands[0], operands[1]);
1263 (define_insn "*vec_setv4sf_0"
1264 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1267 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1268 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1272 movss\t{%2, %0|%0, %2}
1273 movss\t{%2, %0|%0, %2}
1274 movd\t{%2, %0|%0, %2}
1276 [(set_attr "type" "ssemov")
1277 (set_attr "mode" "SF")])
1280 [(set (match_operand:V4SF 0 "memory_operand" "")
1283 (match_operand:SF 1 "nonmemory_operand" ""))
1286 "TARGET_SSE && reload_completed"
1289 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1293 (define_expand "vec_setv4sf"
1294 [(match_operand:V4SF 0 "register_operand" "")
1295 (match_operand:SF 1 "register_operand" "")
1296 (match_operand 2 "const_int_operand" "")]
1299 ix86_expand_vector_set (false, operands[0], operands[1],
1300 INTVAL (operands[2]));
1304 (define_insn_and_split "*vec_extractv4sf_0"
1305 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1307 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1308 (parallel [(const_int 0)])))]
1309 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1311 "&& reload_completed"
1314 rtx op1 = operands[1];
1316 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1318 op1 = gen_lowpart (SFmode, op1);
1319 emit_move_insn (operands[0], op1);
1323 (define_expand "vec_extractv4sf"
1324 [(match_operand:SF 0 "register_operand" "")
1325 (match_operand:V4SF 1 "register_operand" "")
1326 (match_operand 2 "const_int_operand" "")]
1329 ix86_expand_vector_extract (false, operands[0], operands[1],
1330 INTVAL (operands[2]));
1334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1336 ;; Parallel double-precision floating point arithmetic
1338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1340 (define_expand "negv2df2"
1341 [(set (match_operand:V2DF 0 "register_operand" "")
1342 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1344 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1346 (define_expand "absv2df2"
1347 [(set (match_operand:V2DF 0 "register_operand" "")
1348 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1350 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1352 (define_expand "addv2df3"
1353 [(set (match_operand:V2DF 0 "register_operand" "")
1354 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1355 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1357 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1359 (define_insn "*addv2df3"
1360 [(set (match_operand:V2DF 0 "register_operand" "=x")
1361 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1362 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1363 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1364 "addpd\t{%2, %0|%0, %2}"
1365 [(set_attr "type" "sseadd")
1366 (set_attr "mode" "V2DF")])
1368 (define_insn "sse2_vmaddv2df3"
1369 [(set (match_operand:V2DF 0 "register_operand" "=x")
1371 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1375 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1376 "addsd\t{%2, %0|%0, %2}"
1377 [(set_attr "type" "sseadd")
1378 (set_attr "mode" "DF")])
1380 (define_expand "subv2df3"
1381 [(set (match_operand:V2DF 0 "register_operand" "")
1382 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1383 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1385 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1387 (define_insn "*subv2df3"
1388 [(set (match_operand:V2DF 0 "register_operand" "=x")
1389 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1390 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1392 "subpd\t{%2, %0|%0, %2}"
1393 [(set_attr "type" "sseadd")
1394 (set_attr "mode" "V2DF")])
1396 (define_insn "sse2_vmsubv2df3"
1397 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1404 "subsd\t{%2, %0|%0, %2}"
1405 [(set_attr "type" "sseadd")
1406 (set_attr "mode" "DF")])
1408 (define_expand "mulv2df3"
1409 [(set (match_operand:V2DF 0 "register_operand" "")
1410 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1411 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1413 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1415 (define_insn "*mulv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1417 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1418 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1419 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1420 "mulpd\t{%2, %0|%0, %2}"
1421 [(set_attr "type" "ssemul")
1422 (set_attr "mode" "V2DF")])
1424 (define_insn "sse2_vmmulv2df3"
1425 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1431 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1432 "mulsd\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "ssemul")
1434 (set_attr "mode" "DF")])
1436 (define_expand "divv2df3"
1437 [(set (match_operand:V2DF 0 "register_operand" "")
1438 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1439 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1441 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1443 (define_insn "*divv2df3"
1444 [(set (match_operand:V2DF 0 "register_operand" "=x")
1445 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1446 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1448 "divpd\t{%2, %0|%0, %2}"
1449 [(set_attr "type" "ssediv")
1450 (set_attr "mode" "V2DF")])
1452 (define_insn "sse2_vmdivv2df3"
1453 [(set (match_operand:V2DF 0 "register_operand" "=x")
1455 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1460 "divsd\t{%2, %0|%0, %2}"
1461 [(set_attr "type" "ssediv")
1462 (set_attr "mode" "DF")])
1464 (define_insn "sqrtv2df2"
1465 [(set (match_operand:V2DF 0 "register_operand" "=x")
1466 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1468 "sqrtpd\t{%1, %0|%0, %1}"
1469 [(set_attr "type" "sse")
1470 (set_attr "mode" "V2DF")])
1472 (define_insn "sse2_vmsqrtv2df2"
1473 [(set (match_operand:V2DF 0 "register_operand" "=x")
1475 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1476 (match_operand:V2DF 2 "register_operand" "0")
1479 "sqrtsd\t{%1, %0|%0, %1}"
1480 [(set_attr "type" "sse")
1481 (set_attr "mode" "SF")])
1483 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1484 ;; isn't really correct, as those rtl operators aren't defined when
1485 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1487 (define_expand "smaxv2df3"
1488 [(set (match_operand:V2DF 0 "register_operand" "")
1489 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1490 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1493 if (!flag_finite_math_only)
1494 operands[1] = force_reg (V2DFmode, operands[1]);
1495 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1498 (define_insn "*smaxv2df3_finite"
1499 [(set (match_operand:V2DF 0 "register_operand" "=x")
1500 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1501 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1502 "TARGET_SSE2 && flag_finite_math_only
1503 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1504 "maxpd\t{%2, %0|%0, %2}"
1505 [(set_attr "type" "sseadd")
1506 (set_attr "mode" "V2DF")])
1508 (define_insn "*smaxv2df3"
1509 [(set (match_operand:V2DF 0 "register_operand" "=x")
1510 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1511 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1513 "maxpd\t{%2, %0|%0, %2}"
1514 [(set_attr "type" "sseadd")
1515 (set_attr "mode" "V2DF")])
1517 (define_insn "*sse2_vmsmaxv2df3_finite"
1518 [(set (match_operand:V2DF 0 "register_operand" "=x")
1520 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1521 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1524 "TARGET_SSE2 && flag_finite_math_only
1525 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1526 "maxsd\t{%2, %0|%0, %2}"
1527 [(set_attr "type" "sseadd")
1528 (set_attr "mode" "DF")])
1530 (define_insn "sse2_vmsmaxv2df3"
1531 [(set (match_operand:V2DF 0 "register_operand" "=x")
1533 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1534 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1538 "maxsd\t{%2, %0|%0, %2}"
1539 [(set_attr "type" "sseadd")
1540 (set_attr "mode" "DF")])
1542 (define_expand "sminv2df3"
1543 [(set (match_operand:V2DF 0 "register_operand" "")
1544 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1545 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1548 if (!flag_finite_math_only)
1549 operands[1] = force_reg (V2DFmode, operands[1]);
1550 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1553 (define_insn "*sminv2df3_finite"
1554 [(set (match_operand:V2DF 0 "register_operand" "=x")
1555 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1556 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1557 "TARGET_SSE2 && flag_finite_math_only
1558 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1559 "minpd\t{%2, %0|%0, %2}"
1560 [(set_attr "type" "sseadd")
1561 (set_attr "mode" "V2DF")])
1563 (define_insn "*sminv2df3"
1564 [(set (match_operand:V2DF 0 "register_operand" "=x")
1565 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1566 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1568 "minpd\t{%2, %0|%0, %2}"
1569 [(set_attr "type" "sseadd")
1570 (set_attr "mode" "V2DF")])
1572 (define_insn "*sse2_vmsminv2df3_finite"
1573 [(set (match_operand:V2DF 0 "register_operand" "=x")
1575 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1576 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1579 "TARGET_SSE2 && flag_finite_math_only
1580 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1581 "minsd\t{%2, %0|%0, %2}"
1582 [(set_attr "type" "sseadd")
1583 (set_attr "mode" "DF")])
1585 (define_insn "sse2_vmsminv2df3"
1586 [(set (match_operand:V2DF 0 "register_operand" "=x")
1588 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1589 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1593 "minsd\t{%2, %0|%0, %2}"
1594 [(set_attr "type" "sseadd")
1595 (set_attr "mode" "DF")])
1597 (define_insn "sse3_addsubv2df3"
1598 [(set (match_operand:V2DF 0 "register_operand" "=x")
1601 (match_operand:V2DF 1 "register_operand" "0")
1602 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1603 (minus:V2DF (match_dup 1) (match_dup 2))
1606 "addsubpd\t{%2, %0|%0, %2}"
1607 [(set_attr "type" "sseadd")
1608 (set_attr "mode" "V2DF")])
1610 (define_insn "sse3_haddv2df3"
1611 [(set (match_operand:V2DF 0 "register_operand" "=x")
1615 (match_operand:V2DF 1 "register_operand" "0")
1616 (parallel [(const_int 0)]))
1617 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1620 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1621 (parallel [(const_int 0)]))
1622 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1624 "haddpd\t{%2, %0|%0, %2}"
1625 [(set_attr "type" "sseadd")
1626 (set_attr "mode" "V2DF")])
1628 (define_insn "sse3_hsubv2df3"
1629 [(set (match_operand:V2DF 0 "register_operand" "=x")
1633 (match_operand:V2DF 1 "register_operand" "0")
1634 (parallel [(const_int 0)]))
1635 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1638 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1639 (parallel [(const_int 0)]))
1640 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1642 "hsubpd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "sseadd")
1644 (set_attr "mode" "V2DF")])
1646 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1648 ;; Parallel double-precision floating point comparisons
1650 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1652 (define_insn "sse2_maskcmpv2df3"
1653 [(set (match_operand:V2DF 0 "register_operand" "=x")
1654 (match_operator:V2DF 3 "sse_comparison_operator"
1655 [(match_operand:V2DF 1 "register_operand" "0")
1656 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1658 "cmp%D3pd\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "ssecmp")
1660 (set_attr "mode" "V2DF")])
1662 (define_insn "sse2_vmmaskcmpv2df3"
1663 [(set (match_operand:V2DF 0 "register_operand" "=x")
1665 (match_operator:V2DF 3 "sse_comparison_operator"
1666 [(match_operand:V2DF 1 "register_operand" "0")
1667 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1671 "cmp%D3sd\t{%2, %0|%0, %2}"
1672 [(set_attr "type" "ssecmp")
1673 (set_attr "mode" "DF")])
1675 (define_insn "sse2_comi"
1676 [(set (reg:CCFP FLAGS_REG)
1679 (match_operand:V2DF 0 "register_operand" "x")
1680 (parallel [(const_int 0)]))
1682 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1683 (parallel [(const_int 0)]))))]
1685 "comisd\t{%1, %0|%0, %1}"
1686 [(set_attr "type" "ssecomi")
1687 (set_attr "mode" "DF")])
1689 (define_insn "sse2_ucomi"
1690 [(set (reg:CCFPU FLAGS_REG)
1693 (match_operand:V2DF 0 "register_operand" "x")
1694 (parallel [(const_int 0)]))
1696 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1697 (parallel [(const_int 0)]))))]
1699 "ucomisd\t{%1, %0|%0, %1}"
1700 [(set_attr "type" "ssecomi")
1701 (set_attr "mode" "DF")])
1703 (define_expand "vcondv2df"
1704 [(set (match_operand:V2DF 0 "register_operand" "")
1706 (match_operator 3 ""
1707 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1708 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1709 (match_operand:V2DF 1 "general_operand" "")
1710 (match_operand:V2DF 2 "general_operand" "")))]
1713 if (ix86_expand_fp_vcond (operands))
1719 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1721 ;; Parallel double-precision floating point logical operations
1723 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1725 (define_expand "andv2df3"
1726 [(set (match_operand:V2DF 0 "register_operand" "")
1727 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1730 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1732 (define_insn "*andv2df3"
1733 [(set (match_operand:V2DF 0 "register_operand" "=x")
1734 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1735 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1736 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1737 "andpd\t{%2, %0|%0, %2}"
1738 [(set_attr "type" "sselog")
1739 (set_attr "mode" "V2DF")])
1741 (define_insn "sse2_nandv2df3"
1742 [(set (match_operand:V2DF 0 "register_operand" "=x")
1743 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1744 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1746 "andnpd\t{%2, %0|%0, %2}"
1747 [(set_attr "type" "sselog")
1748 (set_attr "mode" "V2DF")])
1750 (define_expand "iorv2df3"
1751 [(set (match_operand:V2DF 0 "register_operand" "")
1752 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1753 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1755 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1757 (define_insn "*iorv2df3"
1758 [(set (match_operand:V2DF 0 "register_operand" "=x")
1759 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1760 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1761 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1762 "orpd\t{%2, %0|%0, %2}"
1763 [(set_attr "type" "sselog")
1764 (set_attr "mode" "V2DF")])
1766 (define_expand "xorv2df3"
1767 [(set (match_operand:V2DF 0 "register_operand" "")
1768 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1769 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1771 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1773 (define_insn "*xorv2df3"
1774 [(set (match_operand:V2DF 0 "register_operand" "=x")
1775 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1776 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1777 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1778 "xorpd\t{%2, %0|%0, %2}"
1779 [(set_attr "type" "sselog")
1780 (set_attr "mode" "V2DF")])
1782 ;; Also define scalar versions. These are used for abs, neg, and
1783 ;; conditional move. Using subregs into vector modes causes register
1784 ;; allocation lossage. These patterns do not allow memory operands
1785 ;; because the native instructions read the full 128-bits.
1787 (define_insn "*anddf3"
1788 [(set (match_operand:DF 0 "register_operand" "=x")
1789 (and:DF (match_operand:DF 1 "register_operand" "0")
1790 (match_operand:DF 2 "register_operand" "x")))]
1792 "andpd\t{%2, %0|%0, %2}"
1793 [(set_attr "type" "sselog")
1794 (set_attr "mode" "V2DF")])
1796 (define_insn "*nanddf3"
1797 [(set (match_operand:DF 0 "register_operand" "=x")
1798 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1799 (match_operand:DF 2 "register_operand" "x")))]
1801 "andnpd\t{%2, %0|%0, %2}"
1802 [(set_attr "type" "sselog")
1803 (set_attr "mode" "V2DF")])
1805 (define_insn "*iordf3"
1806 [(set (match_operand:DF 0 "register_operand" "=x")
1807 (ior:DF (match_operand:DF 1 "register_operand" "0")
1808 (match_operand:DF 2 "register_operand" "x")))]
1810 "orpd\t{%2, %0|%0, %2}"
1811 [(set_attr "type" "sselog")
1812 (set_attr "mode" "V2DF")])
1814 (define_insn "*xordf3"
1815 [(set (match_operand:DF 0 "register_operand" "=x")
1816 (xor:DF (match_operand:DF 1 "register_operand" "0")
1817 (match_operand:DF 2 "register_operand" "x")))]
1819 "xorpd\t{%2, %0|%0, %2}"
1820 [(set_attr "type" "sselog")
1821 (set_attr "mode" "V2DF")])
1823 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1825 ;; Parallel double-precision floating point conversion operations
1827 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1829 (define_insn "sse2_cvtpi2pd"
1830 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1831 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1833 "cvtpi2pd\t{%1, %0|%0, %1}"
1834 [(set_attr "type" "ssecvt")
1835 (set_attr "unit" "mmx,*")
1836 (set_attr "mode" "V2DF")])
1838 (define_insn "sse2_cvtpd2pi"
1839 [(set (match_operand:V2SI 0 "register_operand" "=y")
1840 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1841 UNSPEC_FIX_NOTRUNC))]
1843 "cvtpd2pi\t{%1, %0|%0, %1}"
1844 [(set_attr "type" "ssecvt")
1845 (set_attr "unit" "mmx")
1846 (set_attr "mode" "DI")])
1848 (define_insn "sse2_cvttpd2pi"
1849 [(set (match_operand:V2SI 0 "register_operand" "=y")
1850 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1852 "cvttpd2pi\t{%1, %0|%0, %1}"
1853 [(set_attr "type" "ssecvt")
1854 (set_attr "unit" "mmx")
1855 (set_attr "mode" "TI")])
1857 (define_insn "sse2_cvtsi2sd"
1858 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1861 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1862 (match_operand:V2DF 1 "register_operand" "0,0")
1865 "cvtsi2sd\t{%2, %0|%0, %2}"
1866 [(set_attr "type" "sseicvt")
1867 (set_attr "mode" "DF")
1868 (set_attr "athlon_decode" "double,direct")])
1870 (define_insn "sse2_cvtsi2sdq"
1871 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1874 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1875 (match_operand:V2DF 1 "register_operand" "0,0")
1877 "TARGET_SSE2 && TARGET_64BIT"
1878 "cvtsi2sdq\t{%2, %0|%0, %2}"
1879 [(set_attr "type" "sseicvt")
1880 (set_attr "mode" "DF")
1881 (set_attr "athlon_decode" "double,direct")])
1883 (define_insn "sse2_cvtsd2si"
1884 [(set (match_operand:SI 0 "register_operand" "=r,r")
1887 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1888 (parallel [(const_int 0)]))]
1889 UNSPEC_FIX_NOTRUNC))]
1891 "cvtsd2si\t{%1, %0|%0, %1}"
1892 [(set_attr "type" "sseicvt")
1893 (set_attr "athlon_decode" "double,vector")
1894 (set_attr "mode" "SI")])
1896 (define_insn "sse2_cvtsd2siq"
1897 [(set (match_operand:DI 0 "register_operand" "=r,r")
1900 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1901 (parallel [(const_int 0)]))]
1902 UNSPEC_FIX_NOTRUNC))]
1903 "TARGET_SSE2 && TARGET_64BIT"
1904 "cvtsd2siq\t{%1, %0|%0, %1}"
1905 [(set_attr "type" "sseicvt")
1906 (set_attr "athlon_decode" "double,vector")
1907 (set_attr "mode" "DI")])
1909 (define_insn "sse2_cvttsd2si"
1910 [(set (match_operand:SI 0 "register_operand" "=r,r")
1913 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1914 (parallel [(const_int 0)]))))]
1916 "cvttsd2si\t{%1, %0|%0, %1}"
1917 [(set_attr "type" "sseicvt")
1918 (set_attr "mode" "SI")
1919 (set_attr "athlon_decode" "double,vector")])
1921 (define_insn "sse2_cvttsd2siq"
1922 [(set (match_operand:DI 0 "register_operand" "=r,r")
1925 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1926 (parallel [(const_int 0)]))))]
1927 "TARGET_SSE2 && TARGET_64BIT"
1928 "cvttsd2siq\t{%1, %0|%0, %1}"
1929 [(set_attr "type" "sseicvt")
1930 (set_attr "mode" "DI")
1931 (set_attr "athlon_decode" "double,vector")])
1933 (define_insn "sse2_cvtdq2pd"
1934 [(set (match_operand:V2DF 0 "register_operand" "=x")
1937 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1938 (parallel [(const_int 0) (const_int 1)]))))]
1940 "cvtdq2pd\t{%1, %0|%0, %1}"
1941 [(set_attr "type" "ssecvt")
1942 (set_attr "mode" "V2DF")])
1944 (define_expand "sse2_cvtpd2dq"
1945 [(set (match_operand:V4SI 0 "register_operand" "")
1947 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1951 "operands[2] = CONST0_RTX (V2SImode);")
1953 (define_insn "*sse2_cvtpd2dq"
1954 [(set (match_operand:V4SI 0 "register_operand" "=x")
1956 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1958 (match_operand:V2SI 2 "const0_operand" "")))]
1960 "cvtpd2dq\t{%1, %0|%0, %1}"
1961 [(set_attr "type" "ssecvt")
1962 (set_attr "mode" "TI")])
1964 (define_expand "sse2_cvttpd2dq"
1965 [(set (match_operand:V4SI 0 "register_operand" "")
1967 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1970 "operands[2] = CONST0_RTX (V2SImode);")
1972 (define_insn "*sse2_cvttpd2dq"
1973 [(set (match_operand:V4SI 0 "register_operand" "=x")
1975 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1976 (match_operand:V2SI 2 "const0_operand" "")))]
1978 "cvttpd2dq\t{%1, %0|%0, %1}"
1979 [(set_attr "type" "ssecvt")
1980 (set_attr "mode" "TI")])
1982 (define_insn "sse2_cvtsd2ss"
1983 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1986 (float_truncate:V2SF
1987 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1988 (match_operand:V4SF 1 "register_operand" "0,0")
1991 "cvtsd2ss\t{%2, %0|%0, %2}"
1992 [(set_attr "type" "ssecvt")
1993 (set_attr "athlon_decode" "vector,double")
1994 (set_attr "mode" "SF")])
1996 (define_insn "sse2_cvtss2sd"
1997 [(set (match_operand:V2DF 0 "register_operand" "=x")
2001 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2002 (parallel [(const_int 0) (const_int 1)])))
2003 (match_operand:V2DF 1 "register_operand" "0")
2006 "cvtss2sd\t{%2, %0|%0, %2}"
2007 [(set_attr "type" "ssecvt")
2008 (set_attr "mode" "DF")])
2010 (define_expand "sse2_cvtpd2ps"
2011 [(set (match_operand:V4SF 0 "register_operand" "")
2013 (float_truncate:V2SF
2014 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2017 "operands[2] = CONST0_RTX (V2SFmode);")
2019 (define_insn "*sse2_cvtpd2ps"
2020 [(set (match_operand:V4SF 0 "register_operand" "=x")
2022 (float_truncate:V2SF
2023 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2024 (match_operand:V2SF 2 "const0_operand" "")))]
2026 "cvtpd2ps\t{%1, %0|%0, %1}"
2027 [(set_attr "type" "ssecvt")
2028 (set_attr "mode" "V4SF")])
2030 (define_insn "sse2_cvtps2pd"
2031 [(set (match_operand:V2DF 0 "register_operand" "=x")
2034 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2035 (parallel [(const_int 0) (const_int 1)]))))]
2037 "cvtps2pd\t{%1, %0|%0, %1}"
2038 [(set_attr "type" "ssecvt")
2039 (set_attr "mode" "V2DF")])
2041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2043 ;; Parallel double-precision floating point element swizzling
2045 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2047 (define_insn "sse2_unpckhpd"
2048 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2051 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2052 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2053 (parallel [(const_int 1)
2055 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2057 unpckhpd\t{%2, %0|%0, %2}
2058 movlpd\t{%H1, %0|%0, %H1}
2059 movhpd\t{%1, %0|%0, %1}"
2060 [(set_attr "type" "sselog,ssemov,ssemov")
2061 (set_attr "mode" "V2DF,V1DF,V1DF")])
2063 (define_insn "*sse3_movddup"
2064 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2067 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2069 (parallel [(const_int 0)
2071 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2073 movddup\t{%1, %0|%0, %1}
2075 [(set_attr "type" "sselog,ssemov")
2076 (set_attr "mode" "V2DF")])
2079 [(set (match_operand:V2DF 0 "memory_operand" "")
2082 (match_operand:V2DF 1 "register_operand" "")
2084 (parallel [(const_int 0)
2086 "TARGET_SSE3 && reload_completed"
2089 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2090 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2091 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2095 (define_insn "sse2_unpcklpd"
2096 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2099 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2100 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2101 (parallel [(const_int 0)
2103 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2105 unpcklpd\t{%2, %0|%0, %2}
2106 movhpd\t{%2, %0|%0, %2}
2107 movlpd\t{%2, %H0|%H0, %2}"
2108 [(set_attr "type" "sselog,ssemov,ssemov")
2109 (set_attr "mode" "V2DF,V1DF,V1DF")])
2111 (define_expand "sse2_shufpd"
2112 [(match_operand:V2DF 0 "register_operand" "")
2113 (match_operand:V2DF 1 "register_operand" "")
2114 (match_operand:V2DF 2 "nonimmediate_operand" "")
2115 (match_operand:SI 3 "const_int_operand" "")]
2118 int mask = INTVAL (operands[3]);
2119 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2121 GEN_INT (mask & 2 ? 3 : 2)));
2125 (define_insn "sse2_shufpd_1"
2126 [(set (match_operand:V2DF 0 "register_operand" "=x")
2129 (match_operand:V2DF 1 "register_operand" "0")
2130 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2131 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2132 (match_operand 4 "const_2_to_3_operand" "")])))]
2136 mask = INTVAL (operands[3]);
2137 mask |= (INTVAL (operands[4]) - 2) << 1;
2138 operands[3] = GEN_INT (mask);
2140 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2142 [(set_attr "type" "sselog")
2143 (set_attr "mode" "V2DF")])
2145 (define_insn "sse2_storehpd"
2146 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2148 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2149 (parallel [(const_int 1)])))]
2150 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2152 movhpd\t{%1, %0|%0, %1}
2155 [(set_attr "type" "ssemov,sselog1,ssemov")
2156 (set_attr "mode" "V1DF,V2DF,DF")])
2159 [(set (match_operand:DF 0 "register_operand" "")
2161 (match_operand:V2DF 1 "memory_operand" "")
2162 (parallel [(const_int 1)])))]
2163 "TARGET_SSE2 && reload_completed"
2164 [(set (match_dup 0) (match_dup 1))]
2166 operands[1] = adjust_address (operands[1], DFmode, 8);
2169 (define_insn "sse2_storelpd"
2170 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2172 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2173 (parallel [(const_int 0)])))]
2174 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2176 movlpd\t{%1, %0|%0, %1}
2179 [(set_attr "type" "ssemov")
2180 (set_attr "mode" "V1DF,DF,DF")])
2183 [(set (match_operand:DF 0 "register_operand" "")
2185 (match_operand:V2DF 1 "nonimmediate_operand" "")
2186 (parallel [(const_int 0)])))]
2187 "TARGET_SSE2 && reload_completed"
2190 rtx op1 = operands[1];
2192 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2194 op1 = gen_lowpart (DFmode, op1);
2195 emit_move_insn (operands[0], op1);
2199 (define_insn "sse2_loadhpd"
2200 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2203 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2204 (parallel [(const_int 0)]))
2205 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2206 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2208 movhpd\t{%2, %0|%0, %2}
2209 unpcklpd\t{%2, %0|%0, %2}
2210 shufpd\t{$1, %1, %0|%0, %1, 1}
2212 [(set_attr "type" "ssemov,sselog,sselog,other")
2213 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2216 [(set (match_operand:V2DF 0 "memory_operand" "")
2218 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2219 (match_operand:DF 1 "register_operand" "")))]
2220 "TARGET_SSE2 && reload_completed"
2221 [(set (match_dup 0) (match_dup 1))]
2223 operands[0] = adjust_address (operands[0], DFmode, 8);
2226 (define_insn "sse2_loadlpd"
2227 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2229 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2231 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2232 (parallel [(const_int 1)]))))]
2233 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2235 movsd\t{%2, %0|%0, %2}
2236 movlpd\t{%2, %0|%0, %2}
2237 movsd\t{%2, %0|%0, %2}
2238 shufpd\t{$2, %2, %0|%0, %2, 2}
2239 movhpd\t{%H1, %0|%0, %H1}
2241 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2242 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2245 [(set (match_operand:V2DF 0 "memory_operand" "")
2247 (match_operand:DF 1 "register_operand" "")
2248 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2249 "TARGET_SSE2 && reload_completed"
2250 [(set (match_dup 0) (match_dup 1))]
2252 operands[0] = adjust_address (operands[0], DFmode, 8);
2255 (define_insn "sse2_movsd"
2256 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2258 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2259 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2263 movsd\t{%2, %0|%0, %2}
2264 movlpd\t{%2, %0|%0, %2}
2265 movlpd\t{%2, %0|%0, %2}
2266 shufpd\t{$2, %2, %0|%0, %2, 2}
2267 movhps\t{%H1, %0|%0, %H1
2268 movhps\t{%1, %H0|%H0, %1"
2269 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2270 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2272 (define_insn "*vec_dupv2df_sse3"
2273 [(set (match_operand:V2DF 0 "register_operand" "=x")
2275 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2277 "movddup\t{%1, %0|%0, %1}"
2278 [(set_attr "type" "sselog1")
2279 (set_attr "mode" "DF")])
2281 (define_insn "*vec_dupv2df"
2282 [(set (match_operand:V2DF 0 "register_operand" "=x")
2284 (match_operand:DF 1 "register_operand" "0")))]
2287 [(set_attr "type" "sselog1")
2288 (set_attr "mode" "V4SF")])
2290 (define_insn "*vec_concatv2df_sse3"
2291 [(set (match_operand:V2DF 0 "register_operand" "=x")
2293 (match_operand:DF 1 "nonimmediate_operand" "xm")
2296 "movddup\t{%1, %0|%0, %1}"
2297 [(set_attr "type" "sselog1")
2298 (set_attr "mode" "DF")])
2300 (define_insn "*vec_concatv2df"
2301 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2303 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2304 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2307 unpcklpd\t{%2, %0|%0, %2}
2308 movhpd\t{%2, %0|%0, %2}
2309 movsd\t{%1, %0|%0, %1}
2310 movlhps\t{%2, %0|%0, %2}
2311 movhps\t{%2, %0|%0, %2}"
2312 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2313 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2315 (define_expand "vec_setv2df"
2316 [(match_operand:V2DF 0 "register_operand" "")
2317 (match_operand:DF 1 "register_operand" "")
2318 (match_operand 2 "const_int_operand" "")]
2321 ix86_expand_vector_set (false, operands[0], operands[1],
2322 INTVAL (operands[2]));
2326 (define_expand "vec_extractv2df"
2327 [(match_operand:DF 0 "register_operand" "")
2328 (match_operand:V2DF 1 "register_operand" "")
2329 (match_operand 2 "const_int_operand" "")]
2332 ix86_expand_vector_extract (false, operands[0], operands[1],
2333 INTVAL (operands[2]));
2337 (define_expand "vec_initv2df"
2338 [(match_operand:V2DF 0 "register_operand" "")
2339 (match_operand 1 "" "")]
2342 ix86_expand_vector_init (false, operands[0], operands[1]);
2346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2348 ;; Parallel integral arithmetic
2350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2352 (define_expand "neg<mode>2"
2353 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2356 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2358 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2360 (define_expand "add<mode>3"
2361 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2362 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2363 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2365 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2367 (define_insn "*add<mode>3"
2368 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2370 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2371 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2372 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2373 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2374 [(set_attr "type" "sseiadd")
2375 (set_attr "mode" "TI")])
2377 (define_insn "sse2_ssadd<mode>3"
2378 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2380 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2381 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2382 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2383 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2384 [(set_attr "type" "sseiadd")
2385 (set_attr "mode" "TI")])
2387 (define_insn "sse2_usadd<mode>3"
2388 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2390 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2391 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2392 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2393 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2394 [(set_attr "type" "sseiadd")
2395 (set_attr "mode" "TI")])
2397 (define_expand "sub<mode>3"
2398 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2399 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2400 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2402 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2404 (define_insn "*sub<mode>3"
2405 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2407 (match_operand:SSEMODEI 1 "register_operand" "0")
2408 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2410 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2411 [(set_attr "type" "sseiadd")
2412 (set_attr "mode" "TI")])
2414 (define_insn "sse2_sssub<mode>3"
2415 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2417 (match_operand:SSEMODE12 1 "register_operand" "0")
2418 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2420 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2421 [(set_attr "type" "sseiadd")
2422 (set_attr "mode" "TI")])
2424 (define_insn "sse2_ussub<mode>3"
2425 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2427 (match_operand:SSEMODE12 1 "register_operand" "0")
2428 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2430 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2431 [(set_attr "type" "sseiadd")
2432 (set_attr "mode" "TI")])
2434 (define_expand "mulv16qi3"
2435 [(set (match_operand:V16QI 0 "register_operand" "")
2436 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2437 (match_operand:V16QI 2 "register_operand" "")))]
2443 for (i = 0; i < 12; ++i)
2444 t[i] = gen_reg_rtx (V16QImode);
2446 /* Unpack data such that we've got a source byte in each low byte of
2447 each word. We don't care what goes into the high byte of each word.
2448 Rather than trying to get zero in there, most convenient is to let
2449 it be a copy of the low byte. */
2450 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2451 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2452 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2453 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2455 /* Multiply words. The end-of-line annotations here give a picture of what
2456 the output of that instruction looks like. Dot means don't care; the
2457 letters are the bytes of the result with A being the most significant. */
2458 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2459 gen_lowpart (V8HImode, t[0]),
2460 gen_lowpart (V8HImode, t[1])));
2461 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2462 gen_lowpart (V8HImode, t[2]),
2463 gen_lowpart (V8HImode, t[3])));
2465 /* Extract the relevant bytes and merge them back together. */
2466 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2467 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2468 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2469 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2470 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2471 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2474 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2478 (define_expand "mulv8hi3"
2479 [(set (match_operand:V8HI 0 "register_operand" "")
2480 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2481 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2483 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2485 (define_insn "*mulv8hi3"
2486 [(set (match_operand:V8HI 0 "register_operand" "=x")
2487 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2488 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2489 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2490 "pmullw\t{%2, %0|%0, %2}"
2491 [(set_attr "type" "sseimul")
2492 (set_attr "mode" "TI")])
2494 (define_insn "sse2_smulv8hi3_highpart"
2495 [(set (match_operand:V8HI 0 "register_operand" "=x")
2500 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2502 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2504 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2505 "pmulhw\t{%2, %0|%0, %2}"
2506 [(set_attr "type" "sseimul")
2507 (set_attr "mode" "TI")])
2509 (define_insn "sse2_umulv8hi3_highpart"
2510 [(set (match_operand:V8HI 0 "register_operand" "=x")
2515 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2517 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2519 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2520 "pmulhuw\t{%2, %0|%0, %2}"
2521 [(set_attr "type" "sseimul")
2522 (set_attr "mode" "TI")])
2524 (define_insn "sse2_umulv2siv2di3"
2525 [(set (match_operand:V2DI 0 "register_operand" "=x")
2529 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2530 (parallel [(const_int 0) (const_int 2)])))
2533 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2534 (parallel [(const_int 0) (const_int 2)])))))]
2535 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2536 "pmuludq\t{%2, %0|%0, %2}"
2537 [(set_attr "type" "sseimul")
2538 (set_attr "mode" "TI")])
2540 (define_insn "sse2_pmaddwd"
2541 [(set (match_operand:V4SI 0 "register_operand" "=x")
2546 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2547 (parallel [(const_int 0)
2553 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2554 (parallel [(const_int 0)
2560 (vec_select:V4HI (match_dup 1)
2561 (parallel [(const_int 1)
2566 (vec_select:V4HI (match_dup 2)
2567 (parallel [(const_int 1)
2570 (const_int 7)]))))))]
2572 "pmaddwd\t{%2, %0|%0, %2}"
2573 [(set_attr "type" "sseiadd")
2574 (set_attr "mode" "TI")])
2576 (define_expand "mulv4si3"
2577 [(set (match_operand:V4SI 0 "register_operand" "")
2578 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2579 (match_operand:V4SI 2 "register_operand" "")))]
2582 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2588 t1 = gen_reg_rtx (V4SImode);
2589 t2 = gen_reg_rtx (V4SImode);
2590 t3 = gen_reg_rtx (V4SImode);
2591 t4 = gen_reg_rtx (V4SImode);
2592 t5 = gen_reg_rtx (V4SImode);
2593 t6 = gen_reg_rtx (V4SImode);
2594 thirtytwo = GEN_INT (32);
2596 /* Multiply elements 2 and 0. */
2597 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2599 /* Shift both input vectors down one element, so that elements 3 and 1
2600 are now in the slots for elements 2 and 0. For K8, at least, this is
2601 faster than using a shuffle. */
2602 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2603 gen_lowpart (TImode, op1), thirtytwo));
2604 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2605 gen_lowpart (TImode, op2), thirtytwo));
2607 /* Multiply elements 3 and 1. */
2608 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2610 /* Move the results in element 2 down to element 1; we don't care what
2611 goes in elements 2 and 3. */
2612 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2613 const0_rtx, const0_rtx));
2614 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2615 const0_rtx, const0_rtx));
2617 /* Merge the parts back together. */
2618 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2622 (define_expand "mulv2di3"
2623 [(set (match_operand:V2DI 0 "register_operand" "")
2624 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2625 (match_operand:V2DI 2 "register_operand" "")))]
2628 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2634 t1 = gen_reg_rtx (V2DImode);
2635 t2 = gen_reg_rtx (V2DImode);
2636 t3 = gen_reg_rtx (V2DImode);
2637 t4 = gen_reg_rtx (V2DImode);
2638 t5 = gen_reg_rtx (V2DImode);
2639 t6 = gen_reg_rtx (V2DImode);
2640 thirtytwo = GEN_INT (32);
2642 /* Multiply low parts. */
2643 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2644 gen_lowpart (V4SImode, op2)));
2646 /* Shift input vectors left 32 bits so we can multiply high parts. */
2647 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2648 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2650 /* Multiply high parts by low parts. */
2651 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2652 gen_lowpart (V4SImode, t3)));
2653 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2654 gen_lowpart (V4SImode, t2)));
2656 /* Shift them back. */
2657 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2658 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2660 /* Add the three parts together. */
2661 emit_insn (gen_addv2di3 (t6, t1, t4));
2662 emit_insn (gen_addv2di3 (op0, t6, t5));
2666 (define_insn "ashr<mode>3"
2667 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2669 (match_operand:SSEMODE24 1 "register_operand" "0")
2670 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2672 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2673 [(set_attr "type" "sseishft")
2674 (set_attr "mode" "TI")])
2676 (define_insn "lshr<mode>3"
2677 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2678 (lshiftrt:SSEMODE248
2679 (match_operand:SSEMODE248 1 "register_operand" "0")
2680 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2682 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2683 [(set_attr "type" "sseishft")
2684 (set_attr "mode" "TI")])
2686 (define_insn "ashl<mode>3"
2687 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2689 (match_operand:SSEMODE248 1 "register_operand" "0")
2690 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2692 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2693 [(set_attr "type" "sseishft")
2694 (set_attr "mode" "TI")])
2696 (define_insn "sse2_ashlti3"
2697 [(set (match_operand:TI 0 "register_operand" "=x")
2698 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2699 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2702 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2703 return "pslldq\t{%2, %0|%0, %2}";
2705 [(set_attr "type" "sseishft")
2706 (set_attr "mode" "TI")])
2708 (define_expand "vec_shl_<mode>"
2709 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2710 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2711 (match_operand:SI 2 "general_operand" "")))]
2714 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2716 operands[0] = gen_lowpart (TImode, operands[0]);
2717 operands[1] = gen_lowpart (TImode, operands[1]);
2720 (define_insn "sse2_lshrti3"
2721 [(set (match_operand:TI 0 "register_operand" "=x")
2722 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2723 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2726 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2727 return "psrldq\t{%2, %0|%0, %2}";
2729 [(set_attr "type" "sseishft")
2730 (set_attr "mode" "TI")])
2732 (define_expand "vec_shr_<mode>"
2733 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2734 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2735 (match_operand:SI 2 "general_operand" "")))]
2738 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2740 operands[0] = gen_lowpart (TImode, operands[0]);
2741 operands[1] = gen_lowpart (TImode, operands[1]);
2744 (define_expand "smaxv16qi3"
2745 [(set (match_operand:V16QI 0 "register_operand" "")
2746 (smax:V16QI (match_operand:V16QI 1 "register_operand" "")
2747 (match_operand:V16QI 2 "register_operand" "")))]
2753 xops[0] = operands[0];
2754 xops[1] = operands[1];
2755 xops[2] = operands[2];
2756 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2757 xops[4] = operands[1];
2758 xops[5] = operands[2];
2759 ok = ix86_expand_int_vcond (xops, false);
2764 (define_expand "umaxv16qi3"
2765 [(set (match_operand:V16QI 0 "register_operand" "")
2766 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2767 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2769 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2771 (define_insn "*umaxv16qi3"
2772 [(set (match_operand:V16QI 0 "register_operand" "=x")
2773 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2774 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2775 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2776 "pmaxub\t{%2, %0|%0, %2}"
2777 [(set_attr "type" "sseiadd")
2778 (set_attr "mode" "TI")])
2780 (define_expand "smaxv8hi3"
2781 [(set (match_operand:V8HI 0 "register_operand" "")
2782 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2783 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2785 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2787 (define_insn "*smaxv8hi3"
2788 [(set (match_operand:V8HI 0 "register_operand" "=x")
2789 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2790 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2791 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2792 "pmaxsw\t{%2, %0|%0, %2}"
2793 [(set_attr "type" "sseiadd")
2794 (set_attr "mode" "TI")])
2796 (define_expand "umaxv8hi3"
2797 [(set (match_operand:V8HI 0 "register_operand" "")
2798 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
2799 (match_operand:V8HI 2 "register_operand" "")))]
2802 rtx xops[6], t1, t2;
2805 t1 = gen_reg_rtx (V8HImode);
2806 emit_insn (gen_sse2_ussubv8hi3 (t1, operands[2], operands[1]));
2807 t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
2809 xops[0] = operands[0];
2810 xops[1] = operands[1];
2811 xops[2] = operands[2];
2812 xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
2815 ok = ix86_expand_int_vcond (xops, false);
2820 (define_expand "sminv16qi3"
2821 [(set (match_operand:V16QI 0 "register_operand" "")
2822 (smin:V16QI (match_operand:V16QI 1 "register_operand" "")
2823 (match_operand:V16QI 2 "register_operand" "")))]
2829 xops[0] = operands[0];
2830 xops[1] = operands[1];
2831 xops[2] = operands[2];
2832 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2833 xops[4] = operands[2];
2834 xops[5] = operands[1];
2835 ok = ix86_expand_int_vcond (xops, false);
2840 (define_expand "uminv16qi3"
2841 [(set (match_operand:V16QI 0 "register_operand" "")
2842 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2843 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2845 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2847 (define_insn "*uminv16qi3"
2848 [(set (match_operand:V16QI 0 "register_operand" "=x")
2849 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2850 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2851 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2852 "pminub\t{%2, %0|%0, %2}"
2853 [(set_attr "type" "sseiadd")
2854 (set_attr "mode" "TI")])
2856 (define_expand "sminv8hi3"
2857 [(set (match_operand:V8HI 0 "register_operand" "")
2858 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2859 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2861 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2863 (define_insn "*sminv8hi3"
2864 [(set (match_operand:V8HI 0 "register_operand" "=x")
2865 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2866 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2867 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2868 "pminsw\t{%2, %0|%0, %2}"
2869 [(set_attr "type" "sseiadd")
2870 (set_attr "mode" "TI")])
2872 (define_expand "uminv8hi3"
2873 [(set (match_operand:V8HI 0 "register_operand" "")
2874 (umin:V8HI (match_operand:V8HI 1 "register_operand" "")
2875 (match_operand:V8HI 2 "register_operand" "")))]
2878 rtx xops[6], t1, t2;
2881 t1 = gen_reg_rtx (V8HImode);
2882 emit_insn (gen_sse2_ussubv8hi3 (t1, operands[1], operands[2]));
2883 t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
2885 xops[0] = operands[0];
2886 xops[1] = operands[1];
2887 xops[2] = operands[2];
2888 xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
2891 ok = ix86_expand_int_vcond (xops, false);
2896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2898 ;; Parallel integral comparisons
2900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2902 (define_insn "sse2_eq<mode>3"
2903 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2905 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2906 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2907 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2908 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2909 [(set_attr "type" "ssecmp")
2910 (set_attr "mode" "TI")])
2912 (define_insn "sse2_gt<mode>3"
2913 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2915 (match_operand:SSEMODE124 1 "register_operand" "0")
2916 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2918 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2919 [(set_attr "type" "ssecmp")
2920 (set_attr "mode" "TI")])
2922 (define_expand "vcond<mode>"
2923 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2924 (if_then_else:SSEMODE124
2925 (match_operator 3 ""
2926 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2927 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2928 (match_operand:SSEMODE124 1 "general_operand" "")
2929 (match_operand:SSEMODE124 2 "general_operand" "")))]
2932 if (ix86_expand_int_vcond (operands, false))
2938 (define_expand "vcondu<mode>"
2939 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2940 (if_then_else:SSEMODE12
2941 (match_operator 3 ""
2942 [(match_operand:SSEMODE12 4 "nonimmediate_operand" "")
2943 (match_operand:SSEMODE12 5 "nonimmediate_operand" "")])
2944 (match_operand:SSEMODE12 1 "general_operand" "")
2945 (match_operand:SSEMODE12 2 "general_operand" "")))]
2948 if (ix86_expand_int_vcond (operands, true))
2954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2956 ;; Parallel integral logical operations
2958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2960 (define_expand "one_cmpl<mode>2"
2961 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2962 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2966 int i, n = GET_MODE_NUNITS (<MODE>mode);
2967 rtvec v = rtvec_alloc (n);
2969 for (i = 0; i < n; ++i)
2970 RTVEC_ELT (v, i) = constm1_rtx;
2972 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
2975 (define_expand "and<mode>3"
2976 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2977 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2978 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2980 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
2982 (define_insn "*and<mode>3"
2983 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2985 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2986 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2987 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
2988 "pand\t{%2, %0|%0, %2}"
2989 [(set_attr "type" "sselog")
2990 (set_attr "mode" "TI")])
2992 (define_insn "sse2_nand<mode>3"
2993 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2995 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
2996 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2998 "pandn\t{%2, %0|%0, %2}"
2999 [(set_attr "type" "sselog")
3000 (set_attr "mode" "TI")])
3002 (define_expand "ior<mode>3"
3003 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3004 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3005 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3007 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3009 (define_insn "*ior<mode>3"
3010 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3012 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3013 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3014 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3015 "por\t{%2, %0|%0, %2}"
3016 [(set_attr "type" "sselog")
3017 (set_attr "mode" "TI")])
3019 (define_expand "xor<mode>3"
3020 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3021 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3022 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3024 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3026 (define_insn "*xor<mode>3"
3027 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3029 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3030 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3031 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3032 "pxor\t{%2, %0|%0, %2}"
3033 [(set_attr "type" "sselog")
3034 (set_attr "mode" "TI")])
3036 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3038 ;; Parallel integral element swizzling
3040 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3042 (define_insn "sse2_packsswb"
3043 [(set (match_operand:V16QI 0 "register_operand" "=x")
3046 (match_operand:V8HI 1 "register_operand" "0"))
3048 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3050 "packsswb\t{%2, %0|%0, %2}"
3051 [(set_attr "type" "sselog")
3052 (set_attr "mode" "TI")])
3054 (define_insn "sse2_packssdw"
3055 [(set (match_operand:V8HI 0 "register_operand" "=x")
3058 (match_operand:V4SI 1 "register_operand" "0"))
3060 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3062 "packssdw\t{%2, %0|%0, %2}"
3063 [(set_attr "type" "sselog")
3064 (set_attr "mode" "TI")])
3066 (define_insn "sse2_packuswb"
3067 [(set (match_operand:V16QI 0 "register_operand" "=x")
3070 (match_operand:V8HI 1 "register_operand" "0"))
3072 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3074 "packuswb\t{%2, %0|%0, %2}"
3075 [(set_attr "type" "sselog")
3076 (set_attr "mode" "TI")])
3078 (define_insn "sse2_punpckhbw"
3079 [(set (match_operand:V16QI 0 "register_operand" "=x")
3082 (match_operand:V16QI 1 "register_operand" "0")
3083 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3084 (parallel [(const_int 8) (const_int 24)
3085 (const_int 9) (const_int 25)
3086 (const_int 10) (const_int 26)
3087 (const_int 11) (const_int 27)
3088 (const_int 12) (const_int 28)
3089 (const_int 13) (const_int 29)
3090 (const_int 14) (const_int 30)
3091 (const_int 15) (const_int 31)])))]
3093 "punpckhbw\t{%2, %0|%0, %2}"
3094 [(set_attr "type" "sselog")
3095 (set_attr "mode" "TI")])
3097 (define_insn "sse2_punpcklbw"
3098 [(set (match_operand:V16QI 0 "register_operand" "=x")
3101 (match_operand:V16QI 1 "register_operand" "0")
3102 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3103 (parallel [(const_int 0) (const_int 16)
3104 (const_int 1) (const_int 17)
3105 (const_int 2) (const_int 18)
3106 (const_int 3) (const_int 19)
3107 (const_int 4) (const_int 20)
3108 (const_int 5) (const_int 21)
3109 (const_int 6) (const_int 22)
3110 (const_int 7) (const_int 23)])))]
3112 "punpcklbw\t{%2, %0|%0, %2}"
3113 [(set_attr "type" "sselog")
3114 (set_attr "mode" "TI")])
3116 (define_insn "sse2_punpckhwd"
3117 [(set (match_operand:V8HI 0 "register_operand" "=x")
3120 (match_operand:V8HI 1 "register_operand" "0")
3121 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3122 (parallel [(const_int 4) (const_int 12)
3123 (const_int 5) (const_int 13)
3124 (const_int 6) (const_int 14)
3125 (const_int 7) (const_int 15)])))]
3127 "punpckhwd\t{%2, %0|%0, %2}"
3128 [(set_attr "type" "sselog")
3129 (set_attr "mode" "TI")])
3131 (define_insn "sse2_punpcklwd"
3132 [(set (match_operand:V8HI 0 "register_operand" "=x")
3135 (match_operand:V8HI 1 "register_operand" "0")
3136 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3137 (parallel [(const_int 0) (const_int 8)
3138 (const_int 1) (const_int 9)
3139 (const_int 2) (const_int 10)
3140 (const_int 3) (const_int 11)])))]
3142 "punpcklwd\t{%2, %0|%0, %2}"
3143 [(set_attr "type" "sselog")
3144 (set_attr "mode" "TI")])
3146 (define_insn "sse2_punpckhdq"
3147 [(set (match_operand:V4SI 0 "register_operand" "=x")
3150 (match_operand:V4SI 1 "register_operand" "0")
3151 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3152 (parallel [(const_int 2) (const_int 6)
3153 (const_int 3) (const_int 7)])))]
3155 "punpckhdq\t{%2, %0|%0, %2}"
3156 [(set_attr "type" "sselog")
3157 (set_attr "mode" "TI")])
3159 (define_insn "sse2_punpckldq"
3160 [(set (match_operand:V4SI 0 "register_operand" "=x")
3163 (match_operand:V4SI 1 "register_operand" "0")
3164 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3165 (parallel [(const_int 0) (const_int 4)
3166 (const_int 1) (const_int 5)])))]
3168 "punpckldq\t{%2, %0|%0, %2}"
3169 [(set_attr "type" "sselog")
3170 (set_attr "mode" "TI")])
3172 (define_insn "sse2_punpckhqdq"
3173 [(set (match_operand:V2DI 0 "register_operand" "=x")
3176 (match_operand:V2DI 1 "register_operand" "0")
3177 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3178 (parallel [(const_int 1)
3181 "punpckhqdq\t{%2, %0|%0, %2}"
3182 [(set_attr "type" "sselog")
3183 (set_attr "mode" "TI")])
3185 (define_insn "sse2_punpcklqdq"
3186 [(set (match_operand:V2DI 0 "register_operand" "=x")
3189 (match_operand:V2DI 1 "register_operand" "0")
3190 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3191 (parallel [(const_int 0)
3194 "punpcklqdq\t{%2, %0|%0, %2}"
3195 [(set_attr "type" "sselog")
3196 (set_attr "mode" "TI")])
3198 (define_expand "sse2_pinsrw"
3199 [(set (match_operand:V8HI 0 "register_operand" "")
3202 (match_operand:SI 2 "nonimmediate_operand" ""))
3203 (match_operand:V8HI 1 "register_operand" "")
3204 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3207 operands[2] = gen_lowpart (HImode, operands[2]);
3208 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3211 (define_insn "*sse2_pinsrw"
3212 [(set (match_operand:V8HI 0 "register_operand" "=x")
3215 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3216 (match_operand:V8HI 1 "register_operand" "0")
3217 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3220 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3221 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3223 [(set_attr "type" "sselog")
3224 (set_attr "mode" "TI")])
3226 (define_insn "sse2_pextrw"
3227 [(set (match_operand:SI 0 "register_operand" "=r")
3230 (match_operand:V8HI 1 "register_operand" "x")
3231 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3233 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3234 [(set_attr "type" "sselog")
3235 (set_attr "mode" "TI")])
3237 (define_expand "sse2_pshufd"
3238 [(match_operand:V4SI 0 "register_operand" "")
3239 (match_operand:V4SI 1 "nonimmediate_operand" "")
3240 (match_operand:SI 2 "const_int_operand" "")]
3243 int mask = INTVAL (operands[2]);
3244 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3245 GEN_INT ((mask >> 0) & 3),
3246 GEN_INT ((mask >> 2) & 3),
3247 GEN_INT ((mask >> 4) & 3),
3248 GEN_INT ((mask >> 6) & 3)));
3252 (define_insn "sse2_pshufd_1"
3253 [(set (match_operand:V4SI 0 "register_operand" "=x")
3255 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3256 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3257 (match_operand 3 "const_0_to_3_operand" "")
3258 (match_operand 4 "const_0_to_3_operand" "")
3259 (match_operand 5 "const_0_to_3_operand" "")])))]
3263 mask |= INTVAL (operands[2]) << 0;
3264 mask |= INTVAL (operands[3]) << 2;
3265 mask |= INTVAL (operands[4]) << 4;
3266 mask |= INTVAL (operands[5]) << 6;
3267 operands[2] = GEN_INT (mask);
3269 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3271 [(set_attr "type" "sselog1")
3272 (set_attr "mode" "TI")])
3274 (define_expand "sse2_pshuflw"
3275 [(match_operand:V8HI 0 "register_operand" "")
3276 (match_operand:V8HI 1 "nonimmediate_operand" "")
3277 (match_operand:SI 2 "const_int_operand" "")]
3280 int mask = INTVAL (operands[2]);
3281 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3282 GEN_INT ((mask >> 0) & 3),
3283 GEN_INT ((mask >> 2) & 3),
3284 GEN_INT ((mask >> 4) & 3),
3285 GEN_INT ((mask >> 6) & 3)));
3289 (define_insn "sse2_pshuflw_1"
3290 [(set (match_operand:V8HI 0 "register_operand" "=x")
3292 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3293 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3294 (match_operand 3 "const_0_to_3_operand" "")
3295 (match_operand 4 "const_0_to_3_operand" "")
3296 (match_operand 5 "const_0_to_3_operand" "")
3304 mask |= INTVAL (operands[2]) << 0;
3305 mask |= INTVAL (operands[3]) << 2;
3306 mask |= INTVAL (operands[4]) << 4;
3307 mask |= INTVAL (operands[5]) << 6;
3308 operands[2] = GEN_INT (mask);
3310 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3312 [(set_attr "type" "sselog")
3313 (set_attr "mode" "TI")])
3315 (define_expand "sse2_pshufhw"
3316 [(match_operand:V8HI 0 "register_operand" "")
3317 (match_operand:V8HI 1 "nonimmediate_operand" "")
3318 (match_operand:SI 2 "const_int_operand" "")]
3321 int mask = INTVAL (operands[2]);
3322 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3323 GEN_INT (((mask >> 0) & 3) + 4),
3324 GEN_INT (((mask >> 2) & 3) + 4),
3325 GEN_INT (((mask >> 4) & 3) + 4),
3326 GEN_INT (((mask >> 6) & 3) + 4)));
3330 (define_insn "sse2_pshufhw_1"
3331 [(set (match_operand:V8HI 0 "register_operand" "=x")
3333 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3334 (parallel [(const_int 0)
3338 (match_operand 2 "const_4_to_7_operand" "")
3339 (match_operand 3 "const_4_to_7_operand" "")
3340 (match_operand 4 "const_4_to_7_operand" "")
3341 (match_operand 5 "const_4_to_7_operand" "")])))]
3345 mask |= (INTVAL (operands[2]) - 4) << 0;
3346 mask |= (INTVAL (operands[3]) - 4) << 2;
3347 mask |= (INTVAL (operands[4]) - 4) << 4;
3348 mask |= (INTVAL (operands[5]) - 4) << 6;
3349 operands[2] = GEN_INT (mask);
3351 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3353 [(set_attr "type" "sselog")
3354 (set_attr "mode" "TI")])
3356 (define_expand "sse2_loadd"
3357 [(set (match_operand:V4SI 0 "register_operand" "")
3360 (match_operand:SI 1 "nonimmediate_operand" ""))
3364 "operands[2] = CONST0_RTX (V4SImode);")
3366 (define_insn "sse2_loadld"
3367 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3370 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3371 (match_operand:V4SI 1 "vector_move_operand" " C,C,0")
3375 movd\t{%2, %0|%0, %2}
3376 movss\t{%2, %0|%0, %2}
3377 movss\t{%2, %0|%0, %2}"
3378 [(set_attr "type" "ssemov")
3379 (set_attr "mode" "TI,V4SF,SF")])
3381 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3382 ;; be taken into account, and movdi isn't fully populated even without.
3383 (define_insn_and_split "sse2_stored"
3384 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3386 (match_operand:V4SI 1 "register_operand" "x")
3387 (parallel [(const_int 0)])))]
3390 "&& reload_completed"
3391 [(set (match_dup 0) (match_dup 1))]
3393 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3396 (define_expand "sse_storeq"
3397 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3399 (match_operand:V2DI 1 "register_operand" "")
3400 (parallel [(const_int 0)])))]
3404 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3405 ;; be taken into account, and movdi isn't fully populated even without.
3406 (define_insn "*sse2_storeq"
3407 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3409 (match_operand:V2DI 1 "register_operand" "x")
3410 (parallel [(const_int 0)])))]
3415 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3417 (match_operand:V2DI 1 "register_operand" "")
3418 (parallel [(const_int 0)])))]
3419 "TARGET_SSE && reload_completed"
3420 [(set (match_dup 0) (match_dup 1))]
3422 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3425 (define_insn "*vec_dupv4si"
3426 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3428 (match_operand:SI 1 "register_operand" " Y,0")))]
3431 pshufd\t{$0, %1, %0|%0, %1, 0}
3432 shufps\t{$0, %0, %0|%0, %0, 0}"
3433 [(set_attr "type" "sselog1")
3434 (set_attr "mode" "TI,V4SF")])
3436 (define_insn "*vec_dupv2di"
3437 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3439 (match_operand:DI 1 "register_operand" " 0,0")))]
3444 [(set_attr "type" "sselog1,ssemov")
3445 (set_attr "mode" "TI,V4SF")])
3447 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3448 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3449 ;; alternatives pretty much forces the MMX alternative to be chosen.
3450 (define_insn "*sse2_concatv2si"
3451 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3453 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3454 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3457 punpckldq\t{%2, %0|%0, %2}
3458 movd\t{%1, %0|%0, %1}
3459 punpckldq\t{%2, %0|%0, %2}
3460 movd\t{%1, %0|%0, %1}"
3461 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3462 (set_attr "mode" "TI,TI,DI,DI")])
3464 (define_insn "*sse1_concatv2si"
3465 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3467 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3468 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3471 unpcklps\t{%2, %0|%0, %2}
3472 movss\t{%1, %0|%0, %1}
3473 punpckldq\t{%2, %0|%0, %2}
3474 movd\t{%1, %0|%0, %1}"
3475 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3476 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3478 (define_insn "*vec_concatv4si_1"
3479 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3481 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3482 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3485 punpcklqdq\t{%2, %0|%0, %2}
3486 movlhps\t{%2, %0|%0, %2}
3487 movhps\t{%2, %0|%0, %2}"
3488 [(set_attr "type" "sselog,ssemov,ssemov")
3489 (set_attr "mode" "TI,V4SF,V2SF")])
3491 (define_insn "*vec_concatv2di"
3492 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3494 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3495 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3498 movq\t{%1, %0|%0, %1}
3499 movq2dq\t{%1, %0|%0, %1}
3500 punpcklqdq\t{%2, %0|%0, %2}
3501 movlhps\t{%2, %0|%0, %2}
3502 movhps\t{%2, %0|%0, %2}
3503 movlps\t{%1, %0|%0, %1}"
3504 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3505 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3507 (define_expand "vec_setv2di"
3508 [(match_operand:V2DI 0 "register_operand" "")
3509 (match_operand:DI 1 "register_operand" "")
3510 (match_operand 2 "const_int_operand" "")]
3513 ix86_expand_vector_set (false, operands[0], operands[1],
3514 INTVAL (operands[2]));
3518 (define_expand "vec_extractv2di"
3519 [(match_operand:DI 0 "register_operand" "")
3520 (match_operand:V2DI 1 "register_operand" "")
3521 (match_operand 2 "const_int_operand" "")]
3524 ix86_expand_vector_extract (false, operands[0], operands[1],
3525 INTVAL (operands[2]));
3529 (define_expand "vec_initv2di"
3530 [(match_operand:V2DI 0 "register_operand" "")
3531 (match_operand 1 "" "")]
3534 ix86_expand_vector_init (false, operands[0], operands[1]);
3538 (define_expand "vec_setv4si"
3539 [(match_operand:V4SI 0 "register_operand" "")
3540 (match_operand:SI 1 "register_operand" "")
3541 (match_operand 2 "const_int_operand" "")]
3544 ix86_expand_vector_set (false, operands[0], operands[1],
3545 INTVAL (operands[2]));
3549 (define_expand "vec_extractv4si"
3550 [(match_operand:SI 0 "register_operand" "")
3551 (match_operand:V4SI 1 "register_operand" "")
3552 (match_operand 2 "const_int_operand" "")]
3555 ix86_expand_vector_extract (false, operands[0], operands[1],
3556 INTVAL (operands[2]));
3560 (define_expand "vec_initv4si"
3561 [(match_operand:V4SI 0 "register_operand" "")
3562 (match_operand 1 "" "")]
3565 ix86_expand_vector_init (false, operands[0], operands[1]);
3569 (define_expand "vec_setv8hi"
3570 [(match_operand:V8HI 0 "register_operand" "")
3571 (match_operand:HI 1 "register_operand" "")
3572 (match_operand 2 "const_int_operand" "")]
3575 ix86_expand_vector_set (false, operands[0], operands[1],
3576 INTVAL (operands[2]));
3580 (define_expand "vec_extractv8hi"
3581 [(match_operand:HI 0 "register_operand" "")
3582 (match_operand:V8HI 1 "register_operand" "")
3583 (match_operand 2 "const_int_operand" "")]
3586 ix86_expand_vector_extract (false, operands[0], operands[1],
3587 INTVAL (operands[2]));
3591 (define_expand "vec_initv8hi"
3592 [(match_operand:V8HI 0 "register_operand" "")
3593 (match_operand 1 "" "")]
3596 ix86_expand_vector_init (false, operands[0], operands[1]);
3600 (define_expand "vec_setv16qi"
3601 [(match_operand:V16QI 0 "register_operand" "")
3602 (match_operand:QI 1 "register_operand" "")
3603 (match_operand 2 "const_int_operand" "")]
3606 ix86_expand_vector_set (false, operands[0], operands[1],
3607 INTVAL (operands[2]));
3611 (define_expand "vec_extractv16qi"
3612 [(match_operand:QI 0 "register_operand" "")
3613 (match_operand:V16QI 1 "register_operand" "")
3614 (match_operand 2 "const_int_operand" "")]
3617 ix86_expand_vector_extract (false, operands[0], operands[1],
3618 INTVAL (operands[2]));
3622 (define_expand "vec_initv16qi"
3623 [(match_operand:V16QI 0 "register_operand" "")
3624 (match_operand 1 "" "")]
3627 ix86_expand_vector_init (false, operands[0], operands[1]);
3631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3637 (define_insn "sse2_uavgv16qi3"
3638 [(set (match_operand:V16QI 0 "register_operand" "=x")
3644 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3646 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3647 (const_vector:V16QI [(const_int 1) (const_int 1)
3648 (const_int 1) (const_int 1)
3649 (const_int 1) (const_int 1)
3650 (const_int 1) (const_int 1)
3651 (const_int 1) (const_int 1)
3652 (const_int 1) (const_int 1)
3653 (const_int 1) (const_int 1)
3654 (const_int 1) (const_int 1)]))
3656 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3657 "pavgb\t{%2, %0|%0, %2}"
3658 [(set_attr "type" "sseiadd")
3659 (set_attr "mode" "TI")])
3661 (define_insn "sse2_uavgv8hi3"
3662 [(set (match_operand:V8HI 0 "register_operand" "=x")
3668 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3670 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3671 (const_vector:V8HI [(const_int 1) (const_int 1)
3672 (const_int 1) (const_int 1)
3673 (const_int 1) (const_int 1)
3674 (const_int 1) (const_int 1)]))
3676 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3677 "pavgw\t{%2, %0|%0, %2}"
3678 [(set_attr "type" "sseiadd")
3679 (set_attr "mode" "TI")])
3681 ;; The correct representation for this is absolutely enormous, and
3682 ;; surely not generally useful.
3683 (define_insn "sse2_psadbw"
3684 [(set (match_operand:V2DI 0 "register_operand" "=x")
3685 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3686 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3689 "psadbw\t{%2, %0|%0, %2}"
3690 [(set_attr "type" "sseiadd")
3691 (set_attr "mode" "TI")])
3693 (define_insn "sse_movmskps"
3694 [(set (match_operand:SI 0 "register_operand" "=r")
3695 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3698 "movmskps\t{%1, %0|%0, %1}"
3699 [(set_attr "type" "ssecvt")
3700 (set_attr "mode" "V4SF")])
3702 (define_insn "sse2_movmskpd"
3703 [(set (match_operand:SI 0 "register_operand" "=r")
3704 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3707 "movmskpd\t{%1, %0|%0, %1}"
3708 [(set_attr "type" "ssecvt")
3709 (set_attr "mode" "V2DF")])
3711 (define_insn "sse2_pmovmskb"
3712 [(set (match_operand:SI 0 "register_operand" "=r")
3713 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3716 "pmovmskb\t{%1, %0|%0, %1}"
3717 [(set_attr "type" "ssecvt")
3718 (set_attr "mode" "V2DF")])
3720 (define_expand "sse2_maskmovdqu"
3721 [(set (match_operand:V16QI 0 "memory_operand" "")
3722 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3723 (match_operand:V16QI 2 "register_operand" "x")
3729 (define_insn "*sse2_maskmovdqu"
3730 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3731 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3732 (match_operand:V16QI 2 "register_operand" "x")
3733 (mem:V16QI (match_dup 0))]
3735 "TARGET_SSE2 && !TARGET_64BIT"
3736 ;; @@@ check ordering of operands in intel/nonintel syntax
3737 "maskmovdqu\t{%2, %1|%1, %2}"
3738 [(set_attr "type" "ssecvt")
3739 (set_attr "mode" "TI")])
3741 (define_insn "*sse2_maskmovdqu_rex64"
3742 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3743 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3744 (match_operand:V16QI 2 "register_operand" "x")
3745 (mem:V16QI (match_dup 0))]
3747 "TARGET_SSE2 && TARGET_64BIT"
3748 ;; @@@ check ordering of operands in intel/nonintel syntax
3749 "maskmovdqu\t{%2, %1|%1, %2}"
3750 [(set_attr "type" "ssecvt")
3751 (set_attr "mode" "TI")])
3753 (define_insn "sse_ldmxcsr"
3754 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3758 [(set_attr "type" "sse")
3759 (set_attr "memory" "load")])
3761 (define_insn "sse_stmxcsr"
3762 [(set (match_operand:SI 0 "memory_operand" "=m")
3763 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3766 [(set_attr "type" "sse")
3767 (set_attr "memory" "store")])
3769 (define_expand "sse_sfence"
3771 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3772 "TARGET_SSE || TARGET_3DNOW_A"
3774 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3775 MEM_VOLATILE_P (operands[0]) = 1;
3778 (define_insn "*sse_sfence"
3779 [(set (match_operand:BLK 0 "" "")
3780 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3781 "TARGET_SSE || TARGET_3DNOW_A"
3783 [(set_attr "type" "sse")
3784 (set_attr "memory" "unknown")])
3786 (define_insn "sse2_clflush"
3787 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3791 [(set_attr "type" "sse")
3792 (set_attr "memory" "unknown")])
3794 (define_expand "sse2_mfence"
3796 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3799 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3800 MEM_VOLATILE_P (operands[0]) = 1;
3803 (define_insn "*sse2_mfence"
3804 [(set (match_operand:BLK 0 "" "")
3805 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3808 [(set_attr "type" "sse")
3809 (set_attr "memory" "unknown")])
3811 (define_expand "sse2_lfence"
3813 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3816 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3817 MEM_VOLATILE_P (operands[0]) = 1;
3820 (define_insn "*sse2_lfence"
3821 [(set (match_operand:BLK 0 "" "")
3822 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3825 [(set_attr "type" "sse")
3826 (set_attr "memory" "unknown")])
3828 (define_insn "sse3_mwait"
3829 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3830 (match_operand:SI 1 "register_operand" "c")]
3834 [(set_attr "length" "3")])
3836 (define_insn "sse3_monitor"
3837 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3838 (match_operand:SI 1 "register_operand" "c")
3839 (match_operand:SI 2 "register_operand" "d")]
3842 "monitor\t%0, %1, %2"
3843 [(set_attr "length" "3")])