1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 59 Temple Place - Suite 330,
20 ;; Boston, MA 02111-1307, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
62 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64 switch (which_alternative)
67 if (get_attr_mode (insn) == MODE_V4SF)
68 return "xorps\t%0, %0";
70 return "pxor\t%0, %0";
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
86 (eq_attr "alternative" "0,1")
88 (ne (symbol_ref "optimize_size")
92 (eq_attr "alternative" "2")
94 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
96 (ne (symbol_ref "optimize_size")
100 (const_string "TI")))])
102 (define_expand "movv4sf"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
104 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
107 ix86_expand_vector_move (V4SFmode, operands);
111 (define_insn "*movv4sf_internal"
112 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
113 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
117 movaps\t{%1, %0|%0, %1}
118 movaps\t{%1, %0|%0, %1}"
119 [(set_attr "type" "sselog1,ssemov,ssemov")
120 (set_attr "mode" "V4SF")])
123 [(set (match_operand:V4SF 0 "register_operand" "")
124 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
125 "TARGET_SSE && reload_completed"
128 (vec_duplicate:V4SF (match_dup 1))
132 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
133 operands[2] = CONST0_RTX (V4SFmode);
136 (define_expand "movv2df"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
138 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
141 ix86_expand_vector_move (V2DFmode, operands);
145 (define_insn "*movv2df_internal"
146 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
147 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
148 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
150 switch (which_alternative)
153 if (get_attr_mode (insn) == MODE_V4SF)
154 return "xorps\t%0, %0";
156 return "xorpd\t%0, %0";
159 if (get_attr_mode (insn) == MODE_V4SF)
160 return "movaps\t{%1, %0|%0, %1}";
162 return "movapd\t{%1, %0|%0, %1}";
167 [(set_attr "type" "sselog1,ssemov,ssemov")
169 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
170 (const_string "V4SF")
171 (eq_attr "alternative" "0,1")
173 (ne (symbol_ref "optimize_size")
175 (const_string "V4SF")
176 (const_string "V2DF"))
177 (eq_attr "alternative" "2")
179 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
181 (ne (symbol_ref "optimize_size")
183 (const_string "V4SF")
184 (const_string "V2DF"))]
185 (const_string "V2DF")))])
188 [(set (match_operand:V2DF 0 "register_operand" "")
189 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
190 "TARGET_SSE2 && reload_completed"
191 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
193 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
194 operands[2] = CONST0_RTX (DFmode);
197 (define_expand "movmisalign<mode>"
198 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
199 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
202 ix86_expand_vector_move_misalign (<MODE>mode, operands);
206 (define_insn "sse_movups"
207 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
208 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
210 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
211 "movups\t{%1, %0|%0, %1}"
212 [(set_attr "type" "ssemov")
213 (set_attr "mode" "V2DF")])
215 (define_insn "sse2_movupd"
216 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
217 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
219 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
220 "movupd\t{%1, %0|%0, %1}"
221 [(set_attr "type" "ssemov")
222 (set_attr "mode" "V2DF")])
224 (define_insn "sse2_movdqu"
225 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
226 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
228 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
229 "movdqu\t{%1, %0|%0, %1}"
230 [(set_attr "type" "ssemov")
231 (set_attr "mode" "TI")])
233 (define_insn "sse_movntv4sf"
234 [(set (match_operand:V4SF 0 "memory_operand" "=m")
235 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
238 "movntps\t{%1, %0|%0, %1}"
239 [(set_attr "type" "ssemov")
240 (set_attr "mode" "V4SF")])
242 (define_insn "sse2_movntv2df"
243 [(set (match_operand:V2DF 0 "memory_operand" "=m")
244 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
247 "movntpd\t{%1, %0|%0, %1}"
248 [(set_attr "type" "ssecvt")
249 (set_attr "mode" "V2DF")])
251 (define_insn "sse2_movntv2di"
252 [(set (match_operand:V2DI 0 "memory_operand" "=m")
253 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
256 "movntdq\t{%1, %0|%0, %1}"
257 [(set_attr "type" "ssecvt")
258 (set_attr "mode" "TI")])
260 (define_insn "sse2_movntsi"
261 [(set (match_operand:SI 0 "memory_operand" "=m")
262 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
265 "movnti\t{%1, %0|%0, %1}"
266 [(set_attr "type" "ssecvt")
267 (set_attr "mode" "V2DF")])
269 (define_insn "sse3_lddqu"
270 [(set (match_operand:V16QI 0 "register_operand" "=x")
271 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
274 "lddqu\t{%1, %0|%0, %1}"
275 [(set_attr "type" "ssecvt")
276 (set_attr "mode" "TI")])
278 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
280 ;; Parallel single-precision floating point arithmetic
282 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
284 (define_expand "negv4sf2"
285 [(set (match_operand:V4SF 0 "register_operand" "")
286 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
288 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
290 (define_expand "absv4sf2"
291 [(set (match_operand:V4SF 0 "register_operand" "")
292 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
294 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
296 (define_expand "addv4sf3"
297 [(set (match_operand:V4SF 0 "register_operand" "")
298 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
299 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
301 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
303 (define_insn "*addv4sf3"
304 [(set (match_operand:V4SF 0 "register_operand" "=x")
305 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
306 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
307 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
308 "addps\t{%2, %0|%0, %2}"
309 [(set_attr "type" "sseadd")
310 (set_attr "mode" "V4SF")])
312 (define_insn "sse_vmaddv4sf3"
313 [(set (match_operand:V4SF 0 "register_operand" "=x")
315 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
316 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
319 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
320 "addss\t{%2, %0|%0, %2}"
321 [(set_attr "type" "sseadd")
322 (set_attr "mode" "SF")])
324 (define_expand "subv4sf3"
325 [(set (match_operand:V4SF 0 "register_operand" "")
326 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
327 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
329 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
331 (define_insn "*subv4sf3"
332 [(set (match_operand:V4SF 0 "register_operand" "=x")
333 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
334 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
336 "subps\t{%2, %0|%0, %2}"
337 [(set_attr "type" "sseadd")
338 (set_attr "mode" "V4SF")])
340 (define_insn "sse_vmsubv4sf3"
341 [(set (match_operand:V4SF 0 "register_operand" "=x")
343 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
344 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
348 "subss\t{%2, %0|%0, %2}"
349 [(set_attr "type" "sseadd")
350 (set_attr "mode" "SF")])
352 (define_expand "mulv4sf3"
353 [(set (match_operand:V4SF 0 "register_operand" "")
354 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
355 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
357 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
359 (define_insn "*mulv4sf3"
360 [(set (match_operand:V4SF 0 "register_operand" "=x")
361 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
362 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
363 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
364 "mulps\t{%2, %0|%0, %2}"
365 [(set_attr "type" "ssemul")
366 (set_attr "mode" "V4SF")])
368 (define_insn "sse_vmmulv4sf3"
369 [(set (match_operand:V4SF 0 "register_operand" "=x")
371 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
372 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
375 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
376 "mulss\t{%2, %0|%0, %2}"
377 [(set_attr "type" "ssemul")
378 (set_attr "mode" "SF")])
380 (define_expand "divv4sf3"
381 [(set (match_operand:V4SF 0 "register_operand" "")
382 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
383 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
385 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
387 (define_insn "*divv4sf3"
388 [(set (match_operand:V4SF 0 "register_operand" "=x")
389 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
390 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
392 "divps\t{%2, %0|%0, %2}"
393 [(set_attr "type" "ssediv")
394 (set_attr "mode" "V4SF")])
396 (define_insn "sse_vmdivv4sf3"
397 [(set (match_operand:V4SF 0 "register_operand" "=x")
399 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
400 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
404 "divss\t{%2, %0|%0, %2}"
405 [(set_attr "type" "ssediv")
406 (set_attr "mode" "SF")])
408 (define_insn "sse_rcpv4sf2"
409 [(set (match_operand:V4SF 0 "register_operand" "=x")
411 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
413 "rcpps\t{%1, %0|%0, %1}"
414 [(set_attr "type" "sse")
415 (set_attr "mode" "V4SF")])
417 (define_insn "sse_vmrcpv4sf2"
418 [(set (match_operand:V4SF 0 "register_operand" "=x")
420 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
422 (match_operand:V4SF 2 "register_operand" "0")
425 "rcpss\t{%1, %0|%0, %1}"
426 [(set_attr "type" "sse")
427 (set_attr "mode" "SF")])
429 (define_insn "sse_rsqrtv4sf2"
430 [(set (match_operand:V4SF 0 "register_operand" "=x")
432 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
434 "rsqrtps\t{%1, %0|%0, %1}"
435 [(set_attr "type" "sse")
436 (set_attr "mode" "V4SF")])
438 (define_insn "sse_vmrsqrtv4sf2"
439 [(set (match_operand:V4SF 0 "register_operand" "=x")
441 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
443 (match_operand:V4SF 2 "register_operand" "0")
446 "rsqrtss\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "SF")])
450 (define_insn "sqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
454 "sqrtps\t{%1, %0|%0, %1}"
455 [(set_attr "type" "sse")
456 (set_attr "mode" "V4SF")])
458 (define_insn "sse_vmsqrtv4sf2"
459 [(set (match_operand:V4SF 0 "register_operand" "=x")
461 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
462 (match_operand:V4SF 2 "register_operand" "0")
465 "sqrtss\t{%1, %0|%0, %1}"
466 [(set_attr "type" "sse")
467 (set_attr "mode" "SF")])
469 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
470 ;; isn't really correct, as those rtl operators aren't defined when
471 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
473 (define_expand "smaxv4sf3"
474 [(set (match_operand:V4SF 0 "register_operand" "")
475 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
476 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
479 if (!flag_finite_math_only)
480 operands[1] = force_reg (V4SFmode, operands[1]);
481 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
484 (define_insn "*smaxv4sf3_finite"
485 [(set (match_operand:V4SF 0 "register_operand" "=x")
486 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
487 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
488 "TARGET_SSE && flag_finite_math_only
489 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
490 "maxps\t{%2, %0|%0, %2}"
491 [(set_attr "type" "sse")
492 (set_attr "mode" "V4SF")])
494 (define_insn "*smaxv4sf3"
495 [(set (match_operand:V4SF 0 "register_operand" "=x")
496 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
497 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
499 "maxps\t{%2, %0|%0, %2}"
500 [(set_attr "type" "sse")
501 (set_attr "mode" "V4SF")])
503 (define_insn "*sse_vmsmaxv4sf3_finite"
504 [(set (match_operand:V4SF 0 "register_operand" "=x")
506 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
507 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
510 "TARGET_SSE && flag_finite_math_only
511 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
512 "maxss\t{%2, %0|%0, %2}"
513 [(set_attr "type" "sse")
514 (set_attr "mode" "SF")])
516 (define_insn "sse_vmsmaxv4sf3"
517 [(set (match_operand:V4SF 0 "register_operand" "=x")
519 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
520 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
524 "maxss\t{%2, %0|%0, %2}"
525 [(set_attr "type" "sse")
526 (set_attr "mode" "SF")])
528 (define_expand "sminv4sf3"
529 [(set (match_operand:V4SF 0 "register_operand" "")
530 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
531 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
534 if (!flag_finite_math_only)
535 operands[1] = force_reg (V4SFmode, operands[1]);
536 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
539 (define_insn "*sminv4sf3_finite"
540 [(set (match_operand:V4SF 0 "register_operand" "=x")
541 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
542 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
543 "TARGET_SSE && flag_finite_math_only
544 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
545 "minps\t{%2, %0|%0, %2}"
546 [(set_attr "type" "sse")
547 (set_attr "mode" "V4SF")])
549 (define_insn "*sminv4sf3"
550 [(set (match_operand:V4SF 0 "register_operand" "=x")
551 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
552 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
554 "minps\t{%2, %0|%0, %2}"
555 [(set_attr "type" "sse")
556 (set_attr "mode" "V4SF")])
558 (define_insn "*sse_vmsminv4sf3_finite"
559 [(set (match_operand:V4SF 0 "register_operand" "=x")
561 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
562 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
565 "TARGET_SSE && flag_finite_math_only
566 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
567 "minss\t{%2, %0|%0, %2}"
568 [(set_attr "type" "sse")
569 (set_attr "mode" "SF")])
571 (define_insn "sse_vmsminv4sf3"
572 [(set (match_operand:V4SF 0 "register_operand" "=x")
574 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
575 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
579 "minss\t{%2, %0|%0, %2}"
580 [(set_attr "type" "sse")
581 (set_attr "mode" "SF")])
583 (define_insn "sse3_addsubv4sf3"
584 [(set (match_operand:V4SF 0 "register_operand" "=x")
587 (match_operand:V4SF 1 "register_operand" "0")
588 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
589 (minus:V4SF (match_dup 1) (match_dup 2))
592 "addsubps\t{%2, %0|%0, %2}"
593 [(set_attr "type" "sseadd")
594 (set_attr "mode" "V4SF")])
596 (define_insn "sse3_haddv4sf3"
597 [(set (match_operand:V4SF 0 "register_operand" "=x")
602 (match_operand:V4SF 1 "register_operand" "0")
603 (parallel [(const_int 0)]))
604 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
606 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
607 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
611 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
612 (parallel [(const_int 0)]))
613 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
615 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
616 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
618 "haddps\t{%2, %0|%0, %2}"
619 [(set_attr "type" "sseadd")
620 (set_attr "mode" "V4SF")])
622 (define_insn "sse3_hsubv4sf3"
623 [(set (match_operand:V4SF 0 "register_operand" "=x")
628 (match_operand:V4SF 1 "register_operand" "0")
629 (parallel [(const_int 0)]))
630 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
632 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
633 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
637 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
638 (parallel [(const_int 0)]))
639 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
641 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
642 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
644 "hsubps\t{%2, %0|%0, %2}"
645 [(set_attr "type" "sseadd")
646 (set_attr "mode" "V4SF")])
648 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
650 ;; Parallel single-precision floating point comparisons
652 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
654 (define_insn "sse_maskcmpv4sf3"
655 [(set (match_operand:V4SF 0 "register_operand" "=x")
656 (match_operator:V4SF 3 "sse_comparison_operator"
657 [(match_operand:V4SF 1 "register_operand" "0")
658 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
660 "cmp%D3ps\t{%2, %0|%0, %2}"
661 [(set_attr "type" "ssecmp")
662 (set_attr "mode" "V4SF")])
664 (define_insn "sse_vmmaskcmpv4sf3"
665 [(set (match_operand:V4SF 0 "register_operand" "=x")
667 (match_operator:V4SF 3 "sse_comparison_operator"
668 [(match_operand:V4SF 1 "register_operand" "0")
669 (match_operand:V4SF 2 "register_operand" "x")])
673 "cmp%D3ss\t{%2, %0|%0, %2}"
674 [(set_attr "type" "ssecmp")
675 (set_attr "mode" "SF")])
677 (define_insn "sse_comi"
678 [(set (reg:CCFP FLAGS_REG)
681 (match_operand:V4SF 0 "register_operand" "x")
682 (parallel [(const_int 0)]))
684 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
685 (parallel [(const_int 0)]))))]
687 "comiss\t{%1, %0|%0, %1}"
688 [(set_attr "type" "ssecomi")
689 (set_attr "mode" "SF")])
691 (define_insn "sse_ucomi"
692 [(set (reg:CCFPU FLAGS_REG)
695 (match_operand:V4SF 0 "register_operand" "x")
696 (parallel [(const_int 0)]))
698 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
699 (parallel [(const_int 0)]))))]
701 "ucomiss\t{%1, %0|%0, %1}"
702 [(set_attr "type" "ssecomi")
703 (set_attr "mode" "SF")])
705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
707 ;; Parallel single-precision floating point logical operations
709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
711 (define_expand "andv4sf3"
712 [(set (match_operand:V4SF 0 "register_operand" "")
713 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
714 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
716 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
718 (define_insn "*andv4sf3"
719 [(set (match_operand:V4SF 0 "register_operand" "=x")
720 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
721 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
722 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
723 "andps\t{%2, %0|%0, %2}"
724 [(set_attr "type" "sselog")
725 (set_attr "mode" "V4SF")])
727 (define_insn "sse_nandv4sf3"
728 [(set (match_operand:V4SF 0 "register_operand" "=x")
729 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
730 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
732 "andnps\t{%2, %0|%0, %2}"
733 [(set_attr "type" "sselog")
734 (set_attr "mode" "V4SF")])
736 (define_expand "iorv4sf3"
737 [(set (match_operand:V4SF 0 "register_operand" "")
738 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
739 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
741 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
743 (define_insn "*iorv4sf3"
744 [(set (match_operand:V4SF 0 "register_operand" "=x")
745 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
746 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
747 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
748 "orps\t{%2, %0|%0, %2}"
749 [(set_attr "type" "sselog")
750 (set_attr "mode" "V4SF")])
752 (define_expand "xorv4sf3"
753 [(set (match_operand:V4SF 0 "register_operand" "")
754 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
755 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
757 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
759 (define_insn "*xorv4sf3"
760 [(set (match_operand:V4SF 0 "register_operand" "=x")
761 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
762 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
763 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
764 "xorps\t{%2, %0|%0, %2}"
765 [(set_attr "type" "sselog")
766 (set_attr "mode" "V4SF")])
768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
770 ;; Parallel single-precision floating point conversion operations
772 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
774 (define_insn "sse_cvtpi2ps"
775 [(set (match_operand:V4SF 0 "register_operand" "=x")
778 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
779 (match_operand:V4SF 1 "register_operand" "0")
782 "cvtpi2ps\t{%2, %0|%0, %2}"
783 [(set_attr "type" "ssecvt")
784 (set_attr "mode" "V4SF")])
786 (define_insn "sse_cvtps2pi"
787 [(set (match_operand:V2SI 0 "register_operand" "=y")
789 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
791 (parallel [(const_int 0) (const_int 1)])))]
793 "cvtps2pi\t{%1, %0|%0, %1}"
794 [(set_attr "type" "ssecvt")
795 (set_attr "mode" "DI")])
797 (define_insn "sse_cvttps2pi"
798 [(set (match_operand:V2SI 0 "register_operand" "=y")
800 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
801 (parallel [(const_int 0) (const_int 1)])))]
803 "cvttps2pi\t{%1, %0|%0, %1}"
804 [(set_attr "type" "ssecvt")
805 (set_attr "mode" "SF")])
807 (define_insn "sse_cvtsi2ss"
808 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
811 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
812 (match_operand:V4SF 1 "register_operand" "0,0")
815 "cvtsi2ss\t{%2, %0|%0, %2}"
816 [(set_attr "type" "sseicvt")
817 (set_attr "athlon_decode" "vector,double")
818 (set_attr "mode" "SF")])
820 (define_insn "sse_cvtsi2ssq"
821 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
824 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
825 (match_operand:V4SF 1 "register_operand" "0,0")
827 "TARGET_SSE && TARGET_64BIT"
828 "cvtsi2ssq\t{%2, %0|%0, %2}"
829 [(set_attr "type" "sseicvt")
830 (set_attr "athlon_decode" "vector,double")
831 (set_attr "mode" "SF")])
833 (define_insn "sse_cvtss2si"
834 [(set (match_operand:SI 0 "register_operand" "=r,r")
837 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
838 (parallel [(const_int 0)]))]
839 UNSPEC_FIX_NOTRUNC))]
841 "cvtss2si\t{%1, %0|%0, %1}"
842 [(set_attr "type" "sseicvt")
843 (set_attr "athlon_decode" "double,vector")
844 (set_attr "mode" "SI")])
846 (define_insn "sse_cvtss2siq"
847 [(set (match_operand:DI 0 "register_operand" "=r,r")
850 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
851 (parallel [(const_int 0)]))]
852 UNSPEC_FIX_NOTRUNC))]
853 "TARGET_SSE && TARGET_64BIT"
854 "cvtss2siq\t{%1, %0|%0, %1}"
855 [(set_attr "type" "sseicvt")
856 (set_attr "athlon_decode" "double,vector")
857 (set_attr "mode" "DI")])
859 (define_insn "sse_cvttss2si"
860 [(set (match_operand:SI 0 "register_operand" "=r,r")
863 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
864 (parallel [(const_int 0)]))))]
866 "cvttss2si\t{%1, %0|%0, %1}"
867 [(set_attr "type" "sseicvt")
868 (set_attr "athlon_decode" "double,vector")
869 (set_attr "mode" "SI")])
871 (define_insn "sse_cvttss2siq"
872 [(set (match_operand:DI 0 "register_operand" "=r,r")
875 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
876 (parallel [(const_int 0)]))))]
877 "TARGET_SSE && TARGET_64BIT"
878 "cvttss2siq\t{%1, %0|%0, %1}"
879 [(set_attr "type" "sseicvt")
880 (set_attr "athlon_decode" "double,vector")
881 (set_attr "mode" "DI")])
883 (define_insn "sse2_cvtdq2ps"
884 [(set (match_operand:V4SF 0 "register_operand" "=x")
885 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
887 "cvtdq2ps\t{%1, %0|%0, %1}"
888 [(set_attr "type" "ssecvt")
889 (set_attr "mode" "V2DF")])
891 (define_insn "sse2_cvtps2dq"
892 [(set (match_operand:V4SI 0 "register_operand" "=x")
893 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
894 UNSPEC_FIX_NOTRUNC))]
896 "cvtps2dq\t{%1, %0|%0, %1}"
897 [(set_attr "type" "ssecvt")
898 (set_attr "mode" "TI")])
900 (define_insn "sse2_cvttps2dq"
901 [(set (match_operand:V4SI 0 "register_operand" "=x")
902 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
904 "cvttps2dq\t{%1, %0|%0, %1}"
905 [(set_attr "type" "ssecvt")
906 (set_attr "mode" "TI")])
908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
910 ;; Parallel single-precision floating point element swizzling
912 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
914 (define_insn "sse_movhlps"
915 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
918 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
919 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
920 (parallel [(const_int 4)
924 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
926 movhlps\t{%2, %0|%0, %2}
927 movlps\t{%H1, %0|%0, %H1}
928 movhps\t{%1, %0|%0, %1}"
929 [(set_attr "type" "ssemov")
930 (set_attr "mode" "V4SF,V2SF,V2SF")])
932 (define_insn "sse_movlhps"
933 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
936 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
937 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
938 (parallel [(const_int 0)
942 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
944 movlhps\t{%2, %0|%0, %2}
945 movhps\t{%2, %0|%0, %2}
946 movlps\t{%2, %H0|%H0, %2}"
947 [(set_attr "type" "ssemov")
948 (set_attr "mode" "V4SF,V2SF,V2SF")])
950 (define_insn "sse_unpckhps"
951 [(set (match_operand:V4SF 0 "register_operand" "=x")
954 (match_operand:V4SF 1 "register_operand" "0")
955 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
956 (parallel [(const_int 2) (const_int 6)
957 (const_int 3) (const_int 7)])))]
959 "unpckhps\t{%2, %0|%0, %2}"
960 [(set_attr "type" "sselog")
961 (set_attr "mode" "V4SF")])
963 (define_insn "sse_unpcklps"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
967 (match_operand:V4SF 1 "register_operand" "0")
968 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
969 (parallel [(const_int 0) (const_int 4)
970 (const_int 1) (const_int 5)])))]
972 "unpcklps\t{%2, %0|%0, %2}"
973 [(set_attr "type" "sselog")
974 (set_attr "mode" "V4SF")])
976 ;; These are modeled with the same vec_concat as the others so that we
977 ;; capture users of shufps that can use the new instructions
978 (define_insn "sse3_movshdup"
979 [(set (match_operand:V4SF 0 "register_operand" "=x")
982 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
984 (parallel [(const_int 1)
989 "movshdup\t{%1, %0|%0, %1}"
990 [(set_attr "type" "sse")
991 (set_attr "mode" "V4SF")])
993 (define_insn "sse3_movsldup"
994 [(set (match_operand:V4SF 0 "register_operand" "=x")
997 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
999 (parallel [(const_int 0)
1004 "movsldup\t{%1, %0|%0, %1}"
1005 [(set_attr "type" "sse")
1006 (set_attr "mode" "V4SF")])
1008 (define_expand "sse_shufps"
1009 [(match_operand:V4SF 0 "register_operand" "")
1010 (match_operand:V4SF 1 "register_operand" "")
1011 (match_operand:V4SF 2 "nonimmediate_operand" "")
1012 (match_operand:SI 3 "const_int_operand" "")]
1015 int mask = INTVAL (operands[3]);
1016 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1017 GEN_INT ((mask >> 0) & 3),
1018 GEN_INT ((mask >> 2) & 3),
1019 GEN_INT (((mask >> 4) & 3) + 4),
1020 GEN_INT (((mask >> 6) & 3) + 4)));
1024 (define_insn "sse_shufps_1"
1025 [(set (match_operand:V4SF 0 "register_operand" "=x")
1028 (match_operand:V4SF 1 "register_operand" "0")
1029 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1030 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1031 (match_operand 4 "const_0_to_3_operand" "")
1032 (match_operand 5 "const_4_to_7_operand" "")
1033 (match_operand 6 "const_4_to_7_operand" "")])))]
1037 mask |= INTVAL (operands[3]) << 0;
1038 mask |= INTVAL (operands[4]) << 2;
1039 mask |= (INTVAL (operands[5]) - 4) << 4;
1040 mask |= (INTVAL (operands[6]) - 4) << 6;
1041 operands[3] = GEN_INT (mask);
1043 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1045 [(set_attr "type" "sselog")
1046 (set_attr "mode" "V4SF")])
1048 (define_insn "sse_storehps"
1049 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1051 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1052 (parallel [(const_int 2) (const_int 3)])))]
1055 movhps\t{%1, %0|%0, %1}
1056 movhlps\t{%1, %0|%0, %1}
1057 movlps\t{%H1, %0|%0, %H1}"
1058 [(set_attr "type" "ssemov")
1059 (set_attr "mode" "V2SF,V4SF,V2SF")])
1061 (define_insn "sse_loadhps"
1062 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1065 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1066 (parallel [(const_int 0) (const_int 1)]))
1067 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1070 movhps\t{%2, %0|%0, %2}
1071 movlhps\t{%2, %0|%0, %2}
1072 movlps\t{%2, %H0|%H0, %2}"
1073 [(set_attr "type" "ssemov")
1074 (set_attr "mode" "V2SF,V4SF,V2SF")])
1076 (define_insn "sse_storelps"
1077 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1079 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1080 (parallel [(const_int 0) (const_int 1)])))]
1083 movlps\t{%1, %0|%0, %1}
1084 movaps\t{%1, %0|%0, %1}
1085 movlps\t{%1, %0|%0, %1}"
1086 [(set_attr "type" "ssemov")
1087 (set_attr "mode" "V2SF,V4SF,V2SF")])
1089 (define_insn "sse_loadlps"
1090 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1092 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1094 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1095 (parallel [(const_int 2) (const_int 3)]))))]
1098 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1099 movlps\t{%2, %0|%0, %2}
1100 movlps\t{%2, %0|%0, %2}"
1101 [(set_attr "type" "sselog,ssemov,ssemov")
1102 (set_attr "mode" "V4SF,V2SF,V2SF")])
1104 (define_insn "sse_movss"
1105 [(set (match_operand:V4SF 0 "register_operand" "=x")
1107 (match_operand:V4SF 2 "register_operand" "x")
1108 (match_operand:V4SF 1 "register_operand" "0")
1111 "movss\t{%2, %0|%0, %2}"
1112 [(set_attr "type" "ssemov")
1113 (set_attr "mode" "SF")])
1115 (define_insn "*vec_dupv4sf"
1116 [(set (match_operand:V4SF 0 "register_operand" "=x")
1118 (match_operand:SF 1 "register_operand" "0")))]
1120 "shufps\t{$0, %0, %0|%0, %0, 0}"
1121 [(set_attr "type" "sselog1")
1122 (set_attr "mode" "V4SF")])
1124 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1125 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1126 ;; alternatives pretty much forces the MMX alternative to be chosen.
1127 (define_insn "*sse_concatv2sf"
1128 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1130 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1131 (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
1134 unpcklps\t{%2, %0|%0, %2}
1135 movss\t{%1, %0|%0, %1}
1136 punpckldq\t{%2, %0|%0, %2}
1137 movd\t{%1, %0|%0, %1}"
1138 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1139 (set_attr "mode" "V4SF,SF,DI,DI")])
1141 (define_insn "*sse_concatv4sf"
1142 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1144 (match_operand:V2SF 1 "register_operand" " 0,0")
1145 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1148 movlhps\t{%2, %0|%0, %2}
1149 movhps\t{%2, %0|%0, %2}"
1150 [(set_attr "type" "ssemov")
1151 (set_attr "mode" "V4SF,V2SF")])
1153 (define_expand "vec_initv4sf"
1154 [(match_operand:V4SF 0 "register_operand" "")
1155 (match_operand 1 "" "")]
1158 ix86_expand_vector_init (false, operands[0], operands[1]);
1162 (define_insn "*vec_setv4sf_0"
1163 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1166 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1167 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1171 movss\t{%2, %0|%0, %2}
1172 movss\t{%2, %0|%0, %2}
1173 movd\t{%2, %0|%0, %2}
1175 [(set_attr "type" "ssemov")
1176 (set_attr "mode" "SF")])
1179 [(set (match_operand:V4SF 0 "memory_operand" "")
1182 (match_operand:SF 1 "nonmemory_operand" ""))
1185 "TARGET_SSE && reload_completed"
1188 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1192 (define_expand "vec_setv4sf"
1193 [(match_operand:V4SF 0 "register_operand" "")
1194 (match_operand:SF 1 "register_operand" "")
1195 (match_operand 2 "const_int_operand" "")]
1198 ix86_expand_vector_set (false, operands[0], operands[1],
1199 INTVAL (operands[2]));
1203 (define_insn_and_split "*vec_extractv4sf_0"
1204 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1206 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1207 (parallel [(const_int 0)])))]
1208 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1210 "&& reload_completed"
1213 rtx op1 = operands[1];
1215 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1217 op1 = gen_lowpart (SFmode, op1);
1218 emit_move_insn (operands[0], op1);
1222 (define_expand "vec_extractv4sf"
1223 [(match_operand:SF 0 "register_operand" "")
1224 (match_operand:V4SF 1 "register_operand" "")
1225 (match_operand 2 "const_int_operand" "")]
1228 ix86_expand_vector_extract (false, operands[0], operands[1],
1229 INTVAL (operands[2]));
1233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1235 ;; Parallel double-precision floating point arithmetic
1237 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1239 (define_expand "negv2df2"
1240 [(set (match_operand:V2DF 0 "register_operand" "")
1241 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1243 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1245 (define_expand "absv2df2"
1246 [(set (match_operand:V2DF 0 "register_operand" "")
1247 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1249 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1251 (define_expand "addv2df3"
1252 [(set (match_operand:V2DF 0 "register_operand" "")
1253 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1254 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1256 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1258 (define_insn "*addv2df3"
1259 [(set (match_operand:V2DF 0 "register_operand" "=x")
1260 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1261 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1262 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1263 "addpd\t{%2, %0|%0, %2}"
1264 [(set_attr "type" "sseadd")
1265 (set_attr "mode" "V2DF")])
1267 (define_insn "sse2_vmaddv2df3"
1268 [(set (match_operand:V2DF 0 "register_operand" "=x")
1270 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1271 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1274 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1275 "addsd\t{%2, %0|%0, %2}"
1276 [(set_attr "type" "sseadd")
1277 (set_attr "mode" "DF")])
1279 (define_expand "subv2df3"
1280 [(set (match_operand:V2DF 0 "register_operand" "")
1281 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1282 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1284 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1286 (define_insn "*subv2df3"
1287 [(set (match_operand:V2DF 0 "register_operand" "=x")
1288 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1289 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1291 "subpd\t{%2, %0|%0, %2}"
1292 [(set_attr "type" "sseadd")
1293 (set_attr "mode" "V2DF")])
1295 (define_insn "sse2_vmsubv2df3"
1296 [(set (match_operand:V2DF 0 "register_operand" "=x")
1298 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1299 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1303 "subsd\t{%2, %0|%0, %2}"
1304 [(set_attr "type" "sseadd")
1305 (set_attr "mode" "DF")])
1307 (define_expand "mulv2df3"
1308 [(set (match_operand:V2DF 0 "register_operand" "")
1309 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1310 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1312 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1314 (define_insn "*mulv2df3"
1315 [(set (match_operand:V2DF 0 "register_operand" "=x")
1316 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1317 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1318 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1319 "mulpd\t{%2, %0|%0, %2}"
1320 [(set_attr "type" "ssemul")
1321 (set_attr "mode" "V2DF")])
1323 (define_insn "sse2_vmmulv2df3"
1324 [(set (match_operand:V2DF 0 "register_operand" "=x")
1326 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1327 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1330 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1331 "mulsd\t{%2, %0|%0, %2}"
1332 [(set_attr "type" "ssemul")
1333 (set_attr "mode" "DF")])
1335 (define_expand "divv2df3"
1336 [(set (match_operand:V2DF 0 "register_operand" "")
1337 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1338 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1340 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1342 (define_insn "*divv2df3"
1343 [(set (match_operand:V2DF 0 "register_operand" "=x")
1344 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1345 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1347 "divpd\t{%2, %0|%0, %2}"
1348 [(set_attr "type" "ssediv")
1349 (set_attr "mode" "V2DF")])
1351 (define_insn "sse2_vmdivv2df3"
1352 [(set (match_operand:V2DF 0 "register_operand" "=x")
1354 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1355 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1359 "divsd\t{%2, %0|%0, %2}"
1360 [(set_attr "type" "ssediv")
1361 (set_attr "mode" "DF")])
1363 (define_insn "sqrtv2df2"
1364 [(set (match_operand:V2DF 0 "register_operand" "=x")
1365 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1367 "sqrtpd\t{%1, %0|%0, %1}"
1368 [(set_attr "type" "sse")
1369 (set_attr "mode" "V2DF")])
1371 (define_insn "sse2_vmsqrtv2df2"
1372 [(set (match_operand:V2DF 0 "register_operand" "=x")
1374 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1375 (match_operand:V2DF 2 "register_operand" "0")
1378 "sqrtsd\t{%1, %0|%0, %1}"
1379 [(set_attr "type" "sse")
1380 (set_attr "mode" "SF")])
1382 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1383 ;; isn't really correct, as those rtl operators aren't defined when
1384 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1386 (define_expand "smaxv2df3"
1387 [(set (match_operand:V2DF 0 "register_operand" "")
1388 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1389 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1392 if (!flag_finite_math_only)
1393 operands[1] = force_reg (V2DFmode, operands[1]);
1394 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1397 (define_insn "*smaxv2df3_finite"
1398 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1401 "TARGET_SSE2 && flag_finite_math_only
1402 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1403 "maxpd\t{%2, %0|%0, %2}"
1404 [(set_attr "type" "sseadd")
1405 (set_attr "mode" "V2DF")])
1407 (define_insn "*smaxv2df3"
1408 [(set (match_operand:V2DF 0 "register_operand" "=x")
1409 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1410 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1412 "maxpd\t{%2, %0|%0, %2}"
1413 [(set_attr "type" "sseadd")
1414 (set_attr "mode" "V2DF")])
1416 (define_insn "*sse2_vmsmaxv2df3_finite"
1417 [(set (match_operand:V2DF 0 "register_operand" "=x")
1419 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1420 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1423 "TARGET_SSE2 && flag_finite_math_only
1424 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1425 "maxsd\t{%2, %0|%0, %2}"
1426 [(set_attr "type" "sseadd")
1427 (set_attr "mode" "DF")])
1429 (define_insn "sse2_vmsmaxv2df3"
1430 [(set (match_operand:V2DF 0 "register_operand" "=x")
1432 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1433 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1437 "maxsd\t{%2, %0|%0, %2}"
1438 [(set_attr "type" "sseadd")
1439 (set_attr "mode" "DF")])
1441 (define_expand "sminv2df3"
1442 [(set (match_operand:V2DF 0 "register_operand" "")
1443 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1444 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1447 if (!flag_finite_math_only)
1448 operands[1] = force_reg (V2DFmode, operands[1]);
1449 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1452 (define_insn "*sminv2df3_finite"
1453 [(set (match_operand:V2DF 0 "register_operand" "=x")
1454 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1455 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1456 "TARGET_SSE2 && flag_finite_math_only
1457 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1458 "minpd\t{%2, %0|%0, %2}"
1459 [(set_attr "type" "sseadd")
1460 (set_attr "mode" "V2DF")])
1462 (define_insn "*sminv2df3"
1463 [(set (match_operand:V2DF 0 "register_operand" "=x")
1464 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1465 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1467 "minpd\t{%2, %0|%0, %2}"
1468 [(set_attr "type" "sseadd")
1469 (set_attr "mode" "V2DF")])
1471 (define_insn "*sse2_vmsminv2df3_finite"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1474 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1475 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1478 "TARGET_SSE2 && flag_finite_math_only
1479 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1480 "minsd\t{%2, %0|%0, %2}"
1481 [(set_attr "type" "sseadd")
1482 (set_attr "mode" "DF")])
1484 (define_insn "sse2_vmsminv2df3"
1485 [(set (match_operand:V2DF 0 "register_operand" "=x")
1487 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1488 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1492 "minsd\t{%2, %0|%0, %2}"
1493 [(set_attr "type" "sseadd")
1494 (set_attr "mode" "DF")])
1496 (define_insn "sse3_addsubv2df3"
1497 [(set (match_operand:V2DF 0 "register_operand" "=x")
1500 (match_operand:V2DF 1 "register_operand" "0")
1501 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1502 (minus:V2DF (match_dup 1) (match_dup 2))
1505 "addsubpd\t{%2, %0|%0, %2}"
1506 [(set_attr "type" "sseadd")
1507 (set_attr "mode" "V2DF")])
1509 (define_insn "sse3_haddv2df3"
1510 [(set (match_operand:V2DF 0 "register_operand" "=x")
1514 (match_operand:V2DF 1 "register_operand" "0")
1515 (parallel [(const_int 0)]))
1516 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1519 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1520 (parallel [(const_int 0)]))
1521 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1523 "haddpd\t{%2, %0|%0, %2}"
1524 [(set_attr "type" "sseadd")
1525 (set_attr "mode" "V2DF")])
1527 (define_insn "sse3_hsubv2df3"
1528 [(set (match_operand:V2DF 0 "register_operand" "=x")
1532 (match_operand:V2DF 1 "register_operand" "0")
1533 (parallel [(const_int 0)]))
1534 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1537 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1538 (parallel [(const_int 0)]))
1539 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1541 "hsubpd\t{%2, %0|%0, %2}"
1542 [(set_attr "type" "sseadd")
1543 (set_attr "mode" "V2DF")])
1545 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1547 ;; Parallel double-precision floating point comparisons
1549 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1551 (define_insn "sse2_maskcmpv2df3"
1552 [(set (match_operand:V2DF 0 "register_operand" "=x")
1553 (match_operator:V2DF 3 "sse_comparison_operator"
1554 [(match_operand:V2DF 1 "register_operand" "0")
1555 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1557 "cmp%D3pd\t{%2, %0|%0, %2}"
1558 [(set_attr "type" "ssecmp")
1559 (set_attr "mode" "V2DF")])
1561 (define_insn "sse2_vmmaskcmpv2df3"
1562 [(set (match_operand:V2DF 0 "register_operand" "=x")
1564 (match_operator:V2DF 3 "sse_comparison_operator"
1565 [(match_operand:V2DF 1 "register_operand" "0")
1566 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1570 "cmp%D3sd\t{%2, %0|%0, %2}"
1571 [(set_attr "type" "ssecmp")
1572 (set_attr "mode" "DF")])
1574 (define_insn "sse2_comi"
1575 [(set (reg:CCFP FLAGS_REG)
1578 (match_operand:V2DF 0 "register_operand" "x")
1579 (parallel [(const_int 0)]))
1581 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1582 (parallel [(const_int 0)]))))]
1584 "comisd\t{%1, %0|%0, %1}"
1585 [(set_attr "type" "ssecomi")
1586 (set_attr "mode" "DF")])
1588 (define_insn "sse2_ucomi"
1589 [(set (reg:CCFPU FLAGS_REG)
1592 (match_operand:V2DF 0 "register_operand" "x")
1593 (parallel [(const_int 0)]))
1595 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1596 (parallel [(const_int 0)]))))]
1598 "ucomisd\t{%1, %0|%0, %1}"
1599 [(set_attr "type" "ssecomi")
1600 (set_attr "mode" "DF")])
1602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1604 ;; Parallel double-precision floating point logical operations
1606 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1608 (define_expand "andv2df3"
1609 [(set (match_operand:V2DF 0 "register_operand" "")
1610 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1611 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1613 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1615 (define_insn "*andv2df3"
1616 [(set (match_operand:V2DF 0 "register_operand" "=x")
1617 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1618 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1619 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
1620 "andpd\t{%2, %0|%0, %2}"
1621 [(set_attr "type" "sselog")
1622 (set_attr "mode" "V2DF")])
1624 (define_insn "sse2_nandv2df3"
1625 [(set (match_operand:V2DF 0 "register_operand" "=x")
1626 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1627 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1629 "andnpd\t{%2, %0|%0, %2}"
1630 [(set_attr "type" "sselog")
1631 (set_attr "mode" "V2DF")])
1633 (define_expand "iorv2df3"
1634 [(set (match_operand:V2DF 0 "register_operand" "")
1635 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1636 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1638 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1640 (define_insn "*iorv2df3"
1641 [(set (match_operand:V2DF 0 "register_operand" "=x")
1642 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1643 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1644 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1645 "orpd\t{%2, %0|%0, %2}"
1646 [(set_attr "type" "sselog")
1647 (set_attr "mode" "V2DF")])
1649 (define_expand "xorv2df3"
1650 [(set (match_operand:V2DF 0 "register_operand" "")
1651 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1652 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1654 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1656 (define_insn "*xorv2df3"
1657 [(set (match_operand:V2DF 0 "register_operand" "=x")
1658 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1659 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1660 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1661 "xorpd\t{%2, %0|%0, %2}"
1662 [(set_attr "type" "sselog")
1663 (set_attr "mode" "V2DF")])
1665 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1667 ;; Parallel double-precision floating point conversion operations
1669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1671 (define_insn "sse2_cvtpi2pd"
1672 [(set (match_operand:V2DF 0 "register_operand" "=x")
1673 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
1675 "cvtpi2pd\t{%1, %0|%0, %1}"
1676 [(set_attr "type" "ssecvt")
1677 (set_attr "mode" "V2DF")])
1679 (define_insn "sse2_cvtpd2pi"
1680 [(set (match_operand:V2SI 0 "register_operand" "=y")
1681 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1682 UNSPEC_FIX_NOTRUNC))]
1684 "cvtpd2pi\t{%1, %0|%0, %1}"
1685 [(set_attr "type" "ssecvt")
1686 (set_attr "mode" "DI")])
1688 (define_insn "sse2_cvttpd2pi"
1689 [(set (match_operand:V2SI 0 "register_operand" "=y")
1690 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1692 "cvttpd2pi\t{%1, %0|%0, %1}"
1693 [(set_attr "type" "ssecvt")
1694 (set_attr "mode" "TI")])
1696 (define_insn "sse2_cvtsi2sd"
1697 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1700 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1701 (match_operand:V2DF 1 "register_operand" "0,0")
1704 "cvtsi2sd\t{%2, %0|%0, %2}"
1705 [(set_attr "type" "sseicvt")
1706 (set_attr "mode" "DF")
1707 (set_attr "athlon_decode" "double,direct")])
1709 (define_insn "sse2_cvtsi2sdq"
1710 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1713 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1714 (match_operand:V2DF 1 "register_operand" "0,0")
1716 "TARGET_SSE2 && TARGET_64BIT"
1717 "cvtsi2sdq\t{%2, %0|%0, %2}"
1718 [(set_attr "type" "sseicvt")
1719 (set_attr "mode" "DF")
1720 (set_attr "athlon_decode" "double,direct")])
1722 (define_insn "sse2_cvtsd2si"
1723 [(set (match_operand:SI 0 "register_operand" "=r,r")
1726 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1727 (parallel [(const_int 0)]))]
1728 UNSPEC_FIX_NOTRUNC))]
1730 "cvtsd2si\t{%1, %0|%0, %1}"
1731 [(set_attr "type" "sseicvt")
1732 (set_attr "athlon_decode" "double,vector")
1733 (set_attr "mode" "SI")])
1735 (define_insn "sse2_cvtsd2siq"
1736 [(set (match_operand:DI 0 "register_operand" "=r,r")
1739 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1740 (parallel [(const_int 0)]))]
1741 UNSPEC_FIX_NOTRUNC))]
1742 "TARGET_SSE2 && TARGET_64BIT"
1743 "cvtsd2siq\t{%1, %0|%0, %1}"
1744 [(set_attr "type" "sseicvt")
1745 (set_attr "athlon_decode" "double,vector")
1746 (set_attr "mode" "DI")])
1748 (define_insn "sse2_cvttsd2si"
1749 [(set (match_operand:SI 0 "register_operand" "=r,r")
1752 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1753 (parallel [(const_int 0)]))))]
1755 "cvttsd2si\t{%1, %0|%0, %1}"
1756 [(set_attr "type" "sseicvt")
1757 (set_attr "mode" "SI")
1758 (set_attr "athlon_decode" "double,vector")])
1760 (define_insn "sse2_cvttsd2siq"
1761 [(set (match_operand:DI 0 "register_operand" "=r,r")
1764 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1765 (parallel [(const_int 0)]))))]
1766 "TARGET_SSE2 && TARGET_64BIT"
1767 "cvttsd2siq\t{%1, %0|%0, %1}"
1768 [(set_attr "type" "sseicvt")
1769 (set_attr "mode" "DI")
1770 (set_attr "athlon_decode" "double,vector")])
1772 (define_insn "sse2_cvtdq2pd"
1773 [(set (match_operand:V2DF 0 "register_operand" "=x")
1776 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1777 (parallel [(const_int 0) (const_int 1)]))))]
1779 "cvtdq2pd\t{%1, %0|%0, %1}"
1780 [(set_attr "type" "ssecvt")
1781 (set_attr "mode" "V2DF")])
1783 (define_expand "sse2_cvtpd2dq"
1784 [(set (match_operand:V4SI 0 "register_operand" "")
1786 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1790 "operands[2] = CONST0_RTX (V2SImode);")
1792 (define_insn "*sse2_cvtpd2dq"
1793 [(set (match_operand:V4SI 0 "register_operand" "=x")
1795 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1797 (match_operand:V2SI 2 "const0_operand" "")))]
1799 "cvtpd2dq\t{%1, %0|%0, %1}"
1800 [(set_attr "type" "ssecvt")
1801 (set_attr "mode" "TI")])
1803 (define_expand "sse2_cvttpd2dq"
1804 [(set (match_operand:V4SI 0 "register_operand" "")
1806 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1809 "operands[2] = CONST0_RTX (V2SImode);")
1811 (define_insn "*sse2_cvttpd2dq"
1812 [(set (match_operand:V4SI 0 "register_operand" "=x")
1814 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1815 (match_operand:V2SI 2 "const0_operand" "")))]
1817 "cvttpd2dq\t{%1, %0|%0, %1}"
1818 [(set_attr "type" "ssecvt")
1819 (set_attr "mode" "TI")])
1821 (define_insn "sse2_cvtsd2ss"
1822 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1825 (float_truncate:V2SF
1826 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1827 (match_operand:V4SF 1 "register_operand" "0,0")
1830 "cvtsd2ss\t{%2, %0|%0, %2}"
1831 [(set_attr "type" "ssecvt")
1832 (set_attr "athlon_decode" "vector,double")
1833 (set_attr "mode" "SF")])
1835 (define_insn "sse2_cvtss2sd"
1836 [(set (match_operand:V2DF 0 "register_operand" "=x")
1840 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1841 (parallel [(const_int 0) (const_int 1)])))
1842 (match_operand:V2DF 1 "register_operand" "0")
1845 "cvtss2sd\t{%2, %0|%0, %2}"
1846 [(set_attr "type" "ssecvt")
1847 (set_attr "mode" "DF")])
1849 (define_expand "sse2_cvtpd2ps"
1850 [(set (match_operand:V4SF 0 "register_operand" "")
1852 (float_truncate:V2SF
1853 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1856 "operands[2] = CONST0_RTX (V2SFmode);")
1858 (define_insn "*sse2_cvtpd2ps"
1859 [(set (match_operand:V4SF 0 "register_operand" "=x")
1861 (float_truncate:V2SF
1862 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1863 (match_operand:V2SF 2 "const0_operand" "")))]
1865 "cvtpd2ps\t{%1, %0|%0, %1}"
1866 [(set_attr "type" "ssecvt")
1867 (set_attr "mode" "V4SF")])
1869 (define_insn "sse2_cvtps2pd"
1870 [(set (match_operand:V2DF 0 "register_operand" "=x")
1873 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1874 (parallel [(const_int 0) (const_int 1)]))))]
1876 "cvtps2pd\t{%1, %0|%0, %1}"
1877 [(set_attr "type" "ssecvt")
1878 (set_attr "mode" "V2DF")])
1880 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1882 ;; Parallel double-precision floating point element swizzling
1884 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1886 (define_insn "sse2_unpckhpd"
1887 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
1890 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
1891 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
1892 (parallel [(const_int 1)
1894 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1896 unpckhpd\t{%2, %0|%0, %2}
1897 movlpd\t{%H1, %0|%0, %H1}
1898 movhpd\t{%1, %0|%0, %1}"
1899 [(set_attr "type" "sselog,ssemov,ssemov")
1900 (set_attr "mode" "V2DF,V1DF,V1DF")])
1902 (define_insn "*sse3_movddup"
1903 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
1906 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
1908 (parallel [(const_int 0)
1910 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1912 movddup\t{%1, %0|%0, %1}
1914 [(set_attr "type" "sselog,ssemov")
1915 (set_attr "mode" "V2DF")])
1918 [(set (match_operand:V2DF 0 "memory_operand" "")
1921 (match_operand:V2DF 1 "register_operand" "")
1923 (parallel [(const_int 0)
1925 "TARGET_SSE3 && reload_completed"
1928 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
1929 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
1930 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
1934 (define_insn "sse2_unpcklpd"
1935 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
1938 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
1939 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
1940 (parallel [(const_int 0)
1942 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1944 unpcklpd\t{%2, %0|%0, %2}
1945 movhpd\t{%2, %0|%0, %2}
1946 movlpd\t{%2, %H0|%H0, %2}"
1947 [(set_attr "type" "sselog,ssemov,ssemov")
1948 (set_attr "mode" "V2DF,V1DF,V1DF")])
1950 (define_expand "sse2_shufpd"
1951 [(match_operand:V2DF 0 "register_operand" "")
1952 (match_operand:V2DF 1 "register_operand" "")
1953 (match_operand:V2DF 2 "nonimmediate_operand" "")
1954 (match_operand:SI 3 "const_int_operand" "")]
1957 int mask = INTVAL (operands[3]);
1958 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
1960 GEN_INT (mask & 2 ? 3 : 2)));
1964 (define_insn "sse2_shufpd_1"
1965 [(set (match_operand:V2DF 0 "register_operand" "=x")
1968 (match_operand:V2DF 1 "register_operand" "0")
1969 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1970 (parallel [(match_operand 3 "const_0_to_1_operand" "")
1971 (match_operand 4 "const_2_to_3_operand" "")])))]
1975 mask = INTVAL (operands[3]);
1976 mask |= (INTVAL (operands[4]) - 2) << 1;
1977 operands[3] = GEN_INT (mask);
1979 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
1981 [(set_attr "type" "sselog")
1982 (set_attr "mode" "V2DF")])
1984 (define_insn "sse2_storehpd"
1985 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
1987 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
1988 (parallel [(const_int 1)])))]
1989 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1991 movhpd\t{%1, %0|%0, %1}
1994 [(set_attr "type" "ssemov,sselog1,ssemov")
1995 (set_attr "mode" "V1DF,V2DF,DF")])
1998 [(set (match_operand:DF 0 "register_operand" "")
2000 (match_operand:V2DF 1 "memory_operand" "")
2001 (parallel [(const_int 1)])))]
2002 "TARGET_SSE2 && reload_completed"
2003 [(set (match_dup 0) (match_dup 1))]
2005 operands[1] = adjust_address (operands[1], DFmode, 8);
2008 (define_insn "sse2_storelpd"
2009 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2011 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2012 (parallel [(const_int 0)])))]
2013 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2015 movlpd\t{%1, %0|%0, %1}
2018 [(set_attr "type" "ssemov")
2019 (set_attr "mode" "V1DF,DF,DF")])
2022 [(set (match_operand:DF 0 "register_operand" "")
2024 (match_operand:V2DF 1 "nonimmediate_operand" "")
2025 (parallel [(const_int 0)])))]
2026 "TARGET_SSE2 && reload_completed"
2029 rtx op1 = operands[1];
2031 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2033 op1 = gen_lowpart (DFmode, op1);
2034 emit_move_insn (operands[0], op1);
2038 (define_insn "sse2_loadhpd"
2039 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2042 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2043 (parallel [(const_int 0)]))
2044 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2045 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2047 movhpd\t{%2, %0|%0, %2}
2048 unpcklpd\t{%2, %0|%0, %2}
2049 shufpd\t{$1, %1, %0|%0, %1, 1}
2051 [(set_attr "type" "ssemov,sselog,sselog,other")
2052 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2055 [(set (match_operand:V2DF 0 "memory_operand" "")
2057 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2058 (match_operand:DF 1 "register_operand" "")))]
2059 "TARGET_SSE2 && reload_completed"
2060 [(set (match_dup 0) (match_dup 1))]
2062 operands[0] = adjust_address (operands[0], DFmode, 8);
2065 (define_insn "sse2_loadlpd"
2066 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2068 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2070 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2071 (parallel [(const_int 1)]))))]
2072 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2074 movsd\t{%2, %0|%0, %2}
2075 movlpd\t{%2, %0|%0, %2}
2076 movsd\t{%2, %0|%0, %2}
2077 shufpd\t{$2, %2, %0|%0, %2, 2}
2078 movhpd\t{%H1, %0|%0, %H1}
2080 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2081 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2084 [(set (match_operand:V2DF 0 "memory_operand" "")
2086 (match_operand:DF 1 "register_operand" "")
2087 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2088 "TARGET_SSE2 && reload_completed"
2089 [(set (match_dup 0) (match_dup 1))]
2091 operands[0] = adjust_address (operands[0], DFmode, 8);
2094 (define_insn "sse2_movsd"
2095 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2097 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2098 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2102 movsd\t{%2, %0|%0, %2}
2103 movlpd\t{%2, %0|%0, %2}
2104 movlpd\t{%2, %0|%0, %2}
2105 shufpd\t{$2, %2, %0|%0, %2, 2}
2106 movhps\t{%H1, %0|%0, %H1
2107 movhps\t{%1, %H0|%H0, %1"
2108 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2109 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2111 (define_insn "*vec_dupv2df_sse3"
2112 [(set (match_operand:V2DF 0 "register_operand" "=x")
2114 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2116 "movddup\t{%1, %0|%0, %1}"
2117 [(set_attr "type" "sselog1")
2118 (set_attr "mode" "DF")])
2120 (define_insn "*vec_dupv2df"
2121 [(set (match_operand:V2DF 0 "register_operand" "=x")
2123 (match_operand:DF 1 "register_operand" "0")))]
2126 [(set_attr "type" "sselog1")
2127 (set_attr "mode" "V4SF")])
2129 (define_insn "*vec_concatv2df_sse3"
2130 [(set (match_operand:V2DF 0 "register_operand" "=x")
2132 (match_operand:DF 1 "nonimmediate_operand" "xm")
2135 "movddup\t{%1, %0|%0, %1}"
2136 [(set_attr "type" "sselog1")
2137 (set_attr "mode" "DF")])
2139 (define_insn "*vec_concatv2df"
2140 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2142 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2143 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2146 unpcklpd\t{%2, %0|%0, %2}
2147 movhpd\t{%2, %0|%0, %2}
2148 movsd\t{%1, %0|%0, %1}
2149 movlhps\t{%2, %0|%0, %2}
2150 movhps\t{%2, %0|%0, %2}"
2151 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2152 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2154 (define_expand "vec_setv2df"
2155 [(match_operand:V2DF 0 "register_operand" "")
2156 (match_operand:DF 1 "register_operand" "")
2157 (match_operand 2 "const_int_operand" "")]
2160 ix86_expand_vector_set (false, operands[0], operands[1],
2161 INTVAL (operands[2]));
2165 (define_expand "vec_extractv2df"
2166 [(match_operand:DF 0 "register_operand" "")
2167 (match_operand:V2DF 1 "register_operand" "")
2168 (match_operand 2 "const_int_operand" "")]
2171 ix86_expand_vector_extract (false, operands[0], operands[1],
2172 INTVAL (operands[2]));
2176 (define_expand "vec_initv2df"
2177 [(match_operand:V2DF 0 "register_operand" "")
2178 (match_operand 1 "" "")]
2181 ix86_expand_vector_init (false, operands[0], operands[1]);
2185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2187 ;; Parallel integral arithmetic
2189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2191 (define_expand "neg<mode>2"
2192 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2195 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2197 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2199 (define_expand "add<mode>3"
2200 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2201 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2202 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2204 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2206 (define_insn "*add<mode>3"
2207 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2209 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2210 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2211 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2212 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2213 [(set_attr "type" "sseiadd")
2214 (set_attr "mode" "TI")])
2216 (define_insn "sse2_ssadd<mode>3"
2217 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2219 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2220 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2221 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2222 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2223 [(set_attr "type" "sseiadd")
2224 (set_attr "mode" "TI")])
2226 (define_insn "sse2_usadd<mode>3"
2227 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2229 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2230 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2231 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2232 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2233 [(set_attr "type" "sseiadd")
2234 (set_attr "mode" "TI")])
2236 (define_expand "sub<mode>3"
2237 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2238 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2239 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2241 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2243 (define_insn "*sub<mode>3"
2244 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2246 (match_operand:SSEMODEI 1 "register_operand" "0")
2247 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2249 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2250 [(set_attr "type" "sseiadd")
2251 (set_attr "mode" "TI")])
2253 (define_insn "sse2_sssub<mode>3"
2254 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2256 (match_operand:SSEMODE12 1 "register_operand" "0")
2257 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2259 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2260 [(set_attr "type" "sseiadd")
2261 (set_attr "mode" "TI")])
2263 (define_insn "sse2_ussub<mode>3"
2264 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2266 (match_operand:SSEMODE12 1 "register_operand" "0")
2267 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2269 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2270 [(set_attr "type" "sseiadd")
2271 (set_attr "mode" "TI")])
2273 (define_expand "mulv8hi3"
2274 [(set (match_operand:V8HI 0 "register_operand" "")
2275 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2276 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2278 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2280 (define_insn "*mulv8hi3"
2281 [(set (match_operand:V8HI 0 "register_operand" "=x")
2282 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2283 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2284 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2285 "pmullw\t{%2, %0|%0, %2}"
2286 [(set_attr "type" "sseimul")
2287 (set_attr "mode" "TI")])
2289 (define_insn "sse2_smulv8hi3_highpart"
2290 [(set (match_operand:V8HI 0 "register_operand" "=x")
2295 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2297 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2299 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2300 "pmulhw\t{%2, %0|%0, %2}"
2301 [(set_attr "type" "sseimul")
2302 (set_attr "mode" "TI")])
2304 (define_insn "sse2_umulv8hi3_highpart"
2305 [(set (match_operand:V8HI 0 "register_operand" "=x")
2310 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2312 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2314 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2315 "pmulhuw\t{%2, %0|%0, %2}"
2316 [(set_attr "type" "sseimul")
2317 (set_attr "mode" "TI")])
2319 (define_insn "sse2_umulv2siv2di3"
2320 [(set (match_operand:V2DI 0 "register_operand" "=x")
2324 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2325 (parallel [(const_int 0) (const_int 2)])))
2328 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2329 (parallel [(const_int 0) (const_int 2)])))))]
2330 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2331 "pmuludq\t{%2, %0|%0, %2}"
2332 [(set_attr "type" "sseimul")
2333 (set_attr "mode" "TI")])
2335 (define_insn "sse2_pmaddwd"
2336 [(set (match_operand:V4SI 0 "register_operand" "=x")
2341 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2342 (parallel [(const_int 0)
2348 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2349 (parallel [(const_int 0)
2355 (vec_select:V4HI (match_dup 1)
2356 (parallel [(const_int 1)
2361 (vec_select:V4HI (match_dup 2)
2362 (parallel [(const_int 1)
2365 (const_int 7)]))))))]
2367 "pmaddwd\t{%2, %0|%0, %2}"
2368 [(set_attr "type" "sseiadd")
2369 (set_attr "mode" "TI")])
2371 (define_insn "ashr<mode>3"
2372 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2374 (match_operand:SSEMODE24 1 "register_operand" "0")
2375 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2377 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2378 [(set_attr "type" "sseishft")
2379 (set_attr "mode" "TI")])
2381 (define_insn "lshr<mode>3"
2382 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2383 (lshiftrt:SSEMODE248
2384 (match_operand:SSEMODE248 1 "register_operand" "0")
2385 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2387 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2388 [(set_attr "type" "sseishft")
2389 (set_attr "mode" "TI")])
2391 (define_insn "ashl<mode>3"
2392 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2394 (match_operand:SSEMODE248 1 "register_operand" "0")
2395 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2397 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2398 [(set_attr "type" "sseishft")
2399 (set_attr "mode" "TI")])
2401 (define_insn "sse2_ashlti3"
2402 [(set (match_operand:TI 0 "register_operand" "=x")
2403 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2404 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2407 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2408 return "pslldq\t{%2, %0|%0, %2}";
2410 [(set_attr "type" "sseishft")
2411 (set_attr "mode" "TI")])
2413 (define_insn "sse2_lshrti3"
2414 [(set (match_operand:TI 0 "register_operand" "=x")
2415 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2416 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2419 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2420 return "psrldq\t{%2, %0|%0, %2}";
2422 [(set_attr "type" "sseishft")
2423 (set_attr "mode" "TI")])
2425 (define_expand "umaxv16qi3"
2426 [(set (match_operand:V16QI 0 "register_operand" "")
2427 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2428 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2430 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2432 (define_insn "*umaxv16qi3"
2433 [(set (match_operand:V16QI 0 "register_operand" "=x")
2434 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2435 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2436 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2437 "pmaxub\t{%2, %0|%0, %2}"
2438 [(set_attr "type" "sseiadd")
2439 (set_attr "mode" "TI")])
2441 (define_expand "smaxv8hi3"
2442 [(set (match_operand:V8HI 0 "register_operand" "")
2443 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2444 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2446 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2448 (define_insn "*smaxv8hi3"
2449 [(set (match_operand:V8HI 0 "register_operand" "=x")
2450 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2451 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2452 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2453 "pmaxsw\t{%2, %0|%0, %2}"
2454 [(set_attr "type" "sseiadd")
2455 (set_attr "mode" "TI")])
2457 (define_expand "uminv16qi3"
2458 [(set (match_operand:V16QI 0 "register_operand" "")
2459 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2460 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2462 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2464 (define_insn "*uminv16qi3"
2465 [(set (match_operand:V16QI 0 "register_operand" "=x")
2466 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2467 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2468 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2469 "pminub\t{%2, %0|%0, %2}"
2470 [(set_attr "type" "sseiadd")
2471 (set_attr "mode" "TI")])
2473 (define_expand "sminv8hi3"
2474 [(set (match_operand:V8HI 0 "register_operand" "")
2475 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2476 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2478 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2480 (define_insn "*sminv8hi3"
2481 [(set (match_operand:V8HI 0 "register_operand" "=x")
2482 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2483 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2484 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2485 "pminsw\t{%2, %0|%0, %2}"
2486 [(set_attr "type" "sseiadd")
2487 (set_attr "mode" "TI")])
2489 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2491 ;; Parallel integral comparisons
2493 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2495 (define_insn "sse2_eq<mode>3"
2496 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2498 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2499 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2500 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2501 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2502 [(set_attr "type" "ssecmp")
2503 (set_attr "mode" "TI")])
2505 (define_insn "sse2_gt<mode>3"
2506 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2508 (match_operand:SSEMODE124 1 "register_operand" "0")
2509 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2511 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2512 [(set_attr "type" "ssecmp")
2513 (set_attr "mode" "TI")])
2515 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2517 ;; Parallel integral logical operations
2519 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2521 (define_expand "one_cmpl<mode>2"
2522 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2523 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2527 int i, n = GET_MODE_NUNITS (<MODE>mode);
2528 rtvec v = rtvec_alloc (n);
2530 for (i = 0; i < n; ++i)
2531 RTVEC_ELT (v, i) = constm1_rtx;
2533 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
2536 (define_expand "and<mode>3"
2537 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2538 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2539 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2541 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
2543 (define_insn "*and<mode>3"
2544 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2546 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2547 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2548 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
2549 "pand\t{%2, %0|%0, %2}"
2550 [(set_attr "type" "sselog")
2551 (set_attr "mode" "TI")])
2553 (define_insn "sse2_nand<mode>3"
2554 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2556 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
2557 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2559 "pandn\t{%2, %0|%0, %2}"
2560 [(set_attr "type" "sselog")
2561 (set_attr "mode" "TI")])
2563 (define_expand "ior<mode>3"
2564 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2565 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2566 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2568 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
2570 (define_insn "*ior<mode>3"
2571 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2573 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2574 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2575 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
2576 "por\t{%2, %0|%0, %2}"
2577 [(set_attr "type" "sselog")
2578 (set_attr "mode" "TI")])
2580 (define_expand "xor<mode>3"
2581 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2582 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2583 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2585 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
2587 (define_insn "*xor<mode>3"
2588 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2590 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2591 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2592 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
2593 "pxor\t{%2, %0|%0, %2}"
2594 [(set_attr "type" "sselog")
2595 (set_attr "mode" "TI")])
2597 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2599 ;; Parallel integral element swizzling
2601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2603 (define_insn "sse2_packsswb"
2604 [(set (match_operand:V16QI 0 "register_operand" "=x")
2607 (match_operand:V8HI 1 "register_operand" "0"))
2609 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
2611 "packsswb\t{%2, %0|%0, %2}"
2612 [(set_attr "type" "sselog")
2613 (set_attr "mode" "TI")])
2615 (define_insn "sse2_packssdw"
2616 [(set (match_operand:V8HI 0 "register_operand" "=x")
2619 (match_operand:V4SI 1 "register_operand" "0"))
2621 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
2623 "packssdw\t{%2, %0|%0, %2}"
2624 [(set_attr "type" "sselog")
2625 (set_attr "mode" "TI")])
2627 (define_insn "sse2_packuswb"
2628 [(set (match_operand:V16QI 0 "register_operand" "=x")
2631 (match_operand:V8HI 1 "register_operand" "0"))
2633 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
2635 "packuswb\t{%2, %0|%0, %2}"
2636 [(set_attr "type" "sselog")
2637 (set_attr "mode" "TI")])
2639 (define_insn "sse2_punpckhbw"
2640 [(set (match_operand:V16QI 0 "register_operand" "=x")
2643 (match_operand:V16QI 1 "register_operand" "0")
2644 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
2645 (parallel [(const_int 8) (const_int 24)
2646 (const_int 9) (const_int 25)
2647 (const_int 10) (const_int 26)
2648 (const_int 11) (const_int 27)
2649 (const_int 12) (const_int 28)
2650 (const_int 13) (const_int 29)
2651 (const_int 14) (const_int 30)
2652 (const_int 15) (const_int 31)])))]
2654 "punpckhbw\t{%2, %0|%0, %2}"
2655 [(set_attr "type" "sselog")
2656 (set_attr "mode" "TI")])
2658 (define_insn "sse2_punpcklbw"
2659 [(set (match_operand:V16QI 0 "register_operand" "=x")
2662 (match_operand:V16QI 1 "register_operand" "0")
2663 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
2664 (parallel [(const_int 0) (const_int 16)
2665 (const_int 1) (const_int 17)
2666 (const_int 2) (const_int 18)
2667 (const_int 3) (const_int 19)
2668 (const_int 4) (const_int 20)
2669 (const_int 5) (const_int 21)
2670 (const_int 6) (const_int 22)
2671 (const_int 7) (const_int 23)])))]
2673 "punpcklbw\t{%2, %0|%0, %2}"
2674 [(set_attr "type" "sselog")
2675 (set_attr "mode" "TI")])
2677 (define_insn "sse2_punpckhwd"
2678 [(set (match_operand:V8HI 0 "register_operand" "=x")
2681 (match_operand:V8HI 1 "register_operand" "0")
2682 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
2683 (parallel [(const_int 4) (const_int 12)
2684 (const_int 5) (const_int 13)
2685 (const_int 6) (const_int 14)
2686 (const_int 7) (const_int 15)])))]
2688 "punpckhwd\t{%2, %0|%0, %2}"
2689 [(set_attr "type" "sselog")
2690 (set_attr "mode" "TI")])
2692 (define_insn "sse2_punpcklwd"
2693 [(set (match_operand:V8HI 0 "register_operand" "=x")
2696 (match_operand:V8HI 1 "register_operand" "0")
2697 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
2698 (parallel [(const_int 0) (const_int 8)
2699 (const_int 1) (const_int 9)
2700 (const_int 2) (const_int 10)
2701 (const_int 3) (const_int 11)])))]
2703 "punpcklwd\t{%2, %0|%0, %2}"
2704 [(set_attr "type" "sselog")
2705 (set_attr "mode" "TI")])
2707 (define_insn "sse2_punpckhdq"
2708 [(set (match_operand:V4SI 0 "register_operand" "=x")
2711 (match_operand:V4SI 1 "register_operand" "0")
2712 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
2713 (parallel [(const_int 2) (const_int 6)
2714 (const_int 3) (const_int 7)])))]
2716 "punpckhdq\t{%2, %0|%0, %2}"
2717 [(set_attr "type" "sselog")
2718 (set_attr "mode" "TI")])
2720 (define_insn "sse2_punpckldq"
2721 [(set (match_operand:V4SI 0 "register_operand" "=x")
2724 (match_operand:V4SI 1 "register_operand" "0")
2725 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
2726 (parallel [(const_int 0) (const_int 4)
2727 (const_int 1) (const_int 5)])))]
2729 "punpckldq\t{%2, %0|%0, %2}"
2730 [(set_attr "type" "sselog")
2731 (set_attr "mode" "TI")])
2733 (define_insn "sse2_punpckhqdq"
2734 [(set (match_operand:V2DI 0 "register_operand" "=x")
2737 (match_operand:V2DI 1 "register_operand" "0")
2738 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
2739 (parallel [(const_int 1)
2742 "punpckhqdq\t{%2, %0|%0, %2}"
2743 [(set_attr "type" "sselog")
2744 (set_attr "mode" "TI")])
2746 (define_insn "sse2_punpcklqdq"
2747 [(set (match_operand:V2DI 0 "register_operand" "=x")
2750 (match_operand:V2DI 1 "register_operand" "0")
2751 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
2752 (parallel [(const_int 0)
2755 "punpcklqdq\t{%2, %0|%0, %2}"
2756 [(set_attr "type" "sselog")
2757 (set_attr "mode" "TI")])
2759 (define_expand "sse2_pinsrw"
2760 [(set (match_operand:V8HI 0 "register_operand" "")
2763 (match_operand:SI 2 "nonimmediate_operand" ""))
2764 (match_operand:V8HI 1 "register_operand" "")
2765 (match_operand:SI 3 "const_0_to_7_operand" "")))]
2768 operands[2] = gen_lowpart (HImode, operands[2]);
2769 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
2772 (define_insn "*sse2_pinsrw"
2773 [(set (match_operand:V8HI 0 "register_operand" "=x")
2776 (match_operand:HI 2 "nonimmediate_operand" "rm"))
2777 (match_operand:V8HI 1 "register_operand" "0")
2778 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
2781 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
2782 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
2784 [(set_attr "type" "sselog")
2785 (set_attr "mode" "TI")])
2787 (define_insn "sse2_pextrw"
2788 [(set (match_operand:SI 0 "register_operand" "=r")
2791 (match_operand:V8HI 1 "register_operand" "x")
2792 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
2794 "pextrw\t{%2, %1, %0|%0, %1, %2}"
2795 [(set_attr "type" "sselog")
2796 (set_attr "mode" "TI")])
2798 (define_expand "sse2_pshufd"
2799 [(match_operand:V4SI 0 "register_operand" "")
2800 (match_operand:V4SI 1 "nonimmediate_operand" "")
2801 (match_operand:SI 2 "const_int_operand" "")]
2804 int mask = INTVAL (operands[2]);
2805 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
2806 GEN_INT ((mask >> 0) & 3),
2807 GEN_INT ((mask >> 2) & 3),
2808 GEN_INT ((mask >> 4) & 3),
2809 GEN_INT ((mask >> 6) & 3)));
2813 (define_insn "sse2_pshufd_1"
2814 [(set (match_operand:V4SI 0 "register_operand" "=x")
2816 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2817 (parallel [(match_operand 2 "const_0_to_3_operand" "")
2818 (match_operand 3 "const_0_to_3_operand" "")
2819 (match_operand 4 "const_0_to_3_operand" "")
2820 (match_operand 5 "const_0_to_3_operand" "")])))]
2824 mask |= INTVAL (operands[2]) << 0;
2825 mask |= INTVAL (operands[3]) << 2;
2826 mask |= INTVAL (operands[4]) << 4;
2827 mask |= INTVAL (operands[5]) << 6;
2828 operands[2] = GEN_INT (mask);
2830 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
2832 [(set_attr "type" "sselog1")
2833 (set_attr "mode" "TI")])
2835 (define_expand "sse2_pshuflw"
2836 [(match_operand:V8HI 0 "register_operand" "")
2837 (match_operand:V8HI 1 "nonimmediate_operand" "")
2838 (match_operand:SI 2 "const_int_operand" "")]
2841 int mask = INTVAL (operands[2]);
2842 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
2843 GEN_INT ((mask >> 0) & 3),
2844 GEN_INT ((mask >> 2) & 3),
2845 GEN_INT ((mask >> 4) & 3),
2846 GEN_INT ((mask >> 6) & 3)));
2850 (define_insn "sse2_pshuflw_1"
2851 [(set (match_operand:V8HI 0 "register_operand" "=x")
2853 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
2854 (parallel [(match_operand 2 "const_0_to_3_operand" "")
2855 (match_operand 3 "const_0_to_3_operand" "")
2856 (match_operand 4 "const_0_to_3_operand" "")
2857 (match_operand 5 "const_0_to_3_operand" "")
2865 mask |= INTVAL (operands[2]) << 0;
2866 mask |= INTVAL (operands[3]) << 2;
2867 mask |= INTVAL (operands[4]) << 4;
2868 mask |= INTVAL (operands[5]) << 6;
2869 operands[2] = GEN_INT (mask);
2871 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
2873 [(set_attr "type" "sselog")
2874 (set_attr "mode" "TI")])
2876 (define_expand "sse2_pshufhw"
2877 [(match_operand:V8HI 0 "register_operand" "")
2878 (match_operand:V8HI 1 "nonimmediate_operand" "")
2879 (match_operand:SI 2 "const_int_operand" "")]
2882 int mask = INTVAL (operands[2]);
2883 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
2884 GEN_INT (((mask >> 0) & 3) + 4),
2885 GEN_INT (((mask >> 2) & 3) + 4),
2886 GEN_INT (((mask >> 4) & 3) + 4),
2887 GEN_INT (((mask >> 6) & 3) + 4)));
2891 (define_insn "sse2_pshufhw_1"
2892 [(set (match_operand:V8HI 0 "register_operand" "=x")
2894 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
2895 (parallel [(const_int 0)
2899 (match_operand 2 "const_4_to_7_operand" "")
2900 (match_operand 3 "const_4_to_7_operand" "")
2901 (match_operand 4 "const_4_to_7_operand" "")
2902 (match_operand 5 "const_4_to_7_operand" "")])))]
2906 mask |= (INTVAL (operands[2]) - 4) << 0;
2907 mask |= (INTVAL (operands[3]) - 4) << 2;
2908 mask |= (INTVAL (operands[4]) - 4) << 4;
2909 mask |= (INTVAL (operands[5]) - 4) << 6;
2910 operands[2] = GEN_INT (mask);
2912 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
2914 [(set_attr "type" "sselog")
2915 (set_attr "mode" "TI")])
2917 (define_expand "sse2_loadd"
2918 [(set (match_operand:V4SI 0 "register_operand" "")
2921 (match_operand:SI 1 "nonimmediate_operand" ""))
2925 "operands[2] = CONST0_RTX (V4SImode);")
2927 (define_insn "sse2_loadld"
2928 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
2931 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
2932 (match_operand:V4SI 1 "vector_move_operand" " C,C,0")
2936 movd\t{%2, %0|%0, %2}
2937 movss\t{%2, %0|%0, %2}
2938 movss\t{%2, %0|%0, %2}"
2939 [(set_attr "type" "ssemov")
2940 (set_attr "mode" "TI,V4SF,SF")])
2942 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
2943 ;; be taken into account, and movdi isn't fully populated even without.
2944 (define_insn_and_split "sse2_stored"
2945 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
2947 (match_operand:V4SI 1 "register_operand" "x")
2948 (parallel [(const_int 0)])))]
2951 "&& reload_completed"
2952 [(set (match_dup 0) (match_dup 1))]
2954 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
2957 (define_expand "sse_storeq"
2958 [(set (match_operand:DI 0 "nonimmediate_operand" "")
2960 (match_operand:V2DI 1 "register_operand" "")
2961 (parallel [(const_int 0)])))]
2965 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
2966 ;; be taken into account, and movdi isn't fully populated even without.
2967 (define_insn "*sse2_storeq"
2968 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
2970 (match_operand:V2DI 1 "register_operand" "x")
2971 (parallel [(const_int 0)])))]
2976 [(set (match_operand:DI 0 "nonimmediate_operand" "")
2978 (match_operand:V2DI 1 "register_operand" "")
2979 (parallel [(const_int 0)])))]
2980 "TARGET_SSE && reload_completed"
2981 [(set (match_dup 0) (match_dup 1))]
2983 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
2986 (define_insn "*vec_dupv4si"
2987 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
2989 (match_operand:SI 1 "register_operand" " Y,0")))]
2992 pshufd\t{$0, %1, %0|%0, %1, 0}
2993 shufps\t{$0, %0, %0|%0, %0, 0}"
2994 [(set_attr "type" "sselog1")
2995 (set_attr "mode" "TI,V4SF")])
2997 (define_insn "*vec_dupv2di"
2998 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3000 (match_operand:DI 1 "register_operand" " 0,0")))]
3005 [(set_attr "type" "sselog1,ssemov")
3006 (set_attr "mode" "TI,V4SF")])
3008 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3009 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3010 ;; alternatives pretty much forces the MMX alternative to be chosen.
3011 (define_insn "*sse2_concatv2si"
3012 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3014 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3015 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3018 punpckldq\t{%2, %0|%0, %2}
3019 movd\t{%1, %0|%0, %1}
3020 punpckldq\t{%2, %0|%0, %2}
3021 movd\t{%1, %0|%0, %1}"
3022 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3023 (set_attr "mode" "TI,TI,DI,DI")])
3025 (define_insn "*sse1_concatv2si"
3026 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3028 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3029 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3032 unpcklps\t{%2, %0|%0, %2}
3033 movss\t{%1, %0|%0, %1}
3034 punpckldq\t{%2, %0|%0, %2}
3035 movd\t{%1, %0|%0, %1}"
3036 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3037 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3039 (define_insn "*vec_concatv4si_1"
3040 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3042 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3043 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3046 punpcklqdq\t{%2, %0|%0, %2}
3047 movlhps\t{%2, %0|%0, %2}
3048 movhps\t{%2, %0|%0, %2}"
3049 [(set_attr "type" "sselog,ssemov,ssemov")
3050 (set_attr "mode" "TI,V4SF,V2SF")])
3052 (define_insn "*vec_concatv2di"
3053 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3055 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3056 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3059 movq\t{%1, %0|%0, %1}
3060 movq2dq\t{%1, %0|%0, %1}
3061 punpcklqdq\t{%2, %0|%0, %2}
3062 movlhps\t{%2, %0|%0, %2}
3063 movhps\t{%2, %0|%0, %2}
3064 movlps\t{%1, %0|%0, %1}"
3065 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3066 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3068 (define_expand "vec_setv2di"
3069 [(match_operand:V2DI 0 "register_operand" "")
3070 (match_operand:DI 1 "register_operand" "")
3071 (match_operand 2 "const_int_operand" "")]
3074 ix86_expand_vector_set (false, operands[0], operands[1],
3075 INTVAL (operands[2]));
3079 (define_expand "vec_extractv2di"
3080 [(match_operand:DI 0 "register_operand" "")
3081 (match_operand:V2DI 1 "register_operand" "")
3082 (match_operand 2 "const_int_operand" "")]
3085 ix86_expand_vector_extract (false, operands[0], operands[1],
3086 INTVAL (operands[2]));
3090 (define_expand "vec_initv2di"
3091 [(match_operand:V2DI 0 "register_operand" "")
3092 (match_operand 1 "" "")]
3095 ix86_expand_vector_init (false, operands[0], operands[1]);
3099 (define_expand "vec_setv4si"
3100 [(match_operand:V4SI 0 "register_operand" "")
3101 (match_operand:SI 1 "register_operand" "")
3102 (match_operand 2 "const_int_operand" "")]
3105 ix86_expand_vector_set (false, operands[0], operands[1],
3106 INTVAL (operands[2]));
3110 (define_expand "vec_extractv4si"
3111 [(match_operand:SI 0 "register_operand" "")
3112 (match_operand:V4SI 1 "register_operand" "")
3113 (match_operand 2 "const_int_operand" "")]
3116 ix86_expand_vector_extract (false, operands[0], operands[1],
3117 INTVAL (operands[2]));
3121 (define_expand "vec_initv4si"
3122 [(match_operand:V4SI 0 "register_operand" "")
3123 (match_operand 1 "" "")]
3126 ix86_expand_vector_init (false, operands[0], operands[1]);
3130 (define_expand "vec_setv8hi"
3131 [(match_operand:V8HI 0 "register_operand" "")
3132 (match_operand:HI 1 "register_operand" "")
3133 (match_operand 2 "const_int_operand" "")]
3136 ix86_expand_vector_set (false, operands[0], operands[1],
3137 INTVAL (operands[2]));
3141 (define_expand "vec_extractv8hi"
3142 [(match_operand:HI 0 "register_operand" "")
3143 (match_operand:V8HI 1 "register_operand" "")
3144 (match_operand 2 "const_int_operand" "")]
3147 ix86_expand_vector_extract (false, operands[0], operands[1],
3148 INTVAL (operands[2]));
3152 (define_expand "vec_initv8hi"
3153 [(match_operand:V8HI 0 "register_operand" "")
3154 (match_operand 1 "" "")]
3157 ix86_expand_vector_init (false, operands[0], operands[1]);
3161 (define_expand "vec_setv16qi"
3162 [(match_operand:V16QI 0 "register_operand" "")
3163 (match_operand:QI 1 "register_operand" "")
3164 (match_operand 2 "const_int_operand" "")]
3167 ix86_expand_vector_set (false, operands[0], operands[1],
3168 INTVAL (operands[2]));
3172 (define_expand "vec_extractv16qi"
3173 [(match_operand:QI 0 "register_operand" "")
3174 (match_operand:V16QI 1 "register_operand" "")
3175 (match_operand 2 "const_int_operand" "")]
3178 ix86_expand_vector_extract (false, operands[0], operands[1],
3179 INTVAL (operands[2]));
3183 (define_expand "vec_initv16qi"
3184 [(match_operand:V16QI 0 "register_operand" "")
3185 (match_operand 1 "" "")]
3188 ix86_expand_vector_init (false, operands[0], operands[1]);
3192 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3196 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3198 (define_insn "sse2_uavgv16qi3"
3199 [(set (match_operand:V16QI 0 "register_operand" "=x")
3205 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3207 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3208 (const_vector:V16QI [(const_int 1) (const_int 1)
3209 (const_int 1) (const_int 1)
3210 (const_int 1) (const_int 1)
3211 (const_int 1) (const_int 1)
3212 (const_int 1) (const_int 1)
3213 (const_int 1) (const_int 1)
3214 (const_int 1) (const_int 1)
3215 (const_int 1) (const_int 1)]))
3217 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3218 "pavgb\t{%2, %0|%0, %2}"
3219 [(set_attr "type" "sseiadd")
3220 (set_attr "mode" "TI")])
3222 (define_insn "sse2_uavgv8hi3"
3223 [(set (match_operand:V8HI 0 "register_operand" "=x")
3229 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3231 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3232 (const_vector:V8HI [(const_int 1) (const_int 1)
3233 (const_int 1) (const_int 1)
3234 (const_int 1) (const_int 1)
3235 (const_int 1) (const_int 1)]))
3237 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3238 "pavgw\t{%2, %0|%0, %2}"
3239 [(set_attr "type" "sseiadd")
3240 (set_attr "mode" "TI")])
3242 ;; The correct representation for this is absolutely enormous, and
3243 ;; surely not generally useful.
3244 (define_insn "sse2_psadbw"
3245 [(set (match_operand:V2DI 0 "register_operand" "=x")
3246 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3247 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3250 "psadbw\t{%2, %0|%0, %2}"
3251 [(set_attr "type" "sseiadd")
3252 (set_attr "mode" "TI")])
3254 (define_insn "sse_movmskps"
3255 [(set (match_operand:SI 0 "register_operand" "=r")
3256 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3259 "movmskps\t{%1, %0|%0, %1}"
3260 [(set_attr "type" "ssecvt")
3261 (set_attr "mode" "V4SF")])
3263 (define_insn "sse2_movmskpd"
3264 [(set (match_operand:SI 0 "register_operand" "=r")
3265 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3268 "movmskpd\t{%1, %0|%0, %1}"
3269 [(set_attr "type" "ssecvt")
3270 (set_attr "mode" "V2DF")])
3272 (define_insn "sse2_pmovmskb"
3273 [(set (match_operand:SI 0 "register_operand" "=r")
3274 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3277 "pmovmskb\t{%1, %0|%0, %1}"
3278 [(set_attr "type" "ssecvt")
3279 (set_attr "mode" "V2DF")])
3281 (define_expand "sse2_maskmovdqu"
3282 [(set (match_operand:V16QI 0 "memory_operand" "")
3283 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3284 (match_operand:V16QI 2 "register_operand" "x")
3290 (define_insn "*sse2_maskmovdqu"
3291 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3292 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3293 (match_operand:V16QI 2 "register_operand" "x")
3294 (mem:V16QI (match_dup 0))]
3296 "TARGET_SSE2 && !TARGET_64BIT"
3297 ;; @@@ check ordering of operands in intel/nonintel syntax
3298 "maskmovdqu\t{%2, %1|%1, %2}"
3299 [(set_attr "type" "ssecvt")
3300 (set_attr "mode" "TI")])
3302 (define_insn "*sse2_maskmovdqu_rex64"
3303 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3304 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3305 (match_operand:V16QI 2 "register_operand" "x")
3306 (mem:V16QI (match_dup 0))]
3308 "TARGET_SSE2 && TARGET_64BIT"
3309 ;; @@@ check ordering of operands in intel/nonintel syntax
3310 "maskmovdqu\t{%2, %1|%1, %2}"
3311 [(set_attr "type" "ssecvt")
3312 (set_attr "mode" "TI")])
3314 (define_insn "sse_ldmxcsr"
3315 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3319 [(set_attr "type" "sse")
3320 (set_attr "memory" "load")])
3322 (define_insn "sse_stmxcsr"
3323 [(set (match_operand:SI 0 "memory_operand" "=m")
3324 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3327 [(set_attr "type" "sse")
3328 (set_attr "memory" "store")])
3330 (define_expand "sse_sfence"
3332 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3333 "TARGET_SSE || TARGET_3DNOW_A"
3335 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3336 MEM_VOLATILE_P (operands[0]) = 1;
3339 (define_insn "*sse_sfence"
3340 [(set (match_operand:BLK 0 "" "")
3341 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3342 "TARGET_SSE || TARGET_3DNOW_A"
3344 [(set_attr "type" "sse")
3345 (set_attr "memory" "unknown")])
3347 (define_insn "sse2_clflush"
3348 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3352 [(set_attr "type" "sse")
3353 (set_attr "memory" "unknown")])
3355 (define_expand "sse2_mfence"
3357 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3360 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3361 MEM_VOLATILE_P (operands[0]) = 1;
3364 (define_insn "*sse2_mfence"
3365 [(set (match_operand:BLK 0 "" "")
3366 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3369 [(set_attr "type" "sse")
3370 (set_attr "memory" "unknown")])
3372 (define_expand "sse2_lfence"
3374 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3377 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3378 MEM_VOLATILE_P (operands[0]) = 1;
3381 (define_insn "*sse2_lfence"
3382 [(set (match_operand:BLK 0 "" "")
3383 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3386 [(set_attr "type" "sse")
3387 (set_attr "memory" "unknown")])
3389 (define_insn "sse3_mwait"
3390 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3391 (match_operand:SI 1 "register_operand" "c")]
3395 [(set_attr "length" "3")])
3397 (define_insn "sse3_monitor"
3398 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3399 (match_operand:SI 1 "register_operand" "c")
3400 (match_operand:SI 2 "register_operand" "d")]
3403 "monitor\t%0, %1, %2"
3404 [(set_attr "length" "3")])