1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 return standard_sse_constant_opcode (insn, operands[1]);
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
74 return "movdqa\t{%1, %0|%0, %1}";
79 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_string "TI")))])
90 (define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
95 ix86_expand_vector_move (V4SFmode, operands);
99 (define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
104 switch (which_alternative)
107 return standard_sse_constant_opcode (insn, operands[1]);
110 return "movaps\t{%1, %0|%0, %1}";
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
124 (vec_duplicate:V4SF (match_dup 1))
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
132 (define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
137 ix86_expand_vector_move (V2DFmode, operands);
141 (define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
146 switch (which_alternative)
149 return standard_sse_constant_opcode (insn, operands[1]);
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
155 return "movapd\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
168 (const_string "V4SF")
169 (const_string "V2DF")))])
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
207 (define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
225 (define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
234 (define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
243 (define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
252 (define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
272 ;; Parallel single-precision floating point arithmetic
274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276 (define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
282 (define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
288 (define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
295 (define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
304 (define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
316 (define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
323 (define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
332 (define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
344 (define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
351 (define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
360 (define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
372 (define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
379 (define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
388 (define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
400 (define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
409 (define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
414 (match_operand:V4SF 2 "register_operand" "0")
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
421 (define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
430 (define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
435 (match_operand:V4SF 2 "register_operand" "0")
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
442 (define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
450 (define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
461 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462 ;; isn't really correct, as those rtl operators aren't defined when
463 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
465 (define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
476 (define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
486 (define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
495 (define_insn "*sse_vmsmaxv4sf3_finite"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
502 "TARGET_SSE && flag_finite_math_only
503 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
504 "maxss\t{%2, %0|%0, %2}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 (define_insn "sse_vmsmaxv4sf3"
509 [(set (match_operand:V4SF 0 "register_operand" "=x")
511 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
512 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
516 "maxss\t{%2, %0|%0, %2}"
517 [(set_attr "type" "sse")
518 (set_attr "mode" "SF")])
520 (define_expand "sminv4sf3"
521 [(set (match_operand:V4SF 0 "register_operand" "")
522 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
523 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
526 if (!flag_finite_math_only)
527 operands[1] = force_reg (V4SFmode, operands[1]);
528 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
531 (define_insn "*sminv4sf3_finite"
532 [(set (match_operand:V4SF 0 "register_operand" "=x")
533 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
534 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
535 "TARGET_SSE && flag_finite_math_only
536 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
537 "minps\t{%2, %0|%0, %2}"
538 [(set_attr "type" "sse")
539 (set_attr "mode" "V4SF")])
541 (define_insn "*sminv4sf3"
542 [(set (match_operand:V4SF 0 "register_operand" "=x")
543 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
544 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
546 "minps\t{%2, %0|%0, %2}"
547 [(set_attr "type" "sse")
548 (set_attr "mode" "V4SF")])
550 (define_insn "*sse_vmsminv4sf3_finite"
551 [(set (match_operand:V4SF 0 "register_operand" "=x")
553 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
554 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
557 "TARGET_SSE && flag_finite_math_only
558 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
559 "minss\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sse")
561 (set_attr "mode" "SF")])
563 (define_insn "sse_vmsminv4sf3"
564 [(set (match_operand:V4SF 0 "register_operand" "=x")
566 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
567 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
571 "minss\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sse")
573 (set_attr "mode" "SF")])
575 ;; These versions of the min/max patterns implement exactly the operations
576 ;; min = (op1 < op2 ? op1 : op2)
577 ;; max = (!(op1 < op2) ? op1 : op2)
578 ;; Their operands are not commutative, and thus they may be used in the
579 ;; presence of -0.0 and NaN.
581 (define_insn "*ieee_sminv4sf3"
582 [(set (match_operand:V4SF 0 "register_operand" "=x")
583 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
584 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
587 "minps\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sseadd")
589 (set_attr "mode" "V4SF")])
591 (define_insn "*ieee_smaxv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
593 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
594 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
597 "maxps\t{%2, %0|%0, %2}"
598 [(set_attr "type" "sseadd")
599 (set_attr "mode" "V4SF")])
601 (define_insn "*ieee_sminv2df3"
602 [(set (match_operand:V2DF 0 "register_operand" "=x")
603 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
604 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
607 "minpd\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sseadd")
609 (set_attr "mode" "V2DF")])
611 (define_insn "*ieee_smaxv2df3"
612 [(set (match_operand:V2DF 0 "register_operand" "=x")
613 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
614 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
617 "maxpd\t{%2, %0|%0, %2}"
618 [(set_attr "type" "sseadd")
619 (set_attr "mode" "V2DF")])
621 (define_insn "sse3_addsubv4sf3"
622 [(set (match_operand:V4SF 0 "register_operand" "=x")
625 (match_operand:V4SF 1 "register_operand" "0")
626 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
627 (minus:V4SF (match_dup 1) (match_dup 2))
630 "addsubps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
634 (define_insn "sse3_haddv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
656 "haddps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
660 (define_insn "sse3_hsubv4sf3"
661 [(set (match_operand:V4SF 0 "register_operand" "=x")
666 (match_operand:V4SF 1 "register_operand" "0")
667 (parallel [(const_int 0)]))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
670 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
671 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
675 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
676 (parallel [(const_int 0)]))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
679 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
680 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
682 "hsubps\t{%2, %0|%0, %2}"
683 [(set_attr "type" "sseadd")
684 (set_attr "mode" "V4SF")])
686 (define_expand "reduc_splus_v4sf"
687 [(match_operand:V4SF 0 "register_operand" "")
688 (match_operand:V4SF 1 "register_operand" "")]
693 rtx tmp = gen_reg_rtx (V4SFmode);
694 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
695 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
698 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
702 (define_expand "reduc_smax_v4sf"
703 [(match_operand:V4SF 0 "register_operand" "")
704 (match_operand:V4SF 1 "register_operand" "")]
707 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
711 (define_expand "reduc_smin_v4sf"
712 [(match_operand:V4SF 0 "register_operand" "")
713 (match_operand:V4SF 1 "register_operand" "")]
716 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
722 ;; Parallel single-precision floating point comparisons
724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
726 (define_insn "sse_maskcmpv4sf3"
727 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operator:V4SF 3 "sse_comparison_operator"
729 [(match_operand:V4SF 1 "register_operand" "0")
730 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
732 "cmp%D3ps\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssecmp")
734 (set_attr "mode" "V4SF")])
736 (define_insn "sse_maskcmpsf3"
737 [(set (match_operand:SF 0 "register_operand" "=x")
738 (match_operator:SF 3 "sse_comparison_operator"
739 [(match_operand:SF 1 "register_operand" "0")
740 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
742 "cmp%D3ss\t{%2, %0|%0, %2}"
743 [(set_attr "type" "ssecmp")
744 (set_attr "mode" "SF")])
746 (define_insn "sse_vmmaskcmpv4sf3"
747 [(set (match_operand:V4SF 0 "register_operand" "=x")
749 (match_operator:V4SF 3 "sse_comparison_operator"
750 [(match_operand:V4SF 1 "register_operand" "0")
751 (match_operand:V4SF 2 "register_operand" "x")])
755 "cmp%D3ss\t{%2, %0|%0, %2}"
756 [(set_attr "type" "ssecmp")
757 (set_attr "mode" "SF")])
759 (define_insn "sse_comi"
760 [(set (reg:CCFP FLAGS_REG)
763 (match_operand:V4SF 0 "register_operand" "x")
764 (parallel [(const_int 0)]))
766 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
767 (parallel [(const_int 0)]))))]
769 "comiss\t{%1, %0|%0, %1}"
770 [(set_attr "type" "ssecomi")
771 (set_attr "mode" "SF")])
773 (define_insn "sse_ucomi"
774 [(set (reg:CCFPU FLAGS_REG)
777 (match_operand:V4SF 0 "register_operand" "x")
778 (parallel [(const_int 0)]))
780 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))))]
783 "ucomiss\t{%1, %0|%0, %1}"
784 [(set_attr "type" "ssecomi")
785 (set_attr "mode" "SF")])
787 (define_expand "vcondv4sf"
788 [(set (match_operand:V4SF 0 "register_operand" "")
791 [(match_operand:V4SF 4 "nonimmediate_operand" "")
792 (match_operand:V4SF 5 "nonimmediate_operand" "")])
793 (match_operand:V4SF 1 "general_operand" "")
794 (match_operand:V4SF 2 "general_operand" "")))]
797 if (ix86_expand_fp_vcond (operands))
803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
805 ;; Parallel single-precision floating point logical operations
807 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
809 (define_expand "andv4sf3"
810 [(set (match_operand:V4SF 0 "register_operand" "")
811 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
812 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
814 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
816 (define_insn "*andv4sf3"
817 [(set (match_operand:V4SF 0 "register_operand" "=x")
818 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
819 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
820 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
821 "andps\t{%2, %0|%0, %2}"
822 [(set_attr "type" "sselog")
823 (set_attr "mode" "V4SF")])
825 (define_insn "sse_nandv4sf3"
826 [(set (match_operand:V4SF 0 "register_operand" "=x")
827 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
828 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
830 "andnps\t{%2, %0|%0, %2}"
831 [(set_attr "type" "sselog")
832 (set_attr "mode" "V4SF")])
834 (define_expand "iorv4sf3"
835 [(set (match_operand:V4SF 0 "register_operand" "")
836 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
837 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
839 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
841 (define_insn "*iorv4sf3"
842 [(set (match_operand:V4SF 0 "register_operand" "=x")
843 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
844 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
845 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
846 "orps\t{%2, %0|%0, %2}"
847 [(set_attr "type" "sselog")
848 (set_attr "mode" "V4SF")])
850 (define_expand "xorv4sf3"
851 [(set (match_operand:V4SF 0 "register_operand" "")
852 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
853 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
855 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
857 (define_insn "*xorv4sf3"
858 [(set (match_operand:V4SF 0 "register_operand" "=x")
859 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
860 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
861 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
862 "xorps\t{%2, %0|%0, %2}"
863 [(set_attr "type" "sselog")
864 (set_attr "mode" "V4SF")])
866 ;; Also define scalar versions. These are used for abs, neg, and
867 ;; conditional move. Using subregs into vector modes causes register
868 ;; allocation lossage. These patterns do not allow memory operands
869 ;; because the native instructions read the full 128-bits.
871 (define_insn "*andsf3"
872 [(set (match_operand:SF 0 "register_operand" "=x")
873 (and:SF (match_operand:SF 1 "register_operand" "0")
874 (match_operand:SF 2 "register_operand" "x")))]
876 "andps\t{%2, %0|%0, %2}"
877 [(set_attr "type" "sselog")
878 (set_attr "mode" "V4SF")])
880 (define_insn "*nandsf3"
881 [(set (match_operand:SF 0 "register_operand" "=x")
882 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
883 (match_operand:SF 2 "register_operand" "x")))]
885 "andnps\t{%2, %0|%0, %2}"
886 [(set_attr "type" "sselog")
887 (set_attr "mode" "V4SF")])
889 (define_insn "*iorsf3"
890 [(set (match_operand:SF 0 "register_operand" "=x")
891 (ior:SF (match_operand:SF 1 "register_operand" "0")
892 (match_operand:SF 2 "register_operand" "x")))]
894 "orps\t{%2, %0|%0, %2}"
895 [(set_attr "type" "sselog")
896 (set_attr "mode" "V4SF")])
898 (define_insn "*xorsf3"
899 [(set (match_operand:SF 0 "register_operand" "=x")
900 (xor:SF (match_operand:SF 1 "register_operand" "0")
901 (match_operand:SF 2 "register_operand" "x")))]
903 "xorps\t{%2, %0|%0, %2}"
904 [(set_attr "type" "sselog")
905 (set_attr "mode" "V4SF")])
907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
909 ;; Parallel single-precision floating point conversion operations
911 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
913 (define_insn "sse_cvtpi2ps"
914 [(set (match_operand:V4SF 0 "register_operand" "=x")
917 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
918 (match_operand:V4SF 1 "register_operand" "0")
921 "cvtpi2ps\t{%2, %0|%0, %2}"
922 [(set_attr "type" "ssecvt")
923 (set_attr "mode" "V4SF")])
925 (define_insn "sse_cvtps2pi"
926 [(set (match_operand:V2SI 0 "register_operand" "=y")
928 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
930 (parallel [(const_int 0) (const_int 1)])))]
932 "cvtps2pi\t{%1, %0|%0, %1}"
933 [(set_attr "type" "ssecvt")
934 (set_attr "unit" "mmx")
935 (set_attr "mode" "DI")])
937 (define_insn "sse_cvttps2pi"
938 [(set (match_operand:V2SI 0 "register_operand" "=y")
940 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
941 (parallel [(const_int 0) (const_int 1)])))]
943 "cvttps2pi\t{%1, %0|%0, %1}"
944 [(set_attr "type" "ssecvt")
945 (set_attr "unit" "mmx")
946 (set_attr "mode" "SF")])
948 (define_insn "sse_cvtsi2ss"
949 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
952 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
953 (match_operand:V4SF 1 "register_operand" "0,0")
956 "cvtsi2ss\t{%2, %0|%0, %2}"
957 [(set_attr "type" "sseicvt")
958 (set_attr "athlon_decode" "vector,double")
959 (set_attr "mode" "SF")])
961 (define_insn "sse_cvtsi2ssq"
962 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
965 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
966 (match_operand:V4SF 1 "register_operand" "0,0")
968 "TARGET_SSE && TARGET_64BIT"
969 "cvtsi2ssq\t{%2, %0|%0, %2}"
970 [(set_attr "type" "sseicvt")
971 (set_attr "athlon_decode" "vector,double")
972 (set_attr "mode" "SF")])
974 (define_insn "sse_cvtss2si"
975 [(set (match_operand:SI 0 "register_operand" "=r,r")
978 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
979 (parallel [(const_int 0)]))]
980 UNSPEC_FIX_NOTRUNC))]
982 "cvtss2si\t{%1, %0|%0, %1}"
983 [(set_attr "type" "sseicvt")
984 (set_attr "athlon_decode" "double,vector")
985 (set_attr "mode" "SI")])
987 (define_insn "sse_cvtss2si_2"
988 [(set (match_operand:SI 0 "register_operand" "=r,r")
989 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
990 UNSPEC_FIX_NOTRUNC))]
992 "cvtss2si\t{%1, %0|%0, %1}"
993 [(set_attr "type" "sseicvt")
994 (set_attr "athlon_decode" "double,vector")
995 (set_attr "mode" "SI")])
997 (define_insn "sse_cvtss2siq"
998 [(set (match_operand:DI 0 "register_operand" "=r,r")
1001 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1002 (parallel [(const_int 0)]))]
1003 UNSPEC_FIX_NOTRUNC))]
1004 "TARGET_SSE && TARGET_64BIT"
1005 "cvtss2siq\t{%1, %0|%0, %1}"
1006 [(set_attr "type" "sseicvt")
1007 (set_attr "athlon_decode" "double,vector")
1008 (set_attr "mode" "DI")])
1010 (define_insn "sse_cvtss2siq_2"
1011 [(set (match_operand:DI 0 "register_operand" "=r,r")
1012 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1013 UNSPEC_FIX_NOTRUNC))]
1014 "TARGET_SSE && TARGET_64BIT"
1015 "cvtss2siq\t{%1, %0|%0, %1}"
1016 [(set_attr "type" "sseicvt")
1017 (set_attr "athlon_decode" "double,vector")
1018 (set_attr "mode" "DI")])
1020 (define_insn "sse_cvttss2si"
1021 [(set (match_operand:SI 0 "register_operand" "=r,r")
1024 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1025 (parallel [(const_int 0)]))))]
1027 "cvttss2si\t{%1, %0|%0, %1}"
1028 [(set_attr "type" "sseicvt")
1029 (set_attr "athlon_decode" "double,vector")
1030 (set_attr "mode" "SI")])
1032 (define_insn "sse_cvttss2siq"
1033 [(set (match_operand:DI 0 "register_operand" "=r,r")
1036 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1037 (parallel [(const_int 0)]))))]
1038 "TARGET_SSE && TARGET_64BIT"
1039 "cvttss2siq\t{%1, %0|%0, %1}"
1040 [(set_attr "type" "sseicvt")
1041 (set_attr "athlon_decode" "double,vector")
1042 (set_attr "mode" "DI")])
1044 (define_insn "sse2_cvtdq2ps"
1045 [(set (match_operand:V4SF 0 "register_operand" "=x")
1046 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1048 "cvtdq2ps\t{%1, %0|%0, %1}"
1049 [(set_attr "type" "ssecvt")
1050 (set_attr "mode" "V2DF")])
1052 (define_insn "sse2_cvtps2dq"
1053 [(set (match_operand:V4SI 0 "register_operand" "=x")
1054 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1055 UNSPEC_FIX_NOTRUNC))]
1057 "cvtps2dq\t{%1, %0|%0, %1}"
1058 [(set_attr "type" "ssecvt")
1059 (set_attr "mode" "TI")])
1061 (define_insn "sse2_cvttps2dq"
1062 [(set (match_operand:V4SI 0 "register_operand" "=x")
1063 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1065 "cvttps2dq\t{%1, %0|%0, %1}"
1066 [(set_attr "type" "ssecvt")
1067 (set_attr "mode" "TI")])
1069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1071 ;; Parallel single-precision floating point element swizzling
1073 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1075 (define_insn "sse_movhlps"
1076 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1079 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1080 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1081 (parallel [(const_int 6)
1085 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1087 movhlps\t{%2, %0|%0, %2}
1088 movlps\t{%H2, %0|%0, %H2}
1089 movhps\t{%2, %0|%0, %2}"
1090 [(set_attr "type" "ssemov")
1091 (set_attr "mode" "V4SF,V2SF,V2SF")])
1093 (define_insn "sse_movlhps"
1094 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1097 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1098 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1099 (parallel [(const_int 0)
1103 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1105 movlhps\t{%2, %0|%0, %2}
1106 movhps\t{%2, %0|%0, %2}
1107 movlps\t{%2, %H0|%H0, %2}"
1108 [(set_attr "type" "ssemov")
1109 (set_attr "mode" "V4SF,V2SF,V2SF")])
1111 (define_insn "sse_unpckhps"
1112 [(set (match_operand:V4SF 0 "register_operand" "=x")
1115 (match_operand:V4SF 1 "register_operand" "0")
1116 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1117 (parallel [(const_int 2) (const_int 6)
1118 (const_int 3) (const_int 7)])))]
1120 "unpckhps\t{%2, %0|%0, %2}"
1121 [(set_attr "type" "sselog")
1122 (set_attr "mode" "V4SF")])
1124 (define_insn "sse_unpcklps"
1125 [(set (match_operand:V4SF 0 "register_operand" "=x")
1128 (match_operand:V4SF 1 "register_operand" "0")
1129 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1130 (parallel [(const_int 0) (const_int 4)
1131 (const_int 1) (const_int 5)])))]
1133 "unpcklps\t{%2, %0|%0, %2}"
1134 [(set_attr "type" "sselog")
1135 (set_attr "mode" "V4SF")])
1137 ;; These are modeled with the same vec_concat as the others so that we
1138 ;; capture users of shufps that can use the new instructions
1139 (define_insn "sse3_movshdup"
1140 [(set (match_operand:V4SF 0 "register_operand" "=x")
1143 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1145 (parallel [(const_int 1)
1150 "movshdup\t{%1, %0|%0, %1}"
1151 [(set_attr "type" "sse")
1152 (set_attr "mode" "V4SF")])
1154 (define_insn "sse3_movsldup"
1155 [(set (match_operand:V4SF 0 "register_operand" "=x")
1158 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1160 (parallel [(const_int 0)
1165 "movsldup\t{%1, %0|%0, %1}"
1166 [(set_attr "type" "sse")
1167 (set_attr "mode" "V4SF")])
1169 (define_expand "sse_shufps"
1170 [(match_operand:V4SF 0 "register_operand" "")
1171 (match_operand:V4SF 1 "register_operand" "")
1172 (match_operand:V4SF 2 "nonimmediate_operand" "")
1173 (match_operand:SI 3 "const_int_operand" "")]
1176 int mask = INTVAL (operands[3]);
1177 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1178 GEN_INT ((mask >> 0) & 3),
1179 GEN_INT ((mask >> 2) & 3),
1180 GEN_INT (((mask >> 4) & 3) + 4),
1181 GEN_INT (((mask >> 6) & 3) + 4)));
1185 (define_insn "sse_shufps_1"
1186 [(set (match_operand:V4SF 0 "register_operand" "=x")
1189 (match_operand:V4SF 1 "register_operand" "0")
1190 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1191 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1192 (match_operand 4 "const_0_to_3_operand" "")
1193 (match_operand 5 "const_4_to_7_operand" "")
1194 (match_operand 6 "const_4_to_7_operand" "")])))]
1198 mask |= INTVAL (operands[3]) << 0;
1199 mask |= INTVAL (operands[4]) << 2;
1200 mask |= (INTVAL (operands[5]) - 4) << 4;
1201 mask |= (INTVAL (operands[6]) - 4) << 6;
1202 operands[3] = GEN_INT (mask);
1204 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1206 [(set_attr "type" "sselog")
1207 (set_attr "mode" "V4SF")])
1209 (define_insn "sse_storehps"
1210 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1212 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1213 (parallel [(const_int 2) (const_int 3)])))]
1216 movhps\t{%1, %0|%0, %1}
1217 movhlps\t{%1, %0|%0, %1}
1218 movlps\t{%H1, %0|%0, %H1}"
1219 [(set_attr "type" "ssemov")
1220 (set_attr "mode" "V2SF,V4SF,V2SF")])
1222 (define_insn "sse_loadhps"
1223 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1226 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1227 (parallel [(const_int 0) (const_int 1)]))
1228 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1231 movhps\t{%2, %0|%0, %2}
1232 movlhps\t{%2, %0|%0, %2}
1233 movlps\t{%2, %H0|%H0, %2}"
1234 [(set_attr "type" "ssemov")
1235 (set_attr "mode" "V2SF,V4SF,V2SF")])
1237 (define_insn "sse_storelps"
1238 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1240 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1241 (parallel [(const_int 0) (const_int 1)])))]
1244 movlps\t{%1, %0|%0, %1}
1245 movaps\t{%1, %0|%0, %1}
1246 movlps\t{%1, %0|%0, %1}"
1247 [(set_attr "type" "ssemov")
1248 (set_attr "mode" "V2SF,V4SF,V2SF")])
1250 (define_insn "sse_loadlps"
1251 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1253 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1255 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1256 (parallel [(const_int 2) (const_int 3)]))))]
1259 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1260 movlps\t{%2, %0|%0, %2}
1261 movlps\t{%2, %0|%0, %2}"
1262 [(set_attr "type" "sselog,ssemov,ssemov")
1263 (set_attr "mode" "V4SF,V2SF,V2SF")])
1265 (define_insn "sse_movss"
1266 [(set (match_operand:V4SF 0 "register_operand" "=x")
1268 (match_operand:V4SF 2 "register_operand" "x")
1269 (match_operand:V4SF 1 "register_operand" "0")
1272 "movss\t{%2, %0|%0, %2}"
1273 [(set_attr "type" "ssemov")
1274 (set_attr "mode" "SF")])
1276 (define_insn "*vec_dupv4sf"
1277 [(set (match_operand:V4SF 0 "register_operand" "=x")
1279 (match_operand:SF 1 "register_operand" "0")))]
1281 "shufps\t{$0, %0, %0|%0, %0, 0}"
1282 [(set_attr "type" "sselog1")
1283 (set_attr "mode" "V4SF")])
1285 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1286 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1287 ;; alternatives pretty much forces the MMX alternative to be chosen.
1288 (define_insn "*sse_concatv2sf"
1289 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1291 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1292 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1295 unpcklps\t{%2, %0|%0, %2}
1296 movss\t{%1, %0|%0, %1}
1297 punpckldq\t{%2, %0|%0, %2}
1298 movd\t{%1, %0|%0, %1}"
1299 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1300 (set_attr "mode" "V4SF,SF,DI,DI")])
1302 (define_insn "*sse_concatv4sf"
1303 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1305 (match_operand:V2SF 1 "register_operand" " 0,0")
1306 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1309 movlhps\t{%2, %0|%0, %2}
1310 movhps\t{%2, %0|%0, %2}"
1311 [(set_attr "type" "ssemov")
1312 (set_attr "mode" "V4SF,V2SF")])
1314 (define_expand "vec_initv4sf"
1315 [(match_operand:V4SF 0 "register_operand" "")
1316 (match_operand 1 "" "")]
1319 ix86_expand_vector_init (false, operands[0], operands[1]);
1323 (define_insn "*vec_setv4sf_0"
1324 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1327 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1328 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1332 movss\t{%2, %0|%0, %2}
1333 movss\t{%2, %0|%0, %2}
1334 movd\t{%2, %0|%0, %2}
1336 [(set_attr "type" "ssemov")
1337 (set_attr "mode" "SF")])
1340 [(set (match_operand:V4SF 0 "memory_operand" "")
1343 (match_operand:SF 1 "nonmemory_operand" ""))
1346 "TARGET_SSE && reload_completed"
1349 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1353 (define_expand "vec_setv4sf"
1354 [(match_operand:V4SF 0 "register_operand" "")
1355 (match_operand:SF 1 "register_operand" "")
1356 (match_operand 2 "const_int_operand" "")]
1359 ix86_expand_vector_set (false, operands[0], operands[1],
1360 INTVAL (operands[2]));
1364 (define_insn_and_split "*vec_extractv4sf_0"
1365 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1367 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1368 (parallel [(const_int 0)])))]
1369 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1371 "&& reload_completed"
1374 rtx op1 = operands[1];
1376 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1378 op1 = gen_lowpart (SFmode, op1);
1379 emit_move_insn (operands[0], op1);
1383 (define_expand "vec_extractv4sf"
1384 [(match_operand:SF 0 "register_operand" "")
1385 (match_operand:V4SF 1 "register_operand" "")
1386 (match_operand 2 "const_int_operand" "")]
1389 ix86_expand_vector_extract (false, operands[0], operands[1],
1390 INTVAL (operands[2]));
1394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1396 ;; Parallel double-precision floating point arithmetic
1398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1400 (define_expand "negv2df2"
1401 [(set (match_operand:V2DF 0 "register_operand" "")
1402 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1404 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1406 (define_expand "absv2df2"
1407 [(set (match_operand:V2DF 0 "register_operand" "")
1408 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1410 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1412 (define_expand "addv2df3"
1413 [(set (match_operand:V2DF 0 "register_operand" "")
1414 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1415 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1417 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1419 (define_insn "*addv2df3"
1420 [(set (match_operand:V2DF 0 "register_operand" "=x")
1421 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1422 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1423 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1424 "addpd\t{%2, %0|%0, %2}"
1425 [(set_attr "type" "sseadd")
1426 (set_attr "mode" "V2DF")])
1428 (define_insn "sse2_vmaddv2df3"
1429 [(set (match_operand:V2DF 0 "register_operand" "=x")
1431 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1432 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1435 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1436 "addsd\t{%2, %0|%0, %2}"
1437 [(set_attr "type" "sseadd")
1438 (set_attr "mode" "DF")])
1440 (define_expand "subv2df3"
1441 [(set (match_operand:V2DF 0 "register_operand" "")
1442 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1443 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1445 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1447 (define_insn "*subv2df3"
1448 [(set (match_operand:V2DF 0 "register_operand" "=x")
1449 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1450 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1452 "subpd\t{%2, %0|%0, %2}"
1453 [(set_attr "type" "sseadd")
1454 (set_attr "mode" "V2DF")])
1456 (define_insn "sse2_vmsubv2df3"
1457 [(set (match_operand:V2DF 0 "register_operand" "=x")
1459 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1460 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1464 "subsd\t{%2, %0|%0, %2}"
1465 [(set_attr "type" "sseadd")
1466 (set_attr "mode" "DF")])
1468 (define_expand "mulv2df3"
1469 [(set (match_operand:V2DF 0 "register_operand" "")
1470 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1471 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1473 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1475 (define_insn "*mulv2df3"
1476 [(set (match_operand:V2DF 0 "register_operand" "=x")
1477 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1478 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1479 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1480 "mulpd\t{%2, %0|%0, %2}"
1481 [(set_attr "type" "ssemul")
1482 (set_attr "mode" "V2DF")])
1484 (define_insn "sse2_vmmulv2df3"
1485 [(set (match_operand:V2DF 0 "register_operand" "=x")
1487 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1488 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1491 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1492 "mulsd\t{%2, %0|%0, %2}"
1493 [(set_attr "type" "ssemul")
1494 (set_attr "mode" "DF")])
1496 (define_expand "divv2df3"
1497 [(set (match_operand:V2DF 0 "register_operand" "")
1498 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1499 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1501 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1503 (define_insn "*divv2df3"
1504 [(set (match_operand:V2DF 0 "register_operand" "=x")
1505 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1506 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1508 "divpd\t{%2, %0|%0, %2}"
1509 [(set_attr "type" "ssediv")
1510 (set_attr "mode" "V2DF")])
1512 (define_insn "sse2_vmdivv2df3"
1513 [(set (match_operand:V2DF 0 "register_operand" "=x")
1515 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1516 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1520 "divsd\t{%2, %0|%0, %2}"
1521 [(set_attr "type" "ssediv")
1522 (set_attr "mode" "DF")])
1524 (define_insn "sqrtv2df2"
1525 [(set (match_operand:V2DF 0 "register_operand" "=x")
1526 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1528 "sqrtpd\t{%1, %0|%0, %1}"
1529 [(set_attr "type" "sse")
1530 (set_attr "mode" "V2DF")])
1532 (define_insn "sse2_vmsqrtv2df2"
1533 [(set (match_operand:V2DF 0 "register_operand" "=x")
1535 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1536 (match_operand:V2DF 2 "register_operand" "0")
1539 "sqrtsd\t{%1, %0|%0, %1}"
1540 [(set_attr "type" "sse")
1541 (set_attr "mode" "DF")])
1543 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1544 ;; isn't really correct, as those rtl operators aren't defined when
1545 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1547 (define_expand "smaxv2df3"
1548 [(set (match_operand:V2DF 0 "register_operand" "")
1549 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1550 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1553 if (!flag_finite_math_only)
1554 operands[1] = force_reg (V2DFmode, operands[1]);
1555 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1558 (define_insn "*smaxv2df3_finite"
1559 [(set (match_operand:V2DF 0 "register_operand" "=x")
1560 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1561 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1562 "TARGET_SSE2 && flag_finite_math_only
1563 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1564 "maxpd\t{%2, %0|%0, %2}"
1565 [(set_attr "type" "sseadd")
1566 (set_attr "mode" "V2DF")])
1568 (define_insn "*smaxv2df3"
1569 [(set (match_operand:V2DF 0 "register_operand" "=x")
1570 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1571 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1573 "maxpd\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "sseadd")
1575 (set_attr "mode" "V2DF")])
1577 (define_insn "*sse2_vmsmaxv2df3_finite"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x")
1580 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1581 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1584 "TARGET_SSE2 && flag_finite_math_only
1585 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1586 "maxsd\t{%2, %0|%0, %2}"
1587 [(set_attr "type" "sseadd")
1588 (set_attr "mode" "DF")])
1590 (define_insn "sse2_vmsmaxv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1598 "maxsd\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sseadd")
1600 (set_attr "mode" "DF")])
1602 (define_expand "sminv2df3"
1603 [(set (match_operand:V2DF 0 "register_operand" "")
1604 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1605 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1608 if (!flag_finite_math_only)
1609 operands[1] = force_reg (V2DFmode, operands[1]);
1610 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1613 (define_insn "*sminv2df3_finite"
1614 [(set (match_operand:V2DF 0 "register_operand" "=x")
1615 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1616 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1617 "TARGET_SSE2 && flag_finite_math_only
1618 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1619 "minpd\t{%2, %0|%0, %2}"
1620 [(set_attr "type" "sseadd")
1621 (set_attr "mode" "V2DF")])
1623 (define_insn "*sminv2df3"
1624 [(set (match_operand:V2DF 0 "register_operand" "=x")
1625 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1626 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1628 "minpd\t{%2, %0|%0, %2}"
1629 [(set_attr "type" "sseadd")
1630 (set_attr "mode" "V2DF")])
1632 (define_insn "*sse2_vmsminv2df3_finite"
1633 [(set (match_operand:V2DF 0 "register_operand" "=x")
1635 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1636 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1639 "TARGET_SSE2 && flag_finite_math_only
1640 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1641 "minsd\t{%2, %0|%0, %2}"
1642 [(set_attr "type" "sseadd")
1643 (set_attr "mode" "DF")])
1645 (define_insn "sse2_vmsminv2df3"
1646 [(set (match_operand:V2DF 0 "register_operand" "=x")
1648 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1649 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1653 "minsd\t{%2, %0|%0, %2}"
1654 [(set_attr "type" "sseadd")
1655 (set_attr "mode" "DF")])
1657 (define_insn "sse3_addsubv2df3"
1658 [(set (match_operand:V2DF 0 "register_operand" "=x")
1661 (match_operand:V2DF 1 "register_operand" "0")
1662 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1663 (minus:V2DF (match_dup 1) (match_dup 2))
1666 "addsubpd\t{%2, %0|%0, %2}"
1667 [(set_attr "type" "sseadd")
1668 (set_attr "mode" "V2DF")])
1670 (define_insn "sse3_haddv2df3"
1671 [(set (match_operand:V2DF 0 "register_operand" "=x")
1675 (match_operand:V2DF 1 "register_operand" "0")
1676 (parallel [(const_int 0)]))
1677 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1680 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1681 (parallel [(const_int 0)]))
1682 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1684 "haddpd\t{%2, %0|%0, %2}"
1685 [(set_attr "type" "sseadd")
1686 (set_attr "mode" "V2DF")])
1688 (define_insn "sse3_hsubv2df3"
1689 [(set (match_operand:V2DF 0 "register_operand" "=x")
1693 (match_operand:V2DF 1 "register_operand" "0")
1694 (parallel [(const_int 0)]))
1695 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1698 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1699 (parallel [(const_int 0)]))
1700 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1702 "hsubpd\t{%2, %0|%0, %2}"
1703 [(set_attr "type" "sseadd")
1704 (set_attr "mode" "V2DF")])
1706 (define_expand "reduc_splus_v2df"
1707 [(match_operand:V2DF 0 "register_operand" "")
1708 (match_operand:V2DF 1 "register_operand" "")]
1711 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717 ;; Parallel double-precision floating point comparisons
1719 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1721 (define_insn "sse2_maskcmpv2df3"
1722 [(set (match_operand:V2DF 0 "register_operand" "=x")
1723 (match_operator:V2DF 3 "sse_comparison_operator"
1724 [(match_operand:V2DF 1 "register_operand" "0")
1725 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1727 "cmp%D3pd\t{%2, %0|%0, %2}"
1728 [(set_attr "type" "ssecmp")
1729 (set_attr "mode" "V2DF")])
1731 (define_insn "sse2_maskcmpdf3"
1732 [(set (match_operand:DF 0 "register_operand" "=x")
1733 (match_operator:DF 3 "sse_comparison_operator"
1734 [(match_operand:DF 1 "register_operand" "0")
1735 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1737 "cmp%D3sd\t{%2, %0|%0, %2}"
1738 [(set_attr "type" "ssecmp")
1739 (set_attr "mode" "DF")])
1741 (define_insn "sse2_vmmaskcmpv2df3"
1742 [(set (match_operand:V2DF 0 "register_operand" "=x")
1744 (match_operator:V2DF 3 "sse_comparison_operator"
1745 [(match_operand:V2DF 1 "register_operand" "0")
1746 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1750 "cmp%D3sd\t{%2, %0|%0, %2}"
1751 [(set_attr "type" "ssecmp")
1752 (set_attr "mode" "DF")])
1754 (define_insn "sse2_comi"
1755 [(set (reg:CCFP FLAGS_REG)
1758 (match_operand:V2DF 0 "register_operand" "x")
1759 (parallel [(const_int 0)]))
1761 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1762 (parallel [(const_int 0)]))))]
1764 "comisd\t{%1, %0|%0, %1}"
1765 [(set_attr "type" "ssecomi")
1766 (set_attr "mode" "DF")])
1768 (define_insn "sse2_ucomi"
1769 [(set (reg:CCFPU FLAGS_REG)
1772 (match_operand:V2DF 0 "register_operand" "x")
1773 (parallel [(const_int 0)]))
1775 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1776 (parallel [(const_int 0)]))))]
1778 "ucomisd\t{%1, %0|%0, %1}"
1779 [(set_attr "type" "ssecomi")
1780 (set_attr "mode" "DF")])
1782 (define_expand "vcondv2df"
1783 [(set (match_operand:V2DF 0 "register_operand" "")
1785 (match_operator 3 ""
1786 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1787 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1788 (match_operand:V2DF 1 "general_operand" "")
1789 (match_operand:V2DF 2 "general_operand" "")))]
1792 if (ix86_expand_fp_vcond (operands))
1798 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1800 ;; Parallel double-precision floating point logical operations
1802 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1804 (define_expand "andv2df3"
1805 [(set (match_operand:V2DF 0 "register_operand" "")
1806 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1807 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1809 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1811 (define_insn "*andv2df3"
1812 [(set (match_operand:V2DF 0 "register_operand" "=x")
1813 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1814 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1815 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1816 "andpd\t{%2, %0|%0, %2}"
1817 [(set_attr "type" "sselog")
1818 (set_attr "mode" "V2DF")])
1820 (define_insn "sse2_nandv2df3"
1821 [(set (match_operand:V2DF 0 "register_operand" "=x")
1822 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1823 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1825 "andnpd\t{%2, %0|%0, %2}"
1826 [(set_attr "type" "sselog")
1827 (set_attr "mode" "V2DF")])
1829 (define_expand "iorv2df3"
1830 [(set (match_operand:V2DF 0 "register_operand" "")
1831 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1832 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1834 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1836 (define_insn "*iorv2df3"
1837 [(set (match_operand:V2DF 0 "register_operand" "=x")
1838 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1839 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1840 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1841 "orpd\t{%2, %0|%0, %2}"
1842 [(set_attr "type" "sselog")
1843 (set_attr "mode" "V2DF")])
1845 (define_expand "xorv2df3"
1846 [(set (match_operand:V2DF 0 "register_operand" "")
1847 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1848 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1850 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1852 (define_insn "*xorv2df3"
1853 [(set (match_operand:V2DF 0 "register_operand" "=x")
1854 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1855 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1856 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1857 "xorpd\t{%2, %0|%0, %2}"
1858 [(set_attr "type" "sselog")
1859 (set_attr "mode" "V2DF")])
1861 ;; Also define scalar versions. These are used for abs, neg, and
1862 ;; conditional move. Using subregs into vector modes causes register
1863 ;; allocation lossage. These patterns do not allow memory operands
1864 ;; because the native instructions read the full 128-bits.
1866 (define_insn "*anddf3"
1867 [(set (match_operand:DF 0 "register_operand" "=x")
1868 (and:DF (match_operand:DF 1 "register_operand" "0")
1869 (match_operand:DF 2 "register_operand" "x")))]
1871 "andpd\t{%2, %0|%0, %2}"
1872 [(set_attr "type" "sselog")
1873 (set_attr "mode" "V2DF")])
1875 (define_insn "*nanddf3"
1876 [(set (match_operand:DF 0 "register_operand" "=x")
1877 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1878 (match_operand:DF 2 "register_operand" "x")))]
1880 "andnpd\t{%2, %0|%0, %2}"
1881 [(set_attr "type" "sselog")
1882 (set_attr "mode" "V2DF")])
1884 (define_insn "*iordf3"
1885 [(set (match_operand:DF 0 "register_operand" "=x")
1886 (ior:DF (match_operand:DF 1 "register_operand" "0")
1887 (match_operand:DF 2 "register_operand" "x")))]
1889 "orpd\t{%2, %0|%0, %2}"
1890 [(set_attr "type" "sselog")
1891 (set_attr "mode" "V2DF")])
1893 (define_insn "*xordf3"
1894 [(set (match_operand:DF 0 "register_operand" "=x")
1895 (xor:DF (match_operand:DF 1 "register_operand" "0")
1896 (match_operand:DF 2 "register_operand" "x")))]
1898 "xorpd\t{%2, %0|%0, %2}"
1899 [(set_attr "type" "sselog")
1900 (set_attr "mode" "V2DF")])
1902 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1904 ;; Parallel double-precision floating point conversion operations
1906 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1908 (define_insn "sse2_cvtpi2pd"
1909 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1910 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1912 "cvtpi2pd\t{%1, %0|%0, %1}"
1913 [(set_attr "type" "ssecvt")
1914 (set_attr "unit" "mmx,*")
1915 (set_attr "mode" "V2DF")])
1917 (define_insn "sse2_cvtpd2pi"
1918 [(set (match_operand:V2SI 0 "register_operand" "=y")
1919 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1920 UNSPEC_FIX_NOTRUNC))]
1922 "cvtpd2pi\t{%1, %0|%0, %1}"
1923 [(set_attr "type" "ssecvt")
1924 (set_attr "unit" "mmx")
1925 (set_attr "mode" "DI")])
1927 (define_insn "sse2_cvttpd2pi"
1928 [(set (match_operand:V2SI 0 "register_operand" "=y")
1929 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1931 "cvttpd2pi\t{%1, %0|%0, %1}"
1932 [(set_attr "type" "ssecvt")
1933 (set_attr "unit" "mmx")
1934 (set_attr "mode" "TI")])
1936 (define_insn "sse2_cvtsi2sd"
1937 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1940 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1941 (match_operand:V2DF 1 "register_operand" "0,0")
1944 "cvtsi2sd\t{%2, %0|%0, %2}"
1945 [(set_attr "type" "sseicvt")
1946 (set_attr "mode" "DF")
1947 (set_attr "athlon_decode" "double,direct")])
1949 (define_insn "sse2_cvtsi2sdq"
1950 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1953 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1954 (match_operand:V2DF 1 "register_operand" "0,0")
1956 "TARGET_SSE2 && TARGET_64BIT"
1957 "cvtsi2sdq\t{%2, %0|%0, %2}"
1958 [(set_attr "type" "sseicvt")
1959 (set_attr "mode" "DF")
1960 (set_attr "athlon_decode" "double,direct")])
1962 (define_insn "sse2_cvtsd2si"
1963 [(set (match_operand:SI 0 "register_operand" "=r,r")
1966 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1967 (parallel [(const_int 0)]))]
1968 UNSPEC_FIX_NOTRUNC))]
1970 "cvtsd2si\t{%1, %0|%0, %1}"
1971 [(set_attr "type" "sseicvt")
1972 (set_attr "athlon_decode" "double,vector")
1973 (set_attr "mode" "SI")])
1975 (define_insn "sse2_cvtsd2si_2"
1976 [(set (match_operand:SI 0 "register_operand" "=r,r")
1977 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1978 UNSPEC_FIX_NOTRUNC))]
1980 "cvtsd2si\t{%1, %0|%0, %1}"
1981 [(set_attr "type" "sseicvt")
1982 (set_attr "athlon_decode" "double,vector")
1983 (set_attr "mode" "SI")])
1985 (define_insn "sse2_cvtsd2siq"
1986 [(set (match_operand:DI 0 "register_operand" "=r,r")
1989 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1990 (parallel [(const_int 0)]))]
1991 UNSPEC_FIX_NOTRUNC))]
1992 "TARGET_SSE2 && TARGET_64BIT"
1993 "cvtsd2siq\t{%1, %0|%0, %1}"
1994 [(set_attr "type" "sseicvt")
1995 (set_attr "athlon_decode" "double,vector")
1996 (set_attr "mode" "DI")])
1998 (define_insn "sse2_cvtsd2siq_2"
1999 [(set (match_operand:DI 0 "register_operand" "=r,r")
2000 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2001 UNSPEC_FIX_NOTRUNC))]
2002 "TARGET_SSE2 && TARGET_64BIT"
2003 "cvtsd2siq\t{%1, %0|%0, %1}"
2004 [(set_attr "type" "sseicvt")
2005 (set_attr "athlon_decode" "double,vector")
2006 (set_attr "mode" "DI")])
2008 (define_insn "sse2_cvttsd2si"
2009 [(set (match_operand:SI 0 "register_operand" "=r,r")
2012 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2013 (parallel [(const_int 0)]))))]
2015 "cvttsd2si\t{%1, %0|%0, %1}"
2016 [(set_attr "type" "sseicvt")
2017 (set_attr "mode" "SI")
2018 (set_attr "athlon_decode" "double,vector")])
2020 (define_insn "sse2_cvttsd2siq"
2021 [(set (match_operand:DI 0 "register_operand" "=r,r")
2024 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2025 (parallel [(const_int 0)]))))]
2026 "TARGET_SSE2 && TARGET_64BIT"
2027 "cvttsd2siq\t{%1, %0|%0, %1}"
2028 [(set_attr "type" "sseicvt")
2029 (set_attr "mode" "DI")
2030 (set_attr "athlon_decode" "double,vector")])
2032 (define_insn "sse2_cvtdq2pd"
2033 [(set (match_operand:V2DF 0 "register_operand" "=x")
2036 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2037 (parallel [(const_int 0) (const_int 1)]))))]
2039 "cvtdq2pd\t{%1, %0|%0, %1}"
2040 [(set_attr "type" "ssecvt")
2041 (set_attr "mode" "V2DF")])
2043 (define_expand "sse2_cvtpd2dq"
2044 [(set (match_operand:V4SI 0 "register_operand" "")
2046 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2050 "operands[2] = CONST0_RTX (V2SImode);")
2052 (define_insn "*sse2_cvtpd2dq"
2053 [(set (match_operand:V4SI 0 "register_operand" "=x")
2055 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2057 (match_operand:V2SI 2 "const0_operand" "")))]
2059 "cvtpd2dq\t{%1, %0|%0, %1}"
2060 [(set_attr "type" "ssecvt")
2061 (set_attr "mode" "TI")])
2063 (define_expand "sse2_cvttpd2dq"
2064 [(set (match_operand:V4SI 0 "register_operand" "")
2066 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2069 "operands[2] = CONST0_RTX (V2SImode);")
2071 (define_insn "*sse2_cvttpd2dq"
2072 [(set (match_operand:V4SI 0 "register_operand" "=x")
2074 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2075 (match_operand:V2SI 2 "const0_operand" "")))]
2077 "cvttpd2dq\t{%1, %0|%0, %1}"
2078 [(set_attr "type" "ssecvt")
2079 (set_attr "mode" "TI")])
2081 (define_insn "sse2_cvtsd2ss"
2082 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2085 (float_truncate:V2SF
2086 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2087 (match_operand:V4SF 1 "register_operand" "0,0")
2090 "cvtsd2ss\t{%2, %0|%0, %2}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "athlon_decode" "vector,double")
2093 (set_attr "mode" "SF")])
2095 (define_insn "sse2_cvtss2sd"
2096 [(set (match_operand:V2DF 0 "register_operand" "=x")
2100 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2101 (parallel [(const_int 0) (const_int 1)])))
2102 (match_operand:V2DF 1 "register_operand" "0")
2105 "cvtss2sd\t{%2, %0|%0, %2}"
2106 [(set_attr "type" "ssecvt")
2107 (set_attr "mode" "DF")])
2109 (define_expand "sse2_cvtpd2ps"
2110 [(set (match_operand:V4SF 0 "register_operand" "")
2112 (float_truncate:V2SF
2113 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2116 "operands[2] = CONST0_RTX (V2SFmode);")
2118 (define_insn "*sse2_cvtpd2ps"
2119 [(set (match_operand:V4SF 0 "register_operand" "=x")
2121 (float_truncate:V2SF
2122 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2123 (match_operand:V2SF 2 "const0_operand" "")))]
2125 "cvtpd2ps\t{%1, %0|%0, %1}"
2126 [(set_attr "type" "ssecvt")
2127 (set_attr "mode" "V4SF")])
2129 (define_insn "sse2_cvtps2pd"
2130 [(set (match_operand:V2DF 0 "register_operand" "=x")
2133 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2134 (parallel [(const_int 0) (const_int 1)]))))]
2136 "cvtps2pd\t{%1, %0|%0, %1}"
2137 [(set_attr "type" "ssecvt")
2138 (set_attr "mode" "V2DF")])
2140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2142 ;; Parallel double-precision floating point element swizzling
2144 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2146 (define_insn "sse2_unpckhpd"
2147 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2150 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2151 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2152 (parallel [(const_int 1)
2154 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2156 unpckhpd\t{%2, %0|%0, %2}
2157 movlpd\t{%H1, %0|%0, %H1}
2158 movhpd\t{%1, %0|%0, %1}"
2159 [(set_attr "type" "sselog,ssemov,ssemov")
2160 (set_attr "mode" "V2DF,V1DF,V1DF")])
2162 (define_insn "*sse3_movddup"
2163 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2166 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2168 (parallel [(const_int 0)
2170 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2172 movddup\t{%1, %0|%0, %1}
2174 [(set_attr "type" "sselog1,ssemov")
2175 (set_attr "mode" "V2DF")])
2178 [(set (match_operand:V2DF 0 "memory_operand" "")
2181 (match_operand:V2DF 1 "register_operand" "")
2183 (parallel [(const_int 0)
2185 "TARGET_SSE3 && reload_completed"
2188 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2189 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2190 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2194 (define_insn "sse2_unpcklpd"
2195 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2198 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2199 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2200 (parallel [(const_int 0)
2202 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2204 unpcklpd\t{%2, %0|%0, %2}
2205 movhpd\t{%2, %0|%0, %2}
2206 movlpd\t{%2, %H0|%H0, %2}"
2207 [(set_attr "type" "sselog,ssemov,ssemov")
2208 (set_attr "mode" "V2DF,V1DF,V1DF")])
2210 (define_expand "sse2_shufpd"
2211 [(match_operand:V2DF 0 "register_operand" "")
2212 (match_operand:V2DF 1 "register_operand" "")
2213 (match_operand:V2DF 2 "nonimmediate_operand" "")
2214 (match_operand:SI 3 "const_int_operand" "")]
2217 int mask = INTVAL (operands[3]);
2218 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2220 GEN_INT (mask & 2 ? 3 : 2)));
2224 (define_insn "sse2_shufpd_1"
2225 [(set (match_operand:V2DF 0 "register_operand" "=x")
2228 (match_operand:V2DF 1 "register_operand" "0")
2229 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2230 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2231 (match_operand 4 "const_2_to_3_operand" "")])))]
2235 mask = INTVAL (operands[3]);
2236 mask |= (INTVAL (operands[4]) - 2) << 1;
2237 operands[3] = GEN_INT (mask);
2239 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2241 [(set_attr "type" "sselog")
2242 (set_attr "mode" "V2DF")])
2244 (define_insn "sse2_storehpd"
2245 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2247 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2248 (parallel [(const_int 1)])))]
2249 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2251 movhpd\t{%1, %0|%0, %1}
2254 [(set_attr "type" "ssemov,sselog1,ssemov")
2255 (set_attr "mode" "V1DF,V2DF,DF")])
2258 [(set (match_operand:DF 0 "register_operand" "")
2260 (match_operand:V2DF 1 "memory_operand" "")
2261 (parallel [(const_int 1)])))]
2262 "TARGET_SSE2 && reload_completed"
2263 [(set (match_dup 0) (match_dup 1))]
2265 operands[1] = adjust_address (operands[1], DFmode, 8);
2268 (define_insn "sse2_storelpd"
2269 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2271 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2272 (parallel [(const_int 0)])))]
2273 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2275 movlpd\t{%1, %0|%0, %1}
2278 [(set_attr "type" "ssemov")
2279 (set_attr "mode" "V1DF,DF,DF")])
2282 [(set (match_operand:DF 0 "register_operand" "")
2284 (match_operand:V2DF 1 "nonimmediate_operand" "")
2285 (parallel [(const_int 0)])))]
2286 "TARGET_SSE2 && reload_completed"
2289 rtx op1 = operands[1];
2291 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2293 op1 = gen_lowpart (DFmode, op1);
2294 emit_move_insn (operands[0], op1);
2298 (define_insn "sse2_loadhpd"
2299 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2302 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2303 (parallel [(const_int 0)]))
2304 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2305 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2307 movhpd\t{%2, %0|%0, %2}
2308 unpcklpd\t{%2, %0|%0, %2}
2309 shufpd\t{$1, %1, %0|%0, %1, 1}
2311 [(set_attr "type" "ssemov,sselog,sselog,other")
2312 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2315 [(set (match_operand:V2DF 0 "memory_operand" "")
2317 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2318 (match_operand:DF 1 "register_operand" "")))]
2319 "TARGET_SSE2 && reload_completed"
2320 [(set (match_dup 0) (match_dup 1))]
2322 operands[0] = adjust_address (operands[0], DFmode, 8);
2325 (define_insn "sse2_loadlpd"
2326 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2328 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2330 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2331 (parallel [(const_int 1)]))))]
2332 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2334 movsd\t{%2, %0|%0, %2}
2335 movlpd\t{%2, %0|%0, %2}
2336 movsd\t{%2, %0|%0, %2}
2337 shufpd\t{$2, %2, %0|%0, %2, 2}
2338 movhpd\t{%H1, %0|%0, %H1}
2340 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2341 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2344 [(set (match_operand:V2DF 0 "memory_operand" "")
2346 (match_operand:DF 1 "register_operand" "")
2347 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2348 "TARGET_SSE2 && reload_completed"
2349 [(set (match_dup 0) (match_dup 1))]
2351 operands[0] = adjust_address (operands[0], DFmode, 8);
2354 ;; Not sure these two are ever used, but it doesn't hurt to have
2356 (define_insn "*vec_extractv2df_1_sse"
2357 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2359 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2360 (parallel [(const_int 1)])))]
2361 "!TARGET_SSE2 && TARGET_SSE
2362 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2364 movhps\t{%1, %0|%0, %1}
2365 movhlps\t{%1, %0|%0, %1}
2366 movlps\t{%H1, %0|%0, %H1}"
2367 [(set_attr "type" "ssemov")
2368 (set_attr "mode" "V2SF,V4SF,V2SF")])
2370 (define_insn "*vec_extractv2df_0_sse"
2371 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2373 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2374 (parallel [(const_int 0)])))]
2375 "!TARGET_SSE2 && TARGET_SSE
2376 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2378 movlps\t{%1, %0|%0, %1}
2379 movaps\t{%1, %0|%0, %1}
2380 movlps\t{%1, %0|%0, %1}"
2381 [(set_attr "type" "ssemov")
2382 (set_attr "mode" "V2SF,V4SF,V2SF")])
2384 (define_insn "sse2_movsd"
2385 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2387 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2388 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2392 movsd\t{%2, %0|%0, %2}
2393 movlpd\t{%2, %0|%0, %2}
2394 movlpd\t{%2, %0|%0, %2}
2395 shufpd\t{$2, %2, %0|%0, %2, 2}
2396 movhps\t{%H1, %0|%0, %H1}
2397 movhps\t{%1, %H0|%H0, %1}"
2398 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2399 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2401 (define_insn "*vec_dupv2df_sse3"
2402 [(set (match_operand:V2DF 0 "register_operand" "=x")
2404 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2406 "movddup\t{%1, %0|%0, %1}"
2407 [(set_attr "type" "sselog1")
2408 (set_attr "mode" "DF")])
2410 (define_insn "*vec_dupv2df"
2411 [(set (match_operand:V2DF 0 "register_operand" "=x")
2413 (match_operand:DF 1 "register_operand" "0")))]
2416 [(set_attr "type" "sselog1")
2417 (set_attr "mode" "V4SF")])
2419 (define_insn "*vec_concatv2df_sse3"
2420 [(set (match_operand:V2DF 0 "register_operand" "=x")
2422 (match_operand:DF 1 "nonimmediate_operand" "xm")
2425 "movddup\t{%1, %0|%0, %1}"
2426 [(set_attr "type" "sselog1")
2427 (set_attr "mode" "DF")])
2429 (define_insn "*vec_concatv2df"
2430 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2432 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2433 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2436 unpcklpd\t{%2, %0|%0, %2}
2437 movhpd\t{%2, %0|%0, %2}
2438 movsd\t{%1, %0|%0, %1}
2439 movlhps\t{%2, %0|%0, %2}
2440 movhps\t{%2, %0|%0, %2}"
2441 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2442 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2444 (define_expand "vec_setv2df"
2445 [(match_operand:V2DF 0 "register_operand" "")
2446 (match_operand:DF 1 "register_operand" "")
2447 (match_operand 2 "const_int_operand" "")]
2450 ix86_expand_vector_set (false, operands[0], operands[1],
2451 INTVAL (operands[2]));
2455 (define_expand "vec_extractv2df"
2456 [(match_operand:DF 0 "register_operand" "")
2457 (match_operand:V2DF 1 "register_operand" "")
2458 (match_operand 2 "const_int_operand" "")]
2461 ix86_expand_vector_extract (false, operands[0], operands[1],
2462 INTVAL (operands[2]));
2466 (define_expand "vec_initv2df"
2467 [(match_operand:V2DF 0 "register_operand" "")
2468 (match_operand 1 "" "")]
2471 ix86_expand_vector_init (false, operands[0], operands[1]);
2475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2477 ;; Parallel integral arithmetic
2479 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2481 (define_expand "neg<mode>2"
2482 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2485 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2487 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2489 (define_expand "add<mode>3"
2490 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2491 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2492 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2494 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2496 (define_insn "*add<mode>3"
2497 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2499 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2500 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2501 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2502 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseiadd")
2504 (set_attr "mode" "TI")])
2506 (define_insn "sse2_ssadd<mode>3"
2507 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2509 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2510 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2511 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2512 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2513 [(set_attr "type" "sseiadd")
2514 (set_attr "mode" "TI")])
2516 (define_insn "sse2_usadd<mode>3"
2517 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2519 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2520 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2521 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2522 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2523 [(set_attr "type" "sseiadd")
2524 (set_attr "mode" "TI")])
2526 (define_expand "sub<mode>3"
2527 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2528 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2529 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2531 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2533 (define_insn "*sub<mode>3"
2534 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2536 (match_operand:SSEMODEI 1 "register_operand" "0")
2537 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2539 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2540 [(set_attr "type" "sseiadd")
2541 (set_attr "mode" "TI")])
2543 (define_insn "sse2_sssub<mode>3"
2544 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2546 (match_operand:SSEMODE12 1 "register_operand" "0")
2547 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2549 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2550 [(set_attr "type" "sseiadd")
2551 (set_attr "mode" "TI")])
2553 (define_insn "sse2_ussub<mode>3"
2554 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2556 (match_operand:SSEMODE12 1 "register_operand" "0")
2557 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2559 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2560 [(set_attr "type" "sseiadd")
2561 (set_attr "mode" "TI")])
2563 (define_expand "mulv16qi3"
2564 [(set (match_operand:V16QI 0 "register_operand" "")
2565 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2566 (match_operand:V16QI 2 "register_operand" "")))]
2572 for (i = 0; i < 12; ++i)
2573 t[i] = gen_reg_rtx (V16QImode);
2575 /* Unpack data such that we've got a source byte in each low byte of
2576 each word. We don't care what goes into the high byte of each word.
2577 Rather than trying to get zero in there, most convenient is to let
2578 it be a copy of the low byte. */
2579 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2580 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2581 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2582 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2584 /* Multiply words. The end-of-line annotations here give a picture of what
2585 the output of that instruction looks like. Dot means don't care; the
2586 letters are the bytes of the result with A being the most significant. */
2587 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2588 gen_lowpart (V8HImode, t[0]),
2589 gen_lowpart (V8HImode, t[1])));
2590 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2591 gen_lowpart (V8HImode, t[2]),
2592 gen_lowpart (V8HImode, t[3])));
2594 /* Extract the relevant bytes and merge them back together. */
2595 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2596 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2597 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2598 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2599 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2600 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2603 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2607 (define_expand "mulv8hi3"
2608 [(set (match_operand:V8HI 0 "register_operand" "")
2609 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2610 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2612 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2614 (define_insn "*mulv8hi3"
2615 [(set (match_operand:V8HI 0 "register_operand" "=x")
2616 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2617 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2618 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2619 "pmullw\t{%2, %0|%0, %2}"
2620 [(set_attr "type" "sseimul")
2621 (set_attr "mode" "TI")])
2623 (define_expand "smulv8hi3_highpart"
2624 [(set (match_operand:V8HI 0 "register_operand" "")
2629 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2631 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2634 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2636 (define_insn "*smulv8hi3_highpart"
2637 [(set (match_operand:V8HI 0 "register_operand" "=x")
2642 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2644 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2646 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2647 "pmulhw\t{%2, %0|%0, %2}"
2648 [(set_attr "type" "sseimul")
2649 (set_attr "mode" "TI")])
2651 (define_expand "umulv8hi3_highpart"
2652 [(set (match_operand:V8HI 0 "register_operand" "")
2657 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2659 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2662 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2664 (define_insn "*umulv8hi3_highpart"
2665 [(set (match_operand:V8HI 0 "register_operand" "=x")
2670 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2672 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2674 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2675 "pmulhuw\t{%2, %0|%0, %2}"
2676 [(set_attr "type" "sseimul")
2677 (set_attr "mode" "TI")])
2679 (define_insn "sse2_umulv2siv2di3"
2680 [(set (match_operand:V2DI 0 "register_operand" "=x")
2684 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2685 (parallel [(const_int 0) (const_int 2)])))
2688 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2689 (parallel [(const_int 0) (const_int 2)])))))]
2690 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2691 "pmuludq\t{%2, %0|%0, %2}"
2692 [(set_attr "type" "sseimul")
2693 (set_attr "mode" "TI")])
2695 (define_insn "sse2_pmaddwd"
2696 [(set (match_operand:V4SI 0 "register_operand" "=x")
2701 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2702 (parallel [(const_int 0)
2708 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2709 (parallel [(const_int 0)
2715 (vec_select:V4HI (match_dup 1)
2716 (parallel [(const_int 1)
2721 (vec_select:V4HI (match_dup 2)
2722 (parallel [(const_int 1)
2725 (const_int 7)]))))))]
2727 "pmaddwd\t{%2, %0|%0, %2}"
2728 [(set_attr "type" "sseiadd")
2729 (set_attr "mode" "TI")])
2731 (define_expand "mulv4si3"
2732 [(set (match_operand:V4SI 0 "register_operand" "")
2733 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2734 (match_operand:V4SI 2 "register_operand" "")))]
2737 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2743 t1 = gen_reg_rtx (V4SImode);
2744 t2 = gen_reg_rtx (V4SImode);
2745 t3 = gen_reg_rtx (V4SImode);
2746 t4 = gen_reg_rtx (V4SImode);
2747 t5 = gen_reg_rtx (V4SImode);
2748 t6 = gen_reg_rtx (V4SImode);
2749 thirtytwo = GEN_INT (32);
2751 /* Multiply elements 2 and 0. */
2752 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2754 /* Shift both input vectors down one element, so that elements 3 and 1
2755 are now in the slots for elements 2 and 0. For K8, at least, this is
2756 faster than using a shuffle. */
2757 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2758 gen_lowpart (TImode, op1), thirtytwo));
2759 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2760 gen_lowpart (TImode, op2), thirtytwo));
2762 /* Multiply elements 3 and 1. */
2763 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2765 /* Move the results in element 2 down to element 1; we don't care what
2766 goes in elements 2 and 3. */
2767 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2768 const0_rtx, const0_rtx));
2769 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2770 const0_rtx, const0_rtx));
2772 /* Merge the parts back together. */
2773 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2777 (define_expand "mulv2di3"
2778 [(set (match_operand:V2DI 0 "register_operand" "")
2779 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2780 (match_operand:V2DI 2 "register_operand" "")))]
2783 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2789 t1 = gen_reg_rtx (V2DImode);
2790 t2 = gen_reg_rtx (V2DImode);
2791 t3 = gen_reg_rtx (V2DImode);
2792 t4 = gen_reg_rtx (V2DImode);
2793 t5 = gen_reg_rtx (V2DImode);
2794 t6 = gen_reg_rtx (V2DImode);
2795 thirtytwo = GEN_INT (32);
2797 /* Multiply low parts. */
2798 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2799 gen_lowpart (V4SImode, op2)));
2801 /* Shift input vectors left 32 bits so we can multiply high parts. */
2802 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2803 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2805 /* Multiply high parts by low parts. */
2806 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2807 gen_lowpart (V4SImode, t3)));
2808 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2809 gen_lowpart (V4SImode, t2)));
2811 /* Shift them back. */
2812 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2813 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2815 /* Add the three parts together. */
2816 emit_insn (gen_addv2di3 (t6, t1, t4));
2817 emit_insn (gen_addv2di3 (op0, t6, t5));
2821 (define_expand "vec_widen_smult_hi_v8hi"
2822 [(match_operand:V4SI 0 "register_operand" "")
2823 (match_operand:V8HI 1 "register_operand" "")
2824 (match_operand:V8HI 2 "register_operand" "")]
2827 rtx op1, op2, t1, t2, dest;
2831 t1 = gen_reg_rtx (V8HImode);
2832 t2 = gen_reg_rtx (V8HImode);
2833 dest = gen_lowpart (V8HImode, operands[0]);
2835 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2836 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2837 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2841 (define_expand "vec_widen_smult_lo_v8hi"
2842 [(match_operand:V4SI 0 "register_operand" "")
2843 (match_operand:V8HI 1 "register_operand" "")
2844 (match_operand:V8HI 2 "register_operand" "")]
2847 rtx op1, op2, t1, t2, dest;
2851 t1 = gen_reg_rtx (V8HImode);
2852 t2 = gen_reg_rtx (V8HImode);
2853 dest = gen_lowpart (V8HImode, operands[0]);
2855 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2856 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2857 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2861 (define_expand "vec_widen_umult_hi_v8hi"
2862 [(match_operand:V4SI 0 "register_operand" "")
2863 (match_operand:V8HI 1 "register_operand" "")
2864 (match_operand:V8HI 2 "register_operand" "")]
2867 rtx op1, op2, t1, t2, dest;
2871 t1 = gen_reg_rtx (V8HImode);
2872 t2 = gen_reg_rtx (V8HImode);
2873 dest = gen_lowpart (V8HImode, operands[0]);
2875 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2876 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2877 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2881 (define_expand "vec_widen_umult_lo_v8hi"
2882 [(match_operand:V4SI 0 "register_operand" "")
2883 (match_operand:V8HI 1 "register_operand" "")
2884 (match_operand:V8HI 2 "register_operand" "")]
2887 rtx op1, op2, t1, t2, dest;
2891 t1 = gen_reg_rtx (V8HImode);
2892 t2 = gen_reg_rtx (V8HImode);
2893 dest = gen_lowpart (V8HImode, operands[0]);
2895 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2896 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2897 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2901 (define_expand "vec_widen_smult_hi_v4si"
2902 [(match_operand:V2DI 0 "register_operand" "")
2903 (match_operand:V4SI 1 "register_operand" "")
2904 (match_operand:V4SI 2 "register_operand" "")]
2907 rtx op1, op2, t1, t2;
2911 t1 = gen_reg_rtx (V4SImode);
2912 t2 = gen_reg_rtx (V4SImode);
2914 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2915 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2916 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2920 (define_expand "vec_widen_smult_lo_v4si"
2921 [(match_operand:V2DI 0 "register_operand" "")
2922 (match_operand:V4SI 1 "register_operand" "")
2923 (match_operand:V4SI 2 "register_operand" "")]
2926 rtx op1, op2, t1, t2;
2930 t1 = gen_reg_rtx (V4SImode);
2931 t2 = gen_reg_rtx (V4SImode);
2933 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2934 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2935 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2939 (define_expand "vec_widen_umult_hi_v4si"
2940 [(match_operand:V2DI 0 "register_operand" "")
2941 (match_operand:V4SI 1 "register_operand" "")
2942 (match_operand:V4SI 2 "register_operand" "")]
2945 rtx op1, op2, t1, t2;
2949 t1 = gen_reg_rtx (V4SImode);
2950 t2 = gen_reg_rtx (V4SImode);
2952 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2953 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2954 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2958 (define_expand "vec_widen_umult_lo_v4si"
2959 [(match_operand:V2DI 0 "register_operand" "")
2960 (match_operand:V4SI 1 "register_operand" "")
2961 (match_operand:V4SI 2 "register_operand" "")]
2964 rtx op1, op2, t1, t2;
2968 t1 = gen_reg_rtx (V4SImode);
2969 t2 = gen_reg_rtx (V4SImode);
2971 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2972 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2973 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2977 (define_expand "sdot_prodv8hi"
2978 [(match_operand:V4SI 0 "register_operand" "")
2979 (match_operand:V8HI 1 "nonimmediate_operand" "")
2980 (match_operand:V8HI 2 "nonimmediate_operand" "")
2981 (match_operand:V4SI 3 "register_operand" "")]
2984 rtx t = gen_reg_rtx (V4SImode);
2985 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2986 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2990 (define_expand "udot_prodv4si"
2991 [(match_operand:V2DI 0 "register_operand" "")
2992 (match_operand:V4SI 1 "register_operand" "")
2993 (match_operand:V4SI 2 "register_operand" "")
2994 (match_operand:V2DI 3 "register_operand" "")]
2999 t1 = gen_reg_rtx (V2DImode);
3000 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3001 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3003 t2 = gen_reg_rtx (V4SImode);
3004 t3 = gen_reg_rtx (V4SImode);
3005 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3006 gen_lowpart (TImode, operands[1]),
3008 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3009 gen_lowpart (TImode, operands[2]),
3012 t4 = gen_reg_rtx (V2DImode);
3013 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3015 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3019 (define_insn "ashr<mode>3"
3020 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3022 (match_operand:SSEMODE24 1 "register_operand" "0")
3023 (match_operand:SI 2 "nonmemory_operand" "xi")))]
3025 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3026 [(set_attr "type" "sseishft")
3027 (set_attr "mode" "TI")])
3029 (define_insn "lshr<mode>3"
3030 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3031 (lshiftrt:SSEMODE248
3032 (match_operand:SSEMODE248 1 "register_operand" "0")
3033 (match_operand:SI 2 "nonmemory_operand" "xi")))]
3035 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3036 [(set_attr "type" "sseishft")
3037 (set_attr "mode" "TI")])
3039 (define_insn "ashl<mode>3"
3040 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3042 (match_operand:SSEMODE248 1 "register_operand" "0")
3043 (match_operand:SI 2 "nonmemory_operand" "xi")))]
3045 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3046 [(set_attr "type" "sseishft")
3047 (set_attr "mode" "TI")])
3049 (define_insn "sse2_ashlti3"
3050 [(set (match_operand:TI 0 "register_operand" "=x")
3051 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3052 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3055 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3056 return "pslldq\t{%2, %0|%0, %2}";
3058 [(set_attr "type" "sseishft")
3059 (set_attr "mode" "TI")])
3061 (define_expand "vec_shl_<mode>"
3062 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3063 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3064 (match_operand:SI 2 "general_operand" "")))]
3067 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3069 operands[0] = gen_lowpart (TImode, operands[0]);
3070 operands[1] = gen_lowpart (TImode, operands[1]);
3073 (define_insn "sse2_lshrti3"
3074 [(set (match_operand:TI 0 "register_operand" "=x")
3075 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3076 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3079 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3080 return "psrldq\t{%2, %0|%0, %2}";
3082 [(set_attr "type" "sseishft")
3083 (set_attr "mode" "TI")])
3085 (define_expand "vec_shr_<mode>"
3086 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3087 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3088 (match_operand:SI 2 "general_operand" "")))]
3091 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3093 operands[0] = gen_lowpart (TImode, operands[0]);
3094 operands[1] = gen_lowpart (TImode, operands[1]);
3097 (define_expand "umaxv16qi3"
3098 [(set (match_operand:V16QI 0 "register_operand" "")
3099 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3100 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3102 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3104 (define_insn "*umaxv16qi3"
3105 [(set (match_operand:V16QI 0 "register_operand" "=x")
3106 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3107 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3108 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3109 "pmaxub\t{%2, %0|%0, %2}"
3110 [(set_attr "type" "sseiadd")
3111 (set_attr "mode" "TI")])
3113 (define_expand "smaxv8hi3"
3114 [(set (match_operand:V8HI 0 "register_operand" "")
3115 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3116 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3118 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3120 (define_insn "*smaxv8hi3"
3121 [(set (match_operand:V8HI 0 "register_operand" "=x")
3122 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3123 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3124 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3125 "pmaxsw\t{%2, %0|%0, %2}"
3126 [(set_attr "type" "sseiadd")
3127 (set_attr "mode" "TI")])
3129 (define_expand "umaxv8hi3"
3130 [(set (match_operand:V8HI 0 "register_operand" "=x")
3131 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
3132 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3134 (plus:V8HI (match_dup 0) (match_dup 2)))]
3137 operands[3] = operands[0];
3138 if (rtx_equal_p (operands[0], operands[2]))
3139 operands[0] = gen_reg_rtx (V8HImode);
3142 (define_expand "smax<mode>3"
3143 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3144 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3145 (match_operand:SSEMODE14 2 "register_operand" "")))]
3151 xops[0] = operands[0];
3152 xops[1] = operands[1];
3153 xops[2] = operands[2];
3154 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3155 xops[4] = operands[1];
3156 xops[5] = operands[2];
3157 ok = ix86_expand_int_vcond (xops);
3162 (define_expand "umaxv4si3"
3163 [(set (match_operand:V4SI 0 "register_operand" "")
3164 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3165 (match_operand:V4SI 2 "register_operand" "")))]
3171 xops[0] = operands[0];
3172 xops[1] = operands[1];
3173 xops[2] = operands[2];
3174 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3175 xops[4] = operands[1];
3176 xops[5] = operands[2];
3177 ok = ix86_expand_int_vcond (xops);
3182 (define_expand "uminv16qi3"
3183 [(set (match_operand:V16QI 0 "register_operand" "")
3184 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3185 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3187 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3189 (define_insn "*uminv16qi3"
3190 [(set (match_operand:V16QI 0 "register_operand" "=x")
3191 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3192 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3193 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3194 "pminub\t{%2, %0|%0, %2}"
3195 [(set_attr "type" "sseiadd")
3196 (set_attr "mode" "TI")])
3198 (define_expand "sminv8hi3"
3199 [(set (match_operand:V8HI 0 "register_operand" "")
3200 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3201 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3203 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3205 (define_insn "*sminv8hi3"
3206 [(set (match_operand:V8HI 0 "register_operand" "=x")
3207 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3208 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3209 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3210 "pminsw\t{%2, %0|%0, %2}"
3211 [(set_attr "type" "sseiadd")
3212 (set_attr "mode" "TI")])
3214 (define_expand "smin<mode>3"
3215 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3216 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3217 (match_operand:SSEMODE14 2 "register_operand" "")))]
3223 xops[0] = operands[0];
3224 xops[1] = operands[2];
3225 xops[2] = operands[1];
3226 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3227 xops[4] = operands[1];
3228 xops[5] = operands[2];
3229 ok = ix86_expand_int_vcond (xops);
3234 (define_expand "umin<mode>3"
3235 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3236 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3237 (match_operand:SSEMODE24 2 "register_operand" "")))]
3243 xops[0] = operands[0];
3244 xops[1] = operands[2];
3245 xops[2] = operands[1];
3246 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3247 xops[4] = operands[1];
3248 xops[5] = operands[2];
3249 ok = ix86_expand_int_vcond (xops);
3254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3256 ;; Parallel integral comparisons
3258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3260 (define_insn "sse2_eq<mode>3"
3261 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3263 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3264 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3265 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3266 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3267 [(set_attr "type" "ssecmp")
3268 (set_attr "mode" "TI")])
3270 (define_insn "sse2_gt<mode>3"
3271 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3273 (match_operand:SSEMODE124 1 "register_operand" "0")
3274 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3276 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3277 [(set_attr "type" "ssecmp")
3278 (set_attr "mode" "TI")])
3280 (define_expand "vcond<mode>"
3281 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3282 (if_then_else:SSEMODE124
3283 (match_operator 3 ""
3284 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3285 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3286 (match_operand:SSEMODE124 1 "general_operand" "")
3287 (match_operand:SSEMODE124 2 "general_operand" "")))]
3290 if (ix86_expand_int_vcond (operands))
3296 (define_expand "vcondu<mode>"
3297 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3298 (if_then_else:SSEMODE124
3299 (match_operator 3 ""
3300 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3301 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3302 (match_operand:SSEMODE124 1 "general_operand" "")
3303 (match_operand:SSEMODE124 2 "general_operand" "")))]
3306 if (ix86_expand_int_vcond (operands))
3312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3314 ;; Parallel integral logical operations
3316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3318 (define_expand "one_cmpl<mode>2"
3319 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3320 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3324 int i, n = GET_MODE_NUNITS (<MODE>mode);
3325 rtvec v = rtvec_alloc (n);
3327 for (i = 0; i < n; ++i)
3328 RTVEC_ELT (v, i) = constm1_rtx;
3330 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3333 (define_expand "and<mode>3"
3334 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3335 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3336 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3338 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3340 (define_insn "*and<mode>3"
3341 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3343 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3344 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3345 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3346 "pand\t{%2, %0|%0, %2}"
3347 [(set_attr "type" "sselog")
3348 (set_attr "mode" "TI")])
3350 (define_insn "sse2_nand<mode>3"
3351 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3353 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3354 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3356 "pandn\t{%2, %0|%0, %2}"
3357 [(set_attr "type" "sselog")
3358 (set_attr "mode" "TI")])
3360 (define_expand "ior<mode>3"
3361 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3362 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3363 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3365 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3367 (define_insn "*ior<mode>3"
3368 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3370 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3371 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3372 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3373 "por\t{%2, %0|%0, %2}"
3374 [(set_attr "type" "sselog")
3375 (set_attr "mode" "TI")])
3377 (define_expand "xor<mode>3"
3378 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3379 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3380 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3382 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3384 (define_insn "*xor<mode>3"
3385 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3387 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3388 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3389 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3390 "pxor\t{%2, %0|%0, %2}"
3391 [(set_attr "type" "sselog")
3392 (set_attr "mode" "TI")])
3394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3396 ;; Parallel integral element swizzling
3398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3401 ;; op1 = abcdefghijklmnop
3402 ;; op2 = qrstuvwxyz012345
3403 ;; h1 = aqbrcsdteufvgwhx
3404 ;; l1 = iyjzk0l1m2n3o4p5
3405 ;; h2 = aiqybjrzcks0dlt1
3406 ;; l2 = emu2fnv3gow4hpx5
3407 ;; h3 = aeimquy2bfjnrvz3
3408 ;; l3 = cgkosw04dhlptx15
3409 ;; result = bdfhjlnprtvxz135
3410 (define_expand "vec_pack_mod_v8hi"
3411 [(match_operand:V16QI 0 "register_operand" "")
3412 (match_operand:V8HI 1 "register_operand" "")
3413 (match_operand:V8HI 2 "register_operand" "")]
3416 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3418 op1 = gen_lowpart (V16QImode, operands[1]);
3419 op2 = gen_lowpart (V16QImode, operands[2]);
3420 h1 = gen_reg_rtx (V16QImode);
3421 l1 = gen_reg_rtx (V16QImode);
3422 h2 = gen_reg_rtx (V16QImode);
3423 l2 = gen_reg_rtx (V16QImode);
3424 h3 = gen_reg_rtx (V16QImode);
3425 l3 = gen_reg_rtx (V16QImode);
3427 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3428 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3429 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3430 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3431 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3432 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3433 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3444 ;; result = bdfhjlnp
3445 (define_expand "vec_pack_mod_v4si"
3446 [(match_operand:V8HI 0 "register_operand" "")
3447 (match_operand:V4SI 1 "register_operand" "")
3448 (match_operand:V4SI 2 "register_operand" "")]
3451 rtx op1, op2, h1, l1, h2, l2;
3453 op1 = gen_lowpart (V8HImode, operands[1]);
3454 op2 = gen_lowpart (V8HImode, operands[2]);
3455 h1 = gen_reg_rtx (V8HImode);
3456 l1 = gen_reg_rtx (V8HImode);
3457 h2 = gen_reg_rtx (V8HImode);
3458 l2 = gen_reg_rtx (V8HImode);
3460 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3461 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3462 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3463 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3464 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3474 (define_expand "vec_pack_mod_v2di"
3475 [(match_operand:V4SI 0 "register_operand" "")
3476 (match_operand:V2DI 1 "register_operand" "")
3477 (match_operand:V2DI 2 "register_operand" "")]
3480 rtx op1, op2, h1, l1;
3482 op1 = gen_lowpart (V4SImode, operands[1]);
3483 op2 = gen_lowpart (V4SImode, operands[2]);
3484 h1 = gen_reg_rtx (V4SImode);
3485 l1 = gen_reg_rtx (V4SImode);
3487 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3488 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3489 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3493 (define_expand "vec_interleave_highv16qi"
3494 [(set (match_operand:V16QI 0 "register_operand" "=x")
3497 (match_operand:V16QI 1 "register_operand" "0")
3498 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3499 (parallel [(const_int 8) (const_int 24)
3500 (const_int 9) (const_int 25)
3501 (const_int 10) (const_int 26)
3502 (const_int 11) (const_int 27)
3503 (const_int 12) (const_int 28)
3504 (const_int 13) (const_int 29)
3505 (const_int 14) (const_int 30)
3506 (const_int 15) (const_int 31)])))]
3509 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3513 (define_expand "vec_interleave_lowv16qi"
3514 [(set (match_operand:V16QI 0 "register_operand" "=x")
3517 (match_operand:V16QI 1 "register_operand" "0")
3518 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3519 (parallel [(const_int 0) (const_int 16)
3520 (const_int 1) (const_int 17)
3521 (const_int 2) (const_int 18)
3522 (const_int 3) (const_int 19)
3523 (const_int 4) (const_int 20)
3524 (const_int 5) (const_int 21)
3525 (const_int 6) (const_int 22)
3526 (const_int 7) (const_int 23)])))]
3529 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3533 (define_expand "vec_interleave_highv8hi"
3534 [(set (match_operand:V8HI 0 "register_operand" "=x")
3537 (match_operand:V8HI 1 "register_operand" "0")
3538 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3539 (parallel [(const_int 4) (const_int 12)
3540 (const_int 5) (const_int 13)
3541 (const_int 6) (const_int 14)
3542 (const_int 7) (const_int 15)])))]
3545 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3549 (define_expand "vec_interleave_lowv8hi"
3550 [(set (match_operand:V8HI 0 "register_operand" "=x")
3553 (match_operand:V8HI 1 "register_operand" "0")
3554 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3555 (parallel [(const_int 0) (const_int 8)
3556 (const_int 1) (const_int 9)
3557 (const_int 2) (const_int 10)
3558 (const_int 3) (const_int 11)])))]
3561 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
3565 (define_expand "vec_interleave_highv4si"
3566 [(set (match_operand:V4SI 0 "register_operand" "=x")
3569 (match_operand:V4SI 1 "register_operand" "0")
3570 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3571 (parallel [(const_int 2) (const_int 6)
3572 (const_int 3) (const_int 7)])))]
3575 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
3579 (define_expand "vec_interleave_lowv4si"
3580 [(set (match_operand:V4SI 0 "register_operand" "=x")
3583 (match_operand:V4SI 1 "register_operand" "0")
3584 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3585 (parallel [(const_int 0) (const_int 4)
3586 (const_int 1) (const_int 5)])))]
3589 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
3593 (define_expand "vec_interleave_highv2di"
3594 [(set (match_operand:V2DI 0 "register_operand" "=x")
3597 (match_operand:V2DI 1 "register_operand" "0")
3598 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3599 (parallel [(const_int 1)
3603 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
3607 (define_expand "vec_interleave_lowv2di"
3608 [(set (match_operand:V2DI 0 "register_operand" "=x")
3611 (match_operand:V2DI 1 "register_operand" "0")
3612 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3613 (parallel [(const_int 0)
3617 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
3621 (define_insn "sse2_packsswb"
3622 [(set (match_operand:V16QI 0 "register_operand" "=x")
3625 (match_operand:V8HI 1 "register_operand" "0"))
3627 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3629 "packsswb\t{%2, %0|%0, %2}"
3630 [(set_attr "type" "sselog")
3631 (set_attr "mode" "TI")])
3633 (define_insn "sse2_packssdw"
3634 [(set (match_operand:V8HI 0 "register_operand" "=x")
3637 (match_operand:V4SI 1 "register_operand" "0"))
3639 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3641 "packssdw\t{%2, %0|%0, %2}"
3642 [(set_attr "type" "sselog")
3643 (set_attr "mode" "TI")])
3645 (define_insn "sse2_packuswb"
3646 [(set (match_operand:V16QI 0 "register_operand" "=x")
3649 (match_operand:V8HI 1 "register_operand" "0"))
3651 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3653 "packuswb\t{%2, %0|%0, %2}"
3654 [(set_attr "type" "sselog")
3655 (set_attr "mode" "TI")])
3657 (define_insn "sse2_punpckhbw"
3658 [(set (match_operand:V16QI 0 "register_operand" "=x")
3661 (match_operand:V16QI 1 "register_operand" "0")
3662 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3663 (parallel [(const_int 8) (const_int 24)
3664 (const_int 9) (const_int 25)
3665 (const_int 10) (const_int 26)
3666 (const_int 11) (const_int 27)
3667 (const_int 12) (const_int 28)
3668 (const_int 13) (const_int 29)
3669 (const_int 14) (const_int 30)
3670 (const_int 15) (const_int 31)])))]
3672 "punpckhbw\t{%2, %0|%0, %2}"
3673 [(set_attr "type" "sselog")
3674 (set_attr "mode" "TI")])
3676 (define_insn "sse2_punpcklbw"
3677 [(set (match_operand:V16QI 0 "register_operand" "=x")
3680 (match_operand:V16QI 1 "register_operand" "0")
3681 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3682 (parallel [(const_int 0) (const_int 16)
3683 (const_int 1) (const_int 17)
3684 (const_int 2) (const_int 18)
3685 (const_int 3) (const_int 19)
3686 (const_int 4) (const_int 20)
3687 (const_int 5) (const_int 21)
3688 (const_int 6) (const_int 22)
3689 (const_int 7) (const_int 23)])))]
3691 "punpcklbw\t{%2, %0|%0, %2}"
3692 [(set_attr "type" "sselog")
3693 (set_attr "mode" "TI")])
3695 (define_insn "sse2_punpckhwd"
3696 [(set (match_operand:V8HI 0 "register_operand" "=x")
3699 (match_operand:V8HI 1 "register_operand" "0")
3700 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3701 (parallel [(const_int 4) (const_int 12)
3702 (const_int 5) (const_int 13)
3703 (const_int 6) (const_int 14)
3704 (const_int 7) (const_int 15)])))]
3706 "punpckhwd\t{%2, %0|%0, %2}"
3707 [(set_attr "type" "sselog")
3708 (set_attr "mode" "TI")])
3710 (define_insn "sse2_punpcklwd"
3711 [(set (match_operand:V8HI 0 "register_operand" "=x")
3714 (match_operand:V8HI 1 "register_operand" "0")
3715 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3716 (parallel [(const_int 0) (const_int 8)
3717 (const_int 1) (const_int 9)
3718 (const_int 2) (const_int 10)
3719 (const_int 3) (const_int 11)])))]
3721 "punpcklwd\t{%2, %0|%0, %2}"
3722 [(set_attr "type" "sselog")
3723 (set_attr "mode" "TI")])
3725 (define_insn "sse2_punpckhdq"
3726 [(set (match_operand:V4SI 0 "register_operand" "=x")
3729 (match_operand:V4SI 1 "register_operand" "0")
3730 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3731 (parallel [(const_int 2) (const_int 6)
3732 (const_int 3) (const_int 7)])))]
3734 "punpckhdq\t{%2, %0|%0, %2}"
3735 [(set_attr "type" "sselog")
3736 (set_attr "mode" "TI")])
3738 (define_insn "sse2_punpckldq"
3739 [(set (match_operand:V4SI 0 "register_operand" "=x")
3742 (match_operand:V4SI 1 "register_operand" "0")
3743 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3744 (parallel [(const_int 0) (const_int 4)
3745 (const_int 1) (const_int 5)])))]
3747 "punpckldq\t{%2, %0|%0, %2}"
3748 [(set_attr "type" "sselog")
3749 (set_attr "mode" "TI")])
3751 (define_insn "sse2_punpckhqdq"
3752 [(set (match_operand:V2DI 0 "register_operand" "=x")
3755 (match_operand:V2DI 1 "register_operand" "0")
3756 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3757 (parallel [(const_int 1)
3760 "punpckhqdq\t{%2, %0|%0, %2}"
3761 [(set_attr "type" "sselog")
3762 (set_attr "mode" "TI")])
3764 (define_insn "sse2_punpcklqdq"
3765 [(set (match_operand:V2DI 0 "register_operand" "=x")
3768 (match_operand:V2DI 1 "register_operand" "0")
3769 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3770 (parallel [(const_int 0)
3773 "punpcklqdq\t{%2, %0|%0, %2}"
3774 [(set_attr "type" "sselog")
3775 (set_attr "mode" "TI")])
3777 (define_expand "sse2_pinsrw"
3778 [(set (match_operand:V8HI 0 "register_operand" "")
3781 (match_operand:SI 2 "nonimmediate_operand" ""))
3782 (match_operand:V8HI 1 "register_operand" "")
3783 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3786 operands[2] = gen_lowpart (HImode, operands[2]);
3787 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3790 (define_insn "*sse2_pinsrw"
3791 [(set (match_operand:V8HI 0 "register_operand" "=x")
3794 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3795 (match_operand:V8HI 1 "register_operand" "0")
3796 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3799 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3800 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3802 [(set_attr "type" "sselog")
3803 (set_attr "mode" "TI")])
3805 (define_insn "sse2_pextrw"
3806 [(set (match_operand:SI 0 "register_operand" "=r")
3809 (match_operand:V8HI 1 "register_operand" "x")
3810 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3812 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3813 [(set_attr "type" "sselog")
3814 (set_attr "mode" "TI")])
3816 (define_expand "sse2_pshufd"
3817 [(match_operand:V4SI 0 "register_operand" "")
3818 (match_operand:V4SI 1 "nonimmediate_operand" "")
3819 (match_operand:SI 2 "const_int_operand" "")]
3822 int mask = INTVAL (operands[2]);
3823 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3824 GEN_INT ((mask >> 0) & 3),
3825 GEN_INT ((mask >> 2) & 3),
3826 GEN_INT ((mask >> 4) & 3),
3827 GEN_INT ((mask >> 6) & 3)));
3831 (define_insn "sse2_pshufd_1"
3832 [(set (match_operand:V4SI 0 "register_operand" "=x")
3834 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3835 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3836 (match_operand 3 "const_0_to_3_operand" "")
3837 (match_operand 4 "const_0_to_3_operand" "")
3838 (match_operand 5 "const_0_to_3_operand" "")])))]
3842 mask |= INTVAL (operands[2]) << 0;
3843 mask |= INTVAL (operands[3]) << 2;
3844 mask |= INTVAL (operands[4]) << 4;
3845 mask |= INTVAL (operands[5]) << 6;
3846 operands[2] = GEN_INT (mask);
3848 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3850 [(set_attr "type" "sselog1")
3851 (set_attr "mode" "TI")])
3853 (define_expand "sse2_pshuflw"
3854 [(match_operand:V8HI 0 "register_operand" "")
3855 (match_operand:V8HI 1 "nonimmediate_operand" "")
3856 (match_operand:SI 2 "const_int_operand" "")]
3859 int mask = INTVAL (operands[2]);
3860 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3861 GEN_INT ((mask >> 0) & 3),
3862 GEN_INT ((mask >> 2) & 3),
3863 GEN_INT ((mask >> 4) & 3),
3864 GEN_INT ((mask >> 6) & 3)));
3868 (define_insn "sse2_pshuflw_1"
3869 [(set (match_operand:V8HI 0 "register_operand" "=x")
3871 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3872 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3873 (match_operand 3 "const_0_to_3_operand" "")
3874 (match_operand 4 "const_0_to_3_operand" "")
3875 (match_operand 5 "const_0_to_3_operand" "")
3883 mask |= INTVAL (operands[2]) << 0;
3884 mask |= INTVAL (operands[3]) << 2;
3885 mask |= INTVAL (operands[4]) << 4;
3886 mask |= INTVAL (operands[5]) << 6;
3887 operands[2] = GEN_INT (mask);
3889 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3891 [(set_attr "type" "sselog")
3892 (set_attr "mode" "TI")])
3894 (define_expand "sse2_pshufhw"
3895 [(match_operand:V8HI 0 "register_operand" "")
3896 (match_operand:V8HI 1 "nonimmediate_operand" "")
3897 (match_operand:SI 2 "const_int_operand" "")]
3900 int mask = INTVAL (operands[2]);
3901 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3902 GEN_INT (((mask >> 0) & 3) + 4),
3903 GEN_INT (((mask >> 2) & 3) + 4),
3904 GEN_INT (((mask >> 4) & 3) + 4),
3905 GEN_INT (((mask >> 6) & 3) + 4)));
3909 (define_insn "sse2_pshufhw_1"
3910 [(set (match_operand:V8HI 0 "register_operand" "=x")
3912 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3913 (parallel [(const_int 0)
3917 (match_operand 2 "const_4_to_7_operand" "")
3918 (match_operand 3 "const_4_to_7_operand" "")
3919 (match_operand 4 "const_4_to_7_operand" "")
3920 (match_operand 5 "const_4_to_7_operand" "")])))]
3924 mask |= (INTVAL (operands[2]) - 4) << 0;
3925 mask |= (INTVAL (operands[3]) - 4) << 2;
3926 mask |= (INTVAL (operands[4]) - 4) << 4;
3927 mask |= (INTVAL (operands[5]) - 4) << 6;
3928 operands[2] = GEN_INT (mask);
3930 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3932 [(set_attr "type" "sselog")
3933 (set_attr "mode" "TI")])
3935 (define_expand "sse2_loadd"
3936 [(set (match_operand:V4SI 0 "register_operand" "")
3939 (match_operand:SI 1 "nonimmediate_operand" ""))
3943 "operands[2] = CONST0_RTX (V4SImode);")
3945 (define_insn "sse2_loadld"
3946 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3949 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3950 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3954 movd\t{%2, %0|%0, %2}
3955 movss\t{%2, %0|%0, %2}
3956 movss\t{%2, %0|%0, %2}"
3957 [(set_attr "type" "ssemov")
3958 (set_attr "mode" "TI,V4SF,SF")])
3960 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3961 ;; be taken into account, and movdi isn't fully populated even without.
3962 (define_insn_and_split "sse2_stored"
3963 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3965 (match_operand:V4SI 1 "register_operand" "x")
3966 (parallel [(const_int 0)])))]
3969 "&& reload_completed"
3970 [(set (match_dup 0) (match_dup 1))]
3972 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3975 (define_expand "sse_storeq"
3976 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3978 (match_operand:V2DI 1 "register_operand" "")
3979 (parallel [(const_int 0)])))]
3983 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3984 ;; be taken into account, and movdi isn't fully populated even without.
3985 (define_insn "*sse2_storeq"
3986 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3988 (match_operand:V2DI 1 "register_operand" "x")
3989 (parallel [(const_int 0)])))]
3994 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3996 (match_operand:V2DI 1 "register_operand" "")
3997 (parallel [(const_int 0)])))]
3998 "TARGET_SSE && reload_completed"
3999 [(set (match_dup 0) (match_dup 1))]
4001 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4004 (define_insn "*vec_extractv2di_1_sse2"
4005 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4007 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4008 (parallel [(const_int 1)])))]
4009 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4011 movhps\t{%1, %0|%0, %1}
4012 psrldq\t{$4, %0|%0, 4}
4013 movq\t{%H1, %0|%0, %H1}"
4014 [(set_attr "type" "ssemov,sseishft,ssemov")
4015 (set_attr "mode" "V2SF,TI,TI")])
4017 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4018 (define_insn "*vec_extractv2di_1_sse"
4019 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4021 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4022 (parallel [(const_int 1)])))]
4023 "!TARGET_SSE2 && TARGET_SSE
4024 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4026 movhps\t{%1, %0|%0, %1}
4027 movhlps\t{%1, %0|%0, %1}
4028 movlps\t{%H1, %0|%0, %H1}"
4029 [(set_attr "type" "ssemov")
4030 (set_attr "mode" "V2SF,V4SF,V2SF")])
4032 (define_insn "*vec_dupv4si"
4033 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
4035 (match_operand:SI 1 "register_operand" " Y,0")))]
4038 pshufd\t{$0, %1, %0|%0, %1, 0}
4039 shufps\t{$0, %0, %0|%0, %0, 0}"
4040 [(set_attr "type" "sselog1")
4041 (set_attr "mode" "TI,V4SF")])
4043 (define_insn "*vec_dupv2di"
4044 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
4046 (match_operand:DI 1 "register_operand" " 0,0")))]
4051 [(set_attr "type" "sselog1,ssemov")
4052 (set_attr "mode" "TI,V4SF")])
4054 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4055 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4056 ;; alternatives pretty much forces the MMX alternative to be chosen.
4057 (define_insn "*sse2_concatv2si"
4058 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
4060 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
4061 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
4064 punpckldq\t{%2, %0|%0, %2}
4065 movd\t{%1, %0|%0, %1}
4066 punpckldq\t{%2, %0|%0, %2}
4067 movd\t{%1, %0|%0, %1}"
4068 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4069 (set_attr "mode" "TI,TI,DI,DI")])
4071 (define_insn "*sse1_concatv2si"
4072 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4074 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4075 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4078 unpcklps\t{%2, %0|%0, %2}
4079 movss\t{%1, %0|%0, %1}
4080 punpckldq\t{%2, %0|%0, %2}
4081 movd\t{%1, %0|%0, %1}"
4082 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4083 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4085 (define_insn "*vec_concatv4si_1"
4086 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
4088 (match_operand:V2SI 1 "register_operand" " 0,0,0")
4089 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
4092 punpcklqdq\t{%2, %0|%0, %2}
4093 movlhps\t{%2, %0|%0, %2}
4094 movhps\t{%2, %0|%0, %2}"
4095 [(set_attr "type" "sselog,ssemov,ssemov")
4096 (set_attr "mode" "TI,V4SF,V2SF")])
4098 (define_insn "*vec_concatv2di"
4099 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
4101 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
4102 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
4105 movq\t{%1, %0|%0, %1}
4106 movq2dq\t{%1, %0|%0, %1}
4107 punpcklqdq\t{%2, %0|%0, %2}
4108 movlhps\t{%2, %0|%0, %2}
4109 movhps\t{%2, %0|%0, %2}
4110 movlps\t{%1, %0|%0, %1}"
4111 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4112 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4114 (define_expand "vec_setv2di"
4115 [(match_operand:V2DI 0 "register_operand" "")
4116 (match_operand:DI 1 "register_operand" "")
4117 (match_operand 2 "const_int_operand" "")]
4120 ix86_expand_vector_set (false, operands[0], operands[1],
4121 INTVAL (operands[2]));
4125 (define_expand "vec_extractv2di"
4126 [(match_operand:DI 0 "register_operand" "")
4127 (match_operand:V2DI 1 "register_operand" "")
4128 (match_operand 2 "const_int_operand" "")]
4131 ix86_expand_vector_extract (false, operands[0], operands[1],
4132 INTVAL (operands[2]));
4136 (define_expand "vec_initv2di"
4137 [(match_operand:V2DI 0 "register_operand" "")
4138 (match_operand 1 "" "")]
4141 ix86_expand_vector_init (false, operands[0], operands[1]);
4145 (define_expand "vec_setv4si"
4146 [(match_operand:V4SI 0 "register_operand" "")
4147 (match_operand:SI 1 "register_operand" "")
4148 (match_operand 2 "const_int_operand" "")]
4151 ix86_expand_vector_set (false, operands[0], operands[1],
4152 INTVAL (operands[2]));
4156 (define_expand "vec_extractv4si"
4157 [(match_operand:SI 0 "register_operand" "")
4158 (match_operand:V4SI 1 "register_operand" "")
4159 (match_operand 2 "const_int_operand" "")]
4162 ix86_expand_vector_extract (false, operands[0], operands[1],
4163 INTVAL (operands[2]));
4167 (define_expand "vec_initv4si"
4168 [(match_operand:V4SI 0 "register_operand" "")
4169 (match_operand 1 "" "")]
4172 ix86_expand_vector_init (false, operands[0], operands[1]);
4176 (define_expand "vec_setv8hi"
4177 [(match_operand:V8HI 0 "register_operand" "")
4178 (match_operand:HI 1 "register_operand" "")
4179 (match_operand 2 "const_int_operand" "")]
4182 ix86_expand_vector_set (false, operands[0], operands[1],
4183 INTVAL (operands[2]));
4187 (define_expand "vec_extractv8hi"
4188 [(match_operand:HI 0 "register_operand" "")
4189 (match_operand:V8HI 1 "register_operand" "")
4190 (match_operand 2 "const_int_operand" "")]
4193 ix86_expand_vector_extract (false, operands[0], operands[1],
4194 INTVAL (operands[2]));
4198 (define_expand "vec_initv8hi"
4199 [(match_operand:V8HI 0 "register_operand" "")
4200 (match_operand 1 "" "")]
4203 ix86_expand_vector_init (false, operands[0], operands[1]);
4207 (define_expand "vec_setv16qi"
4208 [(match_operand:V16QI 0 "register_operand" "")
4209 (match_operand:QI 1 "register_operand" "")
4210 (match_operand 2 "const_int_operand" "")]
4213 ix86_expand_vector_set (false, operands[0], operands[1],
4214 INTVAL (operands[2]));
4218 (define_expand "vec_extractv16qi"
4219 [(match_operand:QI 0 "register_operand" "")
4220 (match_operand:V16QI 1 "register_operand" "")
4221 (match_operand 2 "const_int_operand" "")]
4224 ix86_expand_vector_extract (false, operands[0], operands[1],
4225 INTVAL (operands[2]));
4229 (define_expand "vec_initv16qi"
4230 [(match_operand:V16QI 0 "register_operand" "")
4231 (match_operand 1 "" "")]
4234 ix86_expand_vector_init (false, operands[0], operands[1]);
4238 (define_expand "vec_unpacku_hi_v16qi"
4239 [(match_operand:V8HI 0 "register_operand" "")
4240 (match_operand:V16QI 1 "register_operand" "")]
4243 ix86_expand_sse_unpack (operands, true, true);
4247 (define_expand "vec_unpacks_hi_v16qi"
4248 [(match_operand:V8HI 0 "register_operand" "")
4249 (match_operand:V16QI 1 "register_operand" "")]
4252 ix86_expand_sse_unpack (operands, false, true);
4256 (define_expand "vec_unpacku_lo_v16qi"
4257 [(match_operand:V8HI 0 "register_operand" "")
4258 (match_operand:V16QI 1 "register_operand" "")]
4261 ix86_expand_sse_unpack (operands, true, false);
4265 (define_expand "vec_unpacks_lo_v16qi"
4266 [(match_operand:V8HI 0 "register_operand" "")
4267 (match_operand:V16QI 1 "register_operand" "")]
4270 ix86_expand_sse_unpack (operands, false, false);
4274 (define_expand "vec_unpacku_hi_v8hi"
4275 [(match_operand:V4SI 0 "register_operand" "")
4276 (match_operand:V8HI 1 "register_operand" "")]
4279 ix86_expand_sse_unpack (operands, true, true);
4283 (define_expand "vec_unpacks_hi_v8hi"
4284 [(match_operand:V4SI 0 "register_operand" "")
4285 (match_operand:V8HI 1 "register_operand" "")]
4288 ix86_expand_sse_unpack (operands, false, true);
4292 (define_expand "vec_unpacku_lo_v8hi"
4293 [(match_operand:V4SI 0 "register_operand" "")
4294 (match_operand:V8HI 1 "register_operand" "")]
4297 ix86_expand_sse_unpack (operands, true, false);
4301 (define_expand "vec_unpacks_lo_v8hi"
4302 [(match_operand:V4SI 0 "register_operand" "")
4303 (match_operand:V8HI 1 "register_operand" "")]
4306 ix86_expand_sse_unpack (operands, false, false);
4310 (define_expand "vec_unpacku_hi_v4si"
4311 [(match_operand:V2DI 0 "register_operand" "")
4312 (match_operand:V4SI 1 "register_operand" "")]
4315 ix86_expand_sse_unpack (operands, true, true);
4319 (define_expand "vec_unpacks_hi_v4si"
4320 [(match_operand:V2DI 0 "register_operand" "")
4321 (match_operand:V4SI 1 "register_operand" "")]
4324 ix86_expand_sse_unpack (operands, false, true);
4328 (define_expand "vec_unpacku_lo_v4si"
4329 [(match_operand:V2DI 0 "register_operand" "")
4330 (match_operand:V4SI 1 "register_operand" "")]
4333 ix86_expand_sse_unpack (operands, true, false);
4337 (define_expand "vec_unpacks_lo_v4si"
4338 [(match_operand:V2DI 0 "register_operand" "")
4339 (match_operand:V4SI 1 "register_operand" "")]
4342 ix86_expand_sse_unpack (operands, false, false);
4346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4352 (define_insn "sse2_uavgv16qi3"
4353 [(set (match_operand:V16QI 0 "register_operand" "=x")
4359 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4361 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4362 (const_vector:V16QI [(const_int 1) (const_int 1)
4363 (const_int 1) (const_int 1)
4364 (const_int 1) (const_int 1)
4365 (const_int 1) (const_int 1)
4366 (const_int 1) (const_int 1)
4367 (const_int 1) (const_int 1)
4368 (const_int 1) (const_int 1)
4369 (const_int 1) (const_int 1)]))
4371 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4372 "pavgb\t{%2, %0|%0, %2}"
4373 [(set_attr "type" "sseiadd")
4374 (set_attr "mode" "TI")])
4376 (define_insn "sse2_uavgv8hi3"
4377 [(set (match_operand:V8HI 0 "register_operand" "=x")
4383 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4385 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4386 (const_vector:V8HI [(const_int 1) (const_int 1)
4387 (const_int 1) (const_int 1)
4388 (const_int 1) (const_int 1)
4389 (const_int 1) (const_int 1)]))
4391 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4392 "pavgw\t{%2, %0|%0, %2}"
4393 [(set_attr "type" "sseiadd")
4394 (set_attr "mode" "TI")])
4396 ;; The correct representation for this is absolutely enormous, and
4397 ;; surely not generally useful.
4398 (define_insn "sse2_psadbw"
4399 [(set (match_operand:V2DI 0 "register_operand" "=x")
4400 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4401 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4404 "psadbw\t{%2, %0|%0, %2}"
4405 [(set_attr "type" "sseiadd")
4406 (set_attr "mode" "TI")])
4408 (define_insn "sse_movmskps"
4409 [(set (match_operand:SI 0 "register_operand" "=r")
4410 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
4413 "movmskps\t{%1, %0|%0, %1}"
4414 [(set_attr "type" "ssecvt")
4415 (set_attr "mode" "V4SF")])
4417 (define_insn "sse2_movmskpd"
4418 [(set (match_operand:SI 0 "register_operand" "=r")
4419 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
4422 "movmskpd\t{%1, %0|%0, %1}"
4423 [(set_attr "type" "ssecvt")
4424 (set_attr "mode" "V2DF")])
4426 (define_insn "sse2_pmovmskb"
4427 [(set (match_operand:SI 0 "register_operand" "=r")
4428 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
4431 "pmovmskb\t{%1, %0|%0, %1}"
4432 [(set_attr "type" "ssecvt")
4433 (set_attr "mode" "V2DF")])
4435 (define_expand "sse2_maskmovdqu"
4436 [(set (match_operand:V16QI 0 "memory_operand" "")
4437 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4438 (match_operand:V16QI 2 "register_operand" "x")
4444 (define_insn "*sse2_maskmovdqu"
4445 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
4446 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4447 (match_operand:V16QI 2 "register_operand" "x")
4448 (mem:V16QI (match_dup 0))]
4450 "TARGET_SSE2 && !TARGET_64BIT"
4451 ;; @@@ check ordering of operands in intel/nonintel syntax
4452 "maskmovdqu\t{%2, %1|%1, %2}"
4453 [(set_attr "type" "ssecvt")
4454 (set_attr "mode" "TI")])
4456 (define_insn "*sse2_maskmovdqu_rex64"
4457 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
4458 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4459 (match_operand:V16QI 2 "register_operand" "x")
4460 (mem:V16QI (match_dup 0))]
4462 "TARGET_SSE2 && TARGET_64BIT"
4463 ;; @@@ check ordering of operands in intel/nonintel syntax
4464 "maskmovdqu\t{%2, %1|%1, %2}"
4465 [(set_attr "type" "ssecvt")
4466 (set_attr "mode" "TI")])
4468 (define_insn "sse_ldmxcsr"
4469 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
4473 [(set_attr "type" "sse")
4474 (set_attr "memory" "load")])
4476 (define_insn "sse_stmxcsr"
4477 [(set (match_operand:SI 0 "memory_operand" "=m")
4478 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
4481 [(set_attr "type" "sse")
4482 (set_attr "memory" "store")])
4484 (define_expand "sse_sfence"
4486 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4487 "TARGET_SSE || TARGET_3DNOW_A"
4489 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4490 MEM_VOLATILE_P (operands[0]) = 1;
4493 (define_insn "*sse_sfence"
4494 [(set (match_operand:BLK 0 "" "")
4495 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4496 "TARGET_SSE || TARGET_3DNOW_A"
4498 [(set_attr "type" "sse")
4499 (set_attr "memory" "unknown")])
4501 (define_insn "sse2_clflush"
4502 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
4506 [(set_attr "type" "sse")
4507 (set_attr "memory" "unknown")])
4509 (define_expand "sse2_mfence"
4511 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4514 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4515 MEM_VOLATILE_P (operands[0]) = 1;
4518 (define_insn "*sse2_mfence"
4519 [(set (match_operand:BLK 0 "" "")
4520 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4523 [(set_attr "type" "sse")
4524 (set_attr "memory" "unknown")])
4526 (define_expand "sse2_lfence"
4528 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4531 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4532 MEM_VOLATILE_P (operands[0]) = 1;
4535 (define_insn "*sse2_lfence"
4536 [(set (match_operand:BLK 0 "" "")
4537 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4540 [(set_attr "type" "sse")
4541 (set_attr "memory" "unknown")])
4543 (define_insn "sse3_mwait"
4544 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4545 (match_operand:SI 1 "register_operand" "c")]
4548 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
4549 ;; Since 32bit register operands are implicitly zero extended to 64bit,
4550 ;; we only need to set up 32bit registers.
4552 [(set_attr "length" "3")])
4554 (define_insn "sse3_monitor"
4555 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4556 (match_operand:SI 1 "register_operand" "c")
4557 (match_operand:SI 2 "register_operand" "d")]
4559 "TARGET_SSE3 && !TARGET_64BIT"
4560 "monitor\t%0, %1, %2"
4561 [(set_attr "length" "3")])
4563 (define_insn "sse3_monitor64"
4564 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
4565 (match_operand:SI 1 "register_operand" "c")
4566 (match_operand:SI 2 "register_operand" "d")]
4568 "TARGET_SSE3 && TARGET_64BIT"
4569 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
4570 ;; RCX and RDX are used. Since 32bit register operands are implicitly
4571 ;; zero extended to 64bit, we only need to set up 32bit registers.
4573 [(set_attr "length" "3")])
4576 (define_insn "ssse3_phaddwv8hi3"
4577 [(set (match_operand:V8HI 0 "register_operand" "=x")
4583 (match_operand:V8HI 1 "register_operand" "0")
4584 (parallel [(const_int 0)]))
4585 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4587 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4588 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4591 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4592 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4594 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4595 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4600 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4601 (parallel [(const_int 0)]))
4602 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4604 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4605 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4608 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4609 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4611 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4612 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4614 "phaddw\t{%2, %0|%0, %2}"
4615 [(set_attr "type" "sseiadd")
4616 (set_attr "mode" "TI")])
4618 (define_insn "ssse3_phaddwv4hi3"
4619 [(set (match_operand:V4HI 0 "register_operand" "=y")
4624 (match_operand:V4HI 1 "register_operand" "0")
4625 (parallel [(const_int 0)]))
4626 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4628 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4629 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4633 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4634 (parallel [(const_int 0)]))
4635 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4637 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4638 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4640 "phaddw\t{%2, %0|%0, %2}"
4641 [(set_attr "type" "sseiadd")
4642 (set_attr "mode" "DI")])
4644 (define_insn "ssse3_phadddv4si3"
4645 [(set (match_operand:V4SI 0 "register_operand" "=x")
4650 (match_operand:V4SI 1 "register_operand" "0")
4651 (parallel [(const_int 0)]))
4652 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4654 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4655 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4659 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4660 (parallel [(const_int 0)]))
4661 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4663 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4664 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4666 "phaddd\t{%2, %0|%0, %2}"
4667 [(set_attr "type" "sseiadd")
4668 (set_attr "mode" "TI")])
4670 (define_insn "ssse3_phadddv2si3"
4671 [(set (match_operand:V2SI 0 "register_operand" "=y")
4675 (match_operand:V2SI 1 "register_operand" "0")
4676 (parallel [(const_int 0)]))
4677 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4680 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4681 (parallel [(const_int 0)]))
4682 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4684 "phaddd\t{%2, %0|%0, %2}"
4685 [(set_attr "type" "sseiadd")
4686 (set_attr "mode" "DI")])
4688 (define_insn "ssse3_phaddswv8hi3"
4689 [(set (match_operand:V8HI 0 "register_operand" "=x")
4695 (match_operand:V8HI 1 "register_operand" "0")
4696 (parallel [(const_int 0)]))
4697 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4699 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4700 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4703 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4704 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4706 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4707 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4712 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4713 (parallel [(const_int 0)]))
4714 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4716 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4717 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4720 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4721 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4723 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4724 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4726 "phaddsw\t{%2, %0|%0, %2}"
4727 [(set_attr "type" "sseiadd")
4728 (set_attr "mode" "TI")])
4730 (define_insn "ssse3_phaddswv4hi3"
4731 [(set (match_operand:V4HI 0 "register_operand" "=y")
4736 (match_operand:V4HI 1 "register_operand" "0")
4737 (parallel [(const_int 0)]))
4738 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4740 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4741 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4745 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4746 (parallel [(const_int 0)]))
4747 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4749 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4750 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4752 "phaddsw\t{%2, %0|%0, %2}"
4753 [(set_attr "type" "sseiadd")
4754 (set_attr "mode" "DI")])
4756 (define_insn "ssse3_phsubwv8hi3"
4757 [(set (match_operand:V8HI 0 "register_operand" "=x")
4763 (match_operand:V8HI 1 "register_operand" "0")
4764 (parallel [(const_int 0)]))
4765 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4767 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4768 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4771 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4772 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4774 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4775 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4780 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4781 (parallel [(const_int 0)]))
4782 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4784 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4785 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4788 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4789 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4791 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4792 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4794 "phsubw\t{%2, %0|%0, %2}"
4795 [(set_attr "type" "sseiadd")
4796 (set_attr "mode" "TI")])
4798 (define_insn "ssse3_phsubwv4hi3"
4799 [(set (match_operand:V4HI 0 "register_operand" "=y")
4804 (match_operand:V4HI 1 "register_operand" "0")
4805 (parallel [(const_int 0)]))
4806 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4808 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4809 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4813 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4814 (parallel [(const_int 0)]))
4815 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4817 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4818 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4820 "phsubw\t{%2, %0|%0, %2}"
4821 [(set_attr "type" "sseiadd")
4822 (set_attr "mode" "DI")])
4824 (define_insn "ssse3_phsubdv4si3"
4825 [(set (match_operand:V4SI 0 "register_operand" "=x")
4830 (match_operand:V4SI 1 "register_operand" "0")
4831 (parallel [(const_int 0)]))
4832 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4834 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4835 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4839 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4840 (parallel [(const_int 0)]))
4841 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4843 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4844 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4846 "phsubd\t{%2, %0|%0, %2}"
4847 [(set_attr "type" "sseiadd")
4848 (set_attr "mode" "TI")])
4850 (define_insn "ssse3_phsubdv2si3"
4851 [(set (match_operand:V2SI 0 "register_operand" "=y")
4855 (match_operand:V2SI 1 "register_operand" "0")
4856 (parallel [(const_int 0)]))
4857 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4860 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4861 (parallel [(const_int 0)]))
4862 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4864 "phsubd\t{%2, %0|%0, %2}"
4865 [(set_attr "type" "sseiadd")
4866 (set_attr "mode" "DI")])
4868 (define_insn "ssse3_phsubswv8hi3"
4869 [(set (match_operand:V8HI 0 "register_operand" "=x")
4875 (match_operand:V8HI 1 "register_operand" "0")
4876 (parallel [(const_int 0)]))
4877 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4879 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4880 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4883 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4884 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4886 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4887 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4892 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4893 (parallel [(const_int 0)]))
4894 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4896 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4897 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4900 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4901 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4903 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4904 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4906 "phsubsw\t{%2, %0|%0, %2}"
4907 [(set_attr "type" "sseiadd")
4908 (set_attr "mode" "TI")])
4910 (define_insn "ssse3_phsubswv4hi3"
4911 [(set (match_operand:V4HI 0 "register_operand" "=y")
4916 (match_operand:V4HI 1 "register_operand" "0")
4917 (parallel [(const_int 0)]))
4918 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4920 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4921 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4925 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4926 (parallel [(const_int 0)]))
4927 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4929 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4930 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4932 "phsubsw\t{%2, %0|%0, %2}"
4933 [(set_attr "type" "sseiadd")
4934 (set_attr "mode" "DI")])
4936 (define_insn "ssse3_pmaddubswv8hi3"
4937 [(set (match_operand:V8HI 0 "register_operand" "=x")
4942 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4943 (parallel [(const_int 0)
4953 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
4954 (parallel [(const_int 0)
4964 (vec_select:V16QI (match_dup 1)
4965 (parallel [(const_int 1)
4974 (vec_select:V16QI (match_dup 2)
4975 (parallel [(const_int 1)
4982 (const_int 15)]))))))]
4984 "pmaddubsw\t{%2, %0|%0, %2}"
4985 [(set_attr "type" "sseiadd")
4986 (set_attr "mode" "TI")])
4988 (define_insn "ssse3_pmaddubswv4hi3"
4989 [(set (match_operand:V4HI 0 "register_operand" "=y")
4994 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
4995 (parallel [(const_int 0)
5001 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5002 (parallel [(const_int 0)
5008 (vec_select:V8QI (match_dup 1)
5009 (parallel [(const_int 1)
5014 (vec_select:V8QI (match_dup 2)
5015 (parallel [(const_int 1)
5018 (const_int 7)]))))))]
5020 "pmaddubsw\t{%2, %0|%0, %2}"
5021 [(set_attr "type" "sseiadd")
5022 (set_attr "mode" "DI")])
5024 (define_insn "ssse3_pmulhrswv8hi3"
5025 [(set (match_operand:V8HI 0 "register_operand" "=x")
5032 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5034 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5036 (const_vector:V8HI [(const_int 1) (const_int 1)
5037 (const_int 1) (const_int 1)
5038 (const_int 1) (const_int 1)
5039 (const_int 1) (const_int 1)]))
5041 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5042 "pmulhrsw\t{%2, %0|%0, %2}"
5043 [(set_attr "type" "sseimul")
5044 (set_attr "mode" "TI")])
5046 (define_insn "ssse3_pmulhrswv4hi3"
5047 [(set (match_operand:V4HI 0 "register_operand" "=y")
5054 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5056 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5058 (const_vector:V4HI [(const_int 1) (const_int 1)
5059 (const_int 1) (const_int 1)]))
5061 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5062 "pmulhrsw\t{%2, %0|%0, %2}"
5063 [(set_attr "type" "sseimul")
5064 (set_attr "mode" "DI")])
5066 (define_insn "ssse3_pshufbv16qi3"
5067 [(set (match_operand:V16QI 0 "register_operand" "=x")
5068 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5069 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5072 "pshufb\t{%2, %0|%0, %2}";
5073 [(set_attr "type" "sselog1")
5074 (set_attr "mode" "TI")])
5076 (define_insn "ssse3_pshufbv8qi3"
5077 [(set (match_operand:V8QI 0 "register_operand" "=y")
5078 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5079 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5082 "pshufb\t{%2, %0|%0, %2}";
5083 [(set_attr "type" "sselog1")
5084 (set_attr "mode" "DI")])
5086 (define_insn "ssse3_psign<mode>3"
5087 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5088 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5089 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5092 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5093 [(set_attr "type" "sselog1")
5094 (set_attr "mode" "TI")])
5096 (define_insn "ssse3_psign<mode>3"
5097 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5098 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5099 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5102 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5103 [(set_attr "type" "sselog1")
5104 (set_attr "mode" "DI")])
5106 (define_insn "ssse3_palignrti"
5107 [(set (match_operand:TI 0 "register_operand" "=x")
5108 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5109 (match_operand:TI 2 "nonimmediate_operand" "xm")
5110 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5114 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5115 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5117 [(set_attr "type" "sseishft")
5118 (set_attr "mode" "TI")])
5120 (define_insn "ssse3_palignrdi"
5121 [(set (match_operand:DI 0 "register_operand" "=y")
5122 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5123 (match_operand:DI 2 "nonimmediate_operand" "ym")
5124 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5128 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5129 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5131 [(set_attr "type" "sseishft")
5132 (set_attr "mode" "DI")])
5134 (define_insn "abs<mode>2"
5135 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5136 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5138 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5139 [(set_attr "type" "sselog1")
5140 (set_attr "mode" "TI")])
5142 (define_insn "abs<mode>2"
5143 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5144 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5146 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5147 [(set_attr "type" "sselog1")
5148 (set_attr "mode" "DI")])