X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fsse.md;h=d31aceb5d1bc19c60b504707bfaec5b39a756293;hb=2485795e5deaa82dbcceb6ddf52795971a159f7a;hp=0e0a7d3001359c10165f681f6c21bd20e6876732;hpb=c7412c76a2dcbb428fb862dd64f4629e455844a5;p=pf3gnuchains%2Fgcc-fork.git
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0e0a7d30013..d31aceb5d1b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1,12 +1,12 @@
;; GCC machine description for SSE instructions
-;; Copyright (C) 2005
+;; Copyright (C) 2005, 2006, 2007, 2008
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
@@ -15,28 +15,49 @@
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING. If not, write to
-;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
+;; along with GCC; see the file COPYING3. If not see
+;; .
;; 16 byte integral modes handled by SSE, minus TImode, which gets
;; special-cased for TARGET_64BIT.
-(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
+(define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
;; All 16-byte vector modes handled by SSE
-(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
;; Mix-n-match
-(define_mode_macro SSEMODE12 [V16QI V8HI])
-(define_mode_macro SSEMODE24 [V8HI V4SI])
-(define_mode_macro SSEMODE14 [V16QI V4SI])
-(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
-(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
+(define_mode_iterator SSEMODE12 [V16QI V8HI])
+(define_mode_iterator SSEMODE24 [V8HI V4SI])
+(define_mode_iterator SSEMODE14 [V16QI V4SI])
+(define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
+(define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
+(define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
+(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
+(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
+;; Mapping of the sse5 suffix
+(define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
+ (V4SF "ps") (V2DF "pd")])
+(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
+ (V4SF "ss") (V2DF "sd")])
+(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
+
+;; Mapping of the max integer size for sse5 rotate immediate constraint
+(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
+
+;; Mapping of vector modes back to the scalar modes
+(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
+
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
+
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -49,8 +70,8 @@
;; This is essential for maintaining stable calling conventions.
(define_expand "mov"
- [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
- (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
+ [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (mode, operands);
@@ -58,68 +79,81 @@
})
(define_insn "*mov_internal"
- [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
- (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
+ "TARGET_SSE
+ && (register_operand (operands[0], mode)
+ || register_operand (operands[1], mode))"
{
switch (which_alternative)
{
case 0:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
- else
- return "pxor\t%0, %0";
+ return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
- else
- return "movdqa\t{%1, %0|%0, %1}";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ default:
+ return "movdqa\t{%1, %0|%0, %1}";
+ }
default:
gcc_unreachable ();
}
}
[(set_attr "type" "sselog1,ssemov,ssemov")
(set (attr "mode")
- (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))))
(const_string "V4SF")
-
- (eq_attr "alternative" "0,1")
- (if_then_else
- (ne (symbol_ref "optimize_size")
- (const_int 0))
- (const_string "V4SF")
- (const_string "TI"))
- (eq_attr "alternative" "2")
- (if_then_else
- (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
- (const_int 0))
- (ne (symbol_ref "optimize_size")
- (const_int 0)))
- (const_string "V4SF")
- (const_string "TI"))]
- (const_string "TI")))])
-
-(define_expand "movv4sf"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
- (match_operand:V4SF 1 "nonimmediate_operand" ""))]
- "TARGET_SSE"
+ (eq (const_string "mode") (const_string "V4SFmode"))
+ (const_string "V4SF")
+ (eq (const_string "mode") (const_string "V2DFmode"))
+ (const_string "V2DF")
+ ]
+ (const_string "TI")))])
+
+;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
+;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
+;; from memory, we'd prefer to load the memory directly into the %xmm
+;; register. To facilitate this happy circumstance, this pattern won't
+;; split until after register allocation. If the 64-bit value didn't
+;; come from memory, this is the best we can do. This is much better
+;; than storing %edx:%eax into a stack temporary and loading an %xmm
+;; from there.
+
+(define_insn_and_split "movdi_to_sse"
+ [(parallel
+ [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
+ (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
+ (clobber (match_scratch:V4SI 2 "=&x,X"))])]
+ "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
{
- ix86_expand_vector_move (V4SFmode, operands);
- DONE;
+ if (register_operand (operands[1], DImode))
+ {
+ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+ Assemble the 64-bit DImode value in an xmm register. */
+ emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 0)));
+ emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 4)));
+ emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
+ }
+ else if (memory_operand (operands[1], DImode))
+ emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
+ else
+ gcc_unreachable ();
})
-(define_insn "*movv4sf_internal"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
- (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
- "TARGET_SSE"
- "@
- xorps\t%0, %0
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1,ssemov,ssemov")
- (set_attr "mode" "V4SF")])
-
(define_split
[(set (match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
@@ -134,57 +168,6 @@
operands[2] = CONST0_RTX (V4SFmode);
})
-(define_expand "movv2df"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
- (match_operand:V2DF 1 "nonimmediate_operand" ""))]
- "TARGET_SSE"
-{
- ix86_expand_vector_move (V2DFmode, operands);
- DONE;
-})
-
-(define_insn "*movv2df_internal"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
- (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
- switch (which_alternative)
- {
- case 0:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
- else
- return "xorpd\t%0, %0";
- case 1:
- case 2:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
- else
- return "movapd\t{%1, %0|%0, %1}";
- default:
- gcc_unreachable ();
- }
-}
- [(set_attr "type" "sselog1,ssemov,ssemov")
- (set (attr "mode")
- (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
- (const_string "V4SF")
- (eq_attr "alternative" "0,1")
- (if_then_else
- (ne (symbol_ref "optimize_size")
- (const_int 0))
- (const_string "V4SF")
- (const_string "V2DF"))
- (eq_attr "alternative" "2")
- (if_then_else
- (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
- (const_int 0))
- (ne (symbol_ref "optimize_size")
- (const_int 0)))
- (const_string "V4SF")
- (const_string "V2DF"))]
- (const_string "V2DF")))])
-
(define_split
[(set (match_operand:V2DF 0 "register_operand" "")
(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
@@ -212,23 +195,16 @@
DONE;
})
-(define_insn "sse_movups"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "movups\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movupd"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "movupd\t{%1, %0|%0, %1}"
+(define_insn "_movup"
+ [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "SSE_VEC_FLOAT_MODE_P (mode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "movup\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "")])
(define_insn "sse2_movdqu"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
@@ -237,25 +213,18 @@
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
-(define_insn "sse_movntv4sf"
- [(set (match_operand:V4SF 0 "memory_operand" "=m")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
- "TARGET_SSE"
- "movntps\t{%1, %0|%0, %1}"
+(define_insn "_movnt"
+ [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "movntp\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse2_movntv2df"
- [(set (match_operand:V2DF 0 "memory_operand" "=m")
- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
- "TARGET_SSE2"
- "movntpd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "")])
(define_insn "sse2_movntv2di"
[(set (match_operand:V2DI 0 "memory_operand" "=m")
@@ -264,6 +233,7 @@
"TARGET_SSE2"
"movntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_movntsi"
@@ -278,146 +248,174 @@
(define_insn "sse3_lddqu"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
- UNSPEC_LDQQU))]
+ UNSPEC_LDDQU))]
"TARGET_SSE3"
"lddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; Parallel single-precision floating point arithmetic
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_expand "negv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
+; Expand patterns for non-temporal stores. At the moment, only those
+; that directly map to insns are defined; it would be possible to
+; define patterns for other modes that would expand to several insns.
-(define_expand "absv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
+(define_expand "storent"
+ [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "")
-(define_expand "addv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
+(define_expand "storent"
+ [(set (match_operand:MODEF 0 "memory_operand" "")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "")
-(define_insn "*addv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
- "addps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "V4SF")])
+(define_expand "storentv2di"
+ [(set (match_operand:V2DI 0 "memory_operand" "")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "")
-(define_insn "sse_vmaddv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
- "addss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
+(define_expand "storentsi"
+ [(set (match_operand:SI 0 "memory_operand" "")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "")
-(define_expand "subv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "*subv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "subps\t{%2, %0|%0, %2}"
+(define_expand "2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (absneg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "ix86_expand_fp_absneg_operator (, mode, operands); DONE;")
+
+(define_expand "3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_insn "*3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)
+ && ix86_binary_operator_ok (, mode, operands)"
+ "p\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_vmsubv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (set_attr "mode" "")])
+
+(define_insn "_vm3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE"
- "subss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "s\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_expand "mulv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
-
-(define_insn "*mulv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
- "mulps\t{%2, %0|%0, %2}"
+ (set_attr "mode" "")])
+
+(define_expand "mul3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
+
+(define_insn "*mul3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)
+ && ix86_binary_operator_ok (MULT, mode, operands)"
+ "mulp\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemul")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_vmmulv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (set_attr "mode" "")])
+
+(define_insn "_vmmul3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
- "mulss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "muls\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemul")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "")])
(define_expand "divv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "")
(div:V4SF (match_operand:V4SF 1 "register_operand" "")
(match_operand:V4SF 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swdivsf (operands[0], operands[1],
+ operands[2], V4SFmode);
+ DONE;
+ }
+})
-(define_insn "*divv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "divps\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssediv")
- (set_attr "mode" "V4SF")])
+(define_expand "divv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "")
-(define_insn "sse_vmdivv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "_div3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "divp\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "")])
+
+(define_insn "_vmdiv3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE"
- "divss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "divs\t{%2, %0|%0, %2}"
[(set_attr "type" "ssediv")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "")])
(define_insn "sse_rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
- [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
@@ -435,28 +433,21 @@
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
-(define_insn "sse_rsqrtv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF
- [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
- "TARGET_SSE"
- "rsqrtps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_vmrsqrtv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
- UNSPEC_RSQRT)
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
+(define_expand "sqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "rsqrtss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "SF")])
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
+ DONE;
+ }
+})
-(define_insn "sqrtv4sf2"
+(define_insn "sse_sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
@@ -464,130 +455,134 @@
[(set_attr "type" "sse")
(set_attr "mode" "V4SF")])
-(define_insn "sse_vmsqrtv4sf2"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
- "TARGET_SSE"
- "sqrtss\t{%1, %0|%0, %1}"
+(define_insn "sqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "sqrtpd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "V2DF")])
-;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
-;; isn't really correct, as those rtl operators aren't defined when
-;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
+(define_insn "_vmsqrt2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (sqrt:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+ (match_operand:SSEMODEF2P 2 "register_operand" "0")
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "sqrts\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "")])
-(define_expand "smaxv4sf3"
+(define_expand "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
- (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+ "TARGET_SSE_MATH"
{
- if (!flag_finite_math_only)
- operands[1] = force_reg (V4SFmode, operands[1]);
- ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
+ ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
+ DONE;
})
-(define_insn "*smaxv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
- "maxps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*smaxv4sf3"
+(define_insn "sse_rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
"TARGET_SSE"
- "maxps\t{%2, %0|%0, %2}"
+ "rsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "V4SF")])
-(define_insn "*sse_vmsmaxv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
- "maxss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "SF")])
-
-(define_insn "sse_vmsmaxv4sf3"
+(define_insn "sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
"TARGET_SSE"
- "maxss\t{%2, %0|%0, %2}"
+ "rsqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
-(define_expand "sminv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
+
+(define_expand "3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
{
if (!flag_finite_math_only)
- operands[1] = force_reg (V4SFmode, operands[1]);
- ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
+ operands[1] = force_reg (mode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (, mode, operands);
})
-(define_insn "*sminv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
- "minps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*sminv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "minps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*sse_vmsminv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
- "minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "SF")])
-
-(define_insn "sse_vmsminv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "*3_finite"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (mode) && flag_finite_math_only
+ && ix86_binary_operator_ok (, mode, operands)"
+ "p\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "")])
+
+(define_insn "*3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "p\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "")])
+
+(define_insn "_vm3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE"
- "minss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "s\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_smin3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "minp\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "")])
+
+(define_insn "*ieee_smax3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "maxp\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "")])
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -600,64 +595,71 @@
"TARGET_SSE3"
"addsubps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
-(define_insn "sse3_haddv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_concat:V4SF
- (vec_concat:V2SF
- (plus:SF
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "0")
- (parallel [(const_int 0)]))
- (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
- (plus:SF
- (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
- (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
- (vec_concat:V2SF
- (plus:SF
- (vec_select:SF
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)]))
- (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
- (plus:SF
- (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
- (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+(define_insn "sse3_addsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (plus:V2DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (minus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
"TARGET_SSE3"
- "haddps\t{%2, %0|%0, %2}"
+ "addsubpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V2DF")])
-(define_insn "sse3_hsubv4sf3"
+(define_insn "sse3_hv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_concat:V4SF
(vec_concat:V2SF
- (minus:SF
- (vec_select:SF
+ (plusminus:SF
+ (vec_select:SF
(match_operand:V4SF 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
- (minus:SF
+ (plusminus:SF
(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2SF
- (minus:SF
+ (plusminus:SF
(vec_select:SF
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
- (minus:SF
+ (plusminus:SF
(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSE3"
- "hsubps\t{%2, %0|%0, %2}"
+ "hps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
-(define_expand "reduc_plus_v4sf"
- [(match_operand:V4SF 0 "register_operand" "")
- (match_operand:V4SF 1 "register_operand" "")]
- "TARGET_SSE"
+(define_insn "sse3_hv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3"
+ "hpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "reduc_splus_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")]
+ "TARGET_SSE"
{
if (TARGET_SSE3)
{
@@ -670,6 +672,15 @@
DONE;
})
+(define_expand "reduc_splus_v2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")]
+ "TARGET_SSE3"
+{
+ emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+ DONE;
+})
+
(define_expand "reduc_smax_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
@@ -690,70 +701,71 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Parallel single-precision floating point comparisons
+;; Parallel floating point comparisons
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse_maskcmpv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (match_operator:V4SF 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE"
- "cmp%D3ps\t{%2, %0|%0, %2}"
+(define_insn "_maskcmp3"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
+ (match_operator:SSEMODEF4 3 "sse_comparison_operator"
+ [(match_operand:SSEMODEF4 1 "register_operand" "0")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
+ "(SSE_FLOAT_MODE_P (mode) || SSE_VEC_FLOAT_MODE_P (mode))
+ && !TARGET_SSE5"
+ "cmp%D3\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_vmmaskcmpv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (match_operator:V4SF 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "register_operand" "x")])
+ (set_attr "mode" "")])
+
+(define_insn "_vmmaskcmp3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
(match_dup 1)
(const_int 1)))]
- "TARGET_SSE"
- "cmp%D3ss\t{%2, %0|%0, %2}"
+ "SSE_VEC_FLOAT_MODE_P (mode) && !TARGET_SSE5"
+ "cmp%D3s\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "")])
-(define_insn "sse_comi"
+(define_insn "_comi"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP
- (vec_select:SF
- (match_operand:V4SF 0 "register_operand" "x")
+ (vec_select:MODEF
+ (match_operand: 0 "register_operand" "x")
(parallel [(const_int 0)]))
- (vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (vec_select:MODEF
+ (match_operand: 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))))]
- "TARGET_SSE"
- "comiss\t{%1, %0|%0, %1}"
+ "SSE_FLOAT_MODE_P (mode)"
+ "comis\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecomi")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "")])
-(define_insn "sse_ucomi"
+(define_insn "_ucomi"
[(set (reg:CCFPU FLAGS_REG)
(compare:CCFPU
- (vec_select:SF
- (match_operand:V4SF 0 "register_operand" "x")
+ (vec_select:MODEF
+ (match_operand: 0 "register_operand" "x")
(parallel [(const_int 0)]))
- (vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (vec_select:MODEF
+ (match_operand: 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))))]
- "TARGET_SSE"
- "ucomiss\t{%1, %0|%0, %1}"
+ "SSE_FLOAT_MODE_P (mode)"
+ "ucomis\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecomi")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "")])
-(define_expand "vcondv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (if_then_else:V4SF
+(define_expand "vcond"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (if_then_else:SSEMODEF2P
(match_operator 3 ""
- [(match_operand:V4SF 4 "nonimmediate_operand" "")
- (match_operand:V4SF 5 "nonimmediate_operand" "")])
- (match_operand:V4SF 1 "general_operand" "")
- (match_operand:V4SF 2 "general_operand" "")))]
- "TARGET_SSE"
+ [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODEF2P 1 "general_operand" "")
+ (match_operand:SSEMODEF2P 2 "general_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
{
if (ix86_expand_fp_vcond (operands))
DONE;
@@ -763,107 +775,622 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Parallel single-precision floating point logical operations
+;; Parallel floating point logical operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "andv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
-
-(define_insn "*andv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
- "andps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_insn "sse_nandv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "andnps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_expand "iorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
-
-(define_insn "*iorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
- "orps\t{%2, %0|%0, %2}"
+(define_insn "_nand3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (and:SSEMODEF2P
+ (not:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "andnp\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_expand "xorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
-
-(define_insn "*xorv4sf3"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
- "xorps\t{%2, %0|%0, %2}"
+ (set_attr "mode" "")])
+
+(define_expand "3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (plogic:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)"
+ "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_insn "*3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (plogic:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (mode)
+ && ix86_binary_operator_ok (, mode, operands)"
+ "p\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "")])
;; Also define scalar versions. These are used for abs, neg, and
;; conditional move. Using subregs into vector modes causes register
;; allocation lossage. These patterns do not allow memory operands
;; because the native instructions read the full 128-bits.
-(define_insn "*andsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (and:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "andps\t{%2, %0|%0, %2}"
+(define_insn "*nand3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (and:MODEF
+ (not:MODEF
+ (match_operand:MODEF 1 "register_operand" "0"))
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (mode)"
+ "andnp\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
-
-(define_insn "*nandsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "andnps\t{%2, %0|%0, %2}"
+ (set_attr "mode" "")])
+
+(define_insn "*3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (plogic:MODEF
+ (match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (mode)"
+ "p\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "")])
-(define_insn "*iorsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (ior:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "orps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSE5 floating point multiply/accumulate instructions This includes the
+;; scalar version of the instructions as well as the vector
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "*xorsf3"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (xor:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "register_operand" "x")))]
- "TARGET_SSE"
- "xorps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
+;; combine to generate a multiply/add with two memory references. We then
+;; split this insn, into loading up the destination register with one of the
+;; memory operations. If we don't manage to split the insn, reload will
+;; generate the appropriate moves. The reason this is needed, is that combine
+;; has already folded one of the memory references into both the multiply and
+;; add insns, and it can't generate a new pseudo. I.e.:
+;; (set (reg1) (mem (addr1)))
+;; (set (reg2) (mult (reg1) (mem (addr2))))
+;; (set (reg3) (plus (reg2) (mem (addr3))))
+
+(define_insn "sse5_fmadd4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (plus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Split fmadd with two memory operands into a load and the fmadd.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (plus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, mode);
+ emit_insn (gen_sse5_fmadd4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fmadd
+(define_insn "sse5_vmfmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Floating multiply and subtract
+;; Allow two memory operands the same as fmadd
+(define_insn "sse5_fmsub4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Split fmsub with two memory operands into a load and the fmsub.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, mode);
+ emit_insn (gen_sse5_fmsub4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fmsub
+(define_insn "sse5_vmfmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Floating point negative multiply and add
+;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+;; Allow two memory operands to help in optimizing.
+(define_insn "sse5_fnmadd4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (minus:SSEMODEF4
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Split fnmadd with two memory operands into a load and the fnmadd.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, mode);
+ emit_insn (gen_sse5_fnmadd4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fnmadd
+(define_insn "sse5_vmfnmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Floating point negative multiply and subtract
+;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
+;; Allow 2 memory operands to help with optimization
+(define_insn "sse5_fnmsub4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (neg:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Split fnmsub with two memory operands into a load and the fmsub.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (neg:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, mode);
+ emit_insn (gen_sse5_fnmsub4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fnmsub
+(define_insn "sse5_vmfnmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; The same instructions using an UNSPEC to allow the intrinsic to be used
+;; even if the user used -mno-fused-madd
+;; Parallel instructions. During instruction generation, just default
+;; to registers, and let combine later build the appropriate instruction.
+(define_expand "sse5i_fmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fmadd4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+(define_expand "sse5i_fmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fmsub4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+(define_expand "sse5i_fnmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" "")))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fnmadd4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fnmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
+(define_expand "sse5i_fnmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" ""))
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fnmsub4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fnmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Scalar instructions
+(define_expand "sse5i_vmfmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 1)
+ (const_int 0))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfmadd4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are accepted.
+(define_insn "*sse5i_vmfmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 0)
+ (const_int 0))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+(define_expand "sse5i_vmfmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 0)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfmsub4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+(define_expand "sse5i_vmfnmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" "")))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfnmadd4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfnmadd4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
+
+(define_expand "sse5i_vmfnmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" ""))
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfnmsub4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfnmsub4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -917,6 +1444,7 @@
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse_cvtsi2ssq"
@@ -930,6 +1458,7 @@
"cvtsi2ssq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse_cvtss2si"
@@ -943,6 +1472,19 @@
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2si_2"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE"
+ "cvtss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
(define_insn "sse_cvtss2siq"
@@ -956,6 +1498,19 @@
"cvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse_cvtss2siq_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
(define_insn "sse_cvttss2si"
@@ -968,6 +1523,8 @@
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
(define_insn "sse_cvttss2siq"
@@ -980,6 +1537,8 @@
"cvttss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
(define_insn "sse2_cvtdq2ps"
@@ -988,7 +1547,7 @@
"TARGET_SSE2"
"cvtdq2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V4SF")])
(define_insn "sse2_cvtps2dq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
@@ -997,6 +1556,7 @@
"TARGET_SSE2"
"cvtps2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_cvttps2dq"
@@ -1005,713 +1565,2241 @@
"TARGET_SSE2"
"cvttps2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; Parallel single-precision floating point element swizzling
+;; Parallel double-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "sse_movhlps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
- (parallel [(const_int 6)
- (const_int 7)
- (const_int 2)
- (const_int 3)])))]
- "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "@
- movhlps\t{%2, %0|%0, %2}
- movlps\t{%H1, %0|%0, %H1}
- movhps\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF,V2SF,V2SF")])
+(define_insn "sse2_cvtpi2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
+ "TARGET_SSE2"
+ "cvtpi2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx,*")
+ (set_attr "mode" "V2DF")])
-(define_insn "sse_movlhps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 4)
- (const_int 5)])))]
- "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
- "@
- movlhps\t{%2, %0|%0, %2}
- movhps\t{%2, %0|%0, %2}
- movlps\t{%2, %H0|%H0, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF,V2SF,V2SF")])
+(define_insn "sse2_cvtpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "DI")])
-(define_insn "sse_unpckhps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (parallel [(const_int 2) (const_int 6)
- (const_int 3) (const_int 7)])))]
- "TARGET_SSE"
- "unpckhps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+(define_insn "sse2_cvttpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvttpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
-(define_insn "sse_unpcklps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0) (const_int 4)
- (const_int 1) (const_int 5)])))]
- "TARGET_SSE"
- "unpcklps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+(define_insn "sse2_cvtsi2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtsi2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
-;; These are modeled with the same vec_concat as the others so that we
-;; capture users of shufps that can use the new instructions
-(define_insn "sse3_movshdup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
- (match_dup 1))
- (parallel [(const_int 1)
- (const_int 1)
- (const_int 7)
- (const_int 7)])))]
- "TARGET_SSE3"
- "movshdup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
+(define_insn "sse2_cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsi2sdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
-(define_insn "sse3_movsldup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
- (match_dup 1))
- (parallel [(const_int 0)
- (const_int 0)
- (const_int 6)
- (const_int 6)])))]
- "TARGET_SSE3"
- "movsldup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
- (set_attr "mode" "V4SF")])
+(define_insn "sse2_cvtsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "SI")])
-(define_expand "sse_shufps"
- [(match_operand:V4SF 0 "register_operand" "")
- (match_operand:V4SF 1 "register_operand" "")
- (match_operand:V4SF 2 "nonimmediate_operand" "")
- (match_operand:SI 3 "const_int_operand" "")]
- "TARGET_SSE"
-{
- int mask = INTVAL (operands[3]);
- emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT (((mask >> 4) & 3) + 4),
- GEN_INT (((mask >> 6) & 3) + 4)));
- DONE;
-})
+(define_insn "sse2_cvtsd2si_2"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "SI")])
-(define_insn "sse_shufps_1"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (parallel [(match_operand 3 "const_0_to_3_operand" "")
- (match_operand 4 "const_0_to_3_operand" "")
- (match_operand 5 "const_4_to_7_operand" "")
- (match_operand 6 "const_4_to_7_operand" "")])))]
- "TARGET_SSE"
-{
- int mask = 0;
- mask |= INTVAL (operands[3]) << 0;
- mask |= INTVAL (operands[4]) << 2;
- mask |= (INTVAL (operands[5]) - 4) << 4;
- mask |= (INTVAL (operands[6]) - 4) << 6;
- operands[3] = GEN_INT (mask);
+(define_insn "sse2_cvtsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "DI")])
- return "shufps\t{%3, %2, %0|%0, %2, %3}";
-}
- [(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+(define_insn "sse2_cvtsd2siq_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "DI")])
-(define_insn "sse_storehps"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
- (parallel [(const_int 2) (const_int 3)])))]
- "TARGET_SSE"
- "@
- movhps\t{%1, %0|%0, %1}
- movhlps\t{%1, %0|%0, %1}
- movlps\t{%H1, %0|%0, %H1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2SF,V4SF,V2SF")])
+(define_insn "sse2_cvttsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "cvttsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "SI")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
-(define_insn "sse_loadhps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
- (vec_concat:V4SF
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
- (parallel [(const_int 0) (const_int 1)]))
- (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
- "TARGET_SSE"
- "@
- movhps\t{%2, %0|%0, %2}
- movlhps\t{%2, %0|%0, %2}
- movlps\t{%2, %H0|%H0, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2SF,V4SF,V2SF")])
+(define_insn "sse2_cvttsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvttsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
-(define_insn "sse_storelps"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
- (parallel [(const_int 0) (const_int 1)])))]
- "TARGET_SSE"
- "@
- movlps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}
- movlps\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "mode" "V2SF,V4SF,V2SF")])
+(define_insn "sse2_cvtdq2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+ "cvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
-(define_insn "sse_loadlps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
- (vec_concat:V4SF
- (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
- (vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
- (parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_SSE"
- "@
- shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
- movlps\t{%2, %0|%0, %2}
- movlps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog,ssemov,ssemov")
- (set_attr "mode" "V4SF,V2SF,V2SF")])
+(define_expand "sse2_cvtpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_concat:V4SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SImode);")
-(define_insn "sse_movss"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "*sse2_cvtpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_operand:V2SI 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "cvtpd2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_expand "sse2_cvttpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_concat:V4SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvttpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2SI 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "cvttpd2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_insn "sse2_cvtsd2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
- (match_operand:V4SF 2 "register_operand" "x")
- (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
(const_int 1)))]
- "TARGET_SSE"
- "movss\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssemov")
+ "TARGET_SSE2"
+ "cvtsd2ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
-(define_insn "*vec_dupv4sf"
+(define_insn "sse2_cvtss2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0) (const_int 1)])))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtss2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "mode" "DF")])
+
+(define_expand "sse2_cvtpd2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" ""))
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "register_operand" "0")))]
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2SF 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "cvtpd2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "V4SF")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_insn "sse2_cvtps2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+ "cvtps2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_expand "vec_unpacks_hi_v4sf"
+ [(set (match_dup 2)
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_dup 2)
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))
+ (set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2")
+
+(define_expand "vec_unpacks_float_hi_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+ [(set (match_dup 2)
+ (vec_select:V4SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 2)
+ (const_int 3)
+ (const_int 2)
+ (const_int 3)])))
+ (set (match_operand:V2DF 0 "register_operand" "")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SImode);
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2")
+
+(define_expand "vec_pack_trunc_v2df"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SFmode);
+ r2 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
+ emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
+ emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
+ emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_v2df"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
+ emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse_movhlps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
"TARGET_SSE"
- "shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "type" "sselog1")
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "sse_movhlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
+ "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhlps\t{%2, %0|%0, %2}
+ movlps\t{%H2, %0|%0, %H2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_expand "sse_movlhps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "sse_movlhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_unpckhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE"
+ "unpckhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_unpcklps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE"
+ "unpcklps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "sse3_movshdup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 1)
+ (const_int 1)
+ (const_int 7)
+ (const_int 7)])))]
+ "TARGET_SSE3"
+ "movshdup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_movsldup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 0)
+ (const_int 6)
+ (const_int 6)])))]
+ "TARGET_SSE3"
+ "movsldup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
-;; ??? In theory we can match memory for the MMX alternative, but allowing
-;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
-;; alternatives pretty much forces the MMX alternative to be chosen.
-(define_insn "*sse_concatv2sf"
- [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
- (vec_concat:V2SF
- (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
- (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
- "TARGET_SSE"
- "@
- unpcklps\t{%2, %0|%0, %2}
- movss\t{%1, %0|%0, %1}
- punpckldq\t{%2, %0|%0, %2}
- movd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
- (set_attr "mode" "V4SF,SF,DI,DI")])
+(define_expand "sse_shufps"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "sse_shufps_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")
+ (match_operand 6 "const_4_to_7_operand" "")])))]
+ "TARGET_SSE"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "shufps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadhps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "sse_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadlps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "sse_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "@
+ shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+ movlps\t{%2, %0|%0, %2}
+ movlps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "movss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_SSE"
+ "shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V4SF")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*sse_concatv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+ (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
+ "TARGET_SSE"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*sse_concatv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" " 0,0")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_SSE"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF")])
+
+(define_expand "vec_initv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "vec_setv4sf_0"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
+ (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "@
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_sse4_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_SSE4_1"
+ "insertps\t{%3, %2, %0|%0, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_split
+ [(set (match_operand:V4SF 0 "memory_operand" "")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "nonmemory_operand" ""))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_SSE && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "*sse4_1_extractps"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "extractps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn_and_split "*vec_extract_v4sf_mem"
+ [(set (match_operand:SF 0 "register_operand" "=x*rf")
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "o")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ int i = INTVAL (operands[2]);
+
+ emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
+ DONE;
+})
+
+(define_expand "vec_extractv4sf"
+ [(match_operand:SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse2_unpckhpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_unpckhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpckhpd\t{%2, %0|%0, %2}
+ movlpd\t{%H1, %0|%0, %H1}
+ movhpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_insn "*sse3_movddup"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movddup\t{%1, %0|%0, %1}
+ #"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && reload_completed"
+ [(const_int 0)]
+{
+ rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
+ emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+ emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+ DONE;
+})
+
+(define_expand "sse2_unpcklpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_unpcklpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_expand "sse2_shufpd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 3 : 2)));
+ DONE;
+})
+
+(define_insn "sse2_shufpd_1"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_2_to_3_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "shufpd\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_storehpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhpd\t{%1, %0|%0, %1}
+ unpckhpd\t%0, %0
+ #"
+ [(set_attr "type" "ssemov,sselog1,ssemov")
+ (set_attr "mode" "V1DF,V2DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = adjust_address (operands[1], DFmode, 8);
+})
+
+(define_insn "sse2_storelpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlpd\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V1DF,DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (DFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (DFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_expand "sse2_loadhpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_loadhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhpd\t{%2, %0|%0, %2}
+ unpcklpd\t{%2, %0|%0, %2}
+ shufpd\t{$1, %1, %0|%0, %1, 1}
+ #"
+ [(set_attr "type" "ssemov,sselog,sselog,other")
+ (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+ (match_operand:DF 1 "register_operand" "")))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+(define_expand "sse2_loadlpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_loadlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movsd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhpd\t{%H1, %0|%0, %H1}
+ #"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "register_operand" "")
+ (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+;; Not sure these two are ever used, but it doesn't hurt to have
+;; them. -aoliva
+(define_insn "*vec_extractv2df_1_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_extractv2df_0_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse2_movsd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhps\t{%H1, %0|%0, %H1}
+ movhps\t{%1, %H0|%H0, %1}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*vec_dupv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE3"
+ "movddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DF")])
+
+(define_insn "vec_dupv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "register_operand" "0")))]
+ "TARGET_SSE2"
+ "unpcklpd\t%0, %0"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*vec_concatv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")
+ (match_dup 1)))]
+ "TARGET_SSE3"
+ "movddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DF")])
+
+(define_insn "*vec_concatv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
+ (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
+ "TARGET_SSE"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movsd\t{%1, %0|%0, %1}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
+
+(define_expand "vec_setv2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv2df"
+ [(match_operand:DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "neg2"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (minus:SSEMODEI
+ (match_dup 2)
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "operands[2] = force_reg (mode, CONST0_RTX (mode));")
+
+(define_expand "3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_insn "*3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)"
+ "p\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "")
+ (sat_plusminus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (