;; GCC machine description for SSE instructions
-;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
+(define_c_enum "unspec" [
+ ;; SSE
+ UNSPEC_MOVNT
+ UNSPEC_LOADU
+ UNSPEC_STOREU
+
+ ;; SSE3
+ UNSPEC_LDDQU
+
+ ;; SSSE3
+ UNSPEC_PSHUFB
+ UNSPEC_PSIGN
+ UNSPEC_PALIGNR
+
+ ;; For SSE4A support
+ UNSPEC_EXTRQI
+ UNSPEC_EXTRQ
+ UNSPEC_INSERTQI
+ UNSPEC_INSERTQ
+
+ ;; For SSE4.1 support
+ UNSPEC_BLENDV
+ UNSPEC_INSERTPS
+ UNSPEC_DP
+ UNSPEC_MOVNTDQA
+ UNSPEC_MPSADBW
+ UNSPEC_PHMINPOSUW
+ UNSPEC_PTEST
+
+ ;; For SSE4.2 support
+ UNSPEC_PCMPESTR
+ UNSPEC_PCMPISTR
+
+ ;; For FMA4 support
+ UNSPEC_FMADDSUB
+ UNSPEC_XOP_UNSIGNED_CMP
+ UNSPEC_XOP_TRUEFALSE
+ UNSPEC_XOP_PERMUTE
+ UNSPEC_FRCZ
+
+ ;; For AES support
+ UNSPEC_AESENC
+ UNSPEC_AESENCLAST
+ UNSPEC_AESDEC
+ UNSPEC_AESDECLAST
+ UNSPEC_AESIMC
+ UNSPEC_AESKEYGENASSIST
+
+ ;; For PCLMUL support
+ UNSPEC_PCLMUL
+
+ ;; For AVX support
+ UNSPEC_PCMP
+ UNSPEC_VPERMIL
+ UNSPEC_VPERMIL2
+ UNSPEC_VPERMIL2F128
+ UNSPEC_CAST
+ UNSPEC_VTESTP
+ UNSPEC_VCVTPH2PS
+ UNSPEC_VCVTPS2PH
+
+ ;; For AVX2 support
+ UNSPEC_VPERMSI
+ UNSPEC_VPERMDF
+ UNSPEC_VPERMSF
+ UNSPEC_VPERMTI
+ UNSPEC_GATHER
+ UNSPEC_VSIBADDR
+])
+
+(define_c_enum "unspecv" [
+ UNSPECV_LDMXCSR
+ UNSPECV_STMXCSR
+ UNSPECV_CLFLUSH
+ UNSPECV_MONITOR
+ UNSPECV_MWAIT
+ UNSPECV_VZEROALL
+ UNSPECV_VZEROUPPER
+])
+
;; All vector modes including V?TImode, used in move patterns.
(define_mode_iterator V16
[(V32QI "TARGET_AVX") V16QI
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
-
;; Mapping of immediate bits for blend instructions
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
DONE;
})
-(define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "nonimmediate_operand" "")
+(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "register_operand" "=x")
(unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "")]
- UNSPEC_MOVU))]
+ [(match_operand:VF 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
"TARGET_SSE"
-{
- if (MEM_P (operands[0]) && MEM_P (operands[1]))
- operands[1] = force_reg (<MODE>mode, operands[1]);
-})
+ "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
-(define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
+(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "memory_operand" "=m")
(unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ [(match_operand:VF 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE"
"%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_expand "<sse2>_movdqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "nonimmediate_operand" "")
- (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
- UNSPEC_MOVU))]
+(define_insn "<sse2>_loaddqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "register_operand" "=x")
+ (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
"TARGET_SSE2"
-{
- if (MEM_P (operands[0]) && MEM_P (operands[1]))
- operands[1] = force_reg (<MODE>mode, operands[1]);
-})
+ "%vmovdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<sseinsnmode>")])
-(define_insn "*<sse2>_movdqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
- (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+(define_insn "<sse2>_storedqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "memory_operand" "=m")
+ (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE2"
"%vmovdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "sse2_movntsi"
- [(set (match_operand:SI 0 "memory_operand" "=m")
- (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
- UNSPEC_MOVNT))]
+(define_insn "sse2_movnti<mode>"
+ [(set (match_operand:SWI48 0 "memory_operand" "=m")
+ (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
+ UNSPEC_MOVNT))]
"TARGET_SSE2"
"movnti\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_data16" "0")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "<MODE>")])
(define_insn "<sse>_movnt<mode>"
[(set (match_operand:VF 0 "memory_operand" "=m")
;; Modes handled by storent patterns.
(define_mode_iterator STORENT_MODE
- [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
- (V2DI "TARGET_SSE2")
+ [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
+ (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
+ (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
(V8SF "TARGET_AVX") V4SF
(V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:DF
- (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
- (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
- (vec_concat:V2DF
- (plusminus:DF
(vec_select:DF
(match_operand:V4DF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
- (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
(plusminus:DF
(vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 floating point multiply/accumulate instructions. This
-;; includes the scalar version of the instructions as well as the
-;; vector.
+;; FMA floating point multiply/accumulate instructions. These include
+;; scalar versions of the instructions as well as vector versions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
-;; combine to generate a multiply/add with two memory references. We then
-;; split this insn, into loading up the destination register with one of the
-;; memory operations. If we don't manage to split the insn, reload will
-;; generate the appropriate moves. The reason this is needed, is that combine
-;; has already folded one of the memory references into both the multiply and
-;; add insns, and it can't generate a new pseudo. I.e.:
-;; (set (reg1) (mem (addr1)))
-;; (set (reg2) (mult (reg1) (mem (addr2))))
-;; (set (reg3) (plus (reg2) (mem (addr3))))
-;;
-;; ??? This is historic, pre-dating the gimple fma transformation.
-;; We could now properly represent that only one memory operand is
-;; allowed and not be penalized during optimization.
-
-;; Intrinsic FMA operations.
+;; The standard names for scalar FMA are only available with SSE math enabled.
+(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
+ (DF "TARGET_SSE_MATH")
+ V4SF V2DF V8SF V4DF])
-;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand")
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (match_operand:FMAMODEM 1 "nonimmediate_operand")
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
+ "TARGET_FMA || TARGET_FMA4")
(define_expand "fms<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand")
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (match_operand:FMAMODEM 1 "nonimmediate_operand")
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
+ "TARGET_FMA || TARGET_FMA4")
(define_expand "fnma<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
+ "TARGET_FMA || TARGET_FMA4")
(define_expand "fnms<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
+ "TARGET_FMA || TARGET_FMA4")
+
+;; The builtins for intrinsics are not constrained by SSE math enabled.
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
-;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"TARGET_FMA || TARGET_FMA4")
-(define_insn "*fma4i_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4"
- "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
- "TARGET_FMA4"
- "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fnmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4"
- "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fnmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
- "TARGET_FMA4"
- "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-;; Scalar versions of the above. Unlike ADDSS et al, these write the
-;; entire destination register, with the high-order elements zeroed.
+;; FMA parallel floating point multiply addsub and subadd operations.
-(define_expand "fma4i_vmfmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand")
- (vec_merge:VF_128
- (fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand")
- (match_operand:VF_128 2 "nonimmediate_operand")
- (match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 4)
- (const_int 1)))]
- "TARGET_FMA4"
-{
- operands[4] = CONST0_RTX (<MODE>mode);
-})
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;; (fma op1 op2 op3)
+;; (fma op1 op2 (neg op3))
+;; (merge-const))
+;;
+;; But this doesn't seem useful in practice.
+
+(define_expand "fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand")
+ (match_operand:VF 2 "nonimmediate_operand")
+ (match_operand:VF 3 "nonimmediate_operand")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4")
+
+(define_insn "*fma_fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsubadd_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (neg:VF
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA3 floating point scalar intrinsics. These merge result with
+;; high-order elements from the destination register.
(define_expand "fmai_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
(match_operand:VF_128 1 "nonimmediate_operand")
(match_operand:VF_128 2 "nonimmediate_operand")
(match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 0)
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA")
(define_insn "*fmai_fmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
- (match_dup 0)
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
- (match_dup 0)
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
- (match_dup 0)
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
- (match_dup 0)
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+;; FMA4 floating point scalar intrinsics. These write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_FMA4"
+{
+ operands[4] = CONST0_RTX (<MODE>mode);
+})
+
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 Parallel floating point multiply addsub and subadd operations.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; It would be possible to represent these without the UNSPEC as
-;;
-;; (vec_merge
-;; (fma op1 op2 op3)
-;; (fma op1 op2 (neg op3))
-;; (merge-const))
-;;
-;; But this doesn't seem useful in practice.
-
-(define_expand "fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")
- (match_operand:VF 3 "nonimmediate_operand")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4")
-
-(define_insn "*fma4_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma4_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; FMA3 floating point multiply/accumulate instructions.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "*fma_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(match_operand:V2DF 2 "nonimmediate_operand" "")]
"TARGET_SSE2"
{
- rtx r1, r2;
+ rtx tmp0, tmp1;
+
+ if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ {
+ tmp0 = gen_reg_rtx (V4DFmode);
+ tmp1 = force_reg (V2DFmode, operands[1]);
- r1 = gen_reg_rtx (V4SFmode);
- r2 = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+ emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
+ }
+ else
+ {
+ tmp0 = gen_reg_rtx (V4SFmode);
+ tmp1 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
- emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
- emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+ emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
+ emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
+ emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
+ }
DONE;
})
{
rtx r1, r2;
- r1 = gen_reg_rtx (V8SImode);
- r2 = gen_reg_rtx (V8SImode);
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
- emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
- emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
- emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
+ emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
+ emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
+ emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
DONE;
})
(match_operand:V2DF 2 "nonimmediate_operand" "")]
"TARGET_SSE2"
{
- rtx r1, r2;
+ rtx tmp0, tmp1;
- r1 = gen_reg_rtx (V4SImode);
- r2 = gen_reg_rtx (V4SImode);
+ if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ {
+ tmp0 = gen_reg_rtx (V4DFmode);
+ tmp1 = force_reg (V2DFmode, operands[1]);
- emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
- emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
- emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
- gen_lowpart (V2DImode, r1),
- gen_lowpart (V2DImode, r2)));
+ emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+ emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
+ }
+ else
+ {
+ tmp0 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
+ emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
+ emit_insn
+ (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, tmp0),
+ gen_lowpart (V2DImode, tmp1)));
+ }
DONE;
})
{
rtx r1, r2;
- r1 = gen_reg_rtx (V8SImode);
- r2 = gen_reg_rtx (V8SImode);
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
- emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
- emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
- emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
+ emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
+ emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
+ emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
DONE;
})
(match_operand:V2DF 2 "nonimmediate_operand" "")]
"TARGET_SSE2"
{
- rtx r1, r2;
+ rtx tmp0, tmp1;
- r1 = gen_reg_rtx (V4SImode);
- r2 = gen_reg_rtx (V4SImode);
+ if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ {
+ tmp0 = gen_reg_rtx (V4DFmode);
+ tmp1 = force_reg (V2DFmode, operands[1]);
- emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
- emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
- emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
- gen_lowpart (V2DImode, r1),
- gen_lowpart (V2DImode, r2)));
+ emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+ emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
+ }
+ else
+ {
+ tmp0 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
+ emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
+ emit_insn
+ (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, tmp0),
+ gen_lowpart (V2DImode, tmp1)));
+ }
DONE;
})
(vec_select:V4SF
(vec_concat:V8SF
(match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
(parallel [(const_int 0)
(const_int 1)
(const_int 4)
(define_insn "sse_loadlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
(vec_concat:V4SF
- (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
(parallel [(const_int 2) (const_int 3)]))))]
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
- "=x,x,x ,x,x,x,x ,x ,m,m ,m")
+ "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
(vec_merge:VI4F_128
(vec_duplicate:VI4F_128
(match_operand:<ssescalarmode> 2 "general_operand"
- " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
+ " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
(match_operand:VI4F_128 1 "vector_move_operand"
- " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
+ " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
(const_int 1)))]
"TARGET_SSE"
"@
(cond [(eq_attr "alternative" "0,6,7")
(const_string "sselog")
(eq_attr "alternative" "9")
- (const_string "fmov")
- (eq_attr "alternative" "10")
(const_string "imov")
+ (eq_attr "alternative" "10")
+ (const_string "fmov")
]
(const_string "ssemov")))
(set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
- (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_expand "avx_movddup256"
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
- (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
(define_split
[(set (match_operand:V2DF 0 "memory_operand" "")
(vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0) (match_dup 1))]
- "operands[0] = adjust_address (operands[0], DFmode, 8);")
+ "operands[0] = adjust_address (operands[0], DFmode, 0);")
(define_insn "sse2_movsd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
[(set_attr "isa" "noavx,sse3")
(set_attr "type" "sselog1")
(set_attr "prefix" "orig,maybe_vex")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V2DF,DF")])
(define_insn "*vec_concatv2df"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
gen_lowpart (mulmode, t[3]))));
/* Extract the even bytes and merge them back together. */
- ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
+ if (<MODE>mode == V16QImode)
+ ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
+ else
+ {
+ /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
+ this can't be normal even extraction, but one where additionally
+ the second and third quarter are swapped. That is even one insn
+ shorter than even extraction. */
+ rtvec v = rtvec_alloc (32);
+ for (i = 0; i < 32; ++i)
+ RTVEC_ELT (v, i)
+ = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
+ t[0] = operands[0];
+ t[1] = t[5];
+ t[2] = t[4];
+ t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
+ ix86_expand_vec_perm_const (t);
+ }
set_unique_reg_note (get_last_insn (), REG_EQUAL,
gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
DONE;
}
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
DONE;
}
(if_then_else:V_256
(match_operator 3 ""
[(match_operand:VI_256 4 "nonimmediate_operand" "")
- (match_operand:VI_256 5 "nonimmediate_operand" "")])
- (match_operand:V_256 1 "general_operand" "")
- (match_operand:V_256 2 "general_operand" "")))]
+ (match_operand:VI_256 5 "general_operand" "")])
+ (match_operand:V_256 1 "" "")
+ (match_operand:V_256 2 "" "")))]
"TARGET_AVX2
&& (GET_MODE_NUNITS (<V_256:MODE>mode)
== GET_MODE_NUNITS (<VI_256:MODE>mode))"
(if_then_else:V_128
(match_operator 3 ""
[(match_operand:VI124_128 4 "nonimmediate_operand" "")
- (match_operand:VI124_128 5 "nonimmediate_operand" "")])
- (match_operand:V_128 1 "general_operand" "")
- (match_operand:V_128 2 "general_operand" "")))]
+ (match_operand:VI124_128 5 "general_operand" "")])
+ (match_operand:V_128 1 "" "")
+ (match_operand:V_128 2 "" "")))]
"TARGET_SSE2
&& (GET_MODE_NUNITS (<V_128:MODE>mode)
== GET_MODE_NUNITS (<VI124_128:MODE>mode))"
(if_then_else:VI8F_128
(match_operator 3 ""
[(match_operand:V2DI 4 "nonimmediate_operand" "")
- (match_operand:V2DI 5 "nonimmediate_operand" "")])
- (match_operand:VI8F_128 1 "general_operand" "")
- (match_operand:VI8F_128 2 "general_operand" "")))]
+ (match_operand:V2DI 5 "general_operand" "")])
+ (match_operand:VI8F_128 1 "" "")
+ (match_operand:VI8F_128 2 "" "")))]
"TARGET_SSE4_2"
{
bool ok = ix86_expand_int_vcond (operands);
(set_attr "prefix" "maybe_vex")
(set_attr "memory" "store")])
-(define_expand "sse_sfence"
- [(set (match_dup 0)
- (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
- "TARGET_SSE || TARGET_3DNOW_A"
-{
- operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
- MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "*sse_sfence"
- [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
- "TARGET_SSE || TARGET_3DNOW_A"
- "sfence"
- [(set_attr "type" "sse")
- (set_attr "length_address" "0")
- (set_attr "atom_sse_attr" "fence")
- (set_attr "memory" "unknown")])
-
(define_insn "sse2_clflush"
[(unspec_volatile [(match_operand 0 "address_operand" "p")]
UNSPECV_CLFLUSH)]
(set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
-(define_expand "sse2_mfence"
- [(set (match_dup 0)
- (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
- "TARGET_SSE2"
-{
- operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
- MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "*sse2_mfence"
- [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
- "TARGET_64BIT || TARGET_SSE2"
- "mfence"
- [(set_attr "type" "sse")
- (set_attr "length_address" "0")
- (set_attr "atom_sse_attr" "fence")
- (set_attr "memory" "unknown")])
-
-(define_expand "sse2_lfence"
- [(set (match_dup 0)
- (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
- "TARGET_SSE2"
-{
- operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
- MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "*sse2_lfence"
- [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
- "TARGET_SSE2"
- "lfence"
- [(set_attr "type" "sse")
- (set_attr "length_address" "0")
- (set_attr "atom_sse_attr" "lfence")
- (set_attr "memory" "unknown")])
(define_insn "sse3_mwait"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
(set_attr "mode" "TI")])
(define_insn "<sse4_1_avx2>_pblendvb"
- [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
+ [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
(unspec:VI1_AVX2
[(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
(match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_15_operand" "")]
+ "TARGET_ROUND"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
+ operands[2]));
+ emit_insn
+ (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "nonimmediate_operand" "")
+ (match_operand:VF2 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_0_to_15_operand" "")]
+ "TARGET_ROUND"
+{
+ rtx tmp0, tmp1;
+
+ if (<MODE>mode == V2DFmode
+ && TARGET_AVX && !TARGET_PREFER_AVX128)
+ {
+ rtx tmp2 = gen_reg_rtx (V4DFmode);
+
+ tmp0 = gen_reg_rtx (V4DFmode);
+ tmp1 = force_reg (V2DFmode, operands[1]);
+
+ emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+ emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
+ emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
+ }
+ else
+ {
+ tmp0 = gen_reg_rtx (<MODE>mode);
+ tmp1 = gen_reg_rtx (<MODE>mode);
+
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
+ operands[3]));
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
+ operands[3]));
+ emit_insn
+ (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+ }
+ DONE;
+})
+
(define_insn "sse4_1_round<ssescalarmodesuffix>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(define_expand "round<mode>2"
[(set (match_dup 4)
(plus:VF
- (match_operand:VF 1 "nonimmediate_operand" "")
+ (match_operand:VF 1 "register_operand" "")
(match_dup 3)))
(set (match_operand:VF 0 "register_operand" "")
(unspec:VF
operands[5] = GEN_INT (ROUND_TRUNC);
})
+(define_expand "round<mode>2_sfix"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "register_operand" "")]
+ "TARGET_ROUND && !flag_trapping_math"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_round<mode>2 (tmp, operands[1]));
+
+ emit_insn
+ (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "round<mode>2_vec_pack_sfix"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "register_operand" "")
+ (match_operand:VF2 2 "register_operand" "")]
+ "TARGET_ROUND && !flag_trapping_math"
+{
+ rtx tmp0, tmp1;
+
+ if (<MODE>mode == V2DFmode
+ && TARGET_AVX && !TARGET_PREFER_AVX128)
+ {
+ rtx tmp2 = gen_reg_rtx (V4DFmode);
+
+ tmp0 = gen_reg_rtx (V4DFmode);
+ tmp1 = force_reg (V2DFmode, operands[1]);
+
+ emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
+ emit_insn (gen_roundv4df2 (tmp2, tmp0));
+ emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
+ }
+ else
+ {
+ tmp0 = gen_reg_rtx (<MODE>mode);
+ tmp1 = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_round<mode>2 (tmp0, operands[1]));
+ emit_insn (gen_round<mode>2 (tmp1, operands[2]));
+
+ emit_insn
+ (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+ }
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Intel SSE4.2 string/text processing instructions
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
- (vec_select:V2SI
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)])))
+ (parallel [(const_int 0)
+ (const_int 2)])))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
+ (parallel [(const_int 1)
+ (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (parallel [(const_int 1)
+ (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
+ (parallel [(const_int 0)
+ (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)]))))
+ (parallel [(const_int 0)
+ (const_int 2)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as operands 1/2
-(define_insn_and_split "xop_mulv2div2di3_low"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 1)
- (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 1)
- (const_int 3)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
(define_insn "xop_pmacsdqh"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
+ (parallel [(const_int 1)
+ (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (parallel [(const_int 1)
+ (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as either operands[1] or operands[2]
-(define_insn_and_split "xop_mulv2div2di3_high"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 0)
- (const_int 2)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
;; XOP parallel integer multiply/add instructions for the intrinisics
(define_insn "xop_pmacsswd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddbq\t{%1, %0|%0, %1}"
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddubq\t{%1, %0|%0, %1}"
(match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
"TARGET_XOP"
{
- operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+ operands[3]
+ = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
}
[(set_attr "type" "sseishft")
(define_insn "avx2_permvarv8si"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(unspec:V8SI
- [(match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
+ [(match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "register_operand" "x")]
UNSPEC_VPERMSI))]
"TARGET_AVX2"
- "vpermd\t{%2, %1, %0|%0, %1, %2}"
+ "vpermd\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(define_insn "avx2_permvarv8sf"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(unspec:V8SF
- [(match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "register_operand" "x")]
UNSPEC_VPERMSF))]
"TARGET_AVX2"
- "vpermps\t{%2, %1, %0|%0, %1, %2}"
+ "vpermps\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
&& avx_vperm2f128_parallel (operands[3], <MODE>mode)"
{
int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
+ if (mask == 0x12)
+ return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
+ if (mask == 0x20)
+ return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
operands[3] = GEN_INT (mask);
return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
}
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
+ [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
(unspec:V48_AVX2
[(match_operand:<sseintvecmode> 1 "register_operand" "x")
(match_operand:V48_AVX2 2 "register_operand" "x")
(unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
UNSPEC_VCVTPH2PS)
(parallel [(const_int 0) (const_int 1)
- (const_int 1) (const_int 2)])))]
+ (const_int 2) (const_int 3)])))]
"TARGET_F16C"
"vcvtph2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_3"
+ [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
+ (vec_select:<VEC_GATHER_SRCDI>
+ (unspec:VI4F_256
+ [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
+ (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 3 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
+ (match_operand:SI 6 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
+ UNSPEC_GATHER)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (clobber (match_scratch:VI4F_256 1 "=&x"))]
+ "TARGET_AVX2"
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_4"
+ [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
+ (vec_select:<VEC_GATHER_SRCDI>
+ (unspec:VI4F_256
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
+ UNSPEC_GATHER)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (clobber (match_scratch:VI4F_256 1 "=&x"))]
+ "TARGET_AVX2"
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])