;; GCC machine description for SSE instructions
-;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
(define_c_enum "unspec" [
;; SSE
UNSPEC_MOVNT
- UNSPEC_MOVU
+ UNSPEC_LOADU
+ UNSPEC_STOREU
;; SSE3
UNSPEC_LDDQU
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
-
;; Mapping of immediate bits for blend instructions
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
DONE;
})
-(define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "nonimmediate_operand" "")
+(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "register_operand" "=x")
(unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "")]
- UNSPEC_MOVU))]
+ [(match_operand:VF 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
"TARGET_SSE"
-{
- if (MEM_P (operands[0]) && MEM_P (operands[1]))
- operands[1] = force_reg (<MODE>mode, operands[1]);
-})
+ "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
-(define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
+(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "memory_operand" "=m")
(unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ [(match_operand:VF 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE"
"%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_expand "<sse2>_movdqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "nonimmediate_operand" "")
- (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
- UNSPEC_MOVU))]
+(define_insn "<sse2>_loaddqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "register_operand" "=x")
+ (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
"TARGET_SSE2"
-{
- if (MEM_P (operands[0]) && MEM_P (operands[1]))
- operands[1] = force_reg (<MODE>mode, operands[1]);
-})
+ "%vmovdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<sseinsnmode>")])
-(define_insn "*<sse2>_movdqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
- (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+(define_insn "<sse2>_storedqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "memory_operand" "=m")
+ (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE2"
"%vmovdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:DF
- (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
- (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
- (vec_concat:V2DF
- (plusminus:DF
(vec_select:DF
(match_operand:V4DF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
- (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
(plusminus:DF
(vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 floating point multiply/accumulate instructions. This
-;; includes the scalar version of the instructions as well as the
-;; vector.
+;; FMA floating point multiply/accumulate instructions. These include
+;; scalar versions of the instructions as well as vector versions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
-;; combine to generate a multiply/add with two memory references. We then
-;; split this insn, into loading up the destination register with one of the
-;; memory operations. If we don't manage to split the insn, reload will
-;; generate the appropriate moves. The reason this is needed, is that combine
-;; has already folded one of the memory references into both the multiply and
-;; add insns, and it can't generate a new pseudo. I.e.:
-;; (set (reg1) (mem (addr1)))
-;; (set (reg2) (mult (reg1) (mem (addr2))))
-;; (set (reg3) (plus (reg2) (mem (addr3))))
-;;
-;; ??? This is historic, pre-dating the gimple fma transformation.
-;; We could now properly represent that only one memory operand is
-;; allowed and not be penalized during optimization.
-
-;; Intrinsic FMA operations.
+;; The standard names for scalar FMA are only available with SSE math enabled.
+(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
+ (DF "TARGET_SSE_MATH")
+ V4SF V2DF V8SF V4DF])
-;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand")
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (match_operand:FMAMODEM 1 "nonimmediate_operand")
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
+ "TARGET_FMA || TARGET_FMA4")
(define_expand "fms<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand")
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (match_operand:FMAMODEM 1 "nonimmediate_operand")
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
+ "TARGET_FMA || TARGET_FMA4")
(define_expand "fnma<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
+ "TARGET_FMA || TARGET_FMA4")
(define_expand "fnms<mode>4"
- [(set (match_operand:FMAMODE 0 "register_operand")
- (fma:FMAMODE
- (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
- (match_operand:FMAMODE 2 "nonimmediate_operand")
- (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
- "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
+ [(set (match_operand:FMAMODEM 0 "register_operand")
+ (fma:FMAMODEM
+ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
+ (match_operand:FMAMODEM 2 "nonimmediate_operand")
+ (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
+ "TARGET_FMA || TARGET_FMA4")
+
+;; The builtins for intrinsics are not constrained by SSE math enabled.
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
-;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"TARGET_FMA || TARGET_FMA4")
-(define_insn "*fma4i_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4"
- "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
- "TARGET_FMA4"
- "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fnmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4"
- "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fnmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
- "TARGET_FMA4"
- "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-;; Scalar versions of the above. Unlike ADDSS et al, these write the
-;; entire destination register, with the high-order elements zeroed.
+;; FMA parallel floating point multiply addsub and subadd operations.
-(define_expand "fma4i_vmfmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand")
- (vec_merge:VF_128
- (fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand")
- (match_operand:VF_128 2 "nonimmediate_operand")
- (match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 4)
- (const_int 1)))]
- "TARGET_FMA4"
-{
- operands[4] = CONST0_RTX (<MODE>mode);
-})
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;; (fma op1 op2 op3)
+;; (fma op1 op2 (neg op3))
+;; (merge-const))
+;;
+;; But this doesn't seem useful in practice.
+
+(define_expand "fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand")
+ (match_operand:VF 2 "nonimmediate_operand")
+ (match_operand:VF 3 "nonimmediate_operand")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4")
+
+(define_insn "*fma_fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsubadd_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (neg:VF
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA3 floating point scalar intrinsics. These merge result with
+;; high-order elements from the destination register.
(define_expand "fmai_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
(match_operand:VF_128 1 "nonimmediate_operand")
(match_operand:VF_128 2 "nonimmediate_operand")
(match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 0)
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA")
(define_insn "*fmai_fmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
- (match_dup 0)
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
- (match_dup 0)
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
- (match_dup 0)
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
- (match_dup 0)
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+;; FMA4 floating point scalar intrinsics. These write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_FMA4"
+{
+ operands[4] = CONST0_RTX (<MODE>mode);
+})
+
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 Parallel floating point multiply addsub and subadd operations.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; It would be possible to represent these without the UNSPEC as
-;;
-;; (vec_merge
-;; (fma op1 op2 op3)
-;; (fma op1 op2 (neg op3))
-;; (merge-const))
-;;
-;; But this doesn't seem useful in practice.
-
-(define_expand "fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")
- (match_operand:VF 3 "nonimmediate_operand")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4")
-
-(define_insn "*fma4_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma4_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; FMA3 floating point multiply/accumulate instructions.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "*fma_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(vec_select:V4SF
(vec_concat:V8SF
(match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
(parallel [(const_int 0)
(const_int 1)
(const_int 4)
(define_insn "sse_loadlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
(vec_concat:V4SF
- (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
(parallel [(const_int 2) (const_int 3)]))))]
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
- "=x,x,x ,x,x,x,x ,x ,m,m ,m")
+ "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
(vec_merge:VI4F_128
(vec_duplicate:VI4F_128
(match_operand:<ssescalarmode> 2 "general_operand"
- " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
+ " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
(match_operand:VI4F_128 1 "vector_move_operand"
- " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
+ " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
(const_int 1)))]
"TARGET_SSE"
"@
(cond [(eq_attr "alternative" "0,6,7")
(const_string "sselog")
(eq_attr "alternative" "9")
- (const_string "fmov")
- (eq_attr "alternative" "10")
(const_string "imov")
+ (eq_attr "alternative" "10")
+ (const_string "fmov")
]
(const_string "ssemov")))
(set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
- (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_expand "avx_movddup256"
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
- (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
(define_split
[(set (match_operand:V2DF 0 "memory_operand" "")
(vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0) (match_dup 1))]
- "operands[0] = adjust_address (operands[0], DFmode, 8);")
+ "operands[0] = adjust_address (operands[0], DFmode, 0);")
(define_insn "sse2_movsd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
[(set_attr "isa" "noavx,sse3")
(set_attr "type" "sselog1")
(set_attr "prefix" "orig,maybe_vex")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V2DF,DF")])
(define_insn "*vec_concatv2df"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
DONE;
}
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
DONE;
}
(if_then_else:V_256
(match_operator 3 ""
[(match_operand:VI_256 4 "nonimmediate_operand" "")
- (match_operand:VI_256 5 "nonimmediate_operand" "")])
- (match_operand:V_256 1 "general_operand" "")
- (match_operand:V_256 2 "general_operand" "")))]
+ (match_operand:VI_256 5 "general_operand" "")])
+ (match_operand:V_256 1 "" "")
+ (match_operand:V_256 2 "" "")))]
"TARGET_AVX2
&& (GET_MODE_NUNITS (<V_256:MODE>mode)
== GET_MODE_NUNITS (<VI_256:MODE>mode))"
(if_then_else:V_128
(match_operator 3 ""
[(match_operand:VI124_128 4 "nonimmediate_operand" "")
- (match_operand:VI124_128 5 "nonimmediate_operand" "")])
- (match_operand:V_128 1 "general_operand" "")
- (match_operand:V_128 2 "general_operand" "")))]
+ (match_operand:VI124_128 5 "general_operand" "")])
+ (match_operand:V_128 1 "" "")
+ (match_operand:V_128 2 "" "")))]
"TARGET_SSE2
&& (GET_MODE_NUNITS (<V_128:MODE>mode)
== GET_MODE_NUNITS (<VI124_128:MODE>mode))"
(if_then_else:VI8F_128
(match_operator 3 ""
[(match_operand:V2DI 4 "nonimmediate_operand" "")
- (match_operand:V2DI 5 "nonimmediate_operand" "")])
- (match_operand:VI8F_128 1 "general_operand" "")
- (match_operand:VI8F_128 2 "general_operand" "")))]
+ (match_operand:V2DI 5 "general_operand" "")])
+ (match_operand:VI8F_128 1 "" "")
+ (match_operand:VI8F_128 2 "" "")))]
"TARGET_SSE4_2"
{
bool ok = ix86_expand_int_vcond (operands);
(set_attr "mode" "TI")])
(define_insn "<sse4_1_avx2>_pblendvb"
- [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
+ [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
(unspec:VI1_AVX2
[(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
(match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
- (vec_select:V2SI
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)])))
+ (parallel [(const_int 0)
+ (const_int 2)])))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
+ (parallel [(const_int 1)
+ (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (parallel [(const_int 1)
+ (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
+ (parallel [(const_int 0)
+ (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)]))))
+ (parallel [(const_int 0)
+ (const_int 2)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as operands 1/2
-(define_insn_and_split "xop_mulv2div2di3_low"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 1)
- (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 1)
- (const_int 3)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
(define_insn "xop_pmacsdqh"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
+ (parallel [(const_int 1)
+ (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (parallel [(const_int 1)
+ (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as either operands[1] or operands[2]
-(define_insn_and_split "xop_mulv2div2di3_high"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 0)
- (const_int 2)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
;; XOP parallel integer multiply/add instructions for the intrinisics
(define_insn "xop_pmacsswd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddbq\t{%1, %0|%0, %1}"
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddubq\t{%1, %0|%0, %1}"
(match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
"TARGET_XOP"
{
- operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+ operands[3]
+ = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
}
[(set_attr "type" "sseishft")
(define_insn "avx2_permvarv8si"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(unspec:V8SI
- [(match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
+ [(match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "register_operand" "x")]
UNSPEC_VPERMSI))]
"TARGET_AVX2"
- "vpermd\t{%2, %1, %0|%0, %1, %2}"
+ "vpermd\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(define_insn "avx2_permvarv8sf"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(unspec:V8SF
- [(match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "register_operand" "x")]
UNSPEC_VPERMSF))]
"TARGET_AVX2"
- "vpermps\t{%2, %1, %0|%0, %1, %2}"
+ "vpermps\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
+ [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
(unspec:V48_AVX2
[(match_operand:<sseintvecmode> 1 "register_operand" "x")
(match_operand:V48_AVX2 2 "register_operand" "x")
(unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
UNSPEC_VCVTPH2PS)
(parallel [(const_int 0) (const_int 1)
- (const_int 1) (const_int 2)])))]
+ (const_int 2) (const_int 3)])))]
"TARGET_F16C"
"vcvtph2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_3"
+ [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
+ (vec_select:<VEC_GATHER_SRCDI>
+ (unspec:VI4F_256
+ [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
+ (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 3 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
+ (match_operand:SI 6 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
+ UNSPEC_GATHER)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (clobber (match_scratch:VI4F_256 1 "=&x"))]
+ "TARGET_AVX2"
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_4"
+ [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
+ (vec_select:<VEC_GATHER_SRCDI>
+ (unspec:VI4F_256
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
+ UNSPEC_GATHER)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))
+ (clobber (match_scratch:VI4F_256 1 "=&x"))]
+ "TARGET_AVX2"
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])