(define_c_enum "unspec" [
;; SSE
UNSPEC_MOVNT
- UNSPEC_MOVU
+ UNSPEC_LOADU
+ UNSPEC_STOREU
;; SSE3
UNSPEC_LDDQU
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
-
;; Mapping of immediate bits for blend instructions
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
DONE;
})
-(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
+(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "register_operand" "=x")
(unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ [(match_operand:VF 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
+ "TARGET_SSE"
"%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse2>_movdqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
- (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "memory_operand" "=m")
+ (unspec:VF
+ [(match_operand:VF 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE"
+ "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse2>_loaddqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "register_operand" "=x")
+ (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
+ "TARGET_SSE2"
+ "%vmovdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse2>_storedqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "memory_operand" "=m")
+ (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE2"
"%vmovdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 floating point multiply/accumulate instructions. This
-;; includes the scalar version of the instructions as well as the
-;; vector.
+;; FMA floating point multiply/accumulate instructions. These include
+;; scalar versions of the instructions as well as vector versions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
-;; combine to generate a multiply/add with two memory references. We then
-;; split this insn, into loading up the destination register with one of the
-;; memory operations. If we don't manage to split the insn, reload will
-;; generate the appropriate moves. The reason this is needed, is that combine
-;; has already folded one of the memory references into both the multiply and
-;; add insns, and it can't generate a new pseudo. I.e.:
-;; (set (reg1) (mem (addr1)))
-;; (set (reg2) (mult (reg1) (mem (addr2))))
-;; (set (reg3) (plus (reg2) (mem (addr3))))
-;;
-;; ??? This is historic, pre-dating the gimple fma transformation.
-;; We could now properly represent that only one memory operand is
-;; allowed and not be penalized during optimization.
-
-;; Intrinsic FMA operations.
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
(neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
-;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
+;; The builtin for intrinsics is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"TARGET_FMA || TARGET_FMA4")
-(define_insn "*fma4i_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4"
- "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
- "TARGET_FMA4"
- "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fnmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
- "TARGET_FMA4"
- "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+(define_insn "*fma_fnmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
- "TARGET_FMA4"
- "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-;; Scalar versions of the above. Unlike ADDSS et al, these write the
-;; entire destination register, with the high-order elements zeroed.
+;; FMA parallel floating point multiply addsub and subadd operations.
-(define_expand "fma4i_vmfmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand")
- (vec_merge:VF_128
- (fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand")
- (match_operand:VF_128 2 "nonimmediate_operand")
- (match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 4)
- (const_int 1)))]
- "TARGET_FMA4"
-{
- operands[4] = CONST0_RTX (<MODE>mode);
-})
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;; (fma op1 op2 op3)
+;; (fma op1 op2 (neg op3))
+;; (merge-const))
+;;
+;; But this doesn't seem useful in practice.
+
+(define_expand "fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand")
+ (match_operand:VF 2 "nonimmediate_operand")
+ (match_operand:VF 3 "nonimmediate_operand")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4")
+
+(define_insn "*fma_fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsubadd_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
+ (neg:VF
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA3 floating point scalar intrinsics. These merge result with
+;; high-order elements from the destination register.
(define_expand "fmai_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
(match_operand:VF_128 1 "nonimmediate_operand")
(match_operand:VF_128 2 "nonimmediate_operand")
(match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 0)
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA")
(define_insn "*fmai_fmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
- (match_dup 0)
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
- (match_dup 0)
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
- (match_dup 0)
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmsub_<mode>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
- (match_dup 0)
+ (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+ (match_dup 1)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+;; FMA4 floating point scalar intrinsics. These write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_FMA4"
+{
+ operands[4] = CONST0_RTX (<MODE>mode);
+})
+
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 Parallel floating point multiply addsub and subadd operations.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; It would be possible to represent these without the UNSPEC as
-;;
-;; (vec_merge
-;; (fma op1 op2 op3)
-;; (fma op1 op2 (neg op3))
-;; (merge-const))
-;;
-;; But this doesn't seem useful in practice.
-
-(define_expand "fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")
- (match_operand:VF 3 "nonimmediate_operand")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4")
-
-(define_insn "*fma4_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma4_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; FMA3 floating point multiply/accumulate instructions.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "*fma_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
- (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_expand "avx_movddup256"
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
- (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
(define_split
[(set (match_operand:V2DF 0 "memory_operand" "")
[(set_attr "isa" "noavx,sse3")
(set_attr "type" "sselog1")
(set_attr "prefix" "orig,maybe_vex")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V2DF,DF")])
(define_insn "*vec_concatv2df"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
(set_attr "mode" "TI")])
(define_insn "<sse4_1_avx2>_pblendvb"
- [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
+ [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
(unspec:VI1_AVX2
[(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
(match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddbq\t{%1, %0|%0, %1}"
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddubq\t{%1, %0|%0, %1}"
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
+ [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
(unspec:V48_AVX2
[(match_operand:<sseintvecmode> 1 "register_operand" "x")
(match_operand:V48_AVX2 2 "register_operand" "x")