+;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
+;; combine to generate a multiply/add with two memory references. We then
+;; split this insn, into loading up the destination register with one of the
+;; memory operations. If we don't manage to split the insn, reload will
+;; generate the appropriate moves. The reason this is needed, is that combine
+;; has already folded one of the memory references into both the multiply and
+;; add insns, and it can't generate a new pseudo. I.e.:
+;; (set (reg1) (mem (addr1)))
+;; (set (reg2) (mult (reg1) (mem (addr2))))
+;; (set (reg3) (plus (reg2) (mem (addr3))))
+
+(define_insn "sse5_fmadd<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (plus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fmadd with two memory operands into a load and the fmadd.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (plus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fmadd
+(define_insn "sse5_vmfmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Floating multiply and subtract
+;; Allow two memory operands the same as fmadd
+(define_insn "sse5_fmsub<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fmsub with two memory operands into a load and the fmsub.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fmsub
+(define_insn "sse5_vmfmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Floating point negative multiply and add
+;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+;; Allow two memory operands to help in optimizing.
+(define_insn "sse5_fnmadd<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (minus:SSEMODEF4
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fnmadd with two memory operands into a load and the fnmadd.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fnmadd
+(define_insn "sse5_vmfnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Floating point negative multiply and subtract
+;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
+;; Allow 2 memory operands to help with optimization
+(define_insn "sse5_fnmsub<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (neg:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fnmsub with two memory operands into a load and the fmsub.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (neg:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fnmsub
+(define_insn "sse5_vmfnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+ "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; The same instructions using an UNSPEC to allow the intrinsic to be used
+;; even if the user used -mno-fused-madd
+;; Parallel instructions. During instruction generation, just default
+;; to registers, and let combine later build the appropriate instruction.
+(define_expand "sse5i_fmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "sse5i_fmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+(define_expand "sse5i_fnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" "")))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
+(define_expand "sse5i_fnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" ""))
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Scalar instructions
+(define_expand "sse5i_vmfmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 1)
+ (const_int 0))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are accepted.
+(define_insn "*sse5i_vmfmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 0)
+ (const_int 0))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "sse5i_vmfmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 0)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+(define_expand "sse5i_vmfnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" "")))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "sse5i_vmfnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" ""))
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+ "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])