DONE;
})
-(define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "nonimmediate_operand" "")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "")]
- UNSPEC_MOVU))]
- "TARGET_SSE"
-{
- if (MEM_P (operands[0]) && MEM_P (operands[1]))
- operands[1] = force_reg (<MODE>mode, operands[1]);
-})
-
-(define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
+(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_expand "<sse2>_movdqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "nonimmediate_operand" "")
- (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
- UNSPEC_MOVU))]
- "TARGET_SSE2"
-{
- if (MEM_P (operands[0]) && MEM_P (operands[1]))
- operands[1] = force_reg (<MODE>mode, operands[1]);
-})
-
-(define_insn "*<sse2>_movdqu<avxsizesuffix>"
+(define_insn "<sse2>_movdqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:DF
- (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
- (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
- (vec_concat:V2DF
- (plusminus:DF
(vec_select:DF
(match_operand:V4DF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
- (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
(plusminus:DF
(vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
(define_insn "sse_loadlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
(vec_concat:V4SF
- (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
(parallel [(const_int 2) (const_int 3)]))))]
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
DONE;
}
if (TARGET_XOP)
{
+ rtx t3 = gen_reg_rtx (V2DImode);
+
emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
GEN_INT (1), GEN_INT (3)));
- emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+ emit_move_insn (t3, CONST0_RTX (V2DImode));
+
+ emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
DONE;
}
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
- (vec_select:V2SI
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)])))
+ (parallel [(const_int 0)
+ (const_int 2)])))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
+ (parallel [(const_int 1)
+ (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (parallel [(const_int 1)
+ (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
+ (parallel [(const_int 0)
+ (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)]))))
+ (parallel [(const_int 0)
+ (const_int 2)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as operands 1/2
-(define_insn_and_split "xop_mulv2div2di3_low"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 1)
- (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 1)
- (const_int 3)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 1)
- (const_int 3)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 1)
- (const_int 3)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
(define_insn "xop_pmacsdqh"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
+ (parallel [(const_int 1)
+ (const_int 3)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (parallel [(const_int 1)
+ (const_int 3)]))))
(match_operand:V2DI 3 "nonimmediate_operand" "x")))]
"TARGET_XOP"
"vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "TI")])
-;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
-;; fake it with a multiply/add. In general, we expect the define_split to
-;; occur before register allocation, so we have to handle the corner case where
-;; the target is the same as either operands[1] or operands[2]
-(define_insn_and_split "xop_mulv2div2di3_high"
- [(set (match_operand:V2DI 0 "register_operand" "=&x")
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "%x")
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm")
- (parallel [(const_int 0)
- (const_int 2)])))))]
- "TARGET_XOP"
- "#"
- "&& reload_completed"
- [(set (match_dup 0)
- (match_dup 3))
- (set (match_dup 0)
- (plus:V2DI
- (mult:V2DI
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 1)
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2DI
- (vec_select:V2SI
- (match_dup 2)
- (parallel [(const_int 0)
- (const_int 2)]))))
- (match_dup 0)))]
-{
- operands[3] = CONST0_RTX (V2DImode);
-}
- [(set_attr "type" "ssemul")
- (set_attr "mode" "TI")])
-
;; XOP parallel integer multiply/add instructions for the intrinisics
(define_insn "xop_pmacsswd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddbq\t{%1, %0|%0, %1}"
(vec_select:V2QI
(match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
- (const_int 4)])))
+ (const_int 8)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 1)
- (const_int 5)]))))
+ (const_int 9)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 2)
- (const_int 6)])))
+ (const_int 10)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
(parallel [(const_int 3)
- (const_int 7)])))))
+ (const_int 11)])))))
(plus:V2DI
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 8)
+ (parallel [(const_int 4)
(const_int 12)])))
(sign_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 9)
+ (parallel [(const_int 5)
(const_int 13)]))))
(plus:V2DI
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 10)
+ (parallel [(const_int 6)
(const_int 14)])))
(zero_extend:V2DI
(vec_select:V2QI
(match_dup 1)
- (parallel [(const_int 11)
+ (parallel [(const_int 7)
(const_int 15)])))))))]
"TARGET_XOP"
"vphaddubq\t{%1, %0|%0, %1}"
(define_insn "avx2_permvarv8si"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(unspec:V8SI
- [(match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
+ [(match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "register_operand" "x")]
UNSPEC_VPERMSI))]
"TARGET_AVX2"
- "vpermd\t{%2, %1, %0|%0, %1, %2}"
+ "vpermd\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(define_insn "avx2_permvarv8sf"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(unspec:V8SF
- [(match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "register_operand" "x")]
UNSPEC_VPERMSF))]
"TARGET_AVX2"
- "vpermps\t{%2, %1, %0|%0, %1, %2}"
+ "vpermps\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
UNSPEC_VCVTPH2PS)
(parallel [(const_int 0) (const_int 1)
- (const_int 1) (const_int 2)])))]
+ (const_int 2) (const_int 3)])))]
"TARGET_F16C"
"vcvtph2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")