X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fsse.md;h=c2a621774c74e37f780d50a7792cafc3619dc79d;hb=59347d69e81668cb7ab8b6050086c32c2478fa7c;hp=d6b87a15adb79ac156255ffafdf9b397e393e94f;hpb=aa2899f300e401acd7ebad7d5c64e9572be6a8b0;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d6b87a15adb..c2a621774c7 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -582,18 +582,7 @@ DONE; }) -(define_expand "_movu" - [(set (match_operand:VF 0 "nonimmediate_operand" "") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "")] - UNSPEC_MOVU))] - "TARGET_SSE" -{ - if (MEM_P (operands[0]) && MEM_P (operands[1])) - operands[1] = force_reg (mode, operands[1]); -}) - -(define_insn "*_movu" +(define_insn "_movu" [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") (unspec:VF [(match_operand:VF 1 "nonimmediate_operand" "xm,x")] @@ -605,17 +594,7 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) -(define_expand "_movdqu" - [(set (match_operand:VI1 0 "nonimmediate_operand" "") - (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")] - UNSPEC_MOVU))] - "TARGET_SSE2" -{ - if (MEM_P (operands[0]) && MEM_P (operands[1])) - operands[1] = force_reg (mode, operands[1]); -}) - -(define_insn "*_movdqu" +(define_insn "_movdqu" [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m") (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] @@ -1176,14 +1155,14 @@ (parallel [(const_int 0)])) (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) (plusminus:DF - (vec_select:DF (match_dup 1) (parallel [(const_int 2)])) - (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2DF - (plusminus:DF (vec_select:DF (match_operand:V4DF 2 "nonimmediate_operand" "xm") (parallel [(const_int 0)])) - (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))) + (vec_concat:V2DF + (plusminus:DF + (vec_select:DF (match_dup 1) (parallel [(const_int 2)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))) (plusminus:DF (vec_select:DF (match_dup 2) (parallel [(const_int 2)])) (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))] @@ -3765,7 +3744,7 @@ (define_insn "sse_loadlps" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") (vec_concat:V4SF - (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x") + (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x") (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0") (parallel [(const_int 2) (const_int 3)]))))] @@ -5743,11 +5722,15 @@ if (TARGET_XOP) { + rtx t3 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); - emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); + emit_move_insn (t3, CONST0_RTX (V2DImode)); + + emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3)); DONE; } @@ -5772,11 +5755,15 @@ if (TARGET_XOP) { + rtx t3 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); - emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); + emit_move_insn (t3, CONST0_RTX (V2DImode)); + + emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3)); DONE; } @@ -10443,12 +10430,12 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) - (vec_select:V2SI + (parallel [(const_int 0) + (const_int 2)]))) + (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)]))) + (parallel [(const_int 0) + (const_int 2)]))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -10462,13 +10449,13 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 1) + (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))) + (parallel [(const_int 1) + (const_int 3)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -10482,61 +10469,19 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) + (parallel [(const_int 0) + (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))) + (parallel [(const_int 0) + (const_int 2)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; We don't have a straight 32-bit parallel multiply and extend on XOP, so -;; fake it with a multiply/add. In general, we expect the define_split to -;; occur before register allocation, so we have to handle the corner case where -;; the target is the same as operands 1/2 -(define_insn_and_split "xop_mulv2div2di3_low" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))))] - "TARGET_XOP" - "#" - "&& reload_completed" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3)])))) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V2DImode); -} - [(set_attr "type" "ssemul") - (set_attr "mode" "TI")]) - (define_insn "xop_pmacsdqh" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI @@ -10544,61 +10489,19 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 1) + (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))) + (parallel [(const_int 1) + (const_int 3)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; We don't have a straight 32-bit parallel multiply and extend on XOP, so -;; fake it with a multiply/add. In general, we expect the define_split to -;; occur before register allocation, so we have to handle the corner case where -;; the target is the same as either operands[1] or operands[2] -(define_insn_and_split "xop_mulv2div2di3_high" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))))] - "TARGET_XOP" - "#" - "&& reload_completed" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 0) - (const_int 2)])))) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V2DImode); -} - [(set_attr "type" "ssemul") - (set_attr "mode" "TI")]) - ;; XOP parallel integer multiply/add instructions for the intrinisics (define_insn "xop_pmacsswd" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -10815,45 +10718,45 @@ (vec_select:V2QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) - (const_int 4)]))) + (const_int 8)]))) (sign_extend:V2DI (vec_select:V2QI (match_dup 1) (parallel [(const_int 1) - (const_int 5)])))) + (const_int 9)])))) (plus:V2DI (sign_extend:V2DI (vec_select:V2QI (match_dup 1) (parallel [(const_int 2) - (const_int 6)]))) + (const_int 10)]))) (sign_extend:V2DI (vec_select:V2QI (match_dup 1) (parallel [(const_int 3) - (const_int 7)]))))) + (const_int 11)]))))) (plus:V2DI (plus:V2DI (sign_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 8) + (parallel [(const_int 4) (const_int 12)]))) (sign_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 9) + (parallel [(const_int 5) (const_int 13)])))) (plus:V2DI (sign_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 10) + (parallel [(const_int 6) (const_int 14)]))) (sign_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 11) + (parallel [(const_int 7) (const_int 15)])))))))] "TARGET_XOP" "vphaddbq\t{%1, %0|%0, %1}" @@ -11001,45 +10904,45 @@ (vec_select:V2QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) - (const_int 4)]))) + (const_int 8)]))) (sign_extend:V2DI (vec_select:V2QI (match_dup 1) (parallel [(const_int 1) - (const_int 5)])))) + (const_int 9)])))) (plus:V2DI (zero_extend:V2DI (vec_select:V2QI (match_dup 1) (parallel [(const_int 2) - (const_int 6)]))) + (const_int 10)]))) (zero_extend:V2DI (vec_select:V2QI (match_dup 1) (parallel [(const_int 3) - (const_int 7)]))))) + (const_int 11)]))))) (plus:V2DI (plus:V2DI (zero_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 8) + (parallel [(const_int 4) (const_int 12)]))) (sign_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 9) + (parallel [(const_int 5) (const_int 13)])))) (plus:V2DI (zero_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 10) + (parallel [(const_int 6) (const_int 14)]))) (zero_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 11) + (parallel [(const_int 7) (const_int 15)])))))))] "TARGET_XOP" "vphaddubq\t{%1, %0|%0, %1}" @@ -11880,11 +11783,11 @@ (define_insn "avx2_permvarv8si" [(set (match_operand:V8SI 0 "register_operand" "=x") (unspec:V8SI - [(match_operand:V8SI 1 "register_operand" "x") - (match_operand:V8SI 2 "nonimmediate_operand" "xm")] + [(match_operand:V8SI 1 "nonimmediate_operand" "xm") + (match_operand:V8SI 2 "register_operand" "x")] UNSPEC_VPERMSI))] "TARGET_AVX2" - "vpermd\t{%2, %1, %0|%0, %1, %2}" + "vpermd\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -11905,11 +11808,11 @@ (define_insn "avx2_permvarv8sf" [(set (match_operand:V8SF 0 "register_operand" "=x") (unspec:V8SF - [(match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")] + [(match_operand:V8SF 1 "nonimmediate_operand" "xm") + (match_operand:V8SI 2 "register_operand" "x")] UNSPEC_VPERMSF))] "TARGET_AVX2" - "vpermps\t{%2, %1, %0|%0, %1, %2}" + "vpermps\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -12617,7 +12520,7 @@ (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")] UNSPEC_VCVTPH2PS) (parallel [(const_int 0) (const_int 1) - (const_int 1) (const_int 2)])))] + (const_int 2) (const_int 3)])))] "TARGET_F16C" "vcvtph2ps\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt")