From: jakub Date: Wed, 19 Oct 2011 22:08:21 +0000 (+0000) Subject: * config/i386/i386.c (expand_vec_perm_vpshufb2_vpermq_even_odd): Use X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=commitdiff_plain;h=25f07ca6d6e54f3de87dfe0cdb49bcf4ce7fd846 * config/i386/i386.c (expand_vec_perm_vpshufb2_vpermq_even_odd): Use d->op1 instead of d->op0 for the second vpshufb. (expand_vec_perm_even_odd_1): For V8SImode fix vpshufd immediates. (ix86_expand_vec_perm_const): If mask indicates two operands are needed, but both are the same and expanding them as d.op0 == d.op1 failed, retry with d.op0 != d.op1. (ix86_expand_vec_perm_builtin): Likewise. Handle sorry printing also for d.nelt == 32. * gcc.dg/torture/vshuf-32.inc: Add interleave permutations. * gcc.dg/torture/vshuf-16.inc: Likewise. * gcc.dg/torture/vshuf-8.inc: Likewise. * gcc.dg/torture/vshuf-4.inc: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180212 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 24cc639258f..394634bc3dd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2011-10-19 Jakub Jelinek + * config/i386/i386.c (expand_vec_perm_vpshufb2_vpermq_even_odd): Use + d->op1 instead of d->op0 for the second vpshufb. + (expand_vec_perm_even_odd_1): For V8SImode fix vpshufd immediates. + (ix86_expand_vec_perm_const): If mask indicates two operands are + needed, but both are the same and expanding them as d.op0 == d.op1 + failed, retry with d.op0 != d.op1. + (ix86_expand_vec_perm_builtin): Likewise. Handle sorry printing + also for d.nelt == 32. + PR middle-end/50754 * cfgexpand.c (expand_debug_expr): Handle WIDEN_LSHIFT_EXPR, ignore VEC_PERM_EXPR. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ec9d39b4830..4af4e5958b7 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -35992,7 +35992,7 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) vperm = force_reg (V32QImode, vperm); h = gen_reg_rtx (V32QImode); - op = gen_lowpart (V32QImode, d->op0); + op = gen_lowpart (V32QImode, d->op1); emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); ior = gen_reg_rtx (V32QImode); @@ -36154,9 +36154,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) /* Swap the 2nd and 3rd position in each lane into { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ emit_insn (gen_avx2_pshufdv3 (t1, t1, - GEN_INT (2 * 2 + 1 * 16 + 3 * 64))); + GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); emit_insn (gen_avx2_pshufdv3 (t2, t2, - GEN_INT (2 * 2 + 1 * 16 + 3 * 64))); + GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); /* Now an vpunpck[lh]qdq will produce { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */ @@ -36498,6 +36498,7 @@ ix86_expand_vec_perm_builtin (tree exp) { struct expand_vec_perm_d d; tree arg0, arg1, arg2; + bool maybe_retry = false; arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); @@ -36543,6 +36544,7 @@ ix86_expand_vec_perm_builtin (tree exp) for (i = 0; i < nelt; ++i) if (d.perm[i] >= nelt) d.perm[i] -= nelt; + maybe_retry = true; } /* FALLTHRU */ @@ -36563,6 +36565,28 @@ ix86_expand_vec_perm_builtin (tree exp) if (ix86_expand_vec_perm_builtin_1 (&d)) return d.target; + /* If the mask says both arguments are needed, but they are the same, + the above tried to expand with d.op0 == d.op1. If that didn't work, + retry with d.op0 != d.op1 as that is what testing has been done with. */ + if (maybe_retry) + { + rtx seq; + bool ok; + + extract_vec_perm_cst (&d, arg2); + d.op1 = gen_reg_rtx (d.vmode); + start_sequence (); + ok = ix86_expand_vec_perm_builtin_1 (&d); + seq = get_insns (); + end_sequence (); + if (ok) + { + emit_move_insn (d.op1, d.op0); + emit_insn (seq); + return d.target; + } + } + /* For compiler generated permutations, we should never got here, because the compiler should also be checking the ok hook. But since this is a builtin the user has access too, so don't abort. */ @@ -36588,6 +36612,19 @@ ix86_expand_vec_perm_builtin (tree exp) d.perm[8], d.perm[9], d.perm[10], d.perm[11], d.perm[12], d.perm[13], d.perm[14], d.perm[15]); break; + case 32: + sorry ("vector permutation " + "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d " + "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)", + d.perm[0], d.perm[1], d.perm[2], d.perm[3], + d.perm[4], d.perm[5], d.perm[6], d.perm[7], + d.perm[8], d.perm[9], d.perm[10], d.perm[11], + d.perm[12], d.perm[13], d.perm[14], d.perm[15], + d.perm[16], d.perm[17], d.perm[18], d.perm[19], + d.perm[20], d.perm[21], d.perm[22], d.perm[23], + d.perm[24], d.perm[25], d.perm[26], d.perm[27], + d.perm[28], d.perm[29], d.perm[30], d.perm[31]); + break; default: gcc_unreachable (); } @@ -36599,6 +36636,7 @@ bool ix86_expand_vec_perm_const (rtx operands[4]) { struct expand_vec_perm_d d; + unsigned char perm[MAX_VECT_LEN]; int i, nelt, which; rtx sel; @@ -36614,6 +36652,7 @@ ix86_expand_vec_perm_const (rtx operands[4]) gcc_assert (GET_CODE (sel) == CONST_VECTOR); gcc_assert (XVECLEN (sel, 0) == nelt); + gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); for (i = which = 0; i < nelt; ++i) { @@ -36622,6 +36661,7 @@ ix86_expand_vec_perm_const (rtx operands[4]) which |= (ei < nelt ? 1 : 2); d.perm[i] = ei; + perm[i] = ei; } switch (which) @@ -36653,7 +36693,32 @@ ix86_expand_vec_perm_const (rtx operands[4]) break; } - return ix86_expand_vec_perm_builtin_1 (&d); + if (ix86_expand_vec_perm_builtin_1 (&d)) + return true; + + /* If the mask says both arguments are needed, but they are the same, + the above tried to expand with d.op0 == d.op1. If that didn't work, + retry with d.op0 != d.op1 as that is what testing has been done with. */ + if (which == 3 && d.op0 == d.op1) + { + rtx seq; + bool ok; + + memcpy (d.perm, perm, sizeof (perm)); + d.op1 = gen_reg_rtx (d.vmode); + start_sequence (); + ok = ix86_expand_vec_perm_builtin_1 (&d); + seq = get_insns (); + end_sequence (); + if (ok) + { + emit_move_insn (d.op1, d.op0); + emit_insn (seq); + return true; + } + } + + return false; } /* Implement targetm.vectorize.builtin_vec_perm_ok. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 23f1e5fff1f..12392074311 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2011-10-19 Jakub Jelinek + + * gcc.dg/torture/vshuf-32.inc: Add interleave permutations. + * gcc.dg/torture/vshuf-16.inc: Likewise. + * gcc.dg/torture/vshuf-8.inc: Likewise. + * gcc.dg/torture/vshuf-4.inc: Likewise. + 2011-10-19 Janus Weil PR fortran/47023 diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-16.inc b/gcc/testsuite/gcc.dg/torture/vshuf-16.inc index 722160377b5..1464774c62f 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-16.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-16.inc @@ -15,7 +15,9 @@ T (11, 15, 9, 14, 10, 8, 12, 13, 11, 7, 1, 2, 0, 4, 5, 6, 3) \ T (12, 2, 5, 24, 23, 17, 22, 20, 21, 12, 14, 13, 8, 6, 20, 10, 18) \ T (13, 23, 11, 15, 9, 0, 14, 8, 12, 10, 13, 19, 11, 2, 26, 24, 30) \ T (14, 25, 5, 17, 1, 9, 15, 21, 7, 28, 2, 18, 13, 30, 14, 10, 4) \ -T (15, 1, 30, 27, 31, 9, 18, 25, 12, 7, 4, 2, 16, 25, 20, 10, 3) +T (15, 1, 30, 27, 31, 9, 18, 25, 12, 7, 4, 2, 16, 25, 20, 10, 3) \ +T (16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30) \ +T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31) #define EXPTESTS \ T (116, 28, 13, 27, 11, 21, 1, 5, 22, 29, 14, 15, 6, 3, 10, 16, 30) \ T (117, 22, 26, 1, 13, 29, 3, 18, 18, 11, 21, 12, 28, 19, 5, 7, 4) \ diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-32.inc b/gcc/testsuite/gcc.dg/torture/vshuf-32.inc index 5cd077b5045..8410e48310d 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-32.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-32.inc @@ -15,7 +15,9 @@ T (11, 13, 40, 7, 33, 51, 21, 59, 46, 47, 32, 9, 34, 63, 35, 41, 17, 24, 1, 8, 2 T (12, 39, 43, 54, 27, 53, 39, 27, 30, 2, 17, 13, 33, 7, 52, 40, 15, 36, 57, 10, 28, 22, 23, 25, 24, 41, 47, 8, 20, 5, 3, 4, 0) \ T (13, 7, 51, 13, 61, 25, 4, 19, 58, 35, 33, 29, 15, 40, 2, 39, 16, 38, 3, 54, 63, 15, 6, 48, 21, 14, 52, 17, 50, 34, 55, 57, 50) \ T (14, 22, 53, 28, 42, 45, 38, 49, 13, 54, 61, 21, 52, 7, 16, 34, 9, 1, 43, 62, 43, 35, 50, 47, 58, 20, 3, 30, 15, 37, 53, 43, 36) \ -T (15, 2, 43, 49, 34, 28, 35, 29, 36, 51, 9, 17, 48, 10, 37, 45, 21, 52, 19, 25, 33, 60, 31, 30, 42, 12, 26, 27, 46, 5, 40, 14, 36) +T (15, 2, 43, 49, 34, 28, 35, 29, 36, 51, 9, 17, 48, 10, 37, 45, 21, 52, 19, 25, 33, 60, 31, 30, 42, 12, 26, 27, 46, 5, 40, 14, 36) \ +T (16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62) \ +T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63) #define EXPTESTS \ T (116, 13, 38, 47, 3, 17, 8, 38, 20, 59, 61, 39, 26, 7, 49, 63, 43, 57, 16, 40, 19, 4, 32, 27, 7, 52, 19, 46, 55, 36, 41, 48, 6) \ T (117, 39, 35, 59, 20, 56, 18, 58, 63, 57, 14, 2, 16, 5, 61, 35, 4, 53, 9, 52, 51, 27, 33, 61, 12, 3, 35, 36, 40, 37, 7, 45, 42) \ diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc index a7a852b80ab..2044e365dec 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc @@ -15,7 +15,9 @@ T (11, 1, 4, 0, 7) \ T (12, 1, 5, 7, 2) \ T (13, 2, 3, 0, 4) \ T (14, 7, 6, 4, 2) \ -T (15, 6, 1, 3, 4) +T (15, 6, 1, 3, 4) \ +T (16, 0, 2, 4, 6) \ +T (17, 1, 3, 5, 7) #define EXPTESTS \ T (116, 1, 2, 4, 3) \ T (117, 7, 3, 3, 0) \ diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-8.inc b/gcc/testsuite/gcc.dg/torture/vshuf-8.inc index cd841c29efe..24db545ef26 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-8.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-8.inc @@ -15,7 +15,9 @@ T (11, 5, 11, 12, 6, 3, 2, 4, 15) \ T (12, 5, 13, 14, 8, 4, 10, 4, 12) \ T (13, 14, 8, 12, 3, 13, 9, 5, 4) \ T (14, 15, 3, 13, 6, 14, 12, 10, 0) \ -T (15, 0, 5, 11, 7, 4, 6, 14, 1) +T (15, 0, 5, 11, 7, 4, 6, 14, 1) \ +T (16, 0, 2, 4, 6, 8, 10, 12, 14) \ +T (17, 1, 3, 5, 7, 9, 11, 13, 15) #define EXPTESTS \ T (116, 9, 3, 9, 4, 7, 0, 0, 6) \ T (117, 4, 14, 12, 8, 9, 6, 0, 10) \