From 6be36710014a580db25d163b86e7dc4f76a49dd9 Mon Sep 17 00:00:00 2001 From: uros Date: Fri, 22 Aug 2008 13:58:52 +0000 Subject: [PATCH] PR target/37184 * config/i386/i386.c (ix86_match_ccmode): Handle CCAmode, CCCmode, CCOmode and CCSmode destination modes. PR target/37191 * config/i386/mmx.md (*vec_extractv2sf_0): Avoid combining registers from different units in a single alternative. (*vec_extractv2sf_1): Ditto. (*vec_extractv2si_0): Ditto. (*vec_extractv2si_1): Ditto. * config/i386/sse.md (sse2_storehpd): Ditto. (sse2_storelpd): Ditto. (sse2_loadhpd): Ditto. (sse2_loadlpd): Ditto. PR target/37197 * config/i386/i386.md (clzsi2_abm): Fix operand 1 constraints. (popcountsi2): Ditto. (clzdi2_abm): Ditto. (popcountdi2): Ditto. (clzhi2_abm): Ditto. (popcounthi2): Ditto. testsuite/ChangeLog: PR target/37184 * gcc.target/i386/pr37184.c: New test. PR target/37191 * gcc.target/i386/pr37191.c: New test. PR target/37197 * gcc.target/i386/pr37197.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@139471 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 38 ++++++++++++++++++---- gcc/config/i386/i386.c | 4 +++ gcc/config/i386/i386.md | 12 +++---- gcc/config/i386/mmx.md | 37 +++++++++++++++------- gcc/config/i386/sse.md | 56 +++++++++++++++++++++------------ gcc/testsuite/ChangeLog | 11 +++++++ gcc/testsuite/gcc.target/i386/pr37184.c | 24 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr37191.c | 43 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr37197.c | 7 +++++ 9 files changed, 188 insertions(+), 44 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr37184.c create mode 100644 gcc/testsuite/gcc.target/i386/pr37191.c create mode 100644 gcc/testsuite/gcc.target/i386/pr37197.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 592b28cd5c8..c7e5e5a15b9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2008-08-22 Uros Bizjak + + PR target/37184 + * config/i386/i386.c (ix86_match_ccmode): Handle CCAmode, + CCCmode, CCOmode and CCSmode destination modes. + + PR target/37191 + * config/i386/mmx.md (*vec_extractv2sf_0): Avoid combining registers + from different units in a single alternative. + (*vec_extractv2sf_1): Ditto. + (*vec_extractv2si_0): Ditto. + (*vec_extractv2si_1): Ditto. + * config/i386/sse.md (sse2_storehpd): Ditto. + (sse2_storelpd): Ditto. + (sse2_loadhpd): Ditto. + (sse2_loadlpd): Ditto. + + PR target/37197 + * config/i386/i386.md (clzsi2_abm): Fix operand 1 constraints. + (popcountsi2): Ditto. + (clzdi2_abm): Ditto. + (popcountdi2): Ditto. + (clzhi2_abm): Ditto. + (popcounthi2): Ditto. + 2008-08-22 Richard Guenther PR middle-end/36548 @@ -35,9 +60,11 @@ 2008-08-21 Jan Hubicka * cgraph.c (first_cgraph_function_insertion_hook): New variable. - (cgraph_add_function_insertion_hook, cgraph_remove_function_insertion_hook, + (cgraph_add_function_insertion_hook, + cgraph_remove_function_insertion_hook, cgraph_call_function_insertion_hooks): New functions. - * cgraph.h (cgraph_add_function_insertion_hook, cgraph_remove_function_insertion_hook, + * cgraph.h (cgraph_add_function_insertion_hook, + cgraph_remove_function_insertion_hook, cgraph_call_function_insertion_hooks): Declare. * ipa-reference.c (function_insertion_hook_holder): New variable. (check_operand, look_for_address_of): When checking late, do not care @@ -260,9 +287,9 @@ 2008-08-19 Rafael Espindola - * varasm.c (weak_decls): Move earlier in the file. - (assemble_external): Add weak decls to the weak_decls list. - (declare_weak): Don't add decls to the weak_decls list. + * varasm.c (weak_decls): Move earlier in the file. + (assemble_external): Add weak decls to the weak_decls list. + (declare_weak): Don't add decls to the weak_decls list. 2008-08-19 H.J. Lu @@ -390,7 +417,6 @@ (compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if flag_profile_correction is set. ->>>>>>> .r139386 2008-08-18 Richard Sandiford * rtlanal.c (subreg_offset_representable_p): Check HARD_REGNO_MODE_OK. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 13d81476370..b7561c3002a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12794,6 +12794,10 @@ ix86_match_ccmode (rtx insn, enum machine_mode req_mode) if (req_mode == CCZmode) return 0; /* FALLTHRU */ + case CCAmode: + case CCCmode: + case CCOmode: + case CCSmode: case CCZmode: break; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f8d4c7db118..49fde6b428a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -15285,7 +15285,7 @@ (define_insn "clzsi2_abm" [(set (match_operand:SI 0 "register_operand" "=r") - (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_ABM" "lzcnt{l}\t{%1, %0|%0, %1}" @@ -15305,7 +15305,7 @@ (define_insn "popcountsi2" [(set (match_operand:SI 0 "register_operand" "=r") - (popcount:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" "popcnt{l}\t{%1, %0|%0, %1}" @@ -15412,7 +15412,7 @@ (define_insn "clzdi2_abm" [(set (match_operand:DI 0 "register_operand" "=r") - (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_ABM" "lzcnt{q}\t{%1, %0|%0, %1}" @@ -15432,7 +15432,7 @@ (define_insn "popcountdi2" [(set (match_operand:DI 0 "register_operand" "=r") - (popcount:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_POPCNT" "popcnt{q}\t{%1, %0|%0, %1}" @@ -15473,7 +15473,7 @@ (define_insn "clzhi2_abm" [(set (match_operand:HI 0 "register_operand" "=r") - (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_ABM" "lzcnt{w}\t{%1, %0|%0, %1}" @@ -15493,7 +15493,7 @@ (define_insn "popcounthi2" [(set (match_operand:HI 0 "register_operand" "=r") - (popcount:HI (match_operand:HI 1 "nonimmediate_operand" ""))) + (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" "popcnt{w}\t{%1, %0|%0, %1}" diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 8e77a30d353..9bc675893a5 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -535,10 +535,12 @@ DONE; }) +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn_and_split "*vec_extractv2sf_0" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x,y,m,m,frxy") + [(set (match_operand:SF 0 "nonimmediate_operand" "=x, m,y ,m,f,r") (vec_select:SF - (match_operand:V2SF 1 "nonimmediate_operand" " x,y,x,y,m") + (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m") (parallel [(const_int 0)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" @@ -554,18 +556,23 @@ DONE; }) +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2sf_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,frxy") + [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,y,x,f,r") (vec_select:SF - (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o") + (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 unpckhps\t%0, %0 + # + # + # #" - [(set_attr "type" "mmxcvt,sselog1,*") - (set_attr "mode" "DI,V4SF,SI")]) + [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov") + (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")]) (define_split [(set (match_operand:SF 0 "register_operand" "") @@ -1214,10 +1221,12 @@ DONE; }) +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn_and_split "*vec_extractv2si_0" - [(set (match_operand:SI 0 "nonimmediate_operand" "=x,y,m,m,frxy") + [(set (match_operand:SI 0 "nonimmediate_operand" "=x,m,y, m,r") (vec_select:SI - (match_operand:V2SI 1 "nonimmediate_operand" " x,y,x,y,m") + (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m") (parallel [(const_int 0)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" @@ -1233,10 +1242,12 @@ DONE; }) +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2si_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=y,Y2,Y2,x,frxy") + [(set (match_operand:SI 0 "nonimmediate_operand" "=y,Y2,Y2,x,y,x,r") (vec_select:SI - (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Y2,0,o") + (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Y2,0,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -1244,9 +1255,11 @@ punpckhdq\t%0, %0 pshufd\t{$85, %1, %0|%0, %1, 85} unpckhps\t%0, %0 + # + # #" - [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,*") - (set_attr "mode" "DI,TI,TI,V4SF,SI")]) + [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov") + (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")]) (define_split [(set (match_operand:SI 0 "register_operand" "") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e7e4140597d..72098ec0ad4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2404,9 +2404,9 @@ }) (define_insn_and_split "*vec_extractv4sf_0" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r") (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") + (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m") (parallel [(const_int 0)])))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" @@ -2659,18 +2659,22 @@ [(set_attr "type" "sselog") (set_attr "mode" "V2DF")]) +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_storehpd" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o") + (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o") (parallel [(const_int 1)])))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ movhpd\t{%1, %0|%0, %1} unpckhpd\t%0, %0 + # + # #" - [(set_attr "type" "ssemov,sselog1,ssemov") - (set_attr "mode" "V1DF,V2DF,DF")]) + [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov") + (set_attr "mode" "V1DF,V2DF,DF,DF,DF")]) (define_split [(set (match_operand:DF 0 "register_operand" "") @@ -2683,18 +2687,22 @@ operands[1] = adjust_address (operands[1], DFmode, 8); }) +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_storelpd" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m") + (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m") (parallel [(const_int 0)])))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ movlpd\t{%1, %0|%0, %1} # + # + # #" - [(set_attr "type" "ssemov") - (set_attr "mode" "V1DF,DF,DF")]) + [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") + (set_attr "mode" "V1DF,DF,DF,DF,DF")]) (define_split [(set (match_operand:DF 0 "register_operand" "") @@ -2723,21 +2731,25 @@ "TARGET_SSE2" "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_loadhpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o") (vec_concat:V2DF (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0") + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0") (parallel [(const_int 0)])) - (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))] + (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ movhpd\t{%2, %0|%0, %2} unpcklpd\t{%2, %0|%0, %2} shufpd\t{$1, %1, %0|%0, %1, 1} + # + # #" - [(set_attr "type" "ssemov,sselog,sselog,other") - (set_attr "mode" "V1DF,V2DF,V2DF,DF")]) + [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov") + (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")]) (define_split [(set (match_operand:V2DF 0 "memory_operand" "") @@ -2760,12 +2772,14 @@ "TARGET_SSE2" "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") +;; Avoid combining registers from different units in a single alternative, +;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_loadlpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m") (vec_concat:V2DF - (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr") + (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r") (vec_select:DF - (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0") + (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0") (parallel [(const_int 1)]))))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ @@ -2774,9 +2788,11 @@ movsd\t{%2, %0|%0, %2} shufpd\t{$2, %2, %0|%0, %2, 2} movhpd\t{%H1, %0|%0, %H1} + # + # #" - [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other") - (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")]) + [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov") + (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")]) (define_split [(set (match_operand:V2DF 0 "memory_operand" "") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 061c21962fc..b1dc9c40eae 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2008-08-22 Uros Bizjak + + PR target/37184 + * gcc.target/i386/pr37184.c: New test. + + PR target/37191 + * gcc.target/i386/pr37191.c: New test. + + PR target/37197 + * gcc.target/i386/pr37197.c: New test. + 2008-08-22 Richard Guenther PR middle-end/36548 diff --git a/gcc/testsuite/gcc.target/i386/pr37184.c b/gcc/testsuite/gcc.target/i386/pr37184.c new file mode 100644 index 00000000000..14e11f70795 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr37184.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O1" } */ + +static inline unsigned int +rshift_u_s (unsigned int left, int right) +{ + return left >> right; +} + +unsigned int g_15; + +int func_29 (int p_30) +{ + unsigned int l_31; + unsigned long long int l_35 = 0x7736EAE11771B705LL; + unsigned int l_36 = 0xEDB553A8L; + + l_31 = g_15; + if ((l_31 < + (rshift_u_s ((g_15 - (g_15 >= l_35)), (l_36 <= 1)))) + mod_rhs (1)) + return 1; +} + diff --git a/gcc/testsuite/gcc.target/i386/pr37191.c b/gcc/testsuite/gcc.target/i386/pr37191.c new file mode 100644 index 00000000000..b7b65df12b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr37191.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -mmmx" } */ + +#include +#include +#include + +extern const uint64_t ff_bone; + +static inline void transpose4x4(uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride) { + __m64 row0 = _mm_cvtsi32_si64(*(unsigned*)(src + (0 * src_stride))); + __m64 row1 = _mm_cvtsi32_si64(*(unsigned*)(src + (1 * src_stride))); + __m64 row2 = _mm_cvtsi32_si64(*(unsigned*)(src + (2 * src_stride))); + __m64 row3 = _mm_cvtsi32_si64(*(unsigned*)(src + (3 * src_stride))); + __m64 tmp0 = _mm_unpacklo_pi8(row0, row1); + __m64 tmp1 = _mm_unpacklo_pi8(row2, row3); + __m64 row01 = _mm_unpacklo_pi16(tmp0, tmp1); + __m64 row23 = _mm_unpackhi_pi16(tmp0, tmp1); + *((unsigned*)(dst + (0 * dst_stride))) = _mm_cvtsi64_si32(row01); + *((unsigned*)(dst + (1 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row01, row01)); + *((unsigned*)(dst + (2 * dst_stride))) = _mm_cvtsi64_si32(row23); + *((unsigned*)(dst + (3 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row23, row23)); +} + +static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1) +{ + asm volatile( + "" + :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), + "m"(alpha1), "m"(beta1), "m"(ff_bone) + ); +} + +void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) +{ + + uint8_t trans[8*4] __attribute__ ((aligned (8))); + transpose4x4(trans, pix-2, 8, stride); + transpose4x4(trans+4, pix-2+4*stride, 8, stride); + h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); + transpose4x4(pix-2, trans, stride, 8); + transpose4x4(pix-2+4*stride, trans+4, stride, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/pr37197.c b/gcc/testsuite/gcc.target/i386/pr37197.c new file mode 100644 index 00000000000..95565e8027e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr37197.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4" } */ + +int testl (unsigned long *a, int b) +{ + return b ? 1 : __builtin_parityl (*a); +} -- 2.11.0