From ad074595c343413e30d55f4bdbc103760db7060d Mon Sep 17 00:00:00 2001 From: irar Date: Sun, 15 Aug 2010 07:00:32 +0000 Subject: [PATCH] * tree-vect-data-refs.c (vect_setup_realignment): Support realignment in basic blocks. (vect_supportable_dr_alignment): Check alignment for basic blocks. * tree-vect-slp.c (vect_build_slp_tree): Allow different codes for data references. (vect_bb_vectorization_profitable_p): New function. (vect_slp_analyze_bb): Call vect_bb_vectorization_profitable_p() to check if it's worthwhile to vectorize the basic block. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@163260 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 11 +++ gcc/testsuite/ChangeLog | 11 +++ gcc/testsuite/gcc.dg/vect/bb-slp-10.c | 4 +- gcc/testsuite/gcc.dg/vect/bb-slp-2.c | 4 +- gcc/testsuite/gcc.dg/vect/bb-slp-9.c | 3 +- .../vect/costmodel/ppc/costmodel-bb-slp-9a.c | 47 ++++++++++++ .../vect/costmodel/ppc/ppc-costmodel-vect.exp | 5 ++ gcc/tree-vect-data-refs.c | 63 +++++++++------ gcc/tree-vect-slp.c | 89 +++++++++++++++++++++- 9 files changed, 206 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index df2c197faed..9400493ab40 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2010-08-15 Ira Rosen + + * tree-vect-data-refs.c (vect_setup_realignment): Support realignment + in basic blocks. + (vect_supportable_dr_alignment): Check alignment for basic blocks. + * tree-vect-slp.c (vect_build_slp_tree): Allow different codes for + data references. + (vect_bb_vectorization_profitable_p): New function. + (vect_slp_analyze_bb): Call vect_bb_vectorization_profitable_p() to + check if it's worthwhile to vectorize the basic block. + 2010-08-14 Anatoly Sokolov * reload.h (register_move_cost, memory_move_secondary_cost, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2166e7228a2..d8073be7ed8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2010-08-15 Ira Rosen + + * gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a.c: New test. + * gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp: Run basic block + SLP tests. + * gcc.dg/vect/bb-slp-9.c: Now vectorizable on targets that support + misaligned loads. + * gcc.dg/vect/bb-slp-10.c: Now vectorizable on targets that support + misaligned stores.c + * gcc.dg/vect/bb-slp-2.c: Avoid loop vectorization. + 2010-08-14 Mingjie Xing * gcc.dg/vect/fast-math-vect-reduc-8.c: Move diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-10.c b/gcc/testsuite/gcc.dg/vect/bb-slp-10.c index d31ffb47c34..991802fb87c 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-10.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-10.c @@ -50,7 +50,7 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ -/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" } } */ +/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" { xfail vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_hw_misalign } } } */ /* { dg-final { cleanup-tree-dump "slp" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-2.c index 1de31986829..4414a989777 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-2.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-2.c @@ -24,8 +24,8 @@ main1 (int dummy) *pout++ = *pin++; /* Avoid loop vectorization. */ - if (dummy == 32) - abort (); + if (dummy) + __asm__ volatile ("" : : : "memory"); } /* check results: */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-9.c b/gcc/testsuite/gcc.dg/vect/bb-slp-9.c index e8fe1507365..40a9f27408c 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-9.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-9.c @@ -47,7 +47,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ -/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" } } */ +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { xfail vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "slp" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a.c new file mode 100644 index 00000000000..d14ae520c72 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "../../tree-vect.h" + +#define N 16 + +unsigned int out[N]; +unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + +__attribute__ ((noinline)) int +main1 (unsigned int x, unsigned int y) +{ + int i; + unsigned int *pin = &in[1]; + unsigned int *pout = &out[0]; + unsigned int a0, a1, a2, a3; + + /* Misaligned load. */ + *pout++ = *pin++; + *pout++ = *pin++; + *pout++ = *pin++; + *pout++ = *pin++; + + /* Check results. */ + if (out[0] != in[1] + || out[1] != in[2] + || out[2] != in[3] + || out[3] != in[4]) + abort(); + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (2, 3); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { xfail vect_no_align } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp index 7401cca75c8..806260ab61e 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp @@ -57,7 +57,10 @@ if [check_vmx_hw_available] { # Initialize `dg'. dg-init +set VECT_SLP_CFLAGS $DEFAULT_VECTCFLAGS + lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details" +lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details" # Main loop. dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \ @@ -66,6 +69,8 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \ "" $DEFAULT_VECTCFLAGS dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-slp-*.\[cS\]]] \ "" $DEFAULT_VECTCFLAGS +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-bb-slp*.\[cS\]]] \ + "" $VECT_SLP_CFLAGS #### Tests with special options global SAVED_DEFAULT_VECTCFLAGS diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index efd95a78acb..3f63a7fabad 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3467,8 +3467,8 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - edge pe; + struct loop *loop = NULL; + edge pe = NULL; tree scalar_dest = gimple_assign_lhs (stmt); tree vec_dest; gimple inc; @@ -3483,9 +3483,15 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, gimple_seq stmts = NULL; bool inv_p; bool compute_in_loop = false; - bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); + bool nested_in_vect_loop = false; struct loop *containing_loop = (gimple_bb (stmt))->loop_father; - struct loop *loop_for_initial_load; + struct loop *loop_for_initial_load = NULL; + + if (loop_vinfo) + { + loop = LOOP_VINFO_LOOP (loop_vinfo); + nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); + } gcc_assert (alignment_support_scheme == dr_explicit_realign || alignment_support_scheme == dr_explicit_realign_optimized); @@ -3523,7 +3529,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, or not, which in turn determines if the misalignment is computed inside the inner-loop, or outside LOOP. */ - if (init_addr != NULL_TREE) + if (init_addr != NULL_TREE || !loop_vinfo) { compute_in_loop = true; gcc_assert (alignment_support_scheme == dr_explicit_realign); @@ -3555,6 +3561,9 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, if (at_loop) *at_loop = loop_for_initial_load; + if (loop_for_initial_load) + pe = loop_preheader_edge (loop_for_initial_load); + /* 3. For the case of the optimized realignment, create the first vector load at the loop preheader. */ @@ -3563,7 +3572,6 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, /* Create msq_init = *(floor(p1)) in the loop preheader */ gcc_assert (!compute_in_loop); - pe = loop_preheader_edge (loop_for_initial_load); vec_dest = vect_create_destination_var (scalar_dest, vectype); ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE, &init_addr, &inc, true, &inv_p); @@ -3582,8 +3590,14 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, new_temp = make_ssa_name (vec_dest, new_stmt); gimple_assign_set_lhs (new_stmt, new_temp); mark_symbols_for_renaming (new_stmt); - new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); - gcc_assert (!new_bb); + if (pe) + { + new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); + gcc_assert (!new_bb); + } + else + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + msq_init = gimple_assign_lhs (new_stmt); } @@ -3596,16 +3610,19 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, tree builtin_decl; /* Compute INIT_ADDR - the initial addressed accessed by this memref. */ - if (compute_in_loop) - gcc_assert (init_addr); /* already computed by the caller. */ - else + if (!init_addr) { /* Generate the INIT_ADDR computation outside LOOP. */ init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts, NULL_TREE, loop); - pe = loop_preheader_edge (loop); - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); + if (loop) + { + pe = loop_preheader_edge (loop); + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + else + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); } builtin_decl = targetm.vectorize.builtin_mask_for_load (); @@ -3979,12 +3996,11 @@ vect_supportable_dr_alignment (struct data_reference *dr, if (aligned_access_p (dr) && !check_aligned_accesses) return dr_aligned; - if (!loop_vinfo) - /* FORNOW: Misaligned accesses are supported only in loops. */ - return dr_unaligned_unsupported; - - vect_loop = LOOP_VINFO_LOOP (loop_vinfo); - nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt); + if (loop_vinfo) + { + vect_loop = LOOP_VINFO_LOOP (loop_vinfo); + nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt); + } /* Possibly unaligned access. */ @@ -4059,9 +4075,10 @@ vect_supportable_dr_alignment (struct data_reference *dr, || targetm.vectorize.builtin_mask_for_load ())) { tree vectype = STMT_VINFO_VECTYPE (stmt_info); - if (nested_in_vect_loop - && (TREE_INT_CST_LOW (DR_STEP (dr)) - != GET_MODE_SIZE (TYPE_MODE (vectype)))) + if ((nested_in_vect_loop + && (TREE_INT_CST_LOW (DR_STEP (dr)) + != GET_MODE_SIZE (TYPE_MODE (vectype)))) + || !loop_vinfo) return dr_explicit_realign; else return dr_explicit_realign_optimized; diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 19967bc6995..7f219d56d89 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -456,7 +456,12 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, && (first_stmt_code != IMAGPART_EXPR || rhs_code != REALPART_EXPR) && (first_stmt_code != REALPART_EXPR - || rhs_code != IMAGPART_EXPR)) + || rhs_code != IMAGPART_EXPR) + && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)) + && (first_stmt_code == ARRAY_REF + || first_stmt_code == INDIRECT_REF + || first_stmt_code == COMPONENT_REF + || first_stmt_code == MEM_REF))) { if (vect_print_dump_info (REPORT_SLP)) { @@ -1509,7 +1514,75 @@ vect_slp_analyze_operations (bb_vec_info bb_vinfo) } -/* Cheick if the basic block can be vectorized. */ +/* Check if vectorization of the basic block is profitable. */ + +static bool +vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo) +{ + VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); + slp_instance instance; + int i; + unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0; + unsigned int stmt_cost; + gimple stmt; + gimple_stmt_iterator si; + basic_block bb = BB_VINFO_BB (bb_vinfo); + stmt_vec_info stmt_info = NULL; + tree dummy_type = NULL; + int dummy = 0; + + /* Calculate vector costs. */ + for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) + { + vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance); + vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance); + } + + /* Calculate scalar cost. */ + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { + stmt = gsi_stmt (si); + stmt_info = vinfo_for_stmt (stmt); + + if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info) + || !PURE_SLP_STMT (stmt_info)) + continue; + + if (STMT_VINFO_DATA_REF (stmt_info)) + { + if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) + stmt_cost = targetm.vectorize.builtin_vectorization_cost + (scalar_load, dummy_type, dummy); + else + stmt_cost = targetm.vectorize.builtin_vectorization_cost + (scalar_store, dummy_type, dummy); + } + else + stmt_cost = targetm.vectorize.builtin_vectorization_cost + (scalar_stmt, dummy_type, dummy); + + scalar_cost += stmt_cost; + } + + if (vect_print_dump_info (REPORT_COST)) + { + fprintf (vect_dump, "Cost model analysis: \n"); + fprintf (vect_dump, " Vector inside of basic block cost: %d\n", + vec_inside_cost); + fprintf (vect_dump, " Vector outside of basic block cost: %d\n", + vec_outside_cost); + fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost); + } + + /* Vectorization is profitable if its cost is less than the cost of scalar + version. */ + if (vec_outside_cost + vec_inside_cost >= scalar_cost) + return false; + + return true; +} + +/* Check if the basic block can be vectorized. */ bb_vec_info vect_slp_analyze_bb (basic_block bb) @@ -1641,6 +1714,18 @@ vect_slp_analyze_bb (basic_block bb) return NULL; } + /* Cost model: check if the vectorization is worthwhile. */ + if (flag_vect_cost_model + && !vect_bb_vectorization_profitable_p (bb_vinfo)) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) + fprintf (vect_dump, "not vectorized: vectorization is not " + "profitable.\n"); + + destroy_bb_vec_info (bb_vinfo); + return NULL; + } + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Basic block will be vectorized using SLP\n"); -- 2.11.0