X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Ftree-vectorizer.c;h=b0aedf74b2c32e42ece5467127fb633f9568901a;hb=ecfefb70ee92eb56b3e3cc52134b06e05a423c42;hp=8c8554193a56bbae5cbe9b17c0d2f0bb7946c356;hpb=053fdd999453feb09cff376ef51ca167dc432205;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 8c8554193a5..b0aedf74b2c 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1,5 +1,5 @@ /* Loop Vectorization - Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. Contributed by Dorit Naishlos This file is part of GCC. @@ -16,8 +16,8 @@ for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ /* Loop Vectorization Pass. @@ -91,7 +91,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA To vectorize stmt S2, the vectorizer first finds the stmt that defines the operand 'b' (S1), and gets the relevant vector def 'vb' from the - vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The + vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The resulting sequence would be: VS1: vb = px[i]; @@ -124,7 +124,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "system.h" #include "coretypes.h" #include "tm.h" -#include "errors.h" #include "ggc.h" #include "tree.h" #include "target.h" @@ -137,7 +136,9 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cfgloop.h" #include "cfglayout.h" #include "expr.h" +#include "recog.h" #include "optabs.h" +#include "params.h" #include "toplev.h" #include "tree-chrec.h" #include "tree-data-ref.h" @@ -149,25 +150,22 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA /************************************************************************* Simple Loop Peeling Utilities *************************************************************************/ -static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg - (struct loop *, struct loops *, edge); static void slpeel_update_phis_for_duplicate_loop (struct loop *, struct loop *, bool after); -static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool); +static void slpeel_update_phi_nodes_for_guard1 + (edge, struct loop *, bool, basic_block *, bitmap *); +static void slpeel_update_phi_nodes_for_guard2 + (edge, struct loop *, bool, basic_block *); static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block); -static void allocate_new_names (bitmap); static void rename_use_op (use_operand_p); -static void rename_def_op (def_operand_p, tree); static void rename_variables_in_bb (basic_block); -static void free_new_names (bitmap); static void rename_variables_in_loop (struct loop *); /************************************************************************* General Vectorization Utilities *************************************************************************/ static void vect_set_dump_settings (void); -static bool need_imm_uses_for (tree); /* vect_dump will be set to stderr or dump_file if exist. */ FILE *vect_dump; @@ -176,7 +174,11 @@ FILE *vect_dump; to mark that it's uninitialized. */ enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; +/* Loop location. */ +static LOC vect_loop_location; +/* Bitmap of virtual variables to be renamed. */ +bitmap vect_memsyms_to_rename; /************************************************************************* Simple Loop Peeling Utilities @@ -185,72 +187,25 @@ enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; *************************************************************************/ -/* For each definition in DEFINITIONS this function allocates - new ssa name. */ - -static void -allocate_new_names (bitmap definitions) -{ - unsigned ver; - bitmap_iterator bi; - - EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) - { - tree def = ssa_name (ver); - tree *new_name_ptr = xmalloc (sizeof (tree)); - - bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def); - - *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def)); - SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal; - - SSA_NAME_AUX (def) = new_name_ptr; - } -} - - /* Renames the use *OP_P. */ static void rename_use_op (use_operand_p op_p) { - tree *new_name_ptr; + tree new_name; if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) return; - new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p)); + new_name = get_current_def (USE_FROM_PTR (op_p)); /* Something defined outside of the loop. */ - if (!new_name_ptr) + if (!new_name) return; /* An ordinary ssa name defined in the loop. */ - SET_USE (op_p, *new_name_ptr); -} - - -/* Renames the def *OP_P in statement STMT. */ - -static void -rename_def_op (def_operand_p op_p, tree stmt) -{ - tree *new_name_ptr; - - if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME) - return; - - new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p)); - - /* Something defined outside of the loop. */ - if (!new_name_ptr) - return; - - /* An ordinary ssa name defined in the loop. */ - - SET_DEF (op_p, *new_name_ptr); - SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt; + SET_USE (op_p, new_name); } @@ -262,51 +217,17 @@ rename_variables_in_bb (basic_block bb) tree phi; block_stmt_iterator bsi; tree stmt; - stmt_ann_t ann; - use_optype uses; - vuse_optype vuses; - def_optype defs; - v_may_def_optype v_may_defs; - v_must_def_optype v_must_defs; - unsigned i; + use_operand_p use_p; + ssa_op_iter iter; edge e; edge_iterator ei; struct loop *loop = bb->loop_father; - for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) - rename_def_op (PHI_RESULT_PTR (phi), phi); - for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) { stmt = bsi_stmt (bsi); - get_stmt_operands (stmt); - ann = stmt_ann (stmt); - - uses = USE_OPS (ann); - for (i = 0; i < NUM_USES (uses); i++) - rename_use_op (USE_OP_PTR (uses, i)); - - defs = DEF_OPS (ann); - for (i = 0; i < NUM_DEFS (defs); i++) - rename_def_op (DEF_OP_PTR (defs, i), stmt); - - vuses = VUSE_OPS (ann); - for (i = 0; i < NUM_VUSES (vuses); i++) - rename_use_op (VUSE_OP_PTR (vuses, i)); - - v_may_defs = V_MAY_DEF_OPS (ann); - for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++) - { - rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i)); - rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt); - } - - v_must_defs = V_MUST_DEF_OPS (ann); - for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++) - { - rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i)); - rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt); - } + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) + rename_use_op (use_p); } FOR_EACH_EDGE (e, ei, bb->succs) @@ -319,27 +240,6 @@ rename_variables_in_bb (basic_block bb) } -/* Releases the structures holding the new ssa names. */ - -static void -free_new_names (bitmap definitions) -{ - unsigned ver; - bitmap_iterator bi; - - EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) - { - tree def = ssa_name (ver); - - if (SSA_NAME_AUX (def)) - { - free (SSA_NAME_AUX (def)); - SSA_NAME_AUX (def) = NULL; - } - } -} - - /* Renames variables in new generated LOOP. */ static void @@ -368,12 +268,12 @@ static void slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, struct loop *new_loop, bool after) { - tree *new_name_ptr, new_ssa_name; + tree new_ssa_name; tree phi_new, phi_orig; tree def; edge orig_loop_latch = loop_latch_edge (orig_loop); edge orig_entry_e = loop_preheader_edge (orig_loop); - edge new_loop_exit_e = new_loop->single_exit; + edge new_loop_exit_e = single_exit (new_loop); edge new_loop_entry_e = loop_preheader_edge (new_loop); edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); @@ -420,13 +320,15 @@ slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, if (TREE_CODE (def) != SSA_NAME) continue; - new_name_ptr = SSA_NAME_AUX (def); - if (!new_name_ptr) - /* Something defined outside of the loop. */ - continue; + new_ssa_name = get_current_def (def); + if (!new_ssa_name) + { + /* This only happens if there are no definitions + inside the loop. use the phi_result in this case. */ + new_ssa_name = PHI_RESULT (phi_new); + } /* An ordinary ssa name defined in the loop. */ - new_ssa_name = *new_name_ptr; add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop)); /* step 3 (case 1). */ @@ -448,110 +350,403 @@ slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, controls whether LOOP is to be executed. GUARD_EDGE is the edge that originates from the guard-bb, skips LOOP and reaches the (unique) exit bb of LOOP. This loop-exit-bb is an empty bb with one successor. - We denote this bb NEW_MERGE_BB because it had a single predecessor (the - LOOP header) before the guard code was added, and now it became a merge + We denote this bb NEW_MERGE_BB because before the guard code was added + it had a single predecessor (the LOOP header), and now it became a merge point of two paths - the path that ends with the LOOP exit-edge, and the path that ends with GUARD_EDGE. + - NEW_EXIT_BB: New basic block that is added by this function between LOOP + and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis. - This function creates and updates the relevant phi nodes to account for - the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB: - 1. Create phi nodes at NEW_MERGE_BB. - 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted - UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB - was added: + ===> The CFG before the guard-code was added: + LOOP_header_bb: + loop_body + if (exit_loop) goto update_bb + else goto LOOP_header_bb + update_bb: - ===> The CFG before the guard-code was added: + ==> The CFG after the guard-code was added: + guard_bb: + if (LOOP_guard_condition) goto new_merge_bb + else goto LOOP_header_bb LOOP_header_bb: - if (exit_loop) goto update_bb : LOOP_header_bb + loop_body + if (exit_loop_condition) goto new_merge_bb + else goto LOOP_header_bb + new_merge_bb: + goto update_bb update_bb: - ==> The CFG after the guard-code was added: - guard_bb: - if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb + ==> The CFG after this function: + guard_bb: + if (LOOP_guard_condition) goto new_merge_bb + else goto LOOP_header_bb LOOP_header_bb: - if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb + loop_body + if (exit_loop_condition) goto new_exit_bb + else goto LOOP_header_bb + new_exit_bb: new_merge_bb: goto update_bb update_bb: - - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in - UPDATE_BB are loop entry phis, like the phis in the LOOP header, - organized in the same order. - If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are - loop exit phis. - - - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another - "original" loop). FALSE if LOOP is an original loop (not a newly - created copy). The SSA_NAME_AUX fields of the defs in the original - loop are the corresponding new ssa-names used in the new duplicated - loop copy. IS_NEW_LOOP indicates which of the two args of the phi - nodes in UPDATE_BB takes the original ssa-name, and which takes the - new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with - the LOOP-exit-edge takes the new-name, and the phi-arg that is - associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is - FALSE, it's the other way around. + This function: + 1. creates and updates the relevant phi nodes to account for the new + incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves: + 1.1. Create phi nodes at NEW_MERGE_BB. + 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted + UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB + 2. preserves loop-closed-ssa-form by creating the required phi nodes + at the exit of LOOP (i.e, in NEW_EXIT_BB). + + There are two flavors to this function: + + slpeel_update_phi_nodes_for_guard1: + Here the guard controls whether we enter or skip LOOP, where LOOP is a + prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are + for variables that have phis in the loop header. + + slpeel_update_phi_nodes_for_guard2: + Here the guard controls whether we enter or skip LOOP, where LOOP is an + epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are + for variables that have phis in the loop exit. + + I.E., the overall structure is: + + loop1_preheader_bb: + guard1 (goto loop1/merg1_bb) + loop1 + loop1_exit_bb: + guard2 (goto merge1_bb/merge2_bb) + merge1_bb + loop2 + loop2_exit_bb + merge2_bb + next_bb + + slpeel_update_phi_nodes_for_guard1 takes care of creating phis in + loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars + that have phis in loop1->header). + + slpeel_update_phi_nodes_for_guard2 takes care of creating phis in + loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars + that have phis in next_bb). It also adds some of these phis to + loop1_exit_bb. + + slpeel_update_phi_nodes_for_guard1 is always called before + slpeel_update_phi_nodes_for_guard2. They are both needed in order + to create correct data-flow and loop-closed-ssa-form. + + Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables + that change between iterations of a loop (and therefore have a phi-node + at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates + phis for variables that are used out of the loop (and therefore have + loop-closed exit phis). Some variables may be both updated between + iterations and used after the loop. This is why in loop1_exit_bb we + may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1) + and exit phis (created by slpeel_update_phi_nodes_for_guard2). + + - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of + an original loop. i.e., we have: + + orig_loop + guard_bb (goto LOOP/new_merge) + new_loop <-- LOOP + new_exit + new_merge + next_bb + + If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we + have: + + new_loop + guard_bb (goto LOOP/new_merge) + orig_loop <-- LOOP + new_exit + new_merge + next_bb + + The SSA names defined in the original loop have a current + reaching definition that that records the corresponding new + ssa-name used in the new duplicated loop copy. */ +/* Function slpeel_update_phi_nodes_for_guard1 + + Input: + - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. + - DEFS - a bitmap of ssa names to mark new names for which we recorded + information. + + In the context of the overall structure, we have: + + loop1_preheader_bb: + guard1 (goto loop1/merg1_bb) +LOOP-> loop1 + loop1_exit_bb: + guard2 (goto merge1_bb/merge2_bb) + merge1_bb + loop2 + loop2_exit_bb + merge2_bb + next_bb + + For each name updated between loop iterations (i.e - for each name that has + an entry (loop-header) phi in LOOP) we create a new phi in: + 1. merge1_bb (to account for the edge from guard1) + 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form) +*/ + static void -slpeel_update_phi_nodes_for_guard (edge guard_edge, - struct loop *loop, - bool entry_phis, - bool is_new_loop) +slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, + bool is_new_loop, basic_block *new_exit_bb, + bitmap *defs) { - tree orig_phi, new_phi, update_phi; + tree orig_phi, new_phi; + tree update_phi, update_phi2; tree guard_arg, loop_arg; basic_block new_merge_bb = guard_edge->dest; edge e = EDGE_SUCC (new_merge_bb, 0); basic_block update_bb = e->dest; - basic_block orig_bb = (entry_phis ? loop->header : update_bb); + basic_block orig_bb = loop->header; + edge new_exit_e; + tree current_new_name; + tree name; + + /* Create new bb between loop and new_merge_bb. */ + *new_exit_bb = split_edge (single_exit (loop)); + + new_exit_e = EDGE_SUCC (*new_exit_bb, 0); for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb); orig_phi && update_phi; orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi)) { - /* 1. Generate new phi node in NEW_MERGE_BB: */ + /* Virtual phi; Mark it for renaming. We actually want to call + mar_sym_for_renaming, but since all ssa renaming datastructures + are going to be freed before we get to call ssa_upate, we just + record this name for now in a bitmap, and will mark it for + renaming later. */ + name = PHI_RESULT (orig_phi); + if (!is_gimple_reg (SSA_NAME_VAR (name))) + bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name))); + + /** 1. Handle new-merge-point phis **/ + + /* 1.1. Generate new phi node in NEW_MERGE_BB: */ new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), new_merge_bb); - /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge + /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge of LOOP. Set the two phi args in NEW_PHI for these edges: */ - if (entry_phis) + loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0)); + guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop)); + + add_phi_arg (new_phi, loop_arg, new_exit_e); + add_phi_arg (new_phi, guard_arg, guard_edge); + + /* 1.3. Update phi in successor block. */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg + || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); + SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); + update_phi2 = new_phi; + + + /** 2. Handle loop-closed-ssa-form phis **/ + + if (!is_gimple_reg (PHI_RESULT (orig_phi))) + continue; + + /* 2.1. Generate new phi node in NEW_EXIT_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + *new_exit_bb); + + /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ + add_phi_arg (new_phi, loop_arg, single_exit (loop)); + + /* 2.3. Update phi in successor of NEW_EXIT_BB: */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); + SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); + + /* 2.4. Record the newly created name with set_current_def. + We want to find a name such that + name = get_current_def (orig_loop_name) + and to set its current definition as follows: + set_current_def (name, new_phi_name) + + If LOOP is a new loop then loop_arg is already the name we're + looking for. If LOOP is the original loop, then loop_arg is + the orig_loop_name and the relevant name is recorded in its + current reaching definition. */ + if (is_new_loop) + current_new_name = loop_arg; + else + { + current_new_name = get_current_def (loop_arg); + /* current_def is not available only if the variable does not + change inside the loop, in which case we also don't care + about recording a current_def for it because we won't be + trying to create loop-exit-phis for it. */ + if (!current_new_name) + continue; + } + gcc_assert (get_current_def (current_new_name) == NULL_TREE); + + set_current_def (current_new_name, PHI_RESULT (new_phi)); + bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name)); + } + + set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); +} + + +/* Function slpeel_update_phi_nodes_for_guard2 + + Input: + - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. + + In the context of the overall structure, we have: + + loop1_preheader_bb: + guard1 (goto loop1/merg1_bb) + loop1 + loop1_exit_bb: + guard2 (goto merge1_bb/merge2_bb) + merge1_bb +LOOP-> loop2 + loop2_exit_bb + merge2_bb + next_bb + + For each name used out side the loop (i.e - for each name that has an exit + phi in next_bb) we create a new phi in: + 1. merge2_bb (to account for the edge from guard_bb) + 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form) + 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form), + if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1). +*/ + +static void +slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, + bool is_new_loop, basic_block *new_exit_bb) +{ + tree orig_phi, new_phi; + tree update_phi, update_phi2; + tree guard_arg, loop_arg; + basic_block new_merge_bb = guard_edge->dest; + edge e = EDGE_SUCC (new_merge_bb, 0); + basic_block update_bb = e->dest; + edge new_exit_e; + tree orig_def, orig_def_new_name; + tree new_name, new_name2; + tree arg; + + /* Create new bb between loop and new_merge_bb. */ + *new_exit_bb = split_edge (single_exit (loop)); + + new_exit_e = EDGE_SUCC (*new_exit_bb, 0); + + for (update_phi = phi_nodes (update_bb); update_phi; + update_phi = PHI_CHAIN (update_phi)) + { + orig_phi = update_phi; + orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); + /* This loop-closed-phi actually doesn't represent a use + out of the loop - the phi arg is a constant. */ + if (TREE_CODE (orig_def) != SSA_NAME) + continue; + orig_def_new_name = get_current_def (orig_def); + arg = NULL_TREE; + + /** 1. Handle new-merge-point phis **/ + + /* 1.1. Generate new phi node in NEW_MERGE_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + new_merge_bb); + + /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge + of LOOP. Set the two PHI args in NEW_PHI for these edges: */ + new_name = orig_def; + new_name2 = NULL_TREE; + if (orig_def_new_name) + { + new_name = orig_def_new_name; + /* Some variables have both loop-entry-phis and loop-exit-phis. + Such variables were given yet newer names by phis placed in + guard_bb by slpeel_update_phi_nodes_for_guard1. I.e: + new_name2 = get_current_def (get_current_def (orig_name)). */ + new_name2 = get_current_def (new_name); + } + + if (is_new_loop) { - loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, - loop_latch_edge (loop)); - guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, - loop_preheader_edge (loop)); + guard_arg = orig_def; + loop_arg = new_name; } - else /* exit phis */ + else { - tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); - tree *new_name_ptr = SSA_NAME_AUX (orig_def); - tree new_name; - - if (new_name_ptr) - new_name = *new_name_ptr; - else - /* Something defined outside of the loop */ - new_name = orig_def; - - if (is_new_loop) - { - guard_arg = orig_def; - loop_arg = new_name; - } - else - { - guard_arg = new_name; - loop_arg = orig_def; - } + guard_arg = new_name; + loop_arg = orig_def; } - add_phi_arg (new_phi, loop_arg, loop->single_exit); + if (new_name2) + guard_arg = new_name2; + + add_phi_arg (new_phi, loop_arg, new_exit_e); add_phi_arg (new_phi, guard_arg, guard_edge); - /* 3. Update phi in successor block. */ - gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg - || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); + /* 1.3. Update phi in successor block. */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def); SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); + update_phi2 = new_phi; + + + /** 2. Handle loop-closed-ssa-form phis **/ + + /* 2.1. Generate new phi node in NEW_EXIT_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + *new_exit_bb); + + /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ + add_phi_arg (new_phi, loop_arg, single_exit (loop)); + + /* 2.3. Update phi in successor of NEW_EXIT_BB: */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); + SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); + + + /** 3. Handle loop-closed-ssa-form phis for first loop **/ + + /* 3.1. Find the relevant names that need an exit-phi in + GUARD_BB, i.e. names for which + slpeel_update_phi_nodes_for_guard1 had not already created a + phi node. This is the case for names that are used outside + the loop (and therefore need an exit phi) but are not updated + across loop iterations (and therefore don't have a + loop-header-phi). + + slpeel_update_phi_nodes_for_guard1 is responsible for + creating loop-exit phis in GUARD_BB for names that have a + loop-header-phi. When such a phi is created we also record + the new name in its current definition. If this new name + exists, then guard_arg was set to this new name (see 1.2 + above). Therefore, if guard_arg is not this new name, this + is an indication that an exit-phi in GUARD_BB was not yet + created, so we take care of it here. */ + if (guard_arg == new_name2) + continue; + arg = guard_arg; + + /* 3.2. Generate new phi node in GUARD_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + guard_edge->src); + + /* 3.3. GUARD_BB has one incoming edge: */ + gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1); + add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0)); + + /* 3.4. Update phi in successor of GUARD_BB: */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge) + == guard_arg); + SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi)); } set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); @@ -568,12 +763,12 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) { tree indx_before_incr, indx_after_incr, cond_stmt, cond; tree orig_cond; - edge exit_edge = loop->single_exit; + edge exit_edge = single_exit (loop); block_stmt_iterator loop_cond_bsi; block_stmt_iterator incr_bsi; bool insert_after; tree begin_label = tree_block_label (loop->latch); - tree exit_label = tree_block_label (loop->single_exit->dest); + tree exit_label = tree_block_label (single_exit (loop)->dest); tree init = build_int_cst (TREE_TYPE (niters), 0); tree step = build_int_cst (TREE_TYPE (niters), 1); tree then_label; @@ -581,9 +776,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) LOC loop_loc; orig_cond = get_loop_exit_condition (loop); -#ifdef ENABLE_CHECKING gcc_assert (orig_cond); -#endif loop_cond_bsi = bsi_for_stmt (orig_cond); standard_iv_increment_position (loop, &incr_bsi, &insert_after); @@ -608,7 +801,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT); /* Remove old loop exit test: */ - bsi_remove (&loop_cond_bsi); + bsi_remove (&loop_cond_bsi, true); loop_loc = find_loop_location (loop); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -627,8 +820,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) on E which is either the entry or exit of LOOP. */ static struct loop * -slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, - edge e) +slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e) { struct loop *new_loop; basic_block *new_bbs, *bbs; @@ -636,8 +828,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, bool was_imm_dom; basic_block exit_dest; tree phi, phi_arg; + edge exit, new_exit; - at_exit = (e == loop->single_exit); + at_exit = (e == single_exit (loop)); if (!at_exit && e != loop_preheader_edge (loop)) return NULL; @@ -651,28 +844,30 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, } /* Generate new loop structure. */ - new_loop = duplicate_loop (loops, loop, loop->outer); + new_loop = duplicate_loop (loop, loop->outer); if (!new_loop) { free (bbs); return NULL; } - exit_dest = loop->single_exit->dest; + exit_dest = single_exit (loop)->dest; was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, exit_dest) == loop->header ? true : false); - new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes); + new_bbs = XNEWVEC (basic_block, loop->num_nodes); + exit = single_exit (loop); copy_bbs (bbs, loop->num_nodes, new_bbs, - &loop->single_exit, 1, &new_loop->single_exit, NULL); + &exit, 1, &new_exit, NULL, + e->src); /* Duplicating phi args at exit bbs as coming also from exit of duplicated loop. */ for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi)) { - phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit); + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop)); if (phi_arg) { edge new_loop_exit_edge; @@ -754,7 +949,7 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, cond_stmt = build3 (COND_EXPR, void_type_node, cond, then_label, else_label); bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); - /* Add new edge to connect entry block to the second loop. */ + /* Add new edge to connect guard block to the merge/loop-exit block. */ new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); return new_e; @@ -772,12 +967,12 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, bool slpeel_can_duplicate_loop_p (struct loop *loop, edge e) { - edge exit_e = loop->single_exit; + edge exit_e = single_exit (loop); edge entry_e = loop_preheader_edge (loop); tree orig_cond = get_loop_exit_condition (loop); block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src); - if (any_marked_for_rewrite_p ()) + if (need_ssa_update_p ()) return false; if (loop->inner @@ -786,7 +981,7 @@ slpeel_can_duplicate_loop_p (struct loop *loop, edge e) || !loop->outer || loop->num_nodes != 2 || !empty_block_p (loop->latch) - || !loop->single_exit + || !single_exit (loop) /* Verify that new loop exit condition can be trivially modified. */ || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi)) || (e != exit_e && e != entry_e)) @@ -800,7 +995,7 @@ void slpeel_verify_cfg_after_peeling (struct loop *first_loop, struct loop *second_loop) { - basic_block loop1_exit_bb = first_loop->single_exit->dest; + basic_block loop1_exit_bb = single_exit (first_loop)->dest; basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src; basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; @@ -814,7 +1009,7 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop, /* 1. Verify that one of the successors of first_loopt->exit is the preheader of second_loop. */ - /* The preheader of new_loop is expected to have two predessors: + /* The preheader of new_loop is expected to have two predecessors: first_loop->exit and the block that precedes first_loop. */ gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 @@ -867,9 +1062,10 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop, */ struct loop* -slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, +slpeel_tree_peel_loop_to_edge (struct loop *loop, edge e, tree first_niters, - tree niters, bool update_first_loop_count) + tree niters, bool update_first_loop_count, + unsigned int th) { struct loop *new_loop = NULL, *first_loop, *second_loop; edge skip_e; @@ -878,7 +1074,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, basic_block bb_before_second_loop, bb_after_second_loop; basic_block bb_before_first_loop; basic_block bb_between_loops; - edge exit_e = loop->single_exit; + basic_block new_exit_bb; + edge exit_e = single_exit (loop); LOC loop_loc; if (!slpeel_can_duplicate_loop_p (loop, e)) @@ -905,7 +1102,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, orig_exit_bb: */ - if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e))) + if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e))) { loop_loc = find_loop_location (loop); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -931,8 +1128,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, second_loop = loop; } - definitions = marked_ssa_names (); - allocate_new_names (definitions); + definitions = ssa_names_to_replace (); slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); rename_variables_in_loop (new_loop); @@ -958,16 +1154,17 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, */ bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); - add_bb_to_loop (bb_before_first_loop, first_loop->outer); - bb_before_second_loop = split_edge (first_loop->single_exit); - add_bb_to_loop (bb_before_second_loop, first_loop->outer); + bb_before_second_loop = split_edge (single_exit (first_loop)); pre_condition = - build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node); + fold_build2 (LE_EXPR, boolean_type_node, first_niters, + build_int_cst (TREE_TYPE (first_niters), th)); + skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, bb_before_second_loop, bb_before_first_loop); - slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */, - first_loop == new_loop); + slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, + first_loop == new_loop, + &new_exit_bb, &definitions); /* 3. Add the guard that controls whether the second loop is executed. @@ -996,29 +1193,23 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, orig_exit_bb: */ - bb_between_loops = split_edge (first_loop->single_exit); - add_bb_to_loop (bb_between_loops, first_loop->outer); - bb_after_second_loop = split_edge (second_loop->single_exit); - add_bb_to_loop (bb_after_second_loop, second_loop->outer); + bb_between_loops = new_exit_bb; + bb_after_second_loop = split_edge (single_exit (second_loop)); - pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters); + pre_condition = + fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters); skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, bb_after_second_loop, bb_before_first_loop); - slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */, - second_loop == new_loop); - - /* Flow loop scan does not update loop->single_exit field. */ - first_loop->single_exit = first_loop->single_exit; - second_loop->single_exit = second_loop->single_exit; + slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, + second_loop == new_loop, &new_exit_bb); /* 4. Make first-loop iterate FIRST_NITERS times, if requested. */ if (update_first_loop_count) slpeel_make_loop_iterate_ntimes (first_loop, first_niters); - free_new_names (definitions); BITMAP_FREE (definitions); - unmark_all_for_rewrite (); + delete_update_ssa (); return new_loop; } @@ -1042,7 +1233,7 @@ find_loop_location (struct loop *loop) node = get_loop_exit_condition (loop); - if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node) + if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node) && EXPR_FILENAME (node) && EXPR_LINENO (node)) return EXPR_LOC (node); @@ -1057,7 +1248,7 @@ find_loop_location (struct loop *loop) for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { node = bsi_stmt (si); - if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)) + if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node)) return EXPR_LOC (node); } @@ -1127,18 +1318,21 @@ vect_set_dump_settings (void) For vectorization debug dumps. */ bool -vect_print_dump_info (enum verbosity_levels vl, LOC loc) +vect_print_dump_info (enum verbosity_levels vl) { if (vl > vect_verbosity_level) return false; - if (loc == UNKNOWN_LOC) + if (!current_function_decl || !vect_dump) + return false; + + if (vect_loop_location == UNKNOWN_LOC) fprintf (vect_dump, "\n%s:%d: note: ", - DECL_SOURCE_FILE (current_function_decl), - DECL_SOURCE_LINE (current_function_decl)); + DECL_SOURCE_FILE (current_function_decl), + DECL_SOURCE_LINE (current_function_decl)); else - fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc)); - + fprintf (vect_dump, "\n%s:%d: note: ", + LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location)); return true; } @@ -1161,16 +1355,25 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) STMT_VINFO_TYPE (res) = undef_vec_info_type; STMT_VINFO_STMT (res) = stmt; STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; - STMT_VINFO_RELEVANT_P (res) = 0; + STMT_VINFO_RELEVANT (res) = 0; + STMT_VINFO_LIVE_P (res) = false; STMT_VINFO_VECTYPE (res) = NULL; STMT_VINFO_VEC_STMT (res) = NULL; + STMT_VINFO_IN_PATTERN_P (res) = false; + STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL; - STMT_VINFO_MEMTAG (res) = NULL; - STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL; - STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE; - STMT_VINFO_VECT_STEP (res) = NULL_TREE; - STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false; - STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE; + if (TREE_CODE (stmt) == PHI_NODE) + STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; + else + STMT_VINFO_DEF_TYPE (res) = vect_loop_def; + STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5); + DR_GROUP_FIRST_DR (res) = NULL_TREE; + DR_GROUP_NEXT_DR (res) = NULL_TREE; + DR_GROUP_SIZE (res) = 0; + DR_GROUP_STORE_COUNT (res) = 0; + DR_GROUP_GAP (res) = 0; + DR_GROUP_SAME_DR_STMT (res) = NULL_TREE; + DR_GROUP_READ_WRITE_DEPENDENCE (res) = false; return res; } @@ -1197,12 +1400,19 @@ new_loop_vec_info (struct loop *loop) for (i = 0; i < loop->num_nodes; i++) { basic_block bb = bbs[i]; + tree phi; + + for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) + { + stmt_ann_t ann = get_stmt_ann (phi); + set_stmt_info (ann, new_stmt_vec_info (phi, res)); + } + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { tree stmt = bsi_stmt (si); stmt_ann_t ann; - get_stmt_operands (stmt); ann = stmt_ann (stmt); set_stmt_info (ann, new_stmt_vec_info (stmt, res)); } @@ -1213,14 +1423,13 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_EXIT_COND (res) = NULL; LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_VECTORIZABLE_P (res) = 0; - LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false; + LOOP_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; - VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, - "loop_write_datarefs"); - VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, - "loop_read_datarefs"); + LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10); + LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10); LOOP_VINFO_UNALIGNED_DR (res) = NULL; - LOOP_VINFO_LOC (res) = UNKNOWN_LOC; + LOOP_VINFO_MAY_MISALIGN_STMTS (res) + = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS)); return res; } @@ -1251,50 +1460,60 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo) for (j = 0; j < nbbs; j++) { basic_block bb = bbs[j]; - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) + tree phi; + stmt_vec_info stmt_info; + + for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) + { + stmt_ann_t ann = stmt_ann (phi); + + stmt_info = vinfo_for_stmt (phi); + free (stmt_info); + set_stmt_info (ann, NULL); + } + + for (si = bsi_start (bb); !bsi_end_p (si); ) { tree stmt = bsi_stmt (si); stmt_ann_t ann = stmt_ann (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - free (stmt_info); - set_stmt_info (ann, NULL); + + if (stmt_info) + { + /* Check if this is a "pattern stmt" (introduced by the + vectorizer during the pattern recognition pass). */ + bool remove_stmt_p = false; + tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (orig_stmt) + { + stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); + if (orig_stmt_info + && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) + remove_stmt_p = true; + } + + /* Free stmt_vec_info. */ + VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info)); + free (stmt_info); + set_stmt_info (ann, NULL); + + /* Remove dead "pattern stmts". */ + if (remove_stmt_p) + bsi_remove (&si, true); + } + bsi_next (&si); } } free (LOOP_VINFO_BBS (loop_vinfo)); - varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); - varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo)); + free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo)); + free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo)); + VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)); free (loop_vinfo); } -/* Function vect_strip_conversions - - Strip conversions that don't narrow the mode. */ - -tree -vect_strip_conversion (tree expr) -{ - tree to, ti, oprnd0; - - while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR) - { - to = TREE_TYPE (expr); - oprnd0 = TREE_OPERAND (expr, 0); - ti = TREE_TYPE (oprnd0); - - if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti)) - return NULL_TREE; - if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti))) - return NULL_TREE; - - expr = oprnd0; - } - return expr; -} - - /* Function vect_force_dr_alignment_p. Returns whether the alignment of a DECL can be forced to be aligned @@ -1337,7 +1556,7 @@ get_vectype_for_scalar_type (tree scalar_type) int nunits; tree vectype; - if (nbytes == 0) + if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD) return NULL_TREE; /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) @@ -1345,7 +1564,7 @@ get_vectype_for_scalar_type (tree scalar_type) nunits = UNITS_PER_SIMD_WORD / nbytes; vectype = build_vector_type (scalar_type, nunits); - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "get vectype with %d units of type ", nunits); print_generic_expr (vect_dump, scalar_type, TDF_SLIM); @@ -1354,18 +1573,16 @@ get_vectype_for_scalar_type (tree scalar_type) if (!vectype) return NULL_TREE; - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "vectype: "); print_generic_expr (vect_dump, vectype, TDF_SLIM); } - if (!VECTOR_MODE_P (TYPE_MODE (vectype))) + if (!VECTOR_MODE_P (TYPE_MODE (vectype)) + && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) { - /* TODO: tree-complex.c sometimes can parallelize operations - on generic vectors. We can vectorize the loop in that case, - but then we should re-run the lowering pass. */ - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "mode not supported by target."); return NULL_TREE; } @@ -1421,64 +1638,500 @@ vect_supportable_dr_alignment (struct data_reference *dr) in reduction/induction computations). */ bool -vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def) +vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt, + tree *def, enum vect_def_type *dt) { - tree def_stmt; basic_block bb; + stmt_vec_info stmt_vinfo; struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - if (def) - *def = NULL_TREE; - + *def_stmt = NULL_TREE; + *def = NULL_TREE; + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "vect_is_simple_use: operand "); + print_generic_expr (vect_dump, operand, TDF_SLIM); + } + if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) - return true; - + { + *dt = vect_constant_def; + return true; + } + if (TREE_CODE (operand) != SSA_NAME) - return false; - - def_stmt = SSA_NAME_DEF_STMT (operand); - if (def_stmt == NULL_TREE ) { - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "not ssa-name."); + return false; + } + + *def_stmt = SSA_NAME_DEF_STMT (operand); + if (*def_stmt == NULL_TREE ) + { + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "no def_stmt."); return false; } + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "def_stmt: "); + print_generic_expr (vect_dump, *def_stmt, TDF_SLIM); + } + /* empty stmt is expected only in case of a function argument. - (Otherwise - we expect a phi_node or a modify_expr). */ - if (IS_EMPTY_STMT (def_stmt)) + (Otherwise - we expect a phi_node or a GIMPLE_MODIFY_STMT). */ + if (IS_EMPTY_STMT (*def_stmt)) { - tree arg = TREE_OPERAND (def_stmt, 0); + tree arg = TREE_OPERAND (*def_stmt, 0); if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) - return true; - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + *def = operand; + *dt = vect_invariant_def; + return true; + } + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unexpected empty stmt."); + return false; + } + + bb = bb_for_stmt (*def_stmt); + if (!flow_bb_inside_loop_p (loop, bb)) + *dt = vect_invariant_def; + else + { + stmt_vinfo = vinfo_for_stmt (*def_stmt); + *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); + } + + if (*dt == vect_unknown_def_type) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unsupported pattern."); + return false; + } + + /* stmts inside the loop that have been identified as performing + a reduction operation cannot have uses in the loop. */ + if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "reduction used in loop."); + return false; + } + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "type of def: %d.",*dt); + + switch (TREE_CODE (*def_stmt)) + { + case PHI_NODE: + *def = PHI_RESULT (*def_stmt); + gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def + || *dt == vect_invariant_def); + break; + + case GIMPLE_MODIFY_STMT: + *def = GIMPLE_STMT_OPERAND (*def_stmt, 0); + gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def); + break; + + default: + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported defining stmt: "); + return false; + } + + return true; +} + + +/* Function supportable_widening_operation + + Check whether an operation represented by the code CODE is a + widening operation that is supported by the target platform in + vector form (i.e., when operating on arguments of type VECTYPE). + + The two kinds of widening operations we currently support are + NOP and WIDEN_MULT. This function checks if these operations + are supported by the target platform either directly (via vector + tree-codes), or via target builtins. + + Output: + - CODE1 and CODE2 are codes of vector operations to be used when + vectorizing the operation, if available. + - DECL1 and DECL2 are decls of target builtin functions to be used + when vectorizing the operation, if available. In this case, + CODE1 and CODE2 are CALL_EXPR. */ + +bool +supportable_widening_operation (enum tree_code code, tree stmt, tree vectype, + tree *decl1, tree *decl2, + enum tree_code *code1, enum tree_code *code2) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + bool ordered_p; + enum machine_mode vec_mode; + enum insn_code icode1, icode2; + optab optab1, optab2; + tree expr = GIMPLE_STMT_OPERAND (stmt, 1); + tree type = TREE_TYPE (expr); + tree wide_vectype = get_vectype_for_scalar_type (type); + enum tree_code c1, c2; + + /* The result of a vectorized widening operation usually requires two vectors + (because the widened results do not fit int one vector). The generated + vector results would normally be expected to be generated in the same + order as in the original scalar computation. i.e. if 8 results are + generated in each vector iteration, they are to be organized as follows: + vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8]. + + However, in the special case that the result of the widening operation is + used in a reduction computation only, the order doesn't matter (because + when vectorizing a reduction we change the order of the computation). + Some targets can take advantage of this and generate more efficient code. + For example, targets like Altivec, that support widen_mult using a sequence + of {mult_even,mult_odd} generate the following vectors: + vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. */ + + if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction) + ordered_p = false; + else + ordered_p = true; + + if (!ordered_p + && code == WIDEN_MULT_EXPR + && targetm.vectorize.builtin_mul_widen_even + && targetm.vectorize.builtin_mul_widen_even (vectype) + && targetm.vectorize.builtin_mul_widen_odd + && targetm.vectorize.builtin_mul_widen_odd (vectype)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unordered widening operation detected."); + + *code1 = *code2 = CALL_EXPR; + *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype); + *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype); + return true; + } + + switch (code) + { + case WIDEN_MULT_EXPR: + if (BYTES_BIG_ENDIAN) + { + c1 = VEC_WIDEN_MULT_HI_EXPR; + c2 = VEC_WIDEN_MULT_LO_EXPR; + } + else + { + c2 = VEC_WIDEN_MULT_HI_EXPR; + c1 = VEC_WIDEN_MULT_LO_EXPR; + } + break; + + case NOP_EXPR: + if (BYTES_BIG_ENDIAN) + { + c1 = VEC_UNPACK_HI_EXPR; + c2 = VEC_UNPACK_LO_EXPR; + } + else + { + c2 = VEC_UNPACK_HI_EXPR; + c1 = VEC_UNPACK_LO_EXPR; + } + break; + + default: + gcc_unreachable (); + } + + *code1 = c1; + *code2 = c2; + optab1 = optab_for_tree_code (c1, vectype); + optab2 = optab_for_tree_code (c2, vectype); + + if (!optab1 || !optab2) + return false; + + vec_mode = TYPE_MODE (vectype); + if ((icode1 = optab1->handlers[(int) vec_mode].insn_code) == CODE_FOR_nothing + || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype) + || (icode2 = optab2->handlers[(int) vec_mode].insn_code) + == CODE_FOR_nothing + || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype)) + return false; + + return true; +} + + +/* Function reduction_code_for_scalar_code + + Input: + CODE - tree_code of a reduction operations. + + Output: + REDUC_CODE - the corresponding tree-code to be used to reduce the + vector of partial results into a single scalar result (which + will also reside in a vector). + + Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */ + +bool +reduction_code_for_scalar_code (enum tree_code code, + enum tree_code *reduc_code) +{ + switch (code) + { + case MAX_EXPR: + *reduc_code = REDUC_MAX_EXPR; + return true; + + case MIN_EXPR: + *reduc_code = REDUC_MIN_EXPR; + return true; + + case PLUS_EXPR: + *reduc_code = REDUC_PLUS_EXPR; + return true; + + default: + return false; + } +} + + +/* Function vect_is_simple_reduction + + Detect a cross-iteration def-use cucle that represents a simple + reduction computation. We look for the following pattern: + + loop_header: + a1 = phi < a0, a2 > + a3 = ... + a2 = operation (a3, a1) + + such that: + 1. operation is commutative and associative and it is safe to + change the order of the computation. + 2. no uses for a2 in the loop (a2 is used out of the loop) + 3. no uses of a1 in the loop besides the reduction operation. + + Condition 1 is tested here. + Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */ + +tree +vect_is_simple_reduction (struct loop *loop, tree phi) +{ + edge latch_e = loop_latch_edge (loop); + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); + tree def_stmt, def1, def2; + enum tree_code code; + int op_type; + tree operation, op1, op2; + tree type; + int nloop_uses; + tree name; + imm_use_iterator imm_iter; + use_operand_p use_p; + + name = PHI_RESULT (phi); + nloop_uses = 0; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) + { + tree use_stmt = USE_STMT (use_p); + if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt)) + && vinfo_for_stmt (use_stmt) + && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) + nloop_uses++; + if (nloop_uses > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "reduction used in loop."); + return NULL_TREE; + } + } + + if (TREE_CODE (loop_arg) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS)) { - fprintf (vect_dump, "Unexpected empty stmt: "); - print_generic_expr (vect_dump, def_stmt, TDF_SLIM); + fprintf (vect_dump, "reduction: not ssa_name: "); + print_generic_expr (vect_dump, loop_arg, TDF_SLIM); } - return false; + return NULL_TREE; } - /* phi_node inside the loop indicates an induction/reduction pattern. - This is not supported yet. */ - bb = bb_for_stmt (def_stmt); - if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) + def_stmt = SSA_NAME_DEF_STMT (loop_arg); + if (!def_stmt) { - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - fprintf (vect_dump, "reduction/induction - unsupported."); - return false; /* FORNOW: not supported yet. */ + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "reduction: no def_stmt."); + return NULL_TREE; } - /* Expecting a modify_expr or a phi_node. */ - if (TREE_CODE (def_stmt) == MODIFY_EXPR - || TREE_CODE (def_stmt) == PHI_NODE) + if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT) { - if (def) - *def = def_stmt; - return true; + if (vect_print_dump_info (REPORT_DETAILS)) + print_generic_expr (vect_dump, def_stmt, TDF_SLIM); + return NULL_TREE; } - return false; + name = GIMPLE_STMT_OPERAND (def_stmt, 0); + nloop_uses = 0; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) + { + tree use_stmt = USE_STMT (use_p); + if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt)) + && vinfo_for_stmt (use_stmt) + && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) + nloop_uses++; + if (nloop_uses > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "reduction used in loop."); + return NULL_TREE; + } + } + + operation = GIMPLE_STMT_OPERAND (def_stmt, 1); + code = TREE_CODE (operation); + if (!commutative_tree_code (code) || !associative_tree_code (code)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: not commutative/associative: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + op_type = TREE_OPERAND_LENGTH (operation); + if (op_type != binary_op) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: not binary operation: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + op1 = TREE_OPERAND (operation, 0); + op2 = TREE_OPERAND (operation, 1); + if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: uses not ssa_names: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + /* Check that it's ok to change the order of the computation. */ + type = TREE_TYPE (operation); + if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1)) + || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: multiple types: operation type: "); + print_generic_expr (vect_dump, type, TDF_SLIM); + fprintf (vect_dump, ", operands types: "); + print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM); + fprintf (vect_dump, ","); + print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM); + } + return NULL_TREE; + } + + /* CHECKME: check for !flag_finite_math_only too? */ + if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations) + { + /* Changing the order of operations changes the semantics. */ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: unsafe fp math optimization: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) + { + /* Changing the order of operations changes the semantics. */ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: unsafe int math optimization: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + /* reduction is safe. we're dealing with one of the following: + 1) integer arithmetic and no trapv + 2) floating point arithmetic, and special flags permit this optimization. + */ + def1 = SSA_NAME_DEF_STMT (op1); + def2 = SSA_NAME_DEF_STMT (op2); + if (!def1 || !def2 || IS_EMPTY_STMT (def1) || IS_EMPTY_STMT (def2)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: no defs for operands: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + + /* Check that one def is the reduction def, defined by PHI, + the other def is either defined in the loop by a GIMPLE_MODIFY_STMT, + or it's an induction (defined by some phi node). */ + + if (def2 == phi + && flow_bb_inside_loop_p (loop, bb_for_stmt (def1)) + && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT + || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "detected reduction:"); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return def_stmt; + } + else if (def1 == phi + && flow_bb_inside_loop_p (loop, bb_for_stmt (def2)) + && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT + || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def)) + { + /* Swap operands (just for simplicity - so that the rest of the code + can assume that the reduction variable is always the last (second) + argument). */ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "detected reduction: need to swap operands:"); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0), + &TREE_OPERAND (operation, 1)); + return def_stmt; + } + else + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "reduction: unknown pattern."); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } } @@ -1493,7 +2146,6 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, { tree init_expr; tree step_expr; - tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); /* When there is no evolution in this loop, the evolution function @@ -1507,10 +2159,9 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, return false; step_expr = evolution_part; - init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, - loop_nb)); + init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "step: "); print_generic_expr (vect_dump, step_expr, TDF_SLIM); @@ -1522,8 +2173,8 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, *step = step_expr; if (TREE_CODE (step_expr) != INTEGER_CST) - { - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "step unknown."); return false; } @@ -1532,83 +2183,68 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, } -/* Function need_imm_uses_for. - - Return whether we ought to include information for 'var' - when calculating immediate uses. For this pass we only want use - information for non-virtual variables. */ - -static bool -need_imm_uses_for (tree var) -{ - return is_gimple_reg (var); -} - - /* Function vectorize_loops. Entry Point to loop vectorization phase. */ -void -vectorize_loops (struct loops *loops) +unsigned +vectorize_loops (void) { - unsigned int i, loops_num; + unsigned int i; unsigned int num_vectorized_loops = 0; + unsigned int vect_loops_num; + loop_iterator li; + struct loop *loop; - /* Fix the verbosity level if not defined explicitly by the user. */ - vect_set_dump_settings (); + vect_loops_num = number_of_loops (); - /* Does the target support SIMD? */ - /* FORNOW: until more sophisticated machine modelling is in place. */ - if (!UNITS_PER_SIMD_WORD) - { - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - fprintf (vect_dump, "vectorizer: target vector size is not defined."); - return; - } + /* Bail out if there are no loops. */ + if (vect_loops_num <= 1) + return 0; -#ifdef ENABLE_CHECKING - verify_loop_closed_ssa (); -#endif + /* Fix the verbosity level if not defined explicitly by the user. */ + vect_set_dump_settings (); - compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for); + /* Allocate the bitmap that records which virtual variables that + need to be renamed. */ + vect_memsyms_to_rename = BITMAP_ALLOC (NULL); /* ----------- Analyze loops. ----------- */ /* If some loop was duplicated, it gets bigger number than all previously defined loops. This fact allows us to run only over initial loops skipping newly generated ones. */ - loops_num = loops->num; - for (i = 1; i < loops_num; i++) + FOR_EACH_LOOP (li, loop, 0) { loop_vec_info loop_vinfo; - struct loop *loop = loops->parray[i]; - - if (!loop) - continue; + vect_loop_location = find_loop_location (loop); loop_vinfo = vect_analyze_loop (loop); loop->aux = loop_vinfo; if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) continue; - vect_transform_loop (loop_vinfo, loops); + vect_transform_loop (loop_vinfo); num_vectorized_loops++; } + vect_loop_location = UNKNOWN_LOC; - if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC)) + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS) + || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS) + && num_vectorized_loops > 0)) fprintf (vect_dump, "vectorized %u loops in function.\n", num_vectorized_loops); /* ----------- Finalize. ----------- */ - free_df (); - for (i = 1; i < loops_num; i++) + BITMAP_FREE (vect_memsyms_to_rename); + + for (i = 1; i < vect_loops_num; i++) { - struct loop *loop = loops->parray[i]; loop_vec_info loop_vinfo; + loop = get_loop (i); if (!loop) continue; loop_vinfo = loop->aux; @@ -1616,7 +2252,71 @@ vectorize_loops (struct loops *loops) loop->aux = NULL; } - rewrite_into_ssa (false); - rewrite_into_loop_closed_ssa (NULL); /* FORNOW */ - bitmap_clear (vars_to_rename); + return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; } + +/* Increase alignment of global arrays to improve vectorization potential. + TODO: + - Consider also structs that have an array field. + - Use ipa analysis to prune arrays that can't be vectorized? + This should involve global alignment analysis and in the future also + array padding. */ + +static unsigned int +increase_alignment (void) +{ + struct varpool_node *vnode; + + /* Increase the alignment of all global arrays for vectorization. */ + for (vnode = varpool_nodes_queue; + vnode; + vnode = vnode->next_needed) + { + tree vectype, decl = vnode->decl; + unsigned int alignment; + + if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE) + continue; + vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl))); + if (!vectype) + continue; + alignment = TYPE_ALIGN (vectype); + if (DECL_ALIGN (decl) >= alignment) + continue; + + if (vect_can_force_dr_alignment_p (decl, alignment)) + { + DECL_ALIGN (decl) = TYPE_ALIGN (vectype); + DECL_USER_ALIGN (decl) = 1; + if (dump_file) + { + fprintf (dump_file, "Increasing alignment of decl: "); + print_generic_expr (dump_file, decl, TDF_SLIM); + } + } + } + return 0; +} + +static bool +gate_increase_alignment (void) +{ + return flag_section_anchors && flag_tree_vectorize; +} + +struct tree_opt_pass pass_ipa_increase_alignment = +{ + "increase_alignment", /* name */ + gate_increase_alignment, /* gate */ + increase_alignment, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ + 0 /* letter */ +};