X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Ftree-vectorizer.c;h=f3551f8528d31648b8bf37197e087ac179a6f146;hb=ffca001431383db73c1c827b614283aaf6a9c832;hp=34fbb9fde798f2ba18fb8b4ddefd60f86fe2ace5;hpb=7016c6128fa54ae4f68077da816fe0744cb8a852;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 34fbb9fde79..f3551f8528d 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1,5 +1,5 @@ /* Loop Vectorization - Copyright (C) 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. Contributed by Dorit Naishlos This file is part of GCC. @@ -57,10 +57,9 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA data: scalars (which are represented by SSA_NAMES), and memory references ("data-refs"). These two types of data require different handling both during analysis and transformation. The types of data-refs that the - vectorizer currently supports are ARRAY_REFS that are one dimensional - arrays which base is an array DECL (not a pointer), and INDIRECT_REFS - through pointers; both array and pointer accesses are required to have a - simple (consecutive) access pattern. + vectorizer currently supports are ARRAY_REFS which base is an array DECL + (not a pointer), and INDIRECT_REFS through pointers; both array and pointer + accesses are required to have a simple (consecutive) access pattern. Analysis phase: =============== @@ -129,7 +128,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "ggc.h" #include "tree.h" #include "target.h" - #include "rtl.h" #include "basic-block.h" #include "diagnostic.h" @@ -140,3270 +138,1679 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cfglayout.h" #include "expr.h" #include "optabs.h" +#include "toplev.h" #include "tree-chrec.h" #include "tree-data-ref.h" #include "tree-scalar-evolution.h" +#include "input.h" #include "tree-vectorizer.h" #include "tree-pass.h" -/* Main analysis functions. */ -static loop_vec_info vect_analyze_loop (struct loop *); -static loop_vec_info vect_analyze_loop_form (struct loop *); -static bool vect_analyze_data_refs (loop_vec_info); -static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); -static bool vect_analyze_scalar_cycles (loop_vec_info); -static bool vect_analyze_data_ref_accesses (loop_vec_info); -static bool vect_analyze_data_refs_alignment (loop_vec_info); -static void vect_compute_data_refs_alignment (loop_vec_info); -static bool vect_analyze_operations (loop_vec_info); - -/* Main code transformation functions. */ -static void vect_transform_loop (loop_vec_info, struct loops *); -static void vect_transform_loop_bound (loop_vec_info); -static bool vect_transform_stmt (tree, block_stmt_iterator *); -static bool vectorizable_load (tree, block_stmt_iterator *, tree *); -static bool vectorizable_store (tree, block_stmt_iterator *, tree *); -static bool vectorizable_operation (tree, block_stmt_iterator *, tree *); -static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); -static void vect_align_data_ref (tree); -static void vect_enhance_data_refs_alignment (loop_vec_info); - -/* Utility functions for the analyses. */ -static bool vect_is_simple_use (tree , struct loop *, tree *); -static bool exist_non_indexing_operands_for_use_p (tree, tree); -static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool); -static void vect_mark_relevant (varray_type, tree); -static bool vect_stmt_relevant_p (tree, loop_vec_info); -static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *); -static void vect_compute_data_ref_alignment - (struct data_reference *, loop_vec_info); -static bool vect_analyze_data_ref_access (struct data_reference *); -static bool vect_get_first_index (tree, tree *); -static bool vect_can_force_dr_alignment_p (tree, unsigned int); -static tree vect_get_base_decl_and_bit_offset (tree, tree *); -static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool); - -/* Utility functions for the code transformation. */ -static tree vect_create_destination_var (tree, tree); -static tree vect_create_data_ref (tree, block_stmt_iterator *); -static tree vect_create_index_for_array_ref (tree, block_stmt_iterator *); -static tree get_vectype_for_scalar_type (tree); -static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); -static tree vect_get_vec_def_for_operand (tree, tree); -static tree vect_init_vector (tree, tree); -static void vect_finish_stmt_generation - (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); - -/* Utilities for creation and deletion of vec_info structs. */ -loop_vec_info new_loop_vec_info (struct loop *loop); -void destroy_loop_vec_info (loop_vec_info); -stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop); - -static bool vect_debug_stats (struct loop *loop); -static bool vect_debug_details (struct loop *loop); +/************************************************************************* + Simple Loop Peeling Utilities + *************************************************************************/ +static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg + (struct loop *, struct loops *, edge); +static void slpeel_update_phis_for_duplicate_loop + (struct loop *, struct loop *, bool after); +static void slpeel_update_phi_nodes_for_guard1 + (edge, struct loop *, bool, basic_block *, bitmap *); +static void slpeel_update_phi_nodes_for_guard2 + (edge, struct loop *, bool, basic_block *); +static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block); +static void allocate_new_names (bitmap); +static void rename_use_op (use_operand_p); +static void rename_def_op (def_operand_p, tree); +static void rename_variables_in_bb (basic_block); +static void free_new_names (bitmap); +static void rename_variables_in_loop (struct loop *); -/* Function new_stmt_vec_info. +/************************************************************************* + General Vectorization Utilities + *************************************************************************/ +static void vect_set_dump_settings (void); - Create and initialize a new stmt_vec_info struct for STMT. */ +/* vect_dump will be set to stderr or dump_file if exist. */ +FILE *vect_dump; -stmt_vec_info -new_stmt_vec_info (tree stmt, struct loop *loop) -{ - stmt_vec_info res; - res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); +/* vect_verbosity_level set to an invalid value + to mark that it's uninitialized. */ +enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; - STMT_VINFO_TYPE (res) = undef_vec_info_type; - STMT_VINFO_STMT (res) = stmt; - STMT_VINFO_LOOP (res) = loop; - STMT_VINFO_RELEVANT_P (res) = 0; - STMT_VINFO_VECTYPE (res) = NULL; - STMT_VINFO_VEC_STMT (res) = NULL; - STMT_VINFO_DATA_REF (res) = NULL; - STMT_VINFO_MEMTAG (res) = NULL; - return res; -} + +/************************************************************************* + Simple Loop Peeling Utilities + Utilities to support loop peeling for vectorization purposes. + *************************************************************************/ -/* Function new_loop_vec_info. - Create and initialize a new loop_vec_info struct for LOOP, as well as - stmt_vec_info structs for all the stmts in LOOP. */ +/* For each definition in DEFINITIONS this function allocates + new ssa name. */ -loop_vec_info -new_loop_vec_info (struct loop *loop) +static void +allocate_new_names (bitmap definitions) { - loop_vec_info res; - basic_block *bbs; - block_stmt_iterator si; - unsigned int i; + unsigned ver; + bitmap_iterator bi; - res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); + EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) + { + tree def = ssa_name (ver); + tree *new_name_ptr = xmalloc (sizeof (tree)); - bbs = get_loop_body (loop); + bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def); - /* Create stmt_info for all stmts in the loop. */ - for (i = 0; i < loop->num_nodes; i++) - { - basic_block bb = bbs[i]; - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - tree stmt = bsi_stmt (si); - stmt_ann_t ann; + *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def)); + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal; - get_stmt_operands (stmt); - ann = stmt_ann (stmt); - set_stmt_info (ann, new_stmt_vec_info (stmt, loop)); - } + SSA_NAME_AUX (def) = new_name_ptr; } - - LOOP_VINFO_LOOP (res) = loop; - LOOP_VINFO_BBS (res) = bbs; - LOOP_VINFO_EXIT_COND (res) = NULL; - LOOP_VINFO_NITERS (res) = -1; - LOOP_VINFO_VECTORIZABLE_P (res) = 0; - LOOP_VINFO_VECT_FACTOR (res) = 0; - VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, - "loop_write_datarefs"); - VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, - "loop_read_datarefs"); - return res; } -/* Function destroy_loop_vec_info. - - Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the - stmts in the loop. */ +/* Renames the use *OP_P. */ -void -destroy_loop_vec_info (loop_vec_info loop_vinfo) +static void +rename_use_op (use_operand_p op_p) { - struct loop *loop; - basic_block *bbs; - int nbbs; - block_stmt_iterator si; - int j; + tree *new_name_ptr; - if (!loop_vinfo) + if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) return; - loop = LOOP_VINFO_LOOP (loop_vinfo); - - bbs = LOOP_VINFO_BBS (loop_vinfo); - nbbs = loop->num_nodes; + new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p)); - for (j = 0; j < nbbs; j++) - { - basic_block bb = bbs[j]; - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - tree stmt = bsi_stmt (si); - stmt_ann_t ann = stmt_ann (stmt); - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - free (stmt_info); - set_stmt_info (ann, NULL); - } - } + /* Something defined outside of the loop. */ + if (!new_name_ptr) + return; - free (LOOP_VINFO_BBS (loop_vinfo)); - varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); - varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo)); + /* An ordinary ssa name defined in the loop. */ - free (loop_vinfo); + SET_USE (op_p, *new_name_ptr); } -/* Function debug_loop_stats. - - For vectorization statistics dumps. */ +/* Renames the def *OP_P in statement STMT. */ -static bool -vect_debug_stats (struct loop *loop) +static void +rename_def_op (def_operand_p op_p, tree stmt) { - basic_block bb; - block_stmt_iterator si; - tree node = NULL_TREE; - - if (!dump_file || !(dump_flags & TDF_STATS)) - return false; - - if (!loop) - { - fprintf (dump_file, "\n"); - return true; - } + tree *new_name_ptr; - if (!loop->header) - return false; + if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME) + return; - bb = loop->header; + new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p)); - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - node = bsi_stmt (si); - if (node && EXPR_P (node) && EXPR_LOCUS (node)) - break; - } + /* Something defined outside of the loop. */ + if (!new_name_ptr) + return; - if (node && EXPR_P (node) && EXPR_LOCUS (node) - && EXPR_FILENAME (node) && EXPR_LINENO (node)) - { - fprintf (dump_file, "\nloop at %s:%d: ", - EXPR_FILENAME (node), EXPR_LINENO (node)); - return true; - } + /* An ordinary ssa name defined in the loop. */ - return false; + SET_DEF (op_p, *new_name_ptr); + SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt; } -/* Function debug_loop_details. - - For vectorization debug dumps. */ +/* Renames the variables in basic block BB. */ -static bool -vect_debug_details (struct loop *loop) +static void +rename_variables_in_bb (basic_block bb) { - basic_block bb; - block_stmt_iterator si; - tree node = NULL_TREE; + tree phi; + block_stmt_iterator bsi; + tree stmt; + stmt_ann_t ann; + use_optype uses; + vuse_optype vuses; + def_optype defs; + v_may_def_optype v_may_defs; + v_must_def_optype v_must_defs; + unsigned i; + edge e; + edge_iterator ei; + struct loop *loop = bb->loop_father; - if (!dump_file || !(dump_flags & TDF_DETAILS)) - return false; + for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) + rename_def_op (PHI_RESULT_PTR (phi), phi); - if (!loop) + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) { - fprintf (dump_file, "\n"); - return true; - } + stmt = bsi_stmt (bsi); + get_stmt_operands (stmt); + ann = stmt_ann (stmt); - if (!loop->header) - return false; + uses = USE_OPS (ann); + for (i = 0; i < NUM_USES (uses); i++) + rename_use_op (USE_OP_PTR (uses, i)); - bb = loop->header; + defs = DEF_OPS (ann); + for (i = 0; i < NUM_DEFS (defs); i++) + rename_def_op (DEF_OP_PTR (defs, i), stmt); - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - node = bsi_stmt (si); - if (node && EXPR_P (node) && EXPR_LOCUS (node)) - break; + vuses = VUSE_OPS (ann); + for (i = 0; i < NUM_VUSES (vuses); i++) + rename_use_op (VUSE_OP_PTR (vuses, i)); + + v_may_defs = V_MAY_DEF_OPS (ann); + for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++) + { + rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i)); + rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt); + } + + v_must_defs = V_MUST_DEF_OPS (ann); + for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++) + { + rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i)); + rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt); + } } - if (node && EXPR_P (node) && EXPR_LOCUS (node) - && EXPR_FILENAME (node) && EXPR_LINENO (node)) + FOR_EACH_EDGE (e, ei, bb->succs) { - fprintf (dump_file, "\nloop at %s:%d: ", - EXPR_FILENAME (node), EXPR_LINENO (node)); - return true; + if (!flow_bb_inside_loop_p (loop, e->dest)) + continue; + for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi)) + rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); } - - return false; } -/* Function vect_get_base_decl_and_bit_offset - - Get the decl from which the data reference REF is based, - and compute the OFFSET from it in bits on the way. - FORNOW: Handle only component-refs that consist of - VAR_DECLs (no ARRAY_REF or INDIRECT_REF). */ -static tree -vect_get_base_decl_and_bit_offset (tree ref, tree *offset) +/* Releases the structures holding the new ssa names. */ + +static void +free_new_names (bitmap definitions) { - tree decl; - if (TREE_CODE (ref) == VAR_DECL) - return ref; + unsigned ver; + bitmap_iterator bi; - if (TREE_CODE (ref) == COMPONENT_REF) + EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi) { - tree this_offset; - tree oprnd0 = TREE_OPERAND (ref, 0); - tree oprnd1 = TREE_OPERAND (ref, 1); - - this_offset = bit_position (oprnd1); - if (!host_integerp (this_offset,1)) - return NULL_TREE; - - decl = vect_get_base_decl_and_bit_offset (oprnd0, offset); + tree def = ssa_name (ver); - if (decl) + if (SSA_NAME_AUX (def)) { - *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1); - - if (!host_integerp (*offset,1) || TREE_OVERFLOW (*offset)) - return NULL_TREE; - - if (vect_debug_details (NULL)) - { - print_generic_expr (dump_file, ref, TDF_SLIM); - fprintf (dump_file, " --> total offset for ref: "); - print_generic_expr (dump_file, *offset, TDF_SLIM); - } + free (SSA_NAME_AUX (def)); + SSA_NAME_AUX (def) = NULL; } - - return decl; } - - /* TODO: extend to handle more cases. */ - return NULL_TREE; -} - - -/* Function vect_force_dr_alignment_p. - - Returns whether the alignment of a DECL can be forced to be aligned - on ALIGNMENT bit boundary. */ - -static bool -vect_can_force_dr_alignment_p (tree decl, unsigned int alignment) -{ - if (TREE_CODE (decl) != VAR_DECL) - return false; - - if (DECL_EXTERNAL (decl)) - return false; - - if (TREE_STATIC (decl)) - return (alignment <= MAX_OFILE_ALIGNMENT); - else - /* This is not 100% correct. The absolute correct stack alignment - is STACK_BOUNDARY. We're supposed to hope, but not assume, that - PREFERRED_STACK_BOUNDARY is honored by all translation units. - However, until someone implements forced stack alignment, SSE - isn't really usable without this. */ - return (alignment <= PREFERRED_STACK_BOUNDARY); } -/* Function vect_get_new_vect_var. +/* Renames variables in new generated LOOP. */ - Returns a name for a new variable. The current naming scheme appends the - prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to - the name of vectorizer generated variables, and appends that to NAME if - provided. */ - -static tree -vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) +static void +rename_variables_in_loop (struct loop *loop) { - const char *prefix; - int prefix_len; - tree new_vect_var; - - if (var_kind == vect_simple_var) - prefix = "vect_"; - else - prefix = "vect_p"; + unsigned i; + basic_block *bbs; - prefix_len = strlen (prefix); + bbs = get_loop_body (loop); - if (name) - new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); - else - new_vect_var = create_tmp_var (type, prefix); + for (i = 0; i < loop->num_nodes; i++) + rename_variables_in_bb (bbs[i]); - return new_vect_var; + free (bbs); } -/* Function create_index_for_array_ref. +/* Update the PHI nodes of NEW_LOOP. - Create (and return) an index variable, along with it's update chain in the - loop. This variable will be used to access a memory location in a vector - operation. + NEW_LOOP is a duplicate of ORIG_LOOP. + AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP: + AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it + executes before it. */ - Input: - STMT: The stmt that contains a memory data-ref. - BSI: The block_stmt_iterator where STMT is. Any new stmts created by this - function can be added here, or in the loop pre-header. - - FORNOW: We are only handling array accesses with step 1. */ - -static tree -vect_create_index_for_array_ref (tree stmt, block_stmt_iterator *bsi) +static void +slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, + struct loop *new_loop, bool after) { - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - struct loop *loop = STMT_VINFO_LOOP (stmt_info); - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); - tree expr = DR_REF (dr); - tree access_fn; - tree init, step; - loop_vec_info loop_info = loop->aux; - int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_info); - tree vf; - tree array_first_index; - tree indx_before_incr, indx_after_incr; - int loopnum = loop->num; - bool ok; -#ifdef ENABLE_CHECKING - varray_type access_fns = DR_ACCESS_FNS (dr); - - /* FORNOW: handling only one dimensional arrays. */ - if (VARRAY_ACTIVE_SIZE (access_fns) != 1) - abort (); - - if (!vectorization_factor) - abort (); -#endif - - access_fn = DR_ACCESS_FN (dr, 0); - ok = vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, true) - && vect_get_first_index (expr, &array_first_index); - -#ifdef ENABLE_CHECKING - if (!ok) - abort (); - - /* FORNOW: Handling only constant 'init'. */ - if (TREE_CODE (init) != INTEGER_CST) - abort (); -#endif - - vf = build_int_cst (unsigned_type_node, vectorization_factor); + tree *new_name_ptr, new_ssa_name; + tree phi_new, phi_orig; + tree def; + edge orig_loop_latch = loop_latch_edge (orig_loop); + edge orig_entry_e = loop_preheader_edge (orig_loop); + edge new_loop_exit_e = new_loop->single_exit; + edge new_loop_entry_e = loop_preheader_edge (new_loop); + edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "int vf = %d",vectorization_factor); - fprintf (dump_file, ", vf:"); - print_generic_expr (dump_file, vf, TDF_SLIM); - fprintf (dump_file, ", init:"); - print_generic_expr (dump_file, init, TDF_SLIM); - fprintf (dump_file, ", array_first_index:"); - print_generic_expr (dump_file, array_first_index, TDF_SLIM); - } + /* + step 1. For each loop-header-phi: + Add the first phi argument for the phi in NEW_LOOP + (the one associated with the entry of NEW_LOOP) + + step 2. For each loop-header-phi: + Add the second phi argument for the phi in NEW_LOOP + (the one associated with the latch of NEW_LOOP) + + step 3. Update the phis in the successor block of NEW_LOOP. + + case 1: NEW_LOOP was placed before ORIG_LOOP: + The successor block of NEW_LOOP is the header of ORIG_LOOP. + Updating the phis in the successor block can therefore be done + along with the scanning of the loop header phis, because the + header blocks of ORIG_LOOP and NEW_LOOP have exactly the same + phi nodes, organized in the same order. + + case 2: NEW_LOOP was placed after ORIG_LOOP: + The successor block of NEW_LOOP is the original exit block of + ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis. + We postpone updating these phis to a later stage (when + loop guards are added). + */ - /* Calculate the 'init' of the new index. - init = (init - array_first_index) / vectorization_factor */ - init = int_const_binop (TRUNC_DIV_EXPR, - int_const_binop (MINUS_EXPR, init, array_first_index, 1), - vf, 1); - /* Calculate the 'step' of the new index. FORNOW: always 1. */ - step = size_one_node; + /* Scan the phis in the headers of the old and new loops + (they are organized in exactly the same order). */ - if (vect_debug_details (NULL)) + for (phi_new = phi_nodes (new_loop->header), + phi_orig = phi_nodes (orig_loop->header); + phi_new && phi_orig; + phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig)) { - fprintf (dump_file, "create iv for ("); - print_generic_expr (dump_file, init, TDF_SLIM); - fprintf (dump_file, ", + ,"); - print_generic_expr (dump_file, step, TDF_SLIM); - fprintf (dump_file, ")"); - } - - create_iv (init, step, NULL_TREE, loop, bsi, false, - &indx_before_incr, &indx_after_incr); - - return indx_before_incr; -} - + /* step 1. */ + def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e); + add_phi_arg (phi_new, def, new_loop_entry_e); -/* Function get_vectype_for_scalar_type. - - Returns the vector type corresponding to SCALAR_TYPE as supported - by the target. */ - -static tree -get_vectype_for_scalar_type (tree scalar_type) -{ - enum machine_mode inner_mode = TYPE_MODE (scalar_type); - int nbytes = GET_MODE_SIZE (inner_mode); - int nunits; + /* step 2. */ + def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); + if (TREE_CODE (def) != SSA_NAME) + continue; - if (nbytes == 0) - return NULL_TREE; + new_name_ptr = SSA_NAME_AUX (def); + if (!new_name_ptr) + /* Something defined outside of the loop. */ + continue; - /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) - is expected. */ - nunits = UNITS_PER_SIMD_WORD / nbytes; + /* An ordinary ssa name defined in the loop. */ + new_ssa_name = *new_name_ptr; + add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop)); - return build_vector_type (scalar_type, nunits); + /* step 3 (case 1). */ + if (!after) + { + gcc_assert (new_loop_exit_e == orig_entry_e); + SET_PHI_ARG_DEF (phi_orig, + new_loop_exit_e->dest_idx, + new_ssa_name); + } + } } -/* Function vect_align_data_ref. +/* Update PHI nodes for a guard of the LOOP. - Handle mislignment of a memory accesses. - - FORNOW: Can't handle misaligned accesses. - Make sure that the dataref is aligned. */ + Input: + - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that + controls whether LOOP is to be executed. GUARD_EDGE is the edge that + originates from the guard-bb, skips LOOP and reaches the (unique) exit + bb of LOOP. This loop-exit-bb is an empty bb with one successor. + We denote this bb NEW_MERGE_BB because before the guard code was added + it had a single predecessor (the LOOP header), and now it became a merge + point of two paths - the path that ends with the LOOP exit-edge, and + the path that ends with GUARD_EDGE. + - NEW_EXIT_BB: New basic block that is added by this function between LOOP + and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis. + + ===> The CFG before the guard-code was added: + LOOP_header_bb: + loop_body + if (exit_loop) goto update_bb + else goto LOOP_header_bb + update_bb: + + ==> The CFG after the guard-code was added: + guard_bb: + if (LOOP_guard_condition) goto new_merge_bb + else goto LOOP_header_bb + LOOP_header_bb: + loop_body + if (exit_loop_condition) goto new_merge_bb + else goto LOOP_header_bb + new_merge_bb: + goto update_bb + update_bb: + + ==> The CFG after this function: + guard_bb: + if (LOOP_guard_condition) goto new_merge_bb + else goto LOOP_header_bb + LOOP_header_bb: + loop_body + if (exit_loop_condition) goto new_exit_bb + else goto LOOP_header_bb + new_exit_bb: + new_merge_bb: + goto update_bb + update_bb: + + This function: + 1. creates and updates the relevant phi nodes to account for the new + incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves: + 1.1. Create phi nodes at NEW_MERGE_BB. + 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted + UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB + 2. preserves loop-closed-ssa-form by creating the required phi nodes + at the exit of LOOP (i.e, in NEW_EXIT_BB). + + There are two flavors to this function: + + slpeel_update_phi_nodes_for_guard1: + Here the guard controls whether we enter or skip LOOP, where LOOP is a + prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are + for variables that have phis in the loop header. + + slpeel_update_phi_nodes_for_guard2: + Here the guard controls whether we enter or skip LOOP, where LOOP is an + epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are + for variables that have phis in the loop exit. + + I.E., the overall structure is: + + loop1_preheader_bb: + guard1 (goto loop1/merg1_bb) + loop1 + loop1_exit_bb: + guard2 (goto merge1_bb/merge2_bb) + merge1_bb + loop2 + loop2_exit_bb + merge2_bb + next_bb + + slpeel_update_phi_nodes_for_guard1 takes care of creating phis in + loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars + that have phis in loop1->header). + + slpeel_update_phi_nodes_for_guard2 takes care of creating phis in + loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars + that have phis in next_bb). It also adds some of these phis to + loop1_exit_bb. + + slpeel_update_phi_nodes_for_guard1 is always called before + slpeel_update_phi_nodes_for_guard2. They are both needed in order + to create correct data-flow and loop-closed-ssa-form. + + Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables + that change between iterations of a loop (and therefore have a phi-node + at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates + phis for variables that are used out of the loop (and therefore have + loop-closed exit phis). Some variables may be both updated between + iterations and used after the loop. This is why in loop1_exit_bb we + may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1) + and exit phis (created by slpeel_update_phi_nodes_for_guard2). + + - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of + an original loop. i.e., we have: + + orig_loop + guard_bb (goto LOOP/new_merge) + new_loop <-- LOOP + new_exit + new_merge + next_bb + + If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we + have: + + new_loop + guard_bb (goto LOOP/new_merge) + orig_loop <-- LOOP + new_exit + new_merge + next_bb + + The ssa-names defined in the original loop have an SSA_NAME_AUX pointer + that records the corresponding new ssa-name used in the new duplicated + loop copy. + */ + +/* Function slpeel_update_phi_nodes_for_guard1 + + Input: + - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. + - DEFS - a bitmap of ssa names to mark new names for which we recorded + information. + + In the context of the overall structure, we have: + + loop1_preheader_bb: + guard1 (goto loop1/merg1_bb) +LOOP-> loop1 + loop1_exit_bb: + guard2 (goto merge1_bb/merge2_bb) + merge1_bb + loop2 + loop2_exit_bb + merge2_bb + next_bb + + For each name updated between loop iterations (i.e - for each name that has + an entry (loop-header) phi in LOOP) we create a new phi in: + 1. merge1_bb (to account for the edge from guard1) + 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form) +*/ static void -vect_align_data_ref (tree stmt) +slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, + bool is_new_loop, basic_block *new_exit_bb, + bitmap *defs) { - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + tree orig_phi, new_phi; + tree update_phi, update_phi2; + tree *new_name_ptr, *new_name_ptr2; + tree guard_arg, loop_arg; + basic_block new_merge_bb = guard_edge->dest; + edge e = EDGE_SUCC (new_merge_bb, 0); + basic_block update_bb = e->dest; + basic_block orig_bb = loop->header; + edge new_exit_e; + tree current_new_name; + + /* Create new bb between loop and new_merge_bb. */ + *new_exit_bb = split_edge (loop->single_exit); + add_bb_to_loop (*new_exit_bb, loop->outer); + + new_exit_e = EDGE_SUCC (*new_exit_bb, 0); + + for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb); + orig_phi && update_phi; + orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi)) + { + /** 1. Handle new-merge-point phis **/ + + /* 1.1. Generate new phi node in NEW_MERGE_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + new_merge_bb); + + /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge + of LOOP. Set the two phi args in NEW_PHI for these edges: */ + loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0)); + guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop)); + + add_phi_arg (new_phi, loop_arg, new_exit_e); + add_phi_arg (new_phi, guard_arg, guard_edge); + + /* 1.3. Update phi in successor block. */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg + || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); + SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); + update_phi2 = new_phi; + + + /** 2. Handle loop-closed-ssa-form phis **/ + + /* 2.1. Generate new phi node in NEW_EXIT_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + *new_exit_bb); + + /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ + add_phi_arg (new_phi, loop_arg, loop->single_exit); + + /* 2.3. Update phi in successor of NEW_EXIT_BB: */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); + SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); + + /* 2.4. Record the newly created name in SSA_NAME_AUX. + We want to find a name such that + name = *(SSA_NAME_AUX (orig_loop_name)) + and to set its SSA_NAME_AUX as follows: + *(SSA_NAME_AUX (name)) = new_phi_name + + If LOOP is a new loop then loop_arg is already the name we're + looking for. If LOOP is the original loop, then loop_arg is + the orig_loop_name and the relevant name is recorded in its + SSA_NAME_AUX */ + if (is_new_loop) + current_new_name = loop_arg; + else + { + new_name_ptr = SSA_NAME_AUX (loop_arg); + gcc_assert (new_name_ptr); + current_new_name = *new_name_ptr; + } +#ifdef ENABLE_CHECKING + gcc_assert (! SSA_NAME_AUX (current_new_name)); +#endif - /* FORNOW: can't handle misaligned accesses; - all accesses expected to be aligned. */ - if (!aligned_access_p (dr)) - abort (); -} + new_name_ptr2 = xmalloc (sizeof (tree)); + *new_name_ptr2 = PHI_RESULT (new_phi); + SSA_NAME_AUX (current_new_name) = new_name_ptr2; + bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name)); + } + set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); +} -/* Function vect_create_data_ref. - Create a memory reference expression for vector access, to be used in a - vector load/store stmt. +/* Function slpeel_update_phi_nodes_for_guard2 Input: - STMT: a stmt that references memory. expected to be of the form - MODIFY_EXPR or MODIFY_EXPR . - BSI: block_stmt_iterator where new stmts can be added. - - Output: - 1. Declare a new ptr to vector_type, and have it point to the array base. - For example, for vector of type V8HI: - v8hi *p0; - p0 = (v8hi *)&a; - 2. Create a data-reference based on the new vector pointer p0, and using - a new index variable 'idx'. Return the expression '(*p0)[idx]'. - - FORNOW: handle only aligned and consecutive accesses. */ + - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. + + In the context of the overall structure, we have: + + loop1_preheader_bb: + guard1 (goto loop1/merg1_bb) + loop1 + loop1_exit_bb: + guard2 (goto merge1_bb/merge2_bb) + merge1_bb +LOOP-> loop2 + loop2_exit_bb + merge2_bb + next_bb + + For each name used out side the loop (i.e - for each name that has an exit + phi in next_bb) we create a new phi in: + 1. merge2_bb (to account for the edge from guard_bb) + 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form) + 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form), + if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1). +*/ -static tree -vect_create_data_ref (tree stmt, block_stmt_iterator *bsi) +static void +slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, + bool is_new_loop, basic_block *new_exit_bb) { - tree new_base; - tree data_ref; - tree idx; - tree vec_stmt; - tree new_temp; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - tree vect_ptr_type; - tree vect_ptr; - tree addr_ref; - v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); - v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); - vuse_optype vuses = STMT_VUSE_OPS (stmt); - int nvuses, nv_may_defs, nv_must_defs; - int i; - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); - tree array_type; - tree base_addr = NULL_TREE; - struct loop *loop = STMT_VINFO_LOOP (stmt_info); - edge pe; - tree tag; - tree addr_expr; - tree scalar_ptr_type; - - /* FORNOW: make sure the data reference is aligned. */ - vect_align_data_ref (stmt); - - addr_ref = DR_BASE_NAME (dr); - - array_type = build_array_type (vectype, 0); - TYPE_ALIGN (array_type) = TYPE_ALIGN (TREE_TYPE (addr_ref)); - vect_ptr_type = build_pointer_type (array_type); - scalar_ptr_type = build_pointer_type (TREE_TYPE (addr_ref)); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "create array_ref of type: "); - print_generic_expr (dump_file, vectype, TDF_SLIM); - } - - /*** create: vectype_array *p; ***/ - vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, - get_name (addr_ref)); - add_referenced_tmp_var (vect_ptr); - -#ifdef ENABLE_CHECKING - if (TREE_CODE (addr_ref) != VAR_DECL - && TREE_CODE (addr_ref) != COMPONENT_REF - && TREE_CODE (addr_ref) != SSA_NAME) - abort (); -#endif - - if (vect_debug_details (NULL)) - { - if (TREE_CODE (addr_ref) == VAR_DECL) - fprintf (dump_file, "vectorizing an array ref: "); - else if (TREE_CODE (addr_ref) == SSA_NAME) - fprintf (dump_file, "vectorizing a pointer ref: "); - else if (TREE_CODE (addr_ref) == COMPONENT_REF) - fprintf (dump_file, "vectorizing a record ref: "); - print_generic_expr (dump_file, addr_ref, TDF_SLIM); - } - - /* Get base address: */ - if (TREE_CODE (addr_ref) == SSA_NAME) - base_addr = addr_ref; - else - base_addr = build_fold_addr_expr (addr_ref); - - /* Handle aliasing: */ - tag = STMT_VINFO_MEMTAG (stmt_info); -#ifdef ENABLE_CHECKING - if (!tag) - abort (); -#endif - get_var_ann (vect_ptr)->type_mem_tag = tag; - - /* Mark for renaming all aliased variables - (i.e, the may-aliases of the type-mem-tag) */ - nvuses = NUM_VUSES (vuses); - nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); - nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); - for (i = 0; i < nvuses; i++) - { - tree use = VUSE_OP (vuses, i); - if (TREE_CODE (use) == SSA_NAME) - bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid); - } - for (i = 0; i < nv_may_defs; i++) - { - tree def = V_MAY_DEF_RESULT (v_may_defs, i); - if (TREE_CODE (def) == SSA_NAME) - bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); - } - for (i = 0; i < nv_must_defs; i++) - { - tree def = V_MUST_DEF_OP (v_must_defs, i); - if (TREE_CODE (def) == SSA_NAME) - bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); - } - - pe = loop_preheader_edge (loop); - - /*** create: p = (vectype *)&a; ***/ - - /* addr_expr = &a */ - addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, - get_name (addr_ref)); - add_referenced_tmp_var (addr_expr); - vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, base_addr); - new_temp = make_ssa_name (addr_expr, vec_stmt); - TREE_OPERAND (vec_stmt, 0) = new_temp; - bsi_insert_on_edge (pe, vec_stmt); - - /* vect_ptr = (vectype_array *)&a; */ - vec_stmt = fold_convert (vect_ptr_type, new_temp); - vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); - new_temp = make_ssa_name (vect_ptr, vec_stmt); - TREE_OPERAND (vec_stmt, 0) = new_temp; - bsi_insert_on_edge (pe, vec_stmt); - - /*** create data ref: '(*p)[idx]' ***/ - - idx = vect_create_index_for_array_ref (stmt, bsi); - - new_base = build_fold_indirect_ref (new_temp); - data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "created new data-ref: "); - print_generic_expr (dump_file, data_ref, TDF_SLIM); - } - - return data_ref; -} - - -/* Function vect_create_destination_var. - - Create a new temporary of type VECTYPE. */ - -static tree -vect_create_destination_var (tree scalar_dest, tree vectype) -{ - tree vec_dest; - const char *new_name; - -#ifdef ENABLE_CHECKING - if (TREE_CODE (scalar_dest) != SSA_NAME) - abort (); -#endif - - new_name = get_name (scalar_dest); - if (!new_name) - new_name = "var_"; - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); - add_referenced_tmp_var (vec_dest); - - return vec_dest; -} - - -/* Function vect_init_vector. - - Insert a new stmt (INIT_STMT) that initializes a new vector variable with - the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be - used in the vectorization of STMT. */ - -static tree -vect_init_vector (tree stmt, tree vector_var) -{ - stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); - struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); - tree new_var; - tree init_stmt; - tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); - tree vec_oprnd; - edge pe; - tree new_temp; - - new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); - add_referenced_tmp_var (new_var); - - init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); - new_temp = make_ssa_name (new_var, init_stmt); - TREE_OPERAND (init_stmt, 0) = new_temp; - - pe = loop_preheader_edge (loop); - bsi_insert_on_edge (pe, init_stmt); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "created new init_stmt: "); - print_generic_expr (dump_file, init_stmt, TDF_SLIM); - } - - vec_oprnd = TREE_OPERAND (init_stmt, 0); - return vec_oprnd; -} - - -/* Function vect_get_vec_def_for_operand. - - OP is an operand in STMT. This function returns a (vector) def that will be - used in the vectorized stmt for STMT. - - In the case that OP is an SSA_NAME which is defined in the loop, then - STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. - - In case OP is an invariant or constant, a new stmt that creates a vector def - needs to be introduced. */ - -static tree -vect_get_vec_def_for_operand (tree op, tree stmt) -{ - tree vec_oprnd; - tree vec_stmt; - tree def_stmt; - stmt_vec_info def_stmt_info = NULL; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); - int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); - struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo); - basic_block bb; - tree vec_inv; - tree t = NULL_TREE; - tree def; - int i; - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "vect_get_vec_def_for_operand: "); - print_generic_expr (dump_file, op, TDF_SLIM); - } - - /** ===> Case 1: operand is a constant. **/ - - if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST) - { - /* Create 'vect_cst_ = {cst,cst,...,cst}' */ - - tree vec_cst; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); - int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); - tree t = NULL_TREE; - int i; - - /* Build a tree with vector elements. */ - if (vect_debug_details (NULL)) - fprintf (dump_file, "Create vector_cst. nunits = %d", nunits); - - for (i = nunits - 1; i >= 0; --i) - { - t = tree_cons (NULL_TREE, op, t); - } - vec_cst = build_vector (vectype, t); - return vect_init_vector (stmt, vec_cst); - } - -#ifdef ENABLE_CHECKING - if (TREE_CODE (op) != SSA_NAME) - abort (); -#endif - - /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/ - - def_stmt = SSA_NAME_DEF_STMT (op); - def_stmt_info = vinfo_for_stmt (def_stmt); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: "); - print_generic_expr (dump_file, def_stmt, TDF_SLIM); - } - - - /** ==> Case 2.1: operand is defined inside the loop. **/ - - if (def_stmt_info) - { - /* Get the def from the vectorized stmt. */ - - vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); -#ifdef ENABLE_CHECKING - if (!vec_stmt) - abort (); -#endif - vec_oprnd = TREE_OPERAND (vec_stmt, 0); - return vec_oprnd; - } - - - /** ==> Case 2.2: operand is defined by the loop-header phi-node - - it is a reduction/induction. **/ - - bb = bb_for_stmt (def_stmt); - if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "reduction/induction - unsupported."); - abort (); /* FORNOW no support for reduction/induction. */ - } - - - /** ==> Case 2.3: operand is defined outside the loop - - it is a loop invariant. */ - - switch (TREE_CODE (def_stmt)) - { - case PHI_NODE: - def = PHI_RESULT (def_stmt); - break; - case MODIFY_EXPR: - def = TREE_OPERAND (def_stmt, 0); - break; - case NOP_EXPR: - def = TREE_OPERAND (def_stmt, 0); -#ifdef ENABLE_CHECKING - if (!IS_EMPTY_STMT (def_stmt)) - abort (); -#endif - def = op; - break; - default: - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "unsupported defining stmt: "); - print_generic_expr (dump_file, def_stmt, TDF_SLIM); - } - abort (); - } - - /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */ - - if (vect_debug_details (NULL)) - fprintf (dump_file, "Create vector_inv."); - - for (i = nunits - 1; i >= 0; --i) - { - t = tree_cons (NULL_TREE, def, t); - } - - vec_inv = build_constructor (vectype, t); - return vect_init_vector (stmt, vec_inv); -} - - -/* Function vect_finish_stmt_generation. - - Insert a new stmt. */ - -static void -vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) -{ - bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "add new stmt: "); - print_generic_expr (dump_file, vec_stmt, TDF_SLIM); - } - - /* Make sure bsi points to the stmt that is being vectorized. */ - - /* Assumption: any stmts created for the vectorization of smtmt S are - inserted before S. BSI may point to S or some new stmt before it. */ - - while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi)) - bsi_next (bsi); -#ifdef ENABLE_CHECKING - if (stmt != bsi_stmt (*bsi)) - abort (); -#endif -} - - -/* Function vectorizable_assignment. - - Check if STMT performs an assignment (copy) that can be vectorized. - If VEC_STMT is also passed, vectorize the STMT: create a vectorized - stmt to replace it, put it in VEC_STMT, and insert it at BSI. - Return FALSE if not a vectorizable STMT, TRUE otherwise. */ - -static bool -vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) -{ - tree vec_dest; - tree scalar_dest; - tree op; - tree vec_oprnd; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - struct loop *loop = STMT_VINFO_LOOP (stmt_info); - tree new_temp; - - /* Is vectorizable assignment? */ - - if (TREE_CODE (stmt) != MODIFY_EXPR) - return false; - - scalar_dest = TREE_OPERAND (stmt, 0); - if (TREE_CODE (scalar_dest) != SSA_NAME) - return false; - - op = TREE_OPERAND (stmt, 1); - if (!vect_is_simple_use (op, loop, NULL)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "use not simple."); - return false; - } - - if (!vec_stmt) /* transformation not required. */ - { - STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; - return true; - } - - /** Trasform. **/ - if (vect_debug_details (NULL)) - fprintf (dump_file, "transform assignment."); - - /* Handle def. */ - vec_dest = vect_create_destination_var (scalar_dest, vectype); - - /* Handle use. */ - op = TREE_OPERAND (stmt, 1); - vec_oprnd = vect_get_vec_def_for_operand (op, stmt); - - /* Arguments are ready. create the new vector stmt. */ - *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - TREE_OPERAND (*vec_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); - - return true; -} - - -/* Function vectorizable_operation. - - Check if STMT performs a binary or unary operation that can be vectorized. - If VEC_STMT is also passed, vectorize the STMT: create a vectorized - stmt to replace it, put it in VEC_STMT, and insert it at BSI. - Return FALSE if not a vectorizable STMT, TRUE otherwise. */ - -static bool -vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) -{ - tree vec_dest; - tree scalar_dest; - tree operation; - tree op0, op1 = NULL; - tree vec_oprnd0, vec_oprnd1=NULL; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - struct loop *loop = STMT_VINFO_LOOP (stmt_info); - int i; - enum tree_code code; - enum machine_mode vec_mode; - tree new_temp; - int op_type; - tree op; - optab optab; - - /* Is STMT a vectorizable binary/unary operation? */ - if (TREE_CODE (stmt) != MODIFY_EXPR) - return false; - - if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) - return false; - - operation = TREE_OPERAND (stmt, 1); - code = TREE_CODE (operation); - optab = optab_for_tree_code (code, vectype); - - /* Support only unary or binary operations. */ - op_type = TREE_CODE_LENGTH (code); - if (op_type != unary_op && op_type != binary_op) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type); - return false; - } - - for (i = 0; i < op_type; i++) - { - op = TREE_OPERAND (operation, i); - if (!vect_is_simple_use (op, loop, NULL)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "use not simple."); - return false; - } - } - - /* Supportable by target? */ - if (!optab) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "no optab."); - return false; - } - vec_mode = TYPE_MODE (vectype); - if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "op not supported by target."); - return false; - } - - if (!vec_stmt) /* transformation not required. */ - { - STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; - return true; - } - - /** Trasform. **/ - - if (vect_debug_details (NULL)) - fprintf (dump_file, "transform binary/unary operation."); - - /* Handle def. */ - scalar_dest = TREE_OPERAND (stmt, 0); - vec_dest = vect_create_destination_var (scalar_dest, vectype); - - /* Handle uses. */ - op0 = TREE_OPERAND (operation, 0); - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); - - if (op_type == binary_op) - { - op1 = TREE_OPERAND (operation, 1); - vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); - } - - /* Arguments are ready. create the new vector stmt. */ - - if (op_type == binary_op) - *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, - build2 (code, vectype, vec_oprnd0, vec_oprnd1)); - else - *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, - build1 (code, vectype, vec_oprnd0)); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - TREE_OPERAND (*vec_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); - - return true; -} - - -/* Function vectorizable_store. - - Check if STMT defines a non scalar data-ref (array/pointer/structure) that - can be vectorized. - If VEC_STMT is also passed, vectorize the STMT: create a vectorized - stmt to replace it, put it in VEC_STMT, and insert it at BSI. - Return FALSE if not a vectorizable STMT, TRUE otherwise. */ - -static bool -vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) -{ - tree scalar_dest; - tree data_ref; - tree op; - tree vec_oprnd1; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - struct loop *loop = STMT_VINFO_LOOP (stmt_info); - enum machine_mode vec_mode; - - /* Is vectorizable store? */ - - if (TREE_CODE (stmt) != MODIFY_EXPR) - return false; - - scalar_dest = TREE_OPERAND (stmt, 0); - if (TREE_CODE (scalar_dest) != ARRAY_REF - && TREE_CODE (scalar_dest) != INDIRECT_REF) - return false; - - op = TREE_OPERAND (stmt, 1); - if (!vect_is_simple_use (op, loop, NULL)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "use not simple."); - return false; - } - - vec_mode = TYPE_MODE (vectype); - /* FORNOW. In some cases can vectorize even if data-type not supported - (e.g. - array initialization with 0). */ - if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) - return false; - - if (!STMT_VINFO_DATA_REF (stmt_info)) - return false; - - if (!vec_stmt) /* transformation not required. */ - { - STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; - return true; - } - - /** Trasform. **/ - - if (vect_debug_details (NULL)) - fprintf (dump_file, "transform store"); - - /* Handle use - get the vectorized def from the defining stmt. */ - vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); - - /* Handle def. */ - data_ref = vect_create_data_ref (stmt, bsi); - - /* Arguments are ready. create the new vector stmt. */ - *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); - - return true; -} - - -/* vectorizable_load. - - Check if STMT reads a non scalar data-ref (array/pointer/structure) that - can be vectorized. - If VEC_STMT is also passed, vectorize the STMT: create a vectorized - stmt to replace it, put it in VEC_STMT, and insert it at BSI. - Return FALSE if not a vectorizable STMT, TRUE otherwise. */ - -static bool -vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) -{ - tree scalar_dest; - tree vec_dest = NULL; - tree data_ref = NULL; - tree op; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - tree new_temp; - enum machine_mode vec_mode; - - /* Is vectorizable load? */ - - if (TREE_CODE (stmt) != MODIFY_EXPR) - return false; - - scalar_dest = TREE_OPERAND (stmt, 0); - if (TREE_CODE (scalar_dest) != SSA_NAME) - return false; - - op = TREE_OPERAND (stmt, 1); - if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) - return false; - - if (!STMT_VINFO_DATA_REF (stmt_info)) - return false; - - vec_mode = TYPE_MODE (vectype); - /* FORNOW. In some cases can vectorize even if data-type not supported - (e.g. - data copies). */ - if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) - return false; - - if (!vec_stmt) /* transformation not required. */ - { - STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; - return true; - } - - /** Trasform. **/ - - if (vect_debug_details (NULL)) - fprintf (dump_file, "transform load."); - - /* Handle def. */ - vec_dest = vect_create_destination_var (scalar_dest, vectype); - - /* Handle use. */ - op = TREE_OPERAND (stmt, 1); - data_ref = vect_create_data_ref (stmt, bsi); - - /* Arguments are ready. create the new vector stmt. */ - *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - TREE_OPERAND (*vec_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); - - return true; -} - - -/* Function vect_transform_stmt. - - Create a vectorized stmt to replace STMT, and insert it at BSI. */ - -static bool -vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) -{ - bool is_store = false; - tree vec_stmt = NULL_TREE; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - - switch (STMT_VINFO_TYPE (stmt_info)) - { - case op_vec_info_type: - if (!vectorizable_operation (stmt, bsi, &vec_stmt)) - abort (); - break; - - case assignment_vec_info_type: - if (!vectorizable_assignment (stmt, bsi, &vec_stmt)) - abort (); - break; - - case load_vec_info_type: - if (!vectorizable_load (stmt, bsi, &vec_stmt)) - abort (); - break; - - case store_vec_info_type: - if (!vectorizable_store (stmt, bsi, &vec_stmt)) - abort (); - is_store = true; - break; - default: - if (vect_debug_details (NULL)) - fprintf (dump_file, "stmt not supported."); - abort (); - } - - STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; - - return is_store; -} - - -/* Function vect_transform_loop_bound. - - Create a new exit condition for the loop. */ - -static void -vect_transform_loop_bound (loop_vec_info loop_vinfo) -{ - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - edge exit_edge = loop->single_exit; - block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src); - tree indx_before_incr, indx_after_incr; - tree orig_cond_expr; - HOST_WIDE_INT old_N = 0; - int vf; - tree cond_stmt; - tree new_loop_bound; - tree cond; - tree lb_type; - -#ifdef ENABLE_CHECKING - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) - abort (); -#endif - old_N = LOOP_VINFO_NITERS (loop_vinfo); - vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - -#ifdef ENABLE_CHECKING - /* FORNOW: - assuming number-of-iterations divides by the vectorization factor. */ - if (old_N % vf) - abort (); -#endif - - orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo); -#ifdef ENABLE_CHECKING - if (!orig_cond_expr) - abort (); -#endif - if (orig_cond_expr != bsi_stmt (loop_exit_bsi)) - abort (); - - create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, - &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr); - - /* bsi_insert is using BSI_NEW_STMT. We need to bump it back - to point to the exit condition. */ - bsi_next (&loop_exit_bsi); - if (bsi_stmt (loop_exit_bsi) != orig_cond_expr) - abort (); - - /* new loop exit test: */ - lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1)); - new_loop_bound = build_int_cst (lb_type, old_N/vf); - - if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ - cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); - else /* 'then' edge loops back. */ - cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound); - - cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond, - TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2)); - - bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT); - - /* remove old loop exit test: */ - bsi_remove (&loop_exit_bsi); - - if (vect_debug_details (NULL)) - print_generic_expr (dump_file, cond_stmt, TDF_SLIM); -} - - -/* Function vect_transform_loop. - - The analysis phase has determined that the loop is vectorizable. - Vectorize the loop - created vectorized stmts to replace the scalar - stmts in the loop, and update the loop exit condition. */ - -static void -vect_transform_loop (loop_vec_info loop_vinfo, - struct loops *loops ATTRIBUTE_UNUSED) -{ - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - block_stmt_iterator si; - int i; -#ifdef ENABLE_CHECKING - int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); -#endif - - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); - - /* 1) Make sure the loop header has exactly two entries - 2) Make sure we have a preheader basic block. */ - - if (!loop->header->pred->pred_next - || loop->header->pred->pred_next->pred_next) - abort (); - - loop_split_edge_with (loop_preheader_edge (loop), NULL); - - - /* FORNOW: the vectorizer supports only loops which body consist - of one basic block (header + empty latch). When the vectorizer will - support more involved loop forms, the order by which the BBs are - traversed need to be reconsidered. */ - - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - - for (si = bsi_start (bb); !bsi_end_p (si);) - { - tree stmt = bsi_stmt (si); - stmt_vec_info stmt_info; - bool is_store; -#ifdef ENABLE_CHECKING - tree vectype; -#endif - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "------>vectorizing statement: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - stmt_info = vinfo_for_stmt (stmt); -#ifdef ENABLE_CHECKING - if (!stmt_info) - abort (); -#endif - if (!STMT_VINFO_RELEVANT_P (stmt_info)) - { - bsi_next (&si); - continue; - } -#ifdef ENABLE_CHECKING - /* FORNOW: Verify that all stmts operate on the same number of - units and no inner unrolling is necessary. */ - vectype = STMT_VINFO_VECTYPE (stmt_info); - if (GET_MODE_NUNITS (TYPE_MODE (vectype)) != vectorization_factor) - abort (); -#endif - /* -------- vectorize statement ------------ */ - if (vect_debug_details (NULL)) - fprintf (dump_file, "transform statement."); - - is_store = vect_transform_stmt (stmt, &si); - if (is_store) - { - /* free the attached stmt_vec_info and remove the stmt. */ - stmt_ann_t ann = stmt_ann (stmt); - free (stmt_info); - set_stmt_info (ann, NULL); - bsi_remove (&si); - continue; - } - - bsi_next (&si); - } /* stmts in BB */ - } /* BBs in loop */ - - vect_transform_loop_bound (loop_vinfo); - - if (vect_debug_details (loop)) - fprintf (dump_file,"Success! loop vectorized."); - if (vect_debug_stats (loop)) - fprintf (dump_file, "LOOP VECTORIZED."); -} - - -/* Function vect_is_simple_use. - - Input: - LOOP - the loop that is being vectorized. - OPERAND - operand of a stmt in LOOP. - DEF - the defining stmt in case OPERAND is an SSA_NAME. - - Returns whether a stmt with OPERAND can be vectorized. - Supportable operands are constants, loop invariants, and operands that are - defined by the current iteration of the loop. Unsupportable opernads are - those that are defined by a previous iteration of the loop (as is the case - in reduction/induction computations). */ - -static bool -vect_is_simple_use (tree operand, struct loop *loop, tree *def) -{ - tree def_stmt; - basic_block bb; - - if (def) - *def = NULL_TREE; - - if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) - return true; - - if (TREE_CODE (operand) != SSA_NAME) - return false; - - def_stmt = SSA_NAME_DEF_STMT (operand); - if (def_stmt == NULL_TREE ) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "no def_stmt."); - return false; - } - - /* empty stmt is expected only in case of a function argument. - (Otherwise - we expect a phi_node or a modify_expr). */ - if (IS_EMPTY_STMT (def_stmt)) - { - tree arg = TREE_OPERAND (def_stmt, 0); - if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) - return true; - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "Unexpected empty stmt: "); - print_generic_expr (dump_file, def_stmt, TDF_SLIM); - } - return false; - } - - /* phi_node inside the loop indicates an induction/reduction pattern. - This is not supported yet. */ - bb = bb_for_stmt (def_stmt); - if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "reduction/induction - unsupported."); - return false; /* FORNOW: not supported yet. */ - } - - /* Expecting a modify_expr or a phi_node. */ - if (TREE_CODE (def_stmt) == MODIFY_EXPR - || TREE_CODE (def_stmt) == PHI_NODE) - { - if (def) - *def = def_stmt; - return true; - } - - return false; -} - - -/* Function vect_analyze_operations. - - Scan the loop stmts and make sure they are all vectorizable. */ - -static bool -vect_analyze_operations (loop_vec_info loop_vinfo) -{ - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - block_stmt_iterator si; - int vectorization_factor = 0; - int i; - bool ok; - tree scalar_type; - - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); - - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - tree stmt = bsi_stmt (si); - int nunits; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype; - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "==> examining statement: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } -#ifdef ENABLE_CHECKING - if (!stmt_info) - abort (); -#endif - /* skip stmts which do not need to be vectorized. - this is expected to include: - - the COND_EXPR which is the loop exit condition - - any LABEL_EXPRs in the loop - - computations that are used only for array indexing or loop - control */ - - if (!STMT_VINFO_RELEVANT_P (stmt_info)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "irrelevant."); - continue; - } - - if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, "not vectorized: vector stmt in loop:"); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - return false; - } - - if (STMT_VINFO_DATA_REF (stmt_info)) - scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); - else if (TREE_CODE (stmt) == MODIFY_EXPR) - scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); - else - scalar_type = TREE_TYPE (stmt); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "get vectype for scalar type: "); - print_generic_expr (dump_file, scalar_type, TDF_SLIM); - } - - vectype = get_vectype_for_scalar_type (scalar_type); - if (!vectype) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, "not vectorized: unsupported data-type "); - print_generic_expr (dump_file, scalar_type, TDF_SLIM); - } - return false; - } - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "vectype: "); - print_generic_expr (dump_file, vectype, TDF_SLIM); - } - STMT_VINFO_VECTYPE (stmt_info) = vectype; - - ok = (vectorizable_operation (stmt, NULL, NULL) - || vectorizable_assignment (stmt, NULL, NULL) - || vectorizable_load (stmt, NULL, NULL) - || vectorizable_store (stmt, NULL, NULL)); - - if (!ok) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, "not vectorized: stmt not supported: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - return false; - } - - nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); - if (vect_debug_details (NULL)) - fprintf (dump_file, "nunits = %d", nunits); - - if (vectorization_factor) - { - /* FORNOW: don't allow mixed units. - This restriction will be relaxed in the future. */ - if (nunits != vectorization_factor) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: mixed data-types"); - return false; - } - } - else - vectorization_factor = nunits; - } - } - - /* TODO: Analyze cost. Decide if worth while to vectorize. */ - if (!vectorization_factor) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unsupported data-type"); - return false; - } - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - - /* FORNOW: handle only cases where the loop bound divides by the - vectorization factor. */ - - if (vect_debug_details (NULL)) - fprintf (dump_file, - "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, - vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo)); - - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: Unknown loop bound."); - return false; - } - - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.", - vectorization_factor); - return false; - } - - return true; -} - - -/* Function exist_non_indexing_operands_for_use_p - - USE is one of the uses attached to STMT. Check if USE is - used in STMT for anything other than indexing an array. */ - -static bool -exist_non_indexing_operands_for_use_p (tree use, tree stmt) -{ - tree operand; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - - /* USE corresponds to some operand in STMT. If there is no data - reference in STMT, then any operand that corresponds to USE - is not indexing an array. */ - if (!STMT_VINFO_DATA_REF (stmt_info)) - return true; - - /* STMT has a data_ref. FORNOW this means that its of one of - the following forms: - -1- ARRAY_REF = var - -2- var = ARRAY_REF - (This should have been verified in analyze_data_refs). - - 'var' in the second case corresponds to a def, not a use, - so USE cannot correspond to any operands that are not used - for array indexing. - - Therefore, all we need to check is if STMT falls into the - first case, and whether var corresponds to USE. */ - - if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) - return false; - - operand = TREE_OPERAND (stmt, 1); - - if (TREE_CODE (operand) != SSA_NAME) - return false; - - if (operand == use) - return true; - - return false; -} - - -/* Function vect_is_simple_iv_evolution. - - FORNOW: A simple evolution of an induction variables in the loop is - considered a polynomial evolution with constant step. */ - -static bool -vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, - tree * step, bool strict) -{ - tree init_expr; - tree step_expr; - - tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); - - /* When there is no evolution in this loop, the evolution function - is not "simple". */ - if (evolution_part == NULL_TREE) - return false; - - /* When the evolution is a polynomial of degree >= 2 - the evolution function is not "simple". */ - if (tree_is_chrec (evolution_part)) - return false; - - step_expr = evolution_part; - init_expr = initial_condition (access_fn); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "step: "); - print_generic_expr (dump_file, step_expr, TDF_SLIM); - fprintf (dump_file, ", init: "); - print_generic_expr (dump_file, init_expr, TDF_SLIM); - } - - *init = init_expr; - *step = step_expr; - - if (TREE_CODE (step_expr) != INTEGER_CST) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "step unknown."); - return false; - } - - if (strict) - if (!integer_onep (step_expr)) - { - if (vect_debug_details (NULL)) - print_generic_expr (dump_file, step_expr, TDF_SLIM); - return false; - } - - return true; -} - - -/* Function vect_analyze_scalar_cycles. - - Examine the cross iteration def-use cycles of scalar variables, by - analyzing the loop (scalar) PHIs; verify that the cross iteration def-use - cycles that they represent do not impede vectorization. - - FORNOW: Reduction as in the following loop, is not supported yet: - loop1: - for (i=0; iheader; - tree dummy; - - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); - - for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) - { - tree access_fn = NULL; - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "Analyze phi: "); - print_generic_expr (dump_file, phi, TDF_SLIM); - } - - /* Skip virtual phi's. The data dependences that are associated with - virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ - - if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "virtual phi. skip."); - continue; - } - - /* Analyze the evolution function. */ - - /* FORNOW: The only scalar cross-iteration cycles that we allow are - those of loop induction variables; This property is verified here. - - Furthermore, if that induction variable is used in an operation - that needs to be vectorized (i.e, is not solely used to index - arrays and check the exit condition) - we do not support its - vectorization yet. This property is verified in vect_is_simple_use, - during vect_analyze_operations. */ - - access_fn = instantiate_parameters - (loop, - analyze_scalar_evolution (loop, PHI_RESULT (phi))); - - if (!access_fn) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unsupported scalar cycle."); - return false; - } - - if (vect_debug_details (NULL)) + tree orig_phi, new_phi; + tree update_phi, update_phi2; + tree *new_name_ptr, *new_name_ptr2; + tree guard_arg, loop_arg; + basic_block new_merge_bb = guard_edge->dest; + edge e = EDGE_SUCC (new_merge_bb, 0); + basic_block update_bb = e->dest; + edge new_exit_e; + tree orig_def; + tree new_name, new_name2; + tree arg; + + /* Create new bb between loop and new_merge_bb. */ + *new_exit_bb = split_edge (loop->single_exit); + add_bb_to_loop (*new_exit_bb, loop->outer); + + new_exit_e = EDGE_SUCC (*new_exit_bb, 0); + + for (update_phi = phi_nodes (update_bb); update_phi; + update_phi = PHI_CHAIN (update_phi)) + { + orig_phi = update_phi; + orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); + new_name_ptr = SSA_NAME_AUX (orig_def); + arg = NULL_TREE; + + /** 1. Handle new-merge-point phis **/ + + /* 1.1. Generate new phi node in NEW_MERGE_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + new_merge_bb); + + /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge + of LOOP. Set the two phi args in NEW_PHI for these edges: */ + new_name = orig_def; + new_name2 = NULL_TREE; + if (new_name_ptr) { - fprintf (dump_file, "Access function of PHI: "); - print_generic_expr (dump_file, access_fn, TDF_SLIM); + new_name = *new_name_ptr; + new_name_ptr2 = SSA_NAME_AUX (new_name); + if (new_name_ptr2) + /* Some variables have both loop-entry-phis and loop-exit-phis. + Such variables were given yet newer names by phis placed in + guard_bb by slpeel_update_phi_nodes_for_guard1. I.e: + new_name2 = SSA_NAME_AUX (SSA_NAME_AUX (orig_name)). */ + new_name2 = *new_name_ptr2; } - - if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, - &dummy, false)) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unsupported scalar cycle."); - return false; - } - } - - return true; -} - - -/* Function vect_analyze_data_ref_dependence. - - Return TRUE if there (might) exist a dependence between a memory-reference - DRA and a memory-reference DRB. */ - -static bool -vect_analyze_data_ref_dependence (struct data_reference *dra, - struct data_reference *drb, - struct loop *loop) -{ - bool differ_p; - struct data_dependence_relation *ddr; - - if (!array_base_name_differ_p (dra, drb, &differ_p)) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) + + if (is_new_loop) { - fprintf (dump_file, - "not vectorized: can't determine dependence between: "); - print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); - fprintf (dump_file, " and "); - print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); + guard_arg = orig_def; + loop_arg = new_name; } - return true; - } - - if (differ_p) - return false; - - ddr = initialize_data_dependence_relation (dra, drb); - compute_affine_dependence (ddr); - - if (DDR_ARE_DEPENDENT (ddr) == chrec_known) - return false; - - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, - "not vectorized: possible dependence between data-refs "); - print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM); - fprintf (dump_file, " and "); - print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM); - } - - return true; -} - - -/* Function vect_analyze_data_ref_dependences. - - Examine all the data references in the loop, and make sure there do not - exist any data dependences between them. - - TODO: dependences which distance is greater than the vectorization factor - can be ignored. */ - -static bool -vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) -{ - unsigned int i, j; - varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); - varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + else + { + guard_arg = new_name; + loop_arg = orig_def; + } + if (new_name2) + guard_arg = new_name2; + + add_phi_arg (new_phi, loop_arg, new_exit_e); + add_phi_arg (new_phi, guard_arg, guard_edge); - /* Examine store-store (output) dependences. */ + /* 1.3. Update phi in successor block. */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def); + SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); + update_phi2 = new_phi; - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); - if (vect_debug_details (NULL)) - fprintf (dump_file, "compare all store-store pairs."); + /** 2. Handle loop-closed-ssa-form phis **/ - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++) - { - for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) - { - struct data_reference *dra = - VARRAY_GENERIC_PTR (loop_write_refs, i); - struct data_reference *drb = - VARRAY_GENERIC_PTR (loop_write_refs, j); - if (vect_analyze_data_ref_dependence (dra, drb, loop)) - return false; - } - } + /* 2.1. Generate new phi node in NEW_EXIT_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + *new_exit_bb); - /* Examine load-store (true/anti) dependences. */ + /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ + add_phi_arg (new_phi, loop_arg, loop->single_exit); - if (vect_debug_details (NULL)) - fprintf (dump_file, "compare all load-store pairs."); + /* 2.3. Update phi in successor of NEW_EXIT_BB: */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); + SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++) - { - for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++) - { - struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i); - struct data_reference *drb = - VARRAY_GENERIC_PTR (loop_write_refs, j); - if (vect_analyze_data_ref_dependence (dra, drb, loop)) - return false; - } - } - return true; -} + /** 3. Handle loop-closed-ssa-form phis for first loop **/ + /* 3.1. Find the relevant names that need an exit-phi in GUARD_BB, i.e. + names for which slpeel_update_phi_nodes_for_guard1 had not already + created a phi node. This is the case for names that are used outside + the loop (and therefore need an exit phi) but are not updated + across loop iterations (and therefore don't have a loop-header-phi). -/* Function vect_get_first_index. + slpeel_update_phi_nodes_for_guard1 is responsible for creating + loop-exit phis in GUARD_BB for names that have a loop-header-phi. When + such a phi is created we also record the new name in SSA_NAME_AUX. If + this new name exists, then guard_arg was set to this new name + (see 1.2 above). Therefore, if guard_arg is not this new name, this is + an indication that an exit-phi in GUARD_BB was not yet created, so we + take care of it here. + */ + if (guard_arg == new_name2) + continue; + arg = guard_arg; - REF is a data reference. - If it is an ARRAY_REF: if its lower bound is simple enough, - put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE. - If it is not an ARRAY_REF: REF has no "first index"; - ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */ + /* 3.2. Generate new phi node in GUARD_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + guard_edge->src); -static bool -vect_get_first_index (tree ref, tree *array_first_index) -{ - tree array_start; + /* 3.3. GUARD_BB has one incoming edge: */ + gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1); + add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0)); - if (TREE_CODE (ref) != ARRAY_REF) - *array_first_index = size_zero_node; - else - { - array_start = array_ref_low_bound (ref); - if (!host_integerp (array_start,0)) - { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "array min val not simple integer cst."); - print_generic_expr (dump_file, array_start, TDF_DETAILS); - } - return false; - } - *array_first_index = array_start; + /* 3.4. Update phi in successor of GUARD_BB: */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge) + == guard_arg); + SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi)); } - return true; + set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); } -/* Function vect_compute_data_ref_alignment - - Compute the misalignment of the data reference DR. +/* Make the LOOP iterate NITERS times. This is done by adding a new IV + that starts at zero, increases by one and its limit is NITERS. - FOR NOW: No analysis is actually performed. Misalignment is calculated - only for trivial cases. TODO. */ + Assumption: the exit-condition of LOOP is the last stmt in the loop. */ -static void -vect_compute_data_ref_alignment (struct data_reference *dr, - loop_vec_info loop_vinfo ATTRIBUTE_UNUSED) +void +slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) { - tree stmt = DR_STMT (dr); - tree ref = DR_REF (dr); - tree vectype; - tree access_fn = DR_ACCESS_FN (dr, 0); /* FORNOW: single access_fn. */ - tree init; - tree scalar_type; - tree misalign; - tree array_first_index; - tree array_base = DR_BASE_NAME (dr); - tree base_decl = NULL_TREE; - tree bit_offset = size_zero_node; - tree offset = size_zero_node; - tree unit_bits = build_int_cst (unsigned_type_node, BITS_PER_UNIT); - tree nunits; - tree alignment; - - if (vect_debug_details (NULL)) - fprintf (dump_file, "vect_compute_data_ref_alignment:"); - - /* Initialize misalignment to unknown. */ - DR_MISALIGNMENT (dr) = -1; - - scalar_type = TREE_TYPE (ref); - vectype = get_vectype_for_scalar_type (scalar_type); - if (!vectype) + tree indx_before_incr, indx_after_incr, cond_stmt, cond; + tree orig_cond; + edge exit_edge = loop->single_exit; + block_stmt_iterator loop_cond_bsi; + block_stmt_iterator incr_bsi; + bool insert_after; + tree begin_label = tree_block_label (loop->latch); + tree exit_label = tree_block_label (loop->single_exit->dest); + tree init = build_int_cst (TREE_TYPE (niters), 0); + tree step = build_int_cst (TREE_TYPE (niters), 1); + tree then_label; + tree else_label; + LOC loop_loc; + + orig_cond = get_loop_exit_condition (loop); +#ifdef ENABLE_CHECKING + gcc_assert (orig_cond); +#endif + loop_cond_bsi = bsi_for_stmt (orig_cond); + + standard_iv_increment_position (loop, &incr_bsi, &insert_after); + create_iv (init, step, NULL_TREE, loop, + &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr); + + if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "no vectype for stmt: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - fprintf (dump_file, "scalar_type: "); - print_generic_expr (dump_file, scalar_type, TDF_DETAILS); - } - return; + cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters); + then_label = build1 (GOTO_EXPR, void_type_node, exit_label); + else_label = build1 (GOTO_EXPR, void_type_node, begin_label); } - - if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (array_base))) < TYPE_ALIGN (vectype)) + else /* 'then' edge loops back. */ { - base_decl = vect_get_base_decl_and_bit_offset (array_base, &bit_offset); - if (!base_decl) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "Unknown alignment for access"); - return; - } - - offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1); - bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1); - if (!integer_zerop (bit_offset)) - { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "bit offset alignment: "); - print_generic_expr (dump_file, bit_offset, TDF_SLIM); - } - return; - } - - if (!base_decl || - (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype) - && !vect_can_force_dr_alignment_p (base_decl, TYPE_ALIGN (vectype)))) - { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "can't force alignment of ref: "); - print_generic_expr (dump_file, array_base, TDF_SLIM); - } - return; - } - - if (DECL_ALIGN (base_decl) < TYPE_ALIGN (vectype)) - { - /* Force the alignment of the decl. - NOTE: This is the only change to the code we make during - the analysis phase, before deciding to vectorize the loop. */ - if (vect_debug_details (NULL)) - fprintf (dump_file, "force alignment"); - DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype); - DECL_USER_ALIGN (base_decl) = TYPE_ALIGN (vectype); - } + cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters); + then_label = build1 (GOTO_EXPR, void_type_node, begin_label); + else_label = build1 (GOTO_EXPR, void_type_node, exit_label); } - /* The misalignement is: - (base_alignment + offset + index_access_fn_init) % alignment. - At this point we already guaranteed that base_alignment == 0, - and computed the offset. - It remains to check the first index accessed. */ + cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond, + then_label, else_label); + bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT); + + /* Remove old loop exit test: */ + bsi_remove (&loop_cond_bsi); - if (!vect_get_first_index (ref, &array_first_index)) + loop_loc = find_loop_location (loop); + if (dump_file && (dump_flags & TDF_DETAILS)) { - if (vect_debug_details (NULL)) - fprintf (dump_file, "no first_index for array."); - return; + if (loop_loc != UNKNOWN_LOC) + fprintf (dump_file, "\nloop at %s:%d: ", + LOC_FILE (loop_loc), LOC_LINE (loop_loc)); + print_generic_expr (dump_file, cond_stmt, TDF_SLIM); } - - /* Check the index of the array_ref. */ - init = initial_condition (access_fn); + loop->nb_iterations = niters; +} - /* FORNOW: In order to simplify the handling of alignment, we make sure - that the first location at which the array is accessed ('init') is on an - 'NUNITS' boundary, since we are assuming here that 'array base' is aligned. - This is too conservative, since we require that - both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of - NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}. - This should be relaxed in the future. */ - if (!init || !host_integerp (init,0)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "init not simple INTEGER_CST."); - return; - } +/* Given LOOP this function generates a new copy of it and puts it + on E which is either the entry or exit of LOOP. */ - /* alignment required, in bytes: */ - alignment = build_int_cst (unsigned_type_node, - TYPE_ALIGN (vectype)/BITS_PER_UNIT); - /* bytes per scalar element: */ - nunits = build_int_cst (unsigned_type_node, - GET_MODE_SIZE (TYPE_MODE (scalar_type))); +static struct loop * +slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, + edge e) +{ + struct loop *new_loop; + basic_block *new_bbs, *bbs; + bool at_exit; + bool was_imm_dom; + basic_block exit_dest; + tree phi, phi_arg; - /* misalign = (offset + (init-array_first_index)*nunits) % alignment */ - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "misalign = ( offset <"); - print_generic_expr (dump_file, offset, TDF_SLIM); - fprintf (dump_file, "> + (init <"); - print_generic_expr (dump_file, init, TDF_SLIM); - fprintf (dump_file, "> - first_indx <"); - print_generic_expr (dump_file, array_first_index, TDF_SLIM); - fprintf (dump_file, ">) * nunits <"); - print_generic_expr (dump_file, nunits, TDF_SLIM); - fprintf (dump_file, ">) mod alignment <"); - print_generic_expr (dump_file, alignment, TDF_SLIM); - fprintf (dump_file, ">"); - } + at_exit = (e == loop->single_exit); + if (!at_exit && e != loop_preheader_edge (loop)) + return NULL; - misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0); - misalign = int_const_binop (MULT_EXPR, misalign, nunits, 0); - misalign = int_const_binop (PLUS_EXPR, misalign, offset, 0); - misalign = int_const_binop (TRUNC_MOD_EXPR, misalign, alignment, 0); + bbs = get_loop_body (loop); - if (vect_debug_details (NULL)) + /* Check whether duplication is possible. */ + if (!can_copy_bbs_p (bbs, loop->num_nodes)) { - fprintf (dump_file, "misalign = "); - print_generic_expr (dump_file, misalign, TDF_SLIM); + free (bbs); + return NULL; } - if (!host_integerp (misalign,1) || TREE_OVERFLOW (misalign)) + /* Generate new loop structure. */ + new_loop = duplicate_loop (loops, loop, loop->outer); + if (!new_loop) { - if (vect_debug_details (NULL)) - fprintf (dump_file, "unexpected misalign value"); - return; + free (bbs); + return NULL; } - DR_MISALIGNMENT (dr) = tree_low_cst (misalign,1); - - if (vect_debug_details (NULL)) - fprintf (dump_file, "misalign = %d",DR_MISALIGNMENT (dr)); -} - + exit_dest = loop->single_exit->dest; + was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, + exit_dest) == loop->header ? + true : false); -/* Function vect_compute_data_refs_alignment + new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes); - Compute the misalignment of data references in the loop. - This pass may take place at function granularity instead of at loop - granularity. + copy_bbs (bbs, loop->num_nodes, new_bbs, + &loop->single_exit, 1, &new_loop->single_exit, NULL); - FOR NOW: No analysis is actually performed. Misalignment is calculated - only for trivial cases. TODO. */ + /* Duplicating phi args at exit bbs as coming + also from exit of duplicated loop. */ + for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi)) + { + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit); + if (phi_arg) + { + edge new_loop_exit_edge; -static void -vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) -{ - varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); - varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); - unsigned int i; + if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) + new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); + else + new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) - { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - vect_compute_data_ref_alignment (dr, loop_vinfo); - } + add_phi_arg (phi, phi_arg, new_loop_exit_edge); + } + } + + if (at_exit) /* Add the loop copy at exit. */ + { + redirect_edge_and_branch_force (e, new_loop->header); + set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); + if (was_imm_dom) + set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); + } + else /* Add the copy at entry. */ + { + edge new_exit_e; + edge entry_e = loop_preheader_edge (loop); + basic_block preheader = entry_e->src; + + if (!flow_bb_inside_loop_p (new_loop, + EDGE_SUCC (new_loop->header, 0)->dest)) + new_exit_e = EDGE_SUCC (new_loop->header, 0); + else + new_exit_e = EDGE_SUCC (new_loop->header, 1); + + redirect_edge_and_branch_force (new_exit_e, loop->header); + set_immediate_dominator (CDI_DOMINATORS, loop->header, + new_exit_e->src); + + /* We have to add phi args to the loop->header here as coming + from new_exit_e edge. */ + for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) + { + phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); + if (phi_arg) + add_phi_arg (phi, phi_arg, new_exit_e); + } - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) - { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); - vect_compute_data_ref_alignment (dr, loop_vinfo); + redirect_edge_and_branch_force (entry_e, new_loop->header); + set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); } -} - -/* Function vect_enhance_data_refs_alignment + free (new_bbs); + free (bbs); - This pass will use loop versioning and loop peeling in order to enhance - the alignment of data references in the loop. + return new_loop; +} - FOR NOW: we assume that whatever versioning/peeling takes place, only the - original loop is to be vectorized; Any other loops that are created by - the transformations performed in this pass - are not supposed to be - vectorized. This restriction will be relaxed. - FOR NOW: No transformation is actually performed. TODO. */ +/* Given the condition statement COND, put it as the last statement + of GUARD_BB; EXIT_BB is the basic block to skip the loop; + Assumes that this is the single exit of the guarded loop. + Returns the skip edge. */ -static void -vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED) +static edge +slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, + basic_block dom_bb) { - /* - This pass will require a cost model to guide it whether to apply peeling - or versioning or a combination of the two. For example, the scheme that - intel uses when given a loop with several memory accesses, is as follows: - choose one memory access ('p') which alignment you want to force by doing - peeling. Then, either (1) generate a loop in which 'p' is aligned and all - other accesses are not necessarily aligned, or (2) use loop versioning to - generate one loop in which all accesses are aligned, and another loop in - which only 'p' is necessarily aligned. - - ("Automatic Intra-Register Vectorization for the Intel Architecture", - Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International - Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) - - Devising a cost model is the most critical aspect of this work. It will - guide us on which access to peel for, whether to use loop versioning, how - many versions to create, etc. The cost model will probably consist of - generic considerations as well as target specific considerations (on - powerpc for example, misaligned stores are more painful than misaligned - loads). - - Here is the general steps involved in alignment enhancements: - - -- original loop, before alignment analysis: - for (i=0; iflags &= ~EDGE_FALLTHRU; + enter_e->flags |= EDGE_FALSE_VALUE; + bsi = bsi_last (guard_bb); + + then_label = build1 (GOTO_EXPR, void_type_node, + tree_block_label (exit_bb)); + else_label = build1 (GOTO_EXPR, void_type_node, + tree_block_label (enter_e->dest)); + cond_stmt = build3 (COND_EXPR, void_type_node, cond, + then_label, else_label); + bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); + /* Add new edge to connect guard block to the merge/loop-exit block. */ + new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); + set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); + return new_e; +} - -- After vect_compute_data_refs_alignment: - for (i=0; isingle_exit; + edge entry_e = loop_preheader_edge (loop); + tree orig_cond = get_loop_exit_condition (loop); + block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src); + if (any_marked_for_rewrite_p ()) + return false; -/* Function vect_analyze_data_refs_alignment + if (loop->inner + /* All loops have an outer scope; the only case loop->outer is NULL is for + the function itself. */ + || !loop->outer + || loop->num_nodes != 2 + || !empty_block_p (loop->latch) + || !loop->single_exit + /* Verify that new loop exit condition can be trivially modified. */ + || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi)) + || (e != exit_e && e != entry_e)) + return false; - Analyze the alignment of the data-references in the loop. - FOR NOW: Until support for misliagned accesses is in place, only if all - accesses are aligned can the loop be vectorized. This restriction will be - relaxed. */ + return true; +} -static bool -vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) +#ifdef ENABLE_CHECKING +void +slpeel_verify_cfg_after_peeling (struct loop *first_loop, + struct loop *second_loop) { - varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); - varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); - unsigned int i; + basic_block loop1_exit_bb = first_loop->single_exit->dest; + basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src; + basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; + + /* A guard that controls whether the second_loop is to be executed or skipped + is placed in first_loop->exit. first_loopt->exit therefore has two + successors - one is the preheader of second_loop, and the other is a bb + after second_loop. + */ + gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2); + + /* 1. Verify that one of the successors of first_loopt->exit is the preheader + of second_loop. */ + + /* The preheader of new_loop is expected to have two predessors: + first_loop->exit and the block that precedes first_loop. */ + + gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 + && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb + && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb) + || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb + && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb))); + + /* Verify that the other successor of first_loopt->exit is after the + second_loop. */ + /* TODO */ +} +#endif - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); +/* Function slpeel_tree_peel_loop_to_edge. + Peel the first (last) iterations of LOOP into a new prolog (epilog) loop + that is placed on the entry (exit) edge E of LOOP. After this transformation + we have two loops one after the other - first-loop iterates FIRST_NITERS + times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. - /* This pass may take place at function granularity instead of at loop - granularity. */ + Input: + - LOOP: the loop to be peeled. + - E: the exit or entry edge of LOOP. + If it is the entry edge, we peel the first iterations of LOOP. In this + case first-loop is LOOP, and second-loop is the newly created loop. + If it is the exit edge, we peel the last iterations of LOOP. In this + case, first-loop is the newly created loop, and second-loop is LOOP. + - NITERS: the number of iterations that LOOP iterates. + - FIRST_NITERS: the number of iterations that the first-loop should iterate. + - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible + for updating the loop bound of the first-loop to FIRST_NITERS. If it + is false, the caller of this function may want to take care of this + (this can be useful if we don't want new stmts added to first-loop). - vect_compute_data_refs_alignment (loop_vinfo); + Output: + The function returns a pointer to the new loop-copy, or NULL if it failed + to perform the transformation. + + The function generates two if-then-else guards: one before the first loop, + and the other before the second loop: + The first guard is: + if (FIRST_NITERS == 0) then skip the first loop, + and go directly to the second loop. + The second guard is: + if (FIRST_NITERS == NITERS) then skip the second loop. + + FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p). + FORNOW the resulting code will not be in loop-closed-ssa form. +*/ +struct loop* +slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, + edge e, tree first_niters, + tree niters, bool update_first_loop_count) +{ + struct loop *new_loop = NULL, *first_loop, *second_loop; + edge skip_e; + tree pre_condition; + bitmap definitions; + basic_block bb_before_second_loop, bb_after_second_loop; + basic_block bb_before_first_loop; + basic_block bb_between_loops; + basic_block new_exit_bb; + edge exit_e = loop->single_exit; + LOC loop_loc; + + if (!slpeel_can_duplicate_loop_p (loop, e)) + return NULL; + + /* We have to initialize cfg_hooks. Then, when calling + cfg_hooks->split_edge, the function tree_split_edge + is actually called and, when calling cfg_hooks->duplicate_block, + the function tree_duplicate_bb is called. */ + tree_register_cfg_hooks (); - /* This pass will use loop versioning and loop peeling in order to enhance - the alignment of data references in the loop. - FOR NOW: we assume that whatever versioning/peeling took place, the - original loop is to be vectorized. Any other loops that were created by - the transformations performed in this pass - are not supposed to be - vectorized. This restriction will be relaxed. */ - vect_enhance_data_refs_alignment (loop_vinfo); + /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP). + Resulting CFG would be: + first_loop: + do { + } while ... - /* Finally, check that loop can be vectorized. - FOR NOW: Until support for misaligned accesses is in place, only if all - accesses are aligned can the loop be vectorized. This restriction will be - relaxed. */ + second_loop: + do { + } while ... - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) + orig_exit_bb: + */ + + if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e))) { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - if (!aligned_access_p (dr)) - { - if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) - || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) - fprintf (dump_file, "not vectorized: unaligned store."); - return false; - } + loop_loc = find_loop_location (loop); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (loop_loc != UNKNOWN_LOC) + fprintf (dump_file, "\n%s:%d: note: ", + LOC_FILE (loop_loc), LOC_LINE (loop_loc)); + fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n"); + } + return NULL; } - - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) + + if (e == exit_e) { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); - if (!aligned_access_p (dr)) - { - if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) - || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) - fprintf (dump_file, "not vectorized: unaligned load."); - return false; - } + /* NEW_LOOP was placed after LOOP. */ + first_loop = loop; + second_loop = new_loop; + } + else + { + /* NEW_LOOP was placed before LOOP. */ + first_loop = new_loop; + second_loop = loop; } - return true; -} + definitions = marked_ssa_names (); + allocate_new_names (definitions); + slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); + rename_variables_in_loop (new_loop); -/* Function vect_analyze_data_ref_access. + /* 2. Add the guard that controls whether the first loop is executed. + Resulting CFG would be: - Analyze the access pattern of the data-reference DR. For now, a data access - has to consecutive and aligned to be considered vectorizable. */ + bb_before_first_loop: + if (FIRST_NITERS == 0) GOTO bb_before_second_loop + GOTO first-loop -static bool -vect_analyze_data_ref_access (struct data_reference *dr) -{ - varray_type access_fns = DR_ACCESS_FNS (dr); - tree access_fn; - tree init, step; + first_loop: + do { + } while ... - /* FORNOW: handle only one dimensional arrays. - This restriction will be relaxed in the future. */ - if (VARRAY_ACTIVE_SIZE (access_fns) != 1) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "multi dimensional array reference."); - return false; - } - access_fn = DR_ACCESS_FN (dr, 0); + bb_before_second_loop: - if (!vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num, - access_fn, &init, &step, true)) - { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "too complicated access function."); - print_generic_expr (dump_file, access_fn, TDF_SLIM); - } - return false; - } + second_loop: + do { + } while ... - return true; -} + orig_exit_bb: + */ + bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); + add_bb_to_loop (bb_before_first_loop, first_loop->outer); + bb_before_second_loop = split_edge (first_loop->single_exit); + add_bb_to_loop (bb_before_second_loop, first_loop->outer); -/* Function vect_analyze_data_ref_accesses. + pre_condition = + fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node)); + skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, + bb_before_second_loop, bb_before_first_loop); + slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, + first_loop == new_loop, + &new_exit_bb, &definitions); - Analyze the access pattern of all the data references in the loop. - FORNOW: the only access pattern that is considered vectorizable is a - simple step 1 (consecutive) access. + /* 3. Add the guard that controls whether the second loop is executed. + Resulting CFG would be: - FORNOW: handle only one dimensional arrays, and pointer accesses. */ + bb_before_first_loop: + if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop) + GOTO first-loop -static bool -vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) -{ - unsigned int i; - varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); - varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); + first_loop: + do { + } while ... - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); + bb_between_loops: + if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop) + GOTO bb_before_second_loop - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) - { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - bool ok = vect_analyze_data_ref_access (dr); - if (!ok) - { - if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) - || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) - fprintf (dump_file, "not vectorized: complicated access pattern."); - return false; - } - } + bb_before_second_loop: - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) - { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); - bool ok = vect_analyze_data_ref_access (dr); - if (!ok) - { - if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) - || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) - fprintf (dump_file, "not vectorized: complicated access pattern."); - return false; - } - } + second_loop: + do { + } while ... - return true; -} + bb_after_second_loop: + + orig_exit_bb: + */ + bb_between_loops = new_exit_bb; + bb_after_second_loop = split_edge (second_loop->single_exit); + add_bb_to_loop (bb_after_second_loop, second_loop->outer); -/* Function vect_analyze_pointer_ref_access. + pre_condition = + fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters)); + skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, + bb_after_second_loop, bb_before_first_loop); + slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, + second_loop == new_loop, &new_exit_bb); - Input: - STMT - a stmt that contains a data-ref - MEMREF - a data-ref in STMT, which is an INDIRECT_REF. + /* 4. Make first-loop iterate FIRST_NITERS times, if requested. + */ + if (update_first_loop_count) + slpeel_make_loop_iterate_ntimes (first_loop, first_niters); + + free_new_names (definitions); + BITMAP_FREE (definitions); + unmark_all_for_rewrite (); - If the data-ref access is vectorizable, return a data_reference structure - that represents it (DR). Otherwise - return NULL. */ + return new_loop; +} + +/* Function vect_get_loop_location. -static struct data_reference * -vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read) + Extract the location of the loop in the source code. + If the loop is not well formed for vectorization, an estimated + location is calculated. + Return the loop location if succeed and NULL if not. */ + +LOC +find_loop_location (struct loop *loop) { - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - struct loop *loop = STMT_VINFO_LOOP (stmt_info); - tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0)); - tree init, step; - int step_val; - tree reftype, innertype; - enum machine_mode innermode; - tree indx_access_fn; - int loopnum = loop->num; - struct data_reference *dr; - - if (!access_fn) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: complicated pointer access."); - return NULL; - } + tree node = NULL_TREE; + basic_block bb; + block_stmt_iterator si; - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "Access function of ptr: "); - print_generic_expr (dump_file, access_fn, TDF_SLIM); - } + if (!loop) + return UNKNOWN_LOC; - if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false)) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: pointer access is not simple."); - return NULL; - } - - if (TREE_CODE (init) != SSA_NAME /* FORNOW */ - || !host_integerp (step,0)) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, - "not vectorized: non constant init/step for pointer access."); - return NULL; - } + node = get_loop_exit_condition (loop); - step_val = TREE_INT_CST_LOW (step); + if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node) + && EXPR_FILENAME (node) && EXPR_LINENO (node)) + return EXPR_LOC (node); - reftype = TREE_TYPE (TREE_OPERAND (memref, 0)); - if (TREE_CODE (reftype) != POINTER_TYPE) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unexpected pointer access form."); - return NULL; - } + /* If we got here the loop is probably not "well formed", + try to estimate the loop location */ - reftype = TREE_TYPE (init); - if (TREE_CODE (reftype) != POINTER_TYPE) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unexpected pointer access form."); - return NULL; - } + if (!loop->header) + return UNKNOWN_LOC; - innertype = TREE_TYPE (reftype); - innermode = TYPE_MODE (innertype); - if (GET_MODE_SIZE (innermode) != step_val) - { - /* FORNOW: support only consecutive access */ - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: non consecutive access."); - return NULL; - } + bb = loop->header; - indx_access_fn = - build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node); - if (vect_debug_details (NULL)) + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { - fprintf (dump_file, "Access function of ptr indx: "); - print_generic_expr (dump_file, indx_access_fn, TDF_SLIM); + node = bsi_stmt (si); + if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)) + return EXPR_LOC (node); } - dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read); - return dr; + + return UNKNOWN_LOC; } -/* Function vect_analyze_data_refs. +/************************************************************************* + Vectorization Debug Information. + *************************************************************************/ - Find all the data references in the loop. +/* Function vect_set_verbosity_level. - FORNOW: Handle aligned INDIRECT_REFs and one dimensional ARRAY_REFs - which base is really an array (not a pointer) and which alignment - can be forced. This restriction will be relaxed. */ + Called from toplev.c upon detection of the + -ftree-vectorizer-verbose=N option. */ -static bool -vect_analyze_data_refs (loop_vec_info loop_vinfo) +void +vect_set_verbosity_level (const char *val) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - block_stmt_iterator si; - int j; - struct data_reference *dr; - - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); - - for (j = 0; j < nbbs; j++) - { - basic_block bb = bbs[j]; - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - bool is_read = false; - tree stmt = bsi_stmt (si); - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt); - v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt); - vuse_optype vuses = STMT_VUSE_OPS (stmt); - varray_type *datarefs = NULL; - int nvuses, nv_may_defs, nv_must_defs; - tree memref = NULL; - tree array_base; - tree symbl; - - /* Assumption: there exists a data-ref in stmt, if and only if - it has vuses/vdefs. */ - - if (!vuses && !v_may_defs && !v_must_defs) - continue; - - nvuses = NUM_VUSES (vuses); - nv_may_defs = NUM_V_MAY_DEFS (v_may_defs); - nv_must_defs = NUM_V_MUST_DEFS (v_must_defs); - - if (nvuses && (nv_may_defs || nv_must_defs)) - { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "unexpected vdefs and vuses in stmt: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - return false; - } - - if (TREE_CODE (stmt) != MODIFY_EXPR) - { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "unexpected vops in stmt: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - return false; - } - - if (vuses) - { - memref = TREE_OPERAND (stmt, 1); - datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo)); - is_read = true; - } - else /* vdefs */ - { - memref = TREE_OPERAND (stmt, 0); - datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); - is_read = false; - } - - if (TREE_CODE (memref) == INDIRECT_REF) - { - dr = vect_analyze_pointer_ref_access (memref, stmt, is_read); - if (! dr) - return false; - symbl = DR_BASE_NAME (dr); - } - else if (TREE_CODE (memref) == ARRAY_REF) - { - tree base; - tree offset = size_zero_node; - array_base = TREE_OPERAND (memref, 0); - - /* FORNOW: make sure that the array is one dimensional. - This restriction will be relaxed in the future. */ - if (TREE_CODE (array_base) == ARRAY_REF) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, - "not vectorized: multi-dimensional array."); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - return false; - } - - dr = analyze_array (stmt, memref, is_read); - - /* Find the relevant symbol for aliasing purposes. */ - base = DR_BASE_NAME (dr); - switch (TREE_CODE (base)) - { - case VAR_DECL: - symbl = base; - break; - /* FORNOW: Disabled. - case INDIRECT_REF: - symbl = TREE_OPERAND (base, 0); - break; - */ - case COMPONENT_REF: - /* CHECKME: could have recorded more accurate information - - i.e, the actual FIELD_DECL that is being referenced - - but later passes expect VAR_DECL as the nmt. */ - symbl = vect_get_base_decl_and_bit_offset (base, &offset); - if (symbl) - break; - /* fall through */ - default: - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, - "not vectorized: unhandled struct/class field access "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - return false; - } /* switch */ - } - else - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, "not vectorized: unhandled data ref: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - return false; - } - - /* Find and record the memtag assigned to this data-ref. */ - if (TREE_CODE (symbl) == VAR_DECL) - STMT_VINFO_MEMTAG (stmt_info) = symbl; - else if (TREE_CODE (symbl) == SSA_NAME) - { - tree tag; - symbl = SSA_NAME_VAR (symbl); - tag = get_var_ann (symbl)->type_mem_tag; - if (!tag) - { - tree ptr = TREE_OPERAND (memref, 0); - if (TREE_CODE (ptr) == SSA_NAME) - tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag; - } - if (!tag) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: no memtag for ref."); - return false; - } - STMT_VINFO_MEMTAG (stmt_info) = tag; - } - else - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, "not vectorized: unsupported data-ref: "); - print_generic_expr (dump_file, memref, TDF_SLIM); - } - return false; - } - - VARRAY_PUSH_GENERIC_PTR (*datarefs, dr); - STMT_VINFO_DATA_REF (stmt_info) = dr; - } - } + unsigned int vl; - return true; + vl = atoi (val); + if (vl < MAX_VERBOSITY_LEVEL) + vect_verbosity_level = vl; + else + vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1; } -/* Utility functions used by vect_mark_stmts_to_be_vectorized. */ +/* Function vect_set_dump_settings. -/* Function vect_mark_relevant. - - Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ + Fix the verbosity level of the vectorizer if the + requested level was not set explicitly using the flag + -ftree-vectorizer-verbose=N. + Decide where to print the debugging information (dump_file/stderr). + If the user defined the verbosity level, but there is no dump file, + print to stderr, otherwise print to the dump file. */ static void -vect_mark_relevant (varray_type worklist, tree stmt) +vect_set_dump_settings (void) { - stmt_vec_info stmt_info; - - if (vect_debug_details (NULL)) - fprintf (dump_file, "mark relevant."); - - if (TREE_CODE (stmt) == PHI_NODE) - { - VARRAY_PUSH_TREE (worklist, stmt); - return; - } + vect_dump = dump_file; - stmt_info = vinfo_for_stmt (stmt); - - if (!stmt_info) + /* Check if the verbosity level was defined by the user: */ + if (vect_verbosity_level != MAX_VERBOSITY_LEVEL) { - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "mark relevant: no stmt info!!."); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } + /* If there is no dump file, print to stderr. */ + if (!dump_file) + vect_dump = stderr; return; } - if (STMT_VINFO_RELEVANT_P (stmt_info)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "already marked relevant."); - return; - } + /* User didn't specify verbosity level: */ + if (dump_file && (dump_flags & TDF_DETAILS)) + vect_verbosity_level = REPORT_DETAILS; + else if (dump_file && (dump_flags & TDF_STATS)) + vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS; + else + vect_verbosity_level = REPORT_NONE; - STMT_VINFO_RELEVANT_P (stmt_info) = 1; - VARRAY_PUSH_TREE (worklist, stmt); + gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); } -/* Function vect_stmt_relevant_p. +/* Function debug_loop_details. - Return true if STMT in loop that is represented by LOOP_VINFO is - "relevant for vectorization". + For vectorization debug dumps. */ - A stmt is considered "relevant for vectorization" if: - - it has uses outside the loop. - - it has vdefs (it alters memory). - - control stmts in the loop (except for the exit condition). +bool +vect_print_dump_info (enum verbosity_levels vl, LOC loc) +{ + if (vl > vect_verbosity_level) + return false; - CHECKME: what other side effects would the vectorizer allow? */ + if (loc == UNKNOWN_LOC) + fprintf (vect_dump, "\n%s:%d: note: ", + DECL_SOURCE_FILE (current_function_decl), + DECL_SOURCE_LINE (current_function_decl)); + else + fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc)); -static bool -vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo) -{ - v_may_def_optype v_may_defs; - v_must_def_optype v_must_defs; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - int i; - dataflow_t df; - int num_uses; - /* cond stmt other than loop exit cond. */ - if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) - return true; + return true; +} - /* changing memory. */ - v_may_defs = STMT_V_MAY_DEF_OPS (stmt); - v_must_defs = STMT_V_MUST_DEF_OPS (stmt); - if (v_may_defs || v_must_defs) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs."); - return true; - } - /* uses outside the loop. */ - df = get_immediate_uses (stmt); - num_uses = num_immediate_uses (df); - for (i = 0; i < num_uses; i++) - { - tree use = immediate_use (df, i); - basic_block bb = bb_for_stmt (use); - if (!flow_bb_inside_loop_p (loop, bb)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "vec_stmt_relevant_p: used out of loop."); - return true; - } - } +/************************************************************************* + Vectorization Utilities. + *************************************************************************/ - return false; -} +/* Function new_stmt_vec_info. + Create and initialize a new stmt_vec_info struct for STMT. */ -/* Function vect_mark_stmts_to_be_vectorized. +stmt_vec_info +new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) +{ + stmt_vec_info res; + res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); - Not all stmts in the loop need to be vectorized. For example: + STMT_VINFO_TYPE (res) = undef_vec_info_type; + STMT_VINFO_STMT (res) = stmt; + STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; + STMT_VINFO_RELEVANT_P (res) = 0; + STMT_VINFO_VECTYPE (res) = NULL; + STMT_VINFO_VEC_STMT (res) = NULL; + STMT_VINFO_DATA_REF (res) = NULL; + STMT_VINFO_MEMTAG (res) = NULL; + STMT_VINFO_PTR_INFO (res) = NULL; + STMT_VINFO_SUBVARS (res) = NULL; + STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL; + STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE; + STMT_VINFO_VECT_STEP (res) = NULL_TREE; + STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false; + STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE; - for i... - for j... - 1. T0 = i + j - 2. T1 = a[T0] + return res; +} - 3. j = j + 1 - Stmt 1 and 3 do not need to be vectorized, because loop control and - addressing of vectorized data-refs are handled differently. +/* Function new_loop_vec_info. - This pass detects such stmts. */ + Create and initialize a new loop_vec_info struct for LOOP, as well as + stmt_vec_info structs for all the stmts in LOOP. */ -static bool -vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) +loop_vec_info +new_loop_vec_info (struct loop *loop) { - varray_type worklist; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - unsigned int nbbs = loop->num_nodes; + loop_vec_info res; + basic_block *bbs; block_stmt_iterator si; - tree stmt; - stmt_ann_t ann; unsigned int i; - int j; - use_optype use_ops; - stmt_vec_info stmt_info; - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); - - VARRAY_TREE_INIT (worklist, 64, "work list"); + res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); - /* 1. Init worklist. */ + bbs = get_loop_body (loop); - for (i = 0; i < nbbs; i++) + /* Create stmt_info for all stmts in the loop. */ + for (i = 0; i < loop->num_nodes; i++) { basic_block bb = bbs[i]; for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { - stmt = bsi_stmt (si); - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "init: stmt relevant? "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } - - stmt_info = vinfo_for_stmt (stmt); - STMT_VINFO_RELEVANT_P (stmt_info) = 0; + tree stmt = bsi_stmt (si); + stmt_ann_t ann; - if (vect_stmt_relevant_p (stmt, loop_vinfo)) - vect_mark_relevant (worklist, stmt); + get_stmt_operands (stmt); + ann = stmt_ann (stmt); + set_stmt_info (ann, new_stmt_vec_info (stmt, res)); } } + LOOP_VINFO_LOOP (res) = loop; + LOOP_VINFO_BBS (res) = bbs; + LOOP_VINFO_EXIT_COND (res) = NULL; + LOOP_VINFO_NITERS (res) = NULL; + LOOP_VINFO_VECTORIZABLE_P (res) = 0; + LOOP_PEELING_FOR_ALIGNMENT (res) = 0; + LOOP_VINFO_VECT_FACTOR (res) = 0; + VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, + "loop_write_datarefs"); + VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, + "loop_read_datarefs"); + LOOP_VINFO_UNALIGNED_DR (res) = NULL; + LOOP_VINFO_LOC (res) = UNKNOWN_LOC; + + return res; +} - /* 2. Process_worklist */ - while (VARRAY_ACTIVE_SIZE (worklist) > 0) - { - stmt = VARRAY_TOP_TREE (worklist); - VARRAY_POP (worklist); +/* Function destroy_loop_vec_info. + + Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the + stmts in the loop. */ - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "worklist: examine stmt: "); - print_generic_expr (dump_file, stmt, TDF_SLIM); - } +void +destroy_loop_vec_info (loop_vec_info loop_vinfo) +{ + struct loop *loop; + basic_block *bbs; + int nbbs; + block_stmt_iterator si; + int j; - /* Examine the USES in this statement. Mark all the statements which - feed this statement's uses as "relevant", unless the USE is used as - an array index. */ + if (!loop_vinfo) + return; - if (TREE_CODE (stmt) == PHI_NODE) - { - /* follow the def-use chain inside the loop. */ - for (j = 0; j < PHI_NUM_ARGS (stmt); j++) - { - tree arg = PHI_ARG_DEF (stmt, j); - tree def_stmt = NULL_TREE; - basic_block bb; - if (!vect_is_simple_use (arg, loop, &def_stmt)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "worklist: unsupported use."); - varray_clear (worklist); - return false; - } - if (!def_stmt) - continue; - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "worklist: def_stmt: "); - print_generic_expr (dump_file, def_stmt, TDF_SLIM); - } - - bb = bb_for_stmt (def_stmt); - if (flow_bb_inside_loop_p (loop, bb)) - vect_mark_relevant (worklist, def_stmt); - } - } + loop = LOOP_VINFO_LOOP (loop_vinfo); - ann = stmt_ann (stmt); - use_ops = USE_OPS (ann); + bbs = LOOP_VINFO_BBS (loop_vinfo); + nbbs = loop->num_nodes; - for (i = 0; i < NUM_USES (use_ops); i++) + for (j = 0; j < nbbs; j++) + { + basic_block bb = bbs[j]; + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { - tree use = USE_OP (use_ops, i); - - /* We are only interested in uses that need to be vectorized. Uses - that are used for address computation are not considered relevant. - */ - if (exist_non_indexing_operands_for_use_p (use, stmt)) - { - tree def_stmt = NULL_TREE; - basic_block bb; - if (!vect_is_simple_use (use, loop, &def_stmt)) - { - if (vect_debug_details (NULL)) - fprintf (dump_file, "worklist: unsupported use."); - varray_clear (worklist); - return false; - } - - if (!def_stmt) - continue; - - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "worklist: examine use %d: ", i); - print_generic_expr (dump_file, use, TDF_SLIM); - } - - bb = bb_for_stmt (def_stmt); - if (flow_bb_inside_loop_p (loop, bb)) - vect_mark_relevant (worklist, def_stmt); - } + tree stmt = bsi_stmt (si); + stmt_ann_t ann = stmt_ann (stmt); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + free (stmt_info); + set_stmt_info (ann, NULL); } - } /* while worklist */ + } - varray_clear (worklist); - return true; + free (LOOP_VINFO_BBS (loop_vinfo)); + varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo)); + varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo)); + + free (loop_vinfo); } -/* Function vect_get_loop_niters. +/* Function vect_strip_conversions - Determine how many iterations the loop is executed. */ + Strip conversions that don't narrow the mode. */ -static tree -vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations) +tree +vect_strip_conversion (tree expr) { - tree niters; + tree to, ti, oprnd0; + + while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR) + { + to = TREE_TYPE (expr); + oprnd0 = TREE_OPERAND (expr, 0); + ti = TREE_TYPE (oprnd0); + + if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti)) + return NULL_TREE; + if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti))) + return NULL_TREE; + + expr = oprnd0; + } + return expr; +} - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<>\n"); - niters = number_of_iterations_in_loop (loop); +/* Function vect_force_dr_alignment_p. - if (niters != NULL_TREE - && niters != chrec_dont_know - && host_integerp (niters,0)) - { - *number_of_iterations = TREE_INT_CST_LOW (niters); + Returns whether the alignment of a DECL can be forced to be aligned + on ALIGNMENT bit boundary. */ - if (vect_debug_details (NULL)) - fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC, - *number_of_iterations); - } +bool +vect_can_force_dr_alignment_p (tree decl, unsigned int alignment) +{ + if (TREE_CODE (decl) != VAR_DECL) + return false; - return get_loop_exit_condition (loop); -} + if (DECL_EXTERNAL (decl)) + return false; + if (TREE_ASM_WRITTEN (decl)) + return false; -/* Function vect_analyze_loop_form. + if (TREE_STATIC (decl)) + return (alignment <= MAX_OFILE_ALIGNMENT); + else + /* This is not 100% correct. The absolute correct stack alignment + is STACK_BOUNDARY. We're supposed to hope, but not assume, that + PREFERRED_STACK_BOUNDARY is honored by all translation units. + However, until someone implements forced stack alignment, SSE + isn't really usable without this. */ + return (alignment <= PREFERRED_STACK_BOUNDARY); +} - Verify the following restrictions (some may be relaxed in the future): - - it's an inner-most loop - - number of BBs = 2 (which are the loop header and the latch) - - the loop has a pre-header - - the loop has a single entry and exit - - the loop exit condition is simple enough, and the number of iterations - can be analyzed (a countable loop). */ -static loop_vec_info -vect_analyze_loop_form (struct loop *loop) -{ - loop_vec_info loop_vinfo; - tree loop_cond; - HOST_WIDE_INT number_of_iterations = -1; +/* Function get_vectype_for_scalar_type. - if (vect_debug_details (loop)) - fprintf (dump_file, "\n<>\n"); + Returns the vector type corresponding to SCALAR_TYPE as supported + by the target. */ - if (loop->inner - || !loop->single_exit - || loop->num_nodes != 2) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - { - fprintf (dump_file, "not vectorized: bad loop form. "); - if (loop->inner) - fprintf (dump_file, "nested loop."); - else if (!loop->single_exit) - fprintf (dump_file, "multiple exits."); - else if (loop->num_nodes != 2) - fprintf (dump_file, "too many BBs in loop."); - } +tree +get_vectype_for_scalar_type (tree scalar_type) +{ + enum machine_mode inner_mode = TYPE_MODE (scalar_type); + int nbytes = GET_MODE_SIZE (inner_mode); + int nunits; + tree vectype; - return NULL; - } + if (nbytes == 0) + return NULL_TREE; - /* We assume that the loop exit condition is at the end of the loop. i.e, - that the loop is represented as a do-while (with a proper if-guard - before the loop if needed), where the loop header contains all the - executable statements, and the latch is empty. */ - if (!empty_block_p (loop->latch)) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unexpectd loop form."); - return NULL; - } + /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) + is expected. */ + nunits = UNITS_PER_SIMD_WORD / nbytes; - if (empty_block_p (loop->header)) + vectype = build_vector_type (scalar_type, nunits); + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: empty loop."); - return NULL; + fprintf (vect_dump, "get vectype with %d units of type ", nunits); + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); } - loop_cond = vect_get_loop_niters (loop, &number_of_iterations); - if (!loop_cond) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: complicated exit condition."); - return NULL; - } + if (!vectype) + return NULL_TREE; - if (number_of_iterations < 0) + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unknown loop bound."); - return NULL; + fprintf (vect_dump, "vectype: "); + print_generic_expr (vect_dump, vectype, TDF_SLIM); } - if (number_of_iterations == 0) /* CHECKME: can this happen? */ + if (!VECTOR_MODE_P (TYPE_MODE (vectype))) { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: number of iterations = 0."); - return NULL; + /* TODO: tree-complex.c sometimes can parallelize operations + on generic vectors. We can vectorize the loop in that case, + but then we should re-run the lowering pass. */ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "mode not supported by target."); + return NULL_TREE; } - loop_vinfo = new_loop_vec_info (loop); - LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; - LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; - - return loop_vinfo; + return vectype; } -/* Function vect_analyze_loop. +/* Function vect_supportable_dr_alignment - Apply a set of analyses on LOOP, and create a loop_vec_info struct - for it. The different analyses will record information in the - loop_vec_info struct. */ + Return whether the data reference DR is supported with respect to its + alignment. */ -static loop_vec_info -vect_analyze_loop (struct loop *loop) +enum dr_alignment_support +vect_supportable_dr_alignment (struct data_reference *dr) { - bool ok; - loop_vec_info loop_vinfo; - - if (vect_debug_details (NULL)) - fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n"); + tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); + enum machine_mode mode = (int) TYPE_MODE (vectype); - /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ + if (aligned_access_p (dr)) + return dr_aligned; - loop_vinfo = vect_analyze_loop_form (loop); - if (!loop_vinfo) + /* Possibly unaligned access. */ + + if (DR_IS_READ (dr)) { - if (vect_debug_details (loop)) - fprintf (dump_file, "bad loop form."); - return NULL; + if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing + && (!targetm.vectorize.builtin_mask_for_load + || targetm.vectorize.builtin_mask_for_load ())) + return dr_unaligned_software_pipeline; + + if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing) + /* Can't software pipeline the loads, but can at least do them. */ + return dr_unaligned_supported; } - /* Find all data references in the loop (which correspond to vdefs/vuses) - and analyze their evolution in the loop. + /* Unsupported. */ + return dr_unaligned_unsupported; +} - FORNOW: Handle only simple, one-dimensional, array references, which - alignment can be forced, and aligned pointer-references. */ - ok = vect_analyze_data_refs (loop_vinfo); - if (!ok) - { - if (vect_debug_details (loop)) - fprintf (dump_file, "bad data references."); - destroy_loop_vec_info (loop_vinfo); - return NULL; - } +/* Function vect_is_simple_use. + Input: + LOOP - the loop that is being vectorized. + OPERAND - operand of a stmt in LOOP. + DEF - the defining stmt in case OPERAND is an SSA_NAME. - /* Data-flow analysis to detect stmts that do not need to be vectorized. */ + Returns whether a stmt with OPERAND can be vectorized. + Supportable operands are constants, loop invariants, and operands that are + defined by the current iteration of the loop. Unsupportable operands are + those that are defined by a previous iteration of the loop (as is the case + in reduction/induction computations). */ - ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); - if (!ok) - { - if (vect_debug_details (loop)) - fprintf (dump_file, "unexpected pattern."); - if (vect_debug_details (loop)) - fprintf (dump_file, "not vectorized: unexpected pattern."); - destroy_loop_vec_info (loop_vinfo); - return NULL; - } +bool +vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def) +{ + tree def_stmt; + basic_block bb; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + if (def) + *def = NULL_TREE; + + if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) + return true; - /* Check that all cross-iteration scalar data-flow cycles are OK. - Cross-iteration cycles caused by virtual phis are analyzed separately. */ + if (TREE_CODE (operand) != SSA_NAME) + return false; - ok = vect_analyze_scalar_cycles (loop_vinfo); - if (!ok) + def_stmt = SSA_NAME_DEF_STMT (operand); + if (def_stmt == NULL_TREE ) { - if (vect_debug_details (loop)) - fprintf (dump_file, "bad scalar cycle."); - destroy_loop_vec_info (loop_vinfo); - return NULL; + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "no def_stmt."); + return false; } - - /* Analyze data dependences between the data-refs in the loop. - FORNOW: fail at the first data dependence that we encounter. */ - - ok = vect_analyze_data_ref_dependences (loop_vinfo); - if (!ok) + /* empty stmt is expected only in case of a function argument. + (Otherwise - we expect a phi_node or a modify_expr). */ + if (IS_EMPTY_STMT (def_stmt)) { - if (vect_debug_details (loop)) - fprintf (dump_file, "bad data dependence."); - destroy_loop_vec_info (loop_vinfo); - return NULL; + tree arg = TREE_OPERAND (def_stmt, 0); + if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) + return true; + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "Unexpected empty stmt: "); + print_generic_expr (vect_dump, def_stmt, TDF_SLIM); + } + return false; } - - /* Analyze the access patterns of the data-refs in the loop (consecutive, - complex, etc.). FORNOW: Only handle consecutive access pattern. */ - - ok = vect_analyze_data_ref_accesses (loop_vinfo); - if (!ok) + /* phi_node inside the loop indicates an induction/reduction pattern. + This is not supported yet. */ + bb = bb_for_stmt (def_stmt); + if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb)) { - if (vect_debug_details (loop)) - fprintf (dump_file, "bad data access."); - destroy_loop_vec_info (loop_vinfo); - return NULL; + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "reduction/induction - unsupported."); + return false; /* FORNOW: not supported yet. */ } - - /* Analyze the alignment of the data-refs in the loop. - FORNOW: Only aligned accesses are handled. */ - - ok = vect_analyze_data_refs_alignment (loop_vinfo); - if (!ok) + /* Expecting a modify_expr or a phi_node. */ + if (TREE_CODE (def_stmt) == MODIFY_EXPR + || TREE_CODE (def_stmt) == PHI_NODE) { - if (vect_debug_details (loop)) - fprintf (dump_file, "bad data alignment."); - destroy_loop_vec_info (loop_vinfo); - return NULL; + if (def) + *def = def_stmt; + return true; } + return false; +} - /* Scan all the operations in the loop and make sure they are - vectorizable. */ - ok = vect_analyze_operations (loop_vinfo); - if (!ok) - { - if (vect_debug_details (loop)) - fprintf (dump_file, "bad operation or unsupported loop bound."); - destroy_loop_vec_info (loop_vinfo); - return NULL; - } +/* Function vect_is_simple_iv_evolution. - LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; + FORNOW: A simple evolution of an induction variables in the loop is + considered a polynomial evolution with constant step. */ - return loop_vinfo; -} +bool +vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, + tree * step) +{ + tree init_expr; + tree step_expr; + + tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); + /* When there is no evolution in this loop, the evolution function + is not "simple". */ + if (evolution_part == NULL_TREE) + return false; + + /* When the evolution is a polynomial of degree >= 2 + the evolution function is not "simple". */ + if (tree_is_chrec (evolution_part)) + return false; + + step_expr = evolution_part; + init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, + loop_nb)); -/* Function need_imm_uses_for. + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "step: "); + print_generic_expr (vect_dump, step_expr, TDF_SLIM); + fprintf (vect_dump, ", init: "); + print_generic_expr (vect_dump, init_expr, TDF_SLIM); + } - Return whether we ought to include information for 'var' - when calculating immediate uses. For this pass we only want use - information for non-virtual variables. */ + *init = init_expr; + *step = step_expr; -static bool -need_imm_uses_for (tree var) -{ - return is_gimple_reg (var); + if (TREE_CODE (step_expr) != INTEGER_CST) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "step unknown."); + return false; + } + + return true; } @@ -3417,16 +1824,21 @@ vectorize_loops (struct loops *loops) unsigned int i, loops_num; unsigned int num_vectorized_loops = 0; + /* Fix the verbosity level if not defined explicitly by the user. */ + vect_set_dump_settings (); + /* Does the target support SIMD? */ /* FORNOW: until more sophisticated machine modelling is in place. */ if (!UNITS_PER_SIMD_WORD) { - if (vect_debug_details (NULL)) - fprintf (dump_file, "vectorizer: target vector size is not defined."); + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "vectorizer: target vector size is not defined."); return; } - compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for); +#ifdef ENABLE_CHECKING + verify_loop_closed_ssa (); +#endif /* ----------- Analyze loops. ----------- */ @@ -3452,31 +1864,21 @@ vectorize_loops (struct loops *loops) num_vectorized_loops++; } - if (vect_debug_stats (NULL) || vect_debug_details (NULL)) - fprintf (dump_file, "\nvectorized %u loops in function.\n", + if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC)) + fprintf (vect_dump, "vectorized %u loops in function.\n", num_vectorized_loops); /* ----------- Finalize. ----------- */ - free_df (); for (i = 1; i < loops_num; i++) { struct loop *loop = loops->parray[i]; - loop_vec_info loop_vinfo = loop->aux; + loop_vec_info loop_vinfo; + if (!loop) - continue; + continue; + loop_vinfo = loop->aux; destroy_loop_vec_info (loop_vinfo); loop->aux = NULL; } - - loop_commit_inserts (); - rewrite_into_ssa (false); - if (bitmap_first_set_bit (vars_to_rename) >= 0) - { - /* The rewrite of ssa names may cause violation of loop closed ssa - form invariants. TODO -- avoid these rewrites completely. - Information in virtual phi nodes is sufficient for it. */ - rewrite_into_loop_closed_ssa (); - } - bitmap_clear (vars_to_rename); }