enum dr_alignment_support {
dr_unaligned_unsupported,
dr_unaligned_supported,
- dr_unaligned_software_pipeline,
+ dr_explicit_realign,
+ dr_explicit_realign_optimized,
dr_aligned
};
/* Define type of def-use cross-iteration cycle. */
enum vect_def_type {
- vect_constant_def,
+ vect_constant_def = 1,
vect_invariant_def,
vect_loop_def,
vect_induction_def,
REPORT_DR_DETAILS,
REPORT_BAD_FORM_LOOPS,
REPORT_OUTER_LOOPS,
+ REPORT_SLP,
REPORT_DETAILS,
/* New verbosity levels should be added before this one. */
MAX_VERBOSITY_LEVEL
};
+/************************************************************************
+ SLP
+ ************************************************************************/
+
+/* A computation tree of an SLP instance. Each node corresponds to a group of
+ stmts to be packed in a SIMD stmt. */
+typedef struct _slp_tree {
+ /* Only binary and unary operations are supported. LEFT child corresponds to
+ the first operand and RIGHT child to the second if the operation is
+ binary. */
+ struct _slp_tree *left;
+ struct _slp_tree *right;
+ /* A group of scalar stmts to be vectorized together. */
+ VEC (tree, heap) *stmts;
+ /* Vectorized stmt/s. */
+ VEC (tree, heap) *vec_stmts;
+ /* Number of vector stmts that are created to replace the group of scalar
+ stmts. It is calculated during the transformation phase as the number of
+ scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
+ divided by vector size. */
+ unsigned int vec_stmts_size;
+ /* Vectorization costs associated with SLP node. */
+ struct
+ {
+ int outside_of_loop; /* Statements generated outside loop. */
+ int inside_of_loop; /* Statements generated inside loop. */
+ } cost;
+} *slp_tree;
+
+
+/* SLP instance is a sequence of stmts in a loop that can be packed into
+ SIMD stmts. */
+typedef struct _slp_instance {
+ /* The root of SLP tree. */
+ slp_tree root;
+
+ /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */
+ unsigned int group_size;
+
+ /* The unrolling factor required to vectorized this SLP instance. */
+ unsigned int unrolling_factor;
+
+ /* Vectorization costs associated with SLP instance. */
+ struct
+ {
+ int outside_of_loop; /* Statements generated outside loop. */
+ int inside_of_loop; /* Statements generated inside loop. */
+ } cost;
+} *slp_instance;
+
+DEF_VEC_P(slp_instance);
+DEF_VEC_ALLOC_P(slp_instance, heap);
+
+/* Access Functions. */
+#define SLP_INSTANCE_TREE(S) (S)->root
+#define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size
+#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
+#define SLP_INSTANCE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop
+#define SLP_INSTANCE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop
+
+#define SLP_TREE_LEFT(S) (S)->left
+#define SLP_TREE_RIGHT(S) (S)->right
+#define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
+#define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts
+#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
+#define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop
+#define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop
+
/*-----------------------------------------------------------------*/
/* Info on vectorized loops. */
/*-----------------------------------------------------------------*/
/* The loop basic blocks. */
basic_block *bbs;
- /* The loop exit_condition. */
- tree exit_cond;
-
/* Number of iterations. */
tree num_iters;
/* All data dependences in the loop. */
VEC (ddr_p, heap) *ddrs;
+ /* Data Dependence Relations defining address ranges that are candidates
+ for a run-time aliasing check. */
+ VEC (ddr_p, heap) *may_alias_ddrs;
+
/* Statements in the loop that have data references that are candidates for a
runtime (loop versioning) misalignment check. */
VEC(tree,heap) *may_misalign_stmts;
/* The loop location in the source. */
LOC loop_line_number;
+
+ /* All interleaving chains of stores in the loop, represented by the first
+ stmt in the chain. */
+ VEC(tree, heap) *strided_stores;
+
+ /* All SLP instances in the loop. This is a subset of the set of STRIDED_STORES
+ of the loop. */
+ VEC(slp_instance, heap) *slp_instances;
+
+ /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
+ applied to the loop, i.e., no unrolling is needed, this is 1. */
+ unsigned slp_unrolling_factor;
} *loop_vec_info;
/* Access Functions. */
#define LOOP_VINFO_LOOP(L) (L)->loop
#define LOOP_VINFO_BBS(L) (L)->bbs
-#define LOOP_VINFO_EXIT_COND(L) (L)->exit_cond
#define LOOP_VINFO_NITERS(L) (L)->num_iters
#define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
#define LOOP_VINFO_LOC(L) (L)->loop_line_number
+#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
+#define LOOP_VINFO_STRIDED_STORES(L) (L)->strided_stores
+#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
+#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
#define NITERS_KNOWN_P(n) \
(host_integerp ((n),0) \
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
NITERS_KNOWN_P((L)->num_iters)
+static inline loop_vec_info
+loop_vec_info_for_loop (struct loop *loop)
+{
+ return (loop_vec_info) loop->aux;
+}
+
+static inline bool
+nested_in_vect_loop_p (struct loop *loop, tree stmt)
+{
+ return (loop->inner
+ && (loop->inner == (bb_for_stmt (stmt))->loop_father));
+}
+
/*-----------------------------------------------------------------*/
/* Info on vectorized defs. */
/*-----------------------------------------------------------------*/
induc_vec_info_type,
type_promotion_vec_info_type,
type_demotion_vec_info_type,
- type_conversion_vec_info_type
+ type_conversion_vec_info_type,
+ loop_exit_ctrl_vec_info_type
};
/* Indicates whether/how a variable is used in the loop. */
enum vect_relevant {
vect_unused_in_loop = 0,
+ vect_used_in_outer_by_reduction,
+ vect_used_in_outer,
/* defs that feed computations that end up (only) in a reduction. These
defs may be used by non-reduction stmts, but eventually, any
vect_used_in_loop
};
+/* The type of vectorization that can be applied to the stmt: regular loop-based
+ vectorization; pure SLP - the stmt is a part of SLP instances and does not
+ have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is
+ a part of SLP instance and also must be loop-based vectorized, since it has
+ uses outside SLP sequences.
+
+ In the loop context the meanings of pure and hybrid SLP are slightly
+ different. By saying that pure SLP is applied to the loop, we mean that we
+ exploit only intra-iteration parallelism in the loop; i.e., the loop can be
+ vectorized without doing any conceptual unrolling, cause we don't pack
+ together stmts from different iterations, only within a single iteration.
+ Loop hybrid SLP means that we exploit both intra-iteration and
+ inter-iteration parallelism (e.g., number of elements in the vector is 4
+ and the slp-group-size is 2, in which case we don't have enough parallelism
+ within an iteration, so we obtain the rest of the parallelism from subsequent
+ iterations by unrolling the loop by 2). */
+enum slp_vect_type {
+ loop_vect = 0,
+ pure_slp,
+ hybrid
+};
+
+
typedef struct data_reference *dr_p;
DEF_VEC_P(dr_p);
DEF_VEC_ALLOC_P(dr_p,heap);
data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have
at most one such data-ref. **/
- /* Information about the data-ref (access function, etc). */
+ /* Information about the data-ref (access function, etc),
+ relative to the inner-most containing loop. */
struct data_reference *data_ref_info;
+ /* Information about the data-ref relative to this loop
+ nest (the loop that is being considered for vectorization). */
+ tree dr_base_address;
+ tree dr_init;
+ tree dr_offset;
+ tree dr_step;
+ tree dr_aligned_to;
+
/* Stmt is part of some pattern (computation idiom) */
bool in_pattern_p;
int outside_of_loop; /* Statements generated outside loop. */
int inside_of_loop; /* Statements generated inside loop. */
} cost;
+
+ /* Whether the stmt is SLPed, loop-based vectorized, or both. */
+ enum slp_vect_type slp_type;
} *stmt_vec_info;
/* Access Functions. */
#define STMT_VINFO_VECTYPE(S) (S)->vectype
#define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt
#define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
+
+#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
+#define STMT_VINFO_DR_INIT(S) (S)->dr_init
+#define STMT_VINFO_DR_OFFSET(S) (S)->dr_offset
+#define STMT_VINFO_DR_STEP(S) (S)->dr_step
+#define STMT_VINFO_DR_ALIGNED_TO(S) (S)->dr_aligned_to
+
#define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
#define STMT_VINFO_DR_GROUP_GAP(S) (S)->gap
#define STMT_VINFO_DR_GROUP_SAME_DR_STMT(S)(S)->same_dr_stmt
#define STMT_VINFO_DR_GROUP_READ_WRITE_DEPENDENCE(S) (S)->read_write_dep
+#define STMT_VINFO_STRIDED_ACCESS(S) ((S)->first_dr != NULL)
#define DR_GROUP_FIRST_DR(S) (S)->first_dr
#define DR_GROUP_NEXT_DR(S) (S)->next_dr
#define STMT_VINFO_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop
#define STMT_VINFO_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop
+#define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid)
+#define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
+#define STMT_SLP_TYPE(S) (S)->slp_type
+
/* These are some defines for the initial implementation of the vectorizer's
cost model. These will later be target specific hooks. */
-/* Cost of conditional branch. */
-#ifndef TARG_COND_BRANCH_COST
-#define TARG_COND_BRANCH_COST 3
+/* Cost of conditional taken branch. */
+#ifndef TARG_COND_TAKEN_BRANCH_COST
+#define TARG_COND_TAKEN_BRANCH_COST 3
+#endif
+
+/* Cost of conditional not taken branch. */
+#ifndef TARG_COND_NOT_TAKEN_BRANCH_COST
+#define TARG_COND_NOT_TAKEN_BRANCH_COST 1
#endif
/* Cost of any scalar operation, excluding load and store. */
return false;
}
+static inline bool
+is_loop_header_bb_p (basic_block bb)
+{
+ if (bb == (bb->loop_father)->header)
+ return true;
+ gcc_assert (EDGE_COUNT (bb->preds) == 1);
+ return false;
+}
+
+static inline void
+stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
+ int cost)
+{
+ if (slp_node)
+ SLP_TREE_INSIDE_OF_LOOP_COST (slp_node) = cost;
+ else
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
+}
+
+static inline void
+stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
+ int cost)
+{
+ if (slp_node)
+ SLP_TREE_OUTSIDE_OF_LOOP_COST (slp_node) = cost;
+ else
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
+}
+
+
/*-----------------------------------------------------------------*/
/* Info on data references alignment. */
/*-----------------------------------------------------------------*/
extern bool vect_is_simple_use (tree, loop_vec_info, tree *, tree *,
enum vect_def_type *);
extern bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *);
-extern tree vect_is_simple_reduction (struct loop *, tree);
-extern bool vect_can_force_dr_alignment_p (tree, unsigned int);
+extern tree vect_is_simple_reduction (loop_vec_info, tree);
+extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
extern enum dr_alignment_support vect_supportable_dr_alignment
(struct data_reference *);
extern bool reduction_code_for_scalar_code (enum tree_code, enum tree_code *);
extern bool supportable_widening_operation (enum tree_code, tree, tree,
tree *, tree *, enum tree_code *, enum tree_code *);
-extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
- enum tree_code *);
+extern bool supportable_narrowing_operation (enum tree_code, const_tree,
+ const_tree, enum tree_code *);
/* Creation and deletion of loop and stmt info structs. */
extern loop_vec_info new_loop_vec_info (struct loop *loop);
-extern void destroy_loop_vec_info (loop_vec_info);
+extern void destroy_loop_vec_info (loop_vec_info, bool);
extern stmt_vec_info new_stmt_vec_info (tree stmt, loop_vec_info);
/** In tree-vect-analyze.c **/
/* Driver for analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *);
+extern void vect_free_slp_tree (slp_tree);
/** In tree-vect-patterns.c **/
/** In tree-vect-transform.c **/
-extern bool vectorizable_load (tree, block_stmt_iterator *, tree *);
-extern bool vectorizable_store (tree, block_stmt_iterator *, tree *);
-extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
+extern bool vectorizable_load (tree, block_stmt_iterator *, tree *, slp_tree);
+extern bool vectorizable_store (tree, block_stmt_iterator *, tree *, slp_tree);
+extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *,
+ slp_tree);
extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_conversion (tree, block_stmt_iterator *,
- tree *);
-extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
+ tree *, slp_tree);
+extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *,
+ slp_tree);
extern tree vectorizable_function (tree, tree, tree);
extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_induction (tree, block_stmt_iterator *, tree *);
extern int vect_estimate_min_profitable_iters (loop_vec_info);
+extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
+ slp_tree);
+extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type,
+ slp_tree);
+extern void vect_model_load_cost (stmt_vec_info, int, slp_tree);
/* Driver for transformation stage. */
extern void vect_transform_loop (loop_vec_info);