2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment
218 (struct data_reference *, loop_vec_info);
219 static bool vect_analyze_data_ref_access (struct data_reference *);
220 static bool vect_get_first_index (tree, tree *);
221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
222 static struct data_reference * vect_analyze_pointer_ref_access
224 static bool vect_can_advance_ivs_p (struct loop *);
225 static tree vect_get_base_and_bit_offset
226 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
227 static struct data_reference * vect_analyze_pointer_ref_access
229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
230 static tree vect_compute_array_ref_alignment
231 (struct data_reference *, loop_vec_info, tree, tree *);
232 static tree vect_get_ptr_offset (tree, tree, tree *);
233 static tree vect_get_symbl_and_dr
234 (tree, tree, bool, loop_vec_info, struct data_reference **);
236 /* Utility functions for the code transformation. */
237 static tree vect_create_destination_var (tree, tree);
238 static tree vect_create_data_ref_ptr
239 (tree, block_stmt_iterator *, tree, tree *, bool);
240 static tree vect_create_index_for_vector_ref
241 (struct loop *, block_stmt_iterator *);
242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
243 static tree get_vectype_for_scalar_type (tree);
244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
245 static tree vect_get_vec_def_for_operand (tree, tree);
246 static tree vect_init_vector (tree, tree);
247 static void vect_finish_stmt_generation
248 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
250 /* Utility function dealing with loop peeling (not peeling itself). */
251 static void vect_generate_tmps_on_preheader
252 (loop_vec_info, tree *, tree *, tree *);
253 static tree vect_build_loop_niters (loop_vec_info);
254 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
255 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
256 static void vect_update_inits_of_dr
257 (struct data_reference *, struct loop *, tree niters);
258 static void vect_update_inits_of_drs (loop_vec_info, tree);
259 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
260 static void vect_do_peeling_for_loop_bound
261 (loop_vec_info, tree *, struct loops *);
263 /* Utilities for creation and deletion of vec_info structs. */
264 loop_vec_info new_loop_vec_info (struct loop *loop);
265 void destroy_loop_vec_info (loop_vec_info);
266 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
268 static bool vect_debug_stats (struct loop *loop);
269 static bool vect_debug_details (struct loop *loop);
272 /*************************************************************************
273 Simple Loop Peeling Utilities
275 Utilities to support loop peeling for vectorization purposes.
276 *************************************************************************/
279 /* For each definition in DEFINITIONS this function allocates
283 allocate_new_names (bitmap definitions)
288 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
290 tree def = ssa_name (ver);
291 tree *new_name_ptr = xmalloc (sizeof (tree));
293 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
295 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
296 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
298 SSA_NAME_AUX (def) = new_name_ptr;
303 /* Renames the use *OP_P. */
306 rename_use_op (use_operand_p op_p)
310 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
313 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
315 /* Something defined outside of the loop. */
319 /* An ordinary ssa name defined in the loop. */
321 SET_USE (op_p, *new_name_ptr);
325 /* Renames the def *OP_P in statement STMT. */
328 rename_def_op (def_operand_p op_p, tree stmt)
332 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
335 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
337 /* Something defined outside of the loop. */
341 /* An ordinary ssa name defined in the loop. */
343 SET_DEF (op_p, *new_name_ptr);
344 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
348 /* Renames the variables in basic block BB. */
351 rename_variables_in_bb (basic_block bb)
354 block_stmt_iterator bsi;
360 v_may_def_optype v_may_defs;
361 v_must_def_optype v_must_defs;
365 struct loop *loop = bb->loop_father;
367 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
368 rename_def_op (PHI_RESULT_PTR (phi), phi);
370 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
372 stmt = bsi_stmt (bsi);
373 get_stmt_operands (stmt);
374 ann = stmt_ann (stmt);
376 uses = USE_OPS (ann);
377 for (i = 0; i < NUM_USES (uses); i++)
378 rename_use_op (USE_OP_PTR (uses, i));
380 defs = DEF_OPS (ann);
381 for (i = 0; i < NUM_DEFS (defs); i++)
382 rename_def_op (DEF_OP_PTR (defs, i), stmt);
384 vuses = VUSE_OPS (ann);
385 for (i = 0; i < NUM_VUSES (vuses); i++)
386 rename_use_op (VUSE_OP_PTR (vuses, i));
388 v_may_defs = V_MAY_DEF_OPS (ann);
389 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
391 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
392 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
395 v_must_defs = V_MUST_DEF_OPS (ann);
396 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
398 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
399 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
403 FOR_EACH_EDGE (e, ei, bb->succs)
405 if (!flow_bb_inside_loop_p (loop, e->dest))
407 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
408 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
413 /* Releases the structures holding the new ssa names. */
416 free_new_names (bitmap definitions)
421 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
423 tree def = ssa_name (ver);
425 if (SSA_NAME_AUX (def))
427 free (SSA_NAME_AUX (def));
428 SSA_NAME_AUX (def) = NULL;
434 /* Renames variables in new generated LOOP. */
437 rename_variables_in_loop (struct loop *loop)
442 bbs = get_loop_body (loop);
444 for (i = 0; i < loop->num_nodes; i++)
445 rename_variables_in_bb (bbs[i]);
451 /* Update the PHI nodes of NEW_LOOP.
453 NEW_LOOP is a duplicate of ORIG_LOOP.
454 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
455 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
456 executes before it. */
459 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
460 struct loop *new_loop, bool after)
462 tree *new_name_ptr, new_ssa_name;
463 tree phi_new, phi_orig;
465 edge orig_loop_latch = loop_latch_edge (orig_loop);
466 edge orig_entry_e = loop_preheader_edge (orig_loop);
467 edge new_loop_exit_e = new_loop->exit_edges[0];
468 edge new_loop_entry_e = loop_preheader_edge (new_loop);
469 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
472 step 1. For each loop-header-phi:
473 Add the first phi argument for the phi in NEW_LOOP
474 (the one associated with the entry of NEW_LOOP)
476 step 2. For each loop-header-phi:
477 Add the second phi argument for the phi in NEW_LOOP
478 (the one associated with the latch of NEW_LOOP)
480 step 3. Update the phis in the successor block of NEW_LOOP.
482 case 1: NEW_LOOP was placed before ORIG_LOOP:
483 The successor block of NEW_LOOP is the header of ORIG_LOOP.
484 Updating the phis in the successor block can therefore be done
485 along with the scanning of the loop header phis, because the
486 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
487 phi nodes, organized in the same order.
489 case 2: NEW_LOOP was placed after ORIG_LOOP:
490 The successor block of NEW_LOOP is the original exit block of
491 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
492 We postpone updating these phis to a later stage (when
493 loop guards are added).
497 /* Scan the phis in the headers of the old and new loops
498 (they are organized in exactly the same order). */
500 for (phi_new = phi_nodes (new_loop->header),
501 phi_orig = phi_nodes (orig_loop->header);
503 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
506 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
507 add_phi_arg (phi_new, def, new_loop_entry_e);
510 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
511 if (TREE_CODE (def) != SSA_NAME)
514 new_name_ptr = SSA_NAME_AUX (def);
516 /* Something defined outside of the loop. */
519 /* An ordinary ssa name defined in the loop. */
520 new_ssa_name = *new_name_ptr;
521 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
523 /* step 3 (case 1). */
526 gcc_assert (new_loop_exit_e == orig_entry_e);
527 SET_PHI_ARG_DEF (phi_orig,
528 phi_arg_from_edge (phi_orig, new_loop_exit_e),
535 /* Update PHI nodes for a guard of the LOOP.
538 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
539 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
540 originates from the guard-bb, skips LOOP and reaches the (unique) exit
541 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
542 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
543 LOOP header) before the guard code was added, and now it became a merge
544 point of two paths - the path that ends with the LOOP exit-edge, and
545 the path that ends with GUARD_EDGE.
547 This function creates and updates the relevant phi nodes to account for
548 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
549 1. Create phi nodes at NEW_MERGE_BB.
550 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
551 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
554 ===> The CFG before the guard-code was added:
556 if (exit_loop) goto update_bb : LOOP_header_bb
559 ==> The CFG after the guard-code was added:
561 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
563 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
568 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
569 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
570 organized in the same order.
571 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
574 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
575 "original" loop). FALSE if LOOP is an original loop (not a newly
576 created copy). The SSA_NAME_AUX fields of the defs in the original
577 loop are the corresponding new ssa-names used in the new duplicated
578 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
579 nodes in UPDATE_BB takes the original ssa-name, and which takes the
580 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
581 the LOOP-exit-edge takes the new-name, and the phi-arg that is
582 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
583 FALSE, it's the other way around.
587 slpeel_update_phi_nodes_for_guard (edge guard_edge,
592 tree orig_phi, new_phi, update_phi;
593 tree guard_arg, loop_arg;
594 basic_block new_merge_bb = guard_edge->dest;
595 edge e = EDGE_SUCC (new_merge_bb, 0);
596 basic_block update_bb = e->dest;
597 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
599 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
600 orig_phi && update_phi;
601 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
603 /* 1. Generate new phi node in NEW_MERGE_BB: */
604 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
607 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
608 of LOOP. Set the two phi args in NEW_PHI for these edges: */
611 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
612 EDGE_SUCC (loop->latch, 0));
613 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
617 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
618 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
622 new_name = *new_name_ptr;
624 /* Something defined outside of the loop */
629 guard_arg = orig_def;
634 guard_arg = new_name;
638 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
639 add_phi_arg (new_phi, guard_arg, guard_edge);
641 /* 3. Update phi in successor block. */
642 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
643 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
644 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
645 PHI_RESULT (new_phi));
648 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
652 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
653 that starts at zero, increases by one and its limit is NITERS.
655 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
658 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
660 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
662 edge exit_edge = loop->exit_edges[0];
663 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
664 tree begin_label = tree_block_label (loop->latch);
665 tree exit_label = tree_block_label (loop->single_exit->dest);
666 tree init = build_int_cst (TREE_TYPE (niters), 0);
667 tree step = build_int_cst (TREE_TYPE (niters), 1);
671 orig_cond = get_loop_exit_condition (loop);
672 gcc_assert (orig_cond);
673 create_iv (init, step, NULL_TREE, loop,
674 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
676 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
677 back to the exit condition statement. */
678 bsi_next (&loop_exit_bsi);
679 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
681 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
683 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
684 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
685 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
687 else /* 'then' edge loops back. */
689 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
690 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
691 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
694 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
695 then_label, else_label);
696 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
698 /* Remove old loop exit test: */
699 bsi_remove (&loop_exit_bsi);
701 if (vect_debug_stats (loop) || vect_debug_details (loop))
702 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
704 loop->nb_iterations = niters;
708 /* Given LOOP this function generates a new copy of it and puts it
709 on E which is either the entry or exit of LOOP. */
712 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
715 struct loop *new_loop;
716 basic_block *new_bbs, *bbs;
719 basic_block exit_dest;
722 at_exit = (e == loop->exit_edges[0]);
723 if (!at_exit && e != loop_preheader_edge (loop))
725 if (dump_file && (dump_flags & TDF_DETAILS))
726 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
730 bbs = get_loop_body (loop);
732 /* Check whether duplication is possible. */
733 if (!can_copy_bbs_p (bbs, loop->num_nodes))
735 if (vect_debug_stats (loop) || vect_debug_details (loop))
736 fprintf (dump_file, "Cannot copy basic blocks.\n");
741 /* Generate new loop structure. */
742 new_loop = duplicate_loop (loops, loop, loop->outer);
745 if (vect_debug_stats (loop) || vect_debug_details (loop))
746 fprintf (dump_file, "duplicate_loop returns NULL.\n");
751 exit_dest = loop->exit_edges[0]->dest;
752 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
753 exit_dest) == loop->header ?
756 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
758 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
760 /* Duplicating phi args at exit bbs as coming
761 also from exit of duplicated loop. */
762 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
764 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
767 edge new_loop_exit_edge;
769 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
770 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
772 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
774 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
778 if (at_exit) /* Add the loop copy at exit. */
780 redirect_edge_and_branch_force (e, new_loop->header);
781 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
783 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
785 else /* Add the copy at entry. */
788 edge entry_e = loop_preheader_edge (loop);
789 basic_block preheader = entry_e->src;
791 if (!flow_bb_inside_loop_p (new_loop,
792 EDGE_SUCC (new_loop->header, 0)->dest))
793 new_exit_e = EDGE_SUCC (new_loop->header, 0);
795 new_exit_e = EDGE_SUCC (new_loop->header, 1);
797 redirect_edge_and_branch_force (new_exit_e, loop->header);
798 set_immediate_dominator (CDI_DOMINATORS, loop->header,
801 /* We have to add phi args to the loop->header here as coming
802 from new_exit_e edge. */
803 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
805 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
807 add_phi_arg (phi, phi_arg, new_exit_e);
810 redirect_edge_and_branch_force (entry_e, new_loop->header);
811 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
814 flow_loop_scan (new_loop, LOOP_ALL);
815 flow_loop_scan (loop, LOOP_ALL);
823 /* Given the condition statement COND, put it as the last statement
824 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
825 Assumes that this is the single exit of the guarded loop.
826 Returns the skip edge. */
829 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
832 block_stmt_iterator bsi;
834 tree cond_stmt, then_label, else_label;
836 enter_e = EDGE_SUCC (guard_bb, 0);
837 enter_e->flags &= ~EDGE_FALLTHRU;
838 enter_e->flags |= EDGE_FALSE_VALUE;
839 bsi = bsi_last (guard_bb);
841 then_label = build1 (GOTO_EXPR, void_type_node,
842 tree_block_label (exit_bb));
843 else_label = build1 (GOTO_EXPR, void_type_node,
844 tree_block_label (enter_e->dest));
845 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
846 then_label, else_label);
847 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
848 /* Add new edge to connect entry block to the second loop. */
849 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
850 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
855 /* This function verifies that the following restrictions apply to LOOP:
857 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
858 (3) it is single entry, single exit
859 (4) its exit condition is the last stmt in the header
860 (5) E is the entry/exit edge of LOOP.
864 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
866 edge exit_e = loop->exit_edges [0];
867 edge entry_e = loop_preheader_edge (loop);
868 tree orig_cond = get_loop_exit_condition (loop);
869 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
871 if (any_marked_for_rewrite_p ())
875 /* All loops have an outer scope; the only case loop->outer is NULL is for
876 the function itself. */
878 || loop->num_nodes != 2
879 || !empty_block_p (loop->latch)
880 || loop->num_exits != 1
881 || loop->num_entries != 1
882 /* Verify that new loop exit condition can be trivially modified. */
883 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
884 || (e != exit_e && e != entry_e))
890 #ifdef ENABLE_CHECKING
892 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
893 struct loop *second_loop)
895 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
896 basic_block loop2_entry_bb = second_loop->pre_header;
897 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
899 /* A guard that controls whether the second_loop is to be executed or skipped
900 is placed in first_loop->exit. first_loopt->exit therefore has two
901 successors - one is the preheader of second_loop, and the other is a bb
904 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
907 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
910 /* The preheader of new_loop is expected to have two predessors:
911 first_loop->exit and the block that precedes first_loop. */
913 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
914 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
915 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
916 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
917 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
919 /* Verify that the other successor of first_loopt->exit is after the
925 /* Function slpeel_tree_peel_loop_to_edge.
927 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
928 that is placed on the entry (exit) edge E of LOOP. After this transformation
929 we have two loops one after the other - first-loop iterates FIRST_NITERS
930 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
933 - LOOP: the loop to be peeled.
934 - E: the exit or entry edge of LOOP.
935 If it is the entry edge, we peel the first iterations of LOOP. In this
936 case first-loop is LOOP, and second-loop is the newly created loop.
937 If it is the exit edge, we peel the last iterations of LOOP. In this
938 case, first-loop is the newly created loop, and second-loop is LOOP.
939 - NITERS: the number of iterations that LOOP iterates.
940 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
941 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
942 for updating the loop bound of the first-loop to FIRST_NITERS. If it
943 is false, the caller of this function may want to take care of this
944 (this can be useful if we don't want new stmts added to first-loop).
947 The function returns a pointer to the new loop-copy, or NULL if it failed
948 to perform the transformation.
950 The function generates two if-then-else guards: one before the first loop,
951 and the other before the second loop:
953 if (FIRST_NITERS == 0) then skip the first loop,
954 and go directly to the second loop.
956 if (FIRST_NITERS == NITERS) then skip the second loop.
958 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
959 FORNOW the resulting code will not be in loop-closed-ssa form.
963 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
964 edge e, tree first_niters,
965 tree niters, bool update_first_loop_count)
967 struct loop *new_loop = NULL, *first_loop, *second_loop;
971 basic_block bb_before_second_loop, bb_after_second_loop;
972 basic_block bb_before_first_loop;
973 basic_block bb_between_loops;
974 edge exit_e = loop->exit_edges [0];
976 if (!slpeel_can_duplicate_loop_p (loop, e))
979 /* We have to initialize cfg_hooks. Then, when calling
980 cfg_hooks->split_edge, the function tree_split_edge
981 is actually called and, when calling cfg_hooks->duplicate_block,
982 the function tree_duplicate_bb is called. */
983 tree_register_cfg_hooks ();
986 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
987 Resulting CFG would be:
1000 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1002 if (vect_debug_stats (loop) || vect_debug_details (loop))
1003 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1009 /* NEW_LOOP was placed after LOOP. */
1011 second_loop = new_loop;
1015 /* NEW_LOOP was placed before LOOP. */
1016 first_loop = new_loop;
1020 definitions = marked_ssa_names ();
1021 allocate_new_names (definitions);
1022 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1023 rename_variables_in_loop (new_loop);
1026 /* 2. Add the guard that controls whether the first loop is executed.
1027 Resulting CFG would be:
1029 bb_before_first_loop:
1030 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1037 bb_before_second_loop:
1046 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1047 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1048 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1049 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1050 flow_loop_scan (first_loop, LOOP_ALL);
1051 flow_loop_scan (second_loop, LOOP_ALL);
1054 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1055 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1056 bb_before_second_loop, bb_before_first_loop);
1057 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1058 first_loop == new_loop);
1061 /* 3. Add the guard that controls whether the second loop is executed.
1062 Resulting CFG would be:
1064 bb_before_first_loop:
1065 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1073 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1074 GOTO bb_before_second_loop
1076 bb_before_second_loop:
1082 bb_after_second_loop:
1087 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1088 add_bb_to_loop (bb_between_loops, first_loop->outer);
1089 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1090 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1091 flow_loop_scan (first_loop, LOOP_ALL);
1092 flow_loop_scan (second_loop, LOOP_ALL);
1094 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1095 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1096 bb_after_second_loop, bb_before_first_loop);
1097 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1098 second_loop == new_loop);
1100 /* Flow loop scan does not update loop->single_exit field. */
1101 first_loop->single_exit = first_loop->exit_edges[0];
1102 second_loop->single_exit = second_loop->exit_edges[0];
1104 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1106 if (update_first_loop_count)
1107 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1109 free_new_names (definitions);
1110 BITMAP_XFREE (definitions);
1111 unmark_all_for_rewrite ();
1117 /* Here the proper Vectorizer starts. */
1119 /*************************************************************************
1120 Vectorization Utilities.
1121 *************************************************************************/
1123 /* Function new_stmt_vec_info.
1125 Create and initialize a new stmt_vec_info struct for STMT. */
1128 new_stmt_vec_info (tree stmt, struct loop *loop)
1131 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1133 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1134 STMT_VINFO_STMT (res) = stmt;
1135 STMT_VINFO_LOOP (res) = loop;
1136 STMT_VINFO_RELEVANT_P (res) = 0;
1137 STMT_VINFO_VECTYPE (res) = NULL;
1138 STMT_VINFO_VEC_STMT (res) = NULL;
1139 STMT_VINFO_DATA_REF (res) = NULL;
1140 STMT_VINFO_MEMTAG (res) = NULL;
1141 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1147 /* Function new_loop_vec_info.
1149 Create and initialize a new loop_vec_info struct for LOOP, as well as
1150 stmt_vec_info structs for all the stmts in LOOP. */
1153 new_loop_vec_info (struct loop *loop)
1157 block_stmt_iterator si;
1160 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1162 bbs = get_loop_body (loop);
1164 /* Create stmt_info for all stmts in the loop. */
1165 for (i = 0; i < loop->num_nodes; i++)
1167 basic_block bb = bbs[i];
1168 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1170 tree stmt = bsi_stmt (si);
1173 get_stmt_operands (stmt);
1174 ann = stmt_ann (stmt);
1175 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1179 LOOP_VINFO_LOOP (res) = loop;
1180 LOOP_VINFO_BBS (res) = bbs;
1181 LOOP_VINFO_EXIT_COND (res) = NULL;
1182 LOOP_VINFO_NITERS (res) = NULL;
1183 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1184 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1185 LOOP_VINFO_VECT_FACTOR (res) = 0;
1186 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1187 "loop_write_datarefs");
1188 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1189 "loop_read_datarefs");
1190 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1196 /* Function destroy_loop_vec_info.
1198 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1199 stmts in the loop. */
1202 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1207 block_stmt_iterator si;
1213 loop = LOOP_VINFO_LOOP (loop_vinfo);
1215 bbs = LOOP_VINFO_BBS (loop_vinfo);
1216 nbbs = loop->num_nodes;
1218 for (j = 0; j < nbbs; j++)
1220 basic_block bb = bbs[j];
1221 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1223 tree stmt = bsi_stmt (si);
1224 stmt_ann_t ann = stmt_ann (stmt);
1225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1227 set_stmt_info (ann, NULL);
1231 free (LOOP_VINFO_BBS (loop_vinfo));
1232 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1233 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1239 /* Function debug_loop_stats.
1241 For vectorization statistics dumps. */
1244 vect_debug_stats (struct loop *loop)
1247 block_stmt_iterator si;
1248 tree node = NULL_TREE;
1250 if (!dump_file || !(dump_flags & TDF_STATS))
1255 fprintf (dump_file, "\n");
1264 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1266 node = bsi_stmt (si);
1267 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1271 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1272 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1274 fprintf (dump_file, "\nloop at %s:%d: ",
1275 EXPR_FILENAME (node), EXPR_LINENO (node));
1283 /* Function debug_loop_details.
1285 For vectorization debug dumps. */
1288 vect_debug_details (struct loop *loop)
1291 block_stmt_iterator si;
1292 tree node = NULL_TREE;
1294 if (!dump_file || !(dump_flags & TDF_DETAILS))
1299 fprintf (dump_file, "\n");
1308 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1310 node = bsi_stmt (si);
1311 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1315 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1316 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1318 fprintf (dump_file, "\nloop at %s:%d: ",
1319 EXPR_FILENAME (node), EXPR_LINENO (node));
1327 /* Function vect_get_ptr_offset
1329 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1332 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1333 tree vectype ATTRIBUTE_UNUSED,
1334 tree *offset ATTRIBUTE_UNUSED)
1336 /* TODO: Use alignment information. */
1341 /* Function vect_get_base_and_bit_offset
1343 Return the BASE of the data reference EXPR.
1344 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1345 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1346 bits of 'a.b[i] + 4B' from a.
1349 EXPR - the memory reference that is being analyzed
1350 DR - the data_reference struct of the _original_ memory reference
1351 (Note: DR_REF (DR) is not necessarily EXPR)
1352 VECTYPE - the type that defines the alignment (i.e, we compute
1353 alignment relative to TYPE_ALIGN(VECTYPE))
1356 BASE (returned value) - the base of the data reference EXPR.
1357 E.g, if EXPR is a.b[k].c[i][j] the returned
1359 OFFSET - offset of EXPR from BASE in bits
1360 BASE_ALIGNED_P - indicates if BASE is aligned
1362 If something unexpected is encountered (an unsupported form of data-ref),
1363 or if VECTYPE is given but OFFSET cannot be determined:
1364 then NULL_TREE is returned. */
1367 vect_get_base_and_bit_offset (struct data_reference *dr,
1370 loop_vec_info loop_vinfo,
1372 bool *base_aligned_p)
1374 tree this_offset = size_zero_node;
1375 tree base = NULL_TREE;
1377 tree oprnd0, oprnd1;
1378 struct data_reference *array_dr;
1379 enum tree_code code = TREE_CODE (expr);
1381 *base_aligned_p = false;
1385 /* These cases end the recursion: */
1387 *offset = size_zero_node;
1388 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1389 *base_aligned_p = true;
1396 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1399 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1401 base = vect_get_ptr_offset (expr, vectype, offset);
1403 *base_aligned_p = true;
1407 *base_aligned_p = true;
1408 *offset = size_zero_node;
1414 *offset = int_const_binop (MULT_EXPR, expr,
1415 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1418 /* These cases continue the recursion: */
1420 oprnd0 = TREE_OPERAND (expr, 0);
1421 oprnd1 = TREE_OPERAND (expr, 1);
1423 this_offset = bit_position (oprnd1);
1424 if (vectype && !host_integerp (this_offset, 1))
1430 oprnd0 = TREE_OPERAND (expr, 0);
1435 oprnd0 = TREE_OPERAND (expr, 0);
1440 if (DR_REF (dr) != expr)
1441 /* Build array data_reference struct if the existing DR_REF
1442 doesn't match EXPR. This happens, for example, when the
1443 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1444 contains information on the access of T, not of arr. In order
1445 to continue the analysis, we create a new DR struct that
1446 describes the access of arr.
1448 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1452 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1453 vectype, &this_offset);
1458 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1460 *offset = this_offset;
1461 *base_aligned_p = true;
1468 /* In case we have a PLUS_EXPR of the form
1469 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1470 This is verified in vect_get_symbl_and_dr. */
1471 oprnd0 = TREE_OPERAND (expr, 0);
1472 oprnd1 = TREE_OPERAND (expr, 1);
1474 base = vect_get_base_and_bit_offset
1475 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1476 if (vectype && !base)
1486 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1487 loop_vinfo, offset, base_aligned_p);
1489 if (vectype && base)
1491 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1492 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1495 if (vect_debug_details (NULL))
1497 print_generic_expr (dump_file, expr, TDF_SLIM);
1498 fprintf (dump_file, " --> total offset for ref: ");
1499 print_generic_expr (dump_file, *offset, TDF_SLIM);
1506 /* Function vect_force_dr_alignment_p.
1508 Returns whether the alignment of a DECL can be forced to be aligned
1509 on ALIGNMENT bit boundary. */
1512 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1514 if (TREE_CODE (decl) != VAR_DECL)
1517 if (DECL_EXTERNAL (decl))
1520 if (TREE_ASM_WRITTEN (decl))
1523 if (TREE_STATIC (decl))
1524 return (alignment <= MAX_OFILE_ALIGNMENT);
1526 /* This is not 100% correct. The absolute correct stack alignment
1527 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1528 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1529 However, until someone implements forced stack alignment, SSE
1530 isn't really usable without this. */
1531 return (alignment <= PREFERRED_STACK_BOUNDARY);
1535 /* Function vect_get_new_vect_var.
1537 Returns a name for a new variable. The current naming scheme appends the
1538 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1539 the name of vectorizer generated variables, and appends that to NAME if
1543 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1549 if (var_kind == vect_simple_var)
1554 prefix_len = strlen (prefix);
1557 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1559 new_vect_var = create_tmp_var (type, prefix);
1561 return new_vect_var;
1565 /* Function vect_create_index_for_vector_ref.
1567 Create (and return) an index variable, along with it's update chain in the
1568 loop. This variable will be used to access a memory location in a vector
1572 LOOP: The loop being vectorized.
1573 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1574 function can be added here, or in the loop pre-header.
1577 Return an index that will be used to index a vector array. It is expected
1578 that a pointer to the first vector will be used as the base address for the
1581 FORNOW: we are not trying to be efficient, just creating a new index each
1582 time from scratch. At this time all vector references could use the same
1585 TODO: create only one index to be used by all vector references. Record
1586 the index in the LOOP_VINFO the first time this procedure is called and
1587 return it on subsequent calls. The increment of this index must be placed
1588 just before the conditional expression that ends the single block loop. */
1591 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1594 tree indx_before_incr, indx_after_incr;
1596 /* It is assumed that the base pointer used for vectorized access contains
1597 the address of the first vector. Therefore the index used for vectorized
1598 access must be initialized to zero and incremented by 1. */
1600 init = integer_zero_node;
1601 step = integer_one_node;
1603 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1604 create_iv (init, step, NULL_TREE, loop, bsi, false,
1605 &indx_before_incr, &indx_after_incr);
1607 return indx_before_incr;
1611 /* Function vect_create_addr_base_for_vector_ref.
1613 Create an expression that computes the address of the first memory location
1614 that will be accessed for a data reference.
1617 STMT: The statement containing the data reference.
1618 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1619 OFFSET: Optional. If supplied, it is be added to the initial address.
1622 1. Return an SSA_NAME whose value is the address of the memory location of
1623 the first vector of the data reference.
1624 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1625 these statement(s) which define the returned SSA_NAME.
1627 FORNOW: We are only handling array accesses with step 1. */
1630 vect_create_addr_base_for_vector_ref (tree stmt,
1631 tree *new_stmt_list,
1634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1635 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1636 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1637 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1638 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1639 tree ref = DR_REF (dr);
1640 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1641 tree scalar_type = TREE_TYPE (ref);
1642 tree scalar_ptr_type = build_pointer_type (scalar_type);
1644 tree init_val, step, init_oval;
1646 bool is_ptr_ref, is_array_ref, is_addr_expr;
1651 tree addr_base, addr_expr;
1652 tree dest, new_stmt;
1654 /* Only the access function of the last index is relevant (i_n in
1655 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1656 access_fn = DR_ACCESS_FN (dr, 0);
1657 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1660 init_oval = integer_zero_node;
1662 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1663 && TREE_CODE (data_ref_base) == SSA_NAME;
1664 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1665 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1666 || TREE_CODE (data_ref_base) == PLUS_EXPR
1667 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1668 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1670 /** Create: &(base[init_val])
1672 if data_ref_base is an ARRAY_TYPE:
1673 base = data_ref_base
1675 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1676 base = *((scalar_array *) data_ref_base)
1680 array_base = data_ref_base;
1681 else /* is_ptr_ref or is_addr_expr */
1683 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1684 tree scalar_array_type = build_array_type (scalar_type, 0);
1685 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1686 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1687 add_referenced_tmp_var (array_ptr);
1689 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1690 add_referenced_tmp_var (dest);
1692 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1693 append_to_statement_list_force (new_stmt, new_stmt_list);
1695 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1696 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1697 new_temp = make_ssa_name (array_ptr, vec_stmt);
1698 TREE_OPERAND (vec_stmt, 0) = new_temp;
1699 append_to_statement_list_force (vec_stmt, new_stmt_list);
1702 array_base = build_fold_indirect_ref (new_temp);
1705 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1706 add_referenced_tmp_var (dest);
1707 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1708 append_to_statement_list_force (new_stmt, new_stmt_list);
1712 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1713 add_referenced_tmp_var (tmp);
1714 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1715 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1716 init_val = make_ssa_name (tmp, vec_stmt);
1717 TREE_OPERAND (vec_stmt, 0) = init_val;
1718 append_to_statement_list_force (vec_stmt, new_stmt_list);
1721 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1722 NULL_TREE, NULL_TREE);
1723 addr_base = build_fold_addr_expr (array_ref);
1725 /* addr_expr = addr_base */
1726 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1727 get_name (base_name));
1728 add_referenced_tmp_var (addr_expr);
1729 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1730 new_temp = make_ssa_name (addr_expr, vec_stmt);
1731 TREE_OPERAND (vec_stmt, 0) = new_temp;
1732 append_to_statement_list_force (vec_stmt, new_stmt_list);
1738 /* Function get_vectype_for_scalar_type.
1740 Returns the vector type corresponding to SCALAR_TYPE as supported
1744 get_vectype_for_scalar_type (tree scalar_type)
1746 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1747 int nbytes = GET_MODE_SIZE (inner_mode);
1754 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1756 nunits = UNITS_PER_SIMD_WORD / nbytes;
1758 vectype = build_vector_type (scalar_type, nunits);
1759 if (vect_debug_details (NULL))
1761 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1762 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1768 if (vect_debug_details (NULL))
1770 fprintf (dump_file, "vectype: ");
1771 print_generic_expr (dump_file, vectype, TDF_SLIM);
1774 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1776 /* TODO: tree-complex.c sometimes can parallelize operations
1777 on generic vectors. We can vectorize the loop in that case,
1778 but then we should re-run the lowering pass. */
1779 if (vect_debug_details (NULL))
1780 fprintf (dump_file, "mode not supported by target.");
1788 /* Function vect_align_data_ref.
1790 Handle mislignment of a memory accesses.
1792 FORNOW: Can't handle misaligned accesses.
1793 Make sure that the dataref is aligned. */
1796 vect_align_data_ref (tree stmt)
1798 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1799 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1801 /* FORNOW: can't handle misaligned accesses;
1802 all accesses expected to be aligned. */
1803 gcc_assert (aligned_access_p (dr));
1807 /* Function vect_create_data_ref_ptr.
1809 Create a memory reference expression for vector access, to be used in a
1810 vector load/store stmt. The reference is based on a new pointer to vector
1814 1. STMT: a stmt that references memory. Expected to be of the form
1815 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1816 2. BSI: block_stmt_iterator where new stmts can be added.
1817 3. OFFSET (optional): an offset to be added to the initial address accessed
1818 by the data-ref in STMT.
1819 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1820 pointing to the initial address.
1823 1. Declare a new ptr to vector_type, and have it point to the base of the
1824 data reference (initial addressed accessed by the data reference).
1825 For example, for vector of type V8HI, the following code is generated:
1828 vp = (v8hi *)initial_address;
1830 if OFFSET is not supplied:
1831 initial_address = &a[init];
1832 if OFFSET is supplied:
1833 initial_address = &a[init + OFFSET];
1835 Return the initial_address in INITIAL_ADDRESS.
1837 2. Create a data-reference in the loop based on the new vector pointer vp,
1838 and using a new index variable 'idx' as follows:
1842 where if ONLY_INIT is true:
1845 update = idx + vector_type_size
1847 Return the pointer vp'.
1850 FORNOW: handle only aligned and consecutive accesses. */
1853 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1854 tree *initial_address, bool only_init)
1857 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1858 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1859 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1860 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1864 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1865 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1866 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1867 int nvuses, nv_may_defs, nv_must_defs;
1871 tree new_stmt_list = NULL_TREE;
1873 edge pe = loop_preheader_edge (loop);
1879 tree type, tmp, size;
1881 base_name = unshare_expr (DR_BASE_NAME (dr));
1882 if (vect_debug_details (NULL))
1884 tree data_ref_base = base_name;
1885 fprintf (dump_file, "create array_ref of type: ");
1886 print_generic_expr (dump_file, vectype, TDF_SLIM);
1887 if (TREE_CODE (data_ref_base) == VAR_DECL)
1888 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1889 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1890 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1891 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1892 fprintf (dump_file, "vectorizing a record based array ref: ");
1893 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1894 fprintf (dump_file, "vectorizing a pointer ref: ");
1895 print_generic_expr (dump_file, base_name, TDF_SLIM);
1898 /** (1) Create the new vector-pointer variable: **/
1900 vect_ptr_type = build_pointer_type (vectype);
1901 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1902 get_name (base_name));
1903 add_referenced_tmp_var (vect_ptr);
1906 /** (2) Handle aliasing information of the new vector-pointer: **/
1908 tag = STMT_VINFO_MEMTAG (stmt_info);
1910 get_var_ann (vect_ptr)->type_mem_tag = tag;
1912 /* Mark for renaming all aliased variables
1913 (i.e, the may-aliases of the type-mem-tag). */
1914 nvuses = NUM_VUSES (vuses);
1915 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1916 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1917 for (i = 0; i < nvuses; i++)
1919 tree use = VUSE_OP (vuses, i);
1920 if (TREE_CODE (use) == SSA_NAME)
1921 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1923 for (i = 0; i < nv_may_defs; i++)
1925 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1926 if (TREE_CODE (def) == SSA_NAME)
1927 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1929 for (i = 0; i < nv_must_defs; i++)
1931 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1932 if (TREE_CODE (def) == SSA_NAME)
1933 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1937 /** (3) Calculate the initial address the vector-pointer, and set
1938 the vector-pointer to point to it before the loop: **/
1940 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1941 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1943 pe = loop_preheader_edge (loop);
1944 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1945 gcc_assert (!new_bb);
1946 *initial_address = new_temp;
1948 /* Create: p = (vectype *) initial_base */
1949 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1950 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1951 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1952 TREE_OPERAND (vec_stmt, 0) = new_temp;
1953 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1954 gcc_assert (!new_bb);
1955 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1958 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1960 if (only_init) /* No update in loop is required. */
1961 return vect_ptr_init;
1963 idx = vect_create_index_for_vector_ref (loop, bsi);
1965 /* Create: update = idx * vectype_size */
1966 tmp = create_tmp_var (integer_type_node, "update");
1967 add_referenced_tmp_var (tmp);
1968 size = TYPE_SIZE (vect_ptr_type);
1969 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1970 ptr_update = create_tmp_var (type, "update");
1971 add_referenced_tmp_var (ptr_update);
1972 vectype_size = build_int_cst (integer_type_node,
1973 GET_MODE_SIZE (TYPE_MODE (vectype)));
1974 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1975 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
1976 new_temp = make_ssa_name (tmp, vec_stmt);
1977 TREE_OPERAND (vec_stmt, 0) = new_temp;
1978 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1979 vec_stmt = fold_convert (type, new_temp);
1980 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1981 new_temp = make_ssa_name (ptr_update, vec_stmt);
1982 TREE_OPERAND (vec_stmt, 0) = new_temp;
1983 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1985 /* Create: data_ref_ptr = vect_ptr_init + update */
1986 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1987 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1988 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1989 TREE_OPERAND (vec_stmt, 0) = new_temp;
1990 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1991 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1993 return data_ref_ptr;
1997 /* Function vect_create_destination_var.
1999 Create a new temporary of type VECTYPE. */
2002 vect_create_destination_var (tree scalar_dest, tree vectype)
2005 const char *new_name;
2007 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2009 new_name = get_name (scalar_dest);
2012 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2013 add_referenced_tmp_var (vec_dest);
2019 /* Function vect_init_vector.
2021 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2022 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2023 used in the vectorization of STMT. */
2026 vect_init_vector (tree stmt, tree vector_var)
2028 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2029 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2032 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2038 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2039 add_referenced_tmp_var (new_var);
2041 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2042 new_temp = make_ssa_name (new_var, init_stmt);
2043 TREE_OPERAND (init_stmt, 0) = new_temp;
2045 pe = loop_preheader_edge (loop);
2046 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2047 gcc_assert (!new_bb);
2049 if (vect_debug_details (NULL))
2051 fprintf (dump_file, "created new init_stmt: ");
2052 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2055 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2060 /* Function vect_get_vec_def_for_operand.
2062 OP is an operand in STMT. This function returns a (vector) def that will be
2063 used in the vectorized stmt for STMT.
2065 In the case that OP is an SSA_NAME which is defined in the loop, then
2066 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2068 In case OP is an invariant or constant, a new stmt that creates a vector def
2069 needs to be introduced. */
2072 vect_get_vec_def_for_operand (tree op, tree stmt)
2077 stmt_vec_info def_stmt_info = NULL;
2078 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2079 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2080 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2081 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2088 if (vect_debug_details (NULL))
2090 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2091 print_generic_expr (dump_file, op, TDF_SLIM);
2094 /** ===> Case 1: operand is a constant. **/
2096 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2098 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2102 /* Build a tree with vector elements. */
2103 if (vect_debug_details (NULL))
2104 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2106 for (i = nunits - 1; i >= 0; --i)
2108 t = tree_cons (NULL_TREE, op, t);
2110 vec_cst = build_vector (vectype, t);
2111 return vect_init_vector (stmt, vec_cst);
2114 gcc_assert (TREE_CODE (op) == SSA_NAME);
2116 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2118 def_stmt = SSA_NAME_DEF_STMT (op);
2119 def_stmt_info = vinfo_for_stmt (def_stmt);
2121 if (vect_debug_details (NULL))
2123 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2124 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2128 /** ==> Case 2.1: operand is defined inside the loop. **/
2132 /* Get the def from the vectorized stmt. */
2134 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2135 gcc_assert (vec_stmt);
2136 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2141 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2142 it is a reduction/induction. **/
2144 bb = bb_for_stmt (def_stmt);
2145 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2147 if (vect_debug_details (NULL))
2148 fprintf (dump_file, "reduction/induction - unsupported.");
2149 internal_error ("no support for reduction/induction"); /* FORNOW */
2153 /** ==> Case 2.3: operand is defined outside the loop -
2154 it is a loop invariant. */
2156 switch (TREE_CODE (def_stmt))
2159 def = PHI_RESULT (def_stmt);
2162 def = TREE_OPERAND (def_stmt, 0);
2165 def = TREE_OPERAND (def_stmt, 0);
2166 gcc_assert (IS_EMPTY_STMT (def_stmt));
2170 if (vect_debug_details (NULL))
2172 fprintf (dump_file, "unsupported defining stmt: ");
2173 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2175 internal_error ("unsupported defining stmt");
2178 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2180 if (vect_debug_details (NULL))
2181 fprintf (dump_file, "Create vector_inv.");
2183 for (i = nunits - 1; i >= 0; --i)
2185 t = tree_cons (NULL_TREE, def, t);
2188 vec_inv = build_constructor (vectype, t);
2189 return vect_init_vector (stmt, vec_inv);
2193 /* Function vect_finish_stmt_generation.
2195 Insert a new stmt. */
2198 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2200 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2202 if (vect_debug_details (NULL))
2204 fprintf (dump_file, "add new stmt: ");
2205 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2208 /* Make sure bsi points to the stmt that is being vectorized. */
2210 /* Assumption: any stmts created for the vectorization of stmt S were
2211 inserted before S. BSI is expected to point to S or some new stmt before S.
2214 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2216 gcc_assert (stmt == bsi_stmt (*bsi));
2220 /* Function vectorizable_assignment.
2222 Check if STMT performs an assignment (copy) that can be vectorized.
2223 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2224 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2225 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2228 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2234 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2235 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2236 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2239 /* Is vectorizable assignment? */
2241 if (TREE_CODE (stmt) != MODIFY_EXPR)
2244 scalar_dest = TREE_OPERAND (stmt, 0);
2245 if (TREE_CODE (scalar_dest) != SSA_NAME)
2248 op = TREE_OPERAND (stmt, 1);
2249 if (!vect_is_simple_use (op, loop, NULL))
2251 if (vect_debug_details (NULL))
2252 fprintf (dump_file, "use not simple.");
2256 if (!vec_stmt) /* transformation not required. */
2258 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2263 if (vect_debug_details (NULL))
2264 fprintf (dump_file, "transform assignment.");
2267 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2270 op = TREE_OPERAND (stmt, 1);
2271 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2273 /* Arguments are ready. create the new vector stmt. */
2274 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2275 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2276 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2277 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2283 /* Function vectorizable_operation.
2285 Check if STMT performs a binary or unary operation that can be vectorized.
2286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2291 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2296 tree op0, op1 = NULL;
2297 tree vec_oprnd0, vec_oprnd1=NULL;
2298 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2299 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2300 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2302 enum tree_code code;
2303 enum machine_mode vec_mode;
2309 /* Is STMT a vectorizable binary/unary operation? */
2310 if (TREE_CODE (stmt) != MODIFY_EXPR)
2313 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2316 operation = TREE_OPERAND (stmt, 1);
2317 code = TREE_CODE (operation);
2318 optab = optab_for_tree_code (code, vectype);
2320 /* Support only unary or binary operations. */
2321 op_type = TREE_CODE_LENGTH (code);
2322 if (op_type != unary_op && op_type != binary_op)
2324 if (vect_debug_details (NULL))
2325 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2329 for (i = 0; i < op_type; i++)
2331 op = TREE_OPERAND (operation, i);
2332 if (!vect_is_simple_use (op, loop, NULL))
2334 if (vect_debug_details (NULL))
2335 fprintf (dump_file, "use not simple.");
2340 /* Supportable by target? */
2343 if (vect_debug_details (NULL))
2344 fprintf (dump_file, "no optab.");
2347 vec_mode = TYPE_MODE (vectype);
2348 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2350 if (vect_debug_details (NULL))
2351 fprintf (dump_file, "op not supported by target.");
2355 if (!vec_stmt) /* transformation not required. */
2357 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2363 if (vect_debug_details (NULL))
2364 fprintf (dump_file, "transform binary/unary operation.");
2367 scalar_dest = TREE_OPERAND (stmt, 0);
2368 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2371 op0 = TREE_OPERAND (operation, 0);
2372 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2374 if (op_type == binary_op)
2376 op1 = TREE_OPERAND (operation, 1);
2377 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2380 /* Arguments are ready. create the new vector stmt. */
2382 if (op_type == binary_op)
2383 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2384 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2386 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2387 build1 (code, vectype, vec_oprnd0));
2388 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2389 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2390 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2396 /* Function vectorizable_store.
2398 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2405 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2412 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2413 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2414 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2415 enum machine_mode vec_mode;
2417 enum dr_alignment_support alignment_support_cheme;
2419 /* Is vectorizable store? */
2421 if (TREE_CODE (stmt) != MODIFY_EXPR)
2424 scalar_dest = TREE_OPERAND (stmt, 0);
2425 if (TREE_CODE (scalar_dest) != ARRAY_REF
2426 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2429 op = TREE_OPERAND (stmt, 1);
2430 if (!vect_is_simple_use (op, loop, NULL))
2432 if (vect_debug_details (NULL))
2433 fprintf (dump_file, "use not simple.");
2437 vec_mode = TYPE_MODE (vectype);
2438 /* FORNOW. In some cases can vectorize even if data-type not supported
2439 (e.g. - array initialization with 0). */
2440 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2443 if (!STMT_VINFO_DATA_REF (stmt_info))
2447 if (!vec_stmt) /* transformation not required. */
2449 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2455 if (vect_debug_details (NULL))
2456 fprintf (dump_file, "transform store");
2458 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2459 gcc_assert (alignment_support_cheme);
2460 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2462 /* Handle use - get the vectorized def from the defining stmt. */
2463 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2466 /* FORNOW: make sure the data reference is aligned. */
2467 vect_align_data_ref (stmt);
2468 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2469 data_ref = build_fold_indirect_ref (data_ref);
2471 /* Arguments are ready. create the new vector stmt. */
2472 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2473 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2479 /* vectorizable_load.
2481 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2483 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2484 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2485 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2488 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2491 tree vec_dest = NULL;
2492 tree data_ref = NULL;
2494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2495 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2496 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2503 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2504 edge pe = loop_preheader_edge (loop);
2505 enum dr_alignment_support alignment_support_cheme;
2507 /* Is vectorizable load? */
2509 if (TREE_CODE (stmt) != MODIFY_EXPR)
2512 scalar_dest = TREE_OPERAND (stmt, 0);
2513 if (TREE_CODE (scalar_dest) != SSA_NAME)
2516 op = TREE_OPERAND (stmt, 1);
2517 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2520 if (!STMT_VINFO_DATA_REF (stmt_info))
2523 mode = (int) TYPE_MODE (vectype);
2525 /* FORNOW. In some cases can vectorize even if data-type not supported
2526 (e.g. - data copies). */
2527 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2529 if (vect_debug_details (loop))
2530 fprintf (dump_file, "Aligned load, but unsupported type.");
2534 if (!vec_stmt) /* transformation not required. */
2536 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2542 if (vect_debug_details (NULL))
2543 fprintf (dump_file, "transform load.");
2545 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2546 gcc_assert (alignment_support_cheme);
2548 if (alignment_support_cheme == dr_aligned
2549 || alignment_support_cheme == dr_unaligned_supported)
2560 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2561 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2562 if (aligned_access_p (dr))
2563 data_ref = build_fold_indirect_ref (data_ref);
2566 int mis = DR_MISALIGNMENT (dr);
2567 tree tmis = (mis == -1 ?
2569 build_int_cst (integer_type_node, mis));
2570 tmis = int_const_binop (MULT_EXPR, tmis,
2571 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2572 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2574 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2575 new_temp = make_ssa_name (vec_dest, new_stmt);
2576 TREE_OPERAND (new_stmt, 0) = new_temp;
2577 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2579 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2583 msq_init = *(floor(p1))
2584 p2 = initial_addr + VS - 1;
2585 magic = have_builtin ? builtin_result : initial_address;
2588 p2' = p2 + indx * vectype_size
2590 vec_dest = realign_load (msq, lsq, magic)
2604 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2605 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2606 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2608 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2609 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2610 new_temp = make_ssa_name (vec_dest, new_stmt);
2611 TREE_OPERAND (new_stmt, 0) = new_temp;
2612 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2613 gcc_assert (!new_bb);
2614 msq_init = TREE_OPERAND (new_stmt, 0);
2617 /* <2> Create lsq = *(floor(p2')) in the loop */
2618 offset = build_int_cst (integer_type_node,
2619 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2620 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2621 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2622 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2623 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2624 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2625 new_temp = make_ssa_name (vec_dest, new_stmt);
2626 TREE_OPERAND (new_stmt, 0) = new_temp;
2627 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2628 lsq = TREE_OPERAND (new_stmt, 0);
2632 if (targetm.vectorize.builtin_mask_for_load)
2634 /* Create permutation mask, if required, in loop preheader. */
2636 params = build_tree_list (NULL_TREE, init_addr);
2637 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2638 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2639 new_stmt = build_function_call_expr (builtin_decl, params);
2640 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2641 new_temp = make_ssa_name (vec_dest, new_stmt);
2642 TREE_OPERAND (new_stmt, 0) = new_temp;
2643 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2644 gcc_assert (!new_bb);
2645 magic = TREE_OPERAND (new_stmt, 0);
2647 /* Since we have just created a CALL_EXPR, we may need to
2648 rename call-clobbered variables. */
2649 mark_call_clobbered_vars_to_rename ();
2653 /* Use current address instead of init_addr for reduced reg pressure.
2655 magic = dataref_ptr;
2659 /* <4> Create msq = phi <msq_init, lsq> in loop */
2660 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2661 msq = make_ssa_name (vec_dest, NULL_TREE);
2662 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2663 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2664 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2665 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2668 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2669 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2670 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2671 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2672 new_temp = make_ssa_name (vec_dest, new_stmt);
2673 TREE_OPERAND (new_stmt, 0) = new_temp;
2674 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2679 *vec_stmt = new_stmt;
2684 /* Function vect_supportable_dr_alignment
2686 Return whether the data reference DR is supported with respect to its
2689 static enum dr_alignment_support
2690 vect_supportable_dr_alignment (struct data_reference *dr)
2692 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2693 enum machine_mode mode = (int) TYPE_MODE (vectype);
2695 if (aligned_access_p (dr))
2698 /* Possibly unaligned access. */
2700 if (DR_IS_READ (dr))
2702 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2703 && (!targetm.vectorize.builtin_mask_for_load
2704 || targetm.vectorize.builtin_mask_for_load ()))
2705 return dr_unaligned_software_pipeline;
2707 if (targetm.vectorize.misaligned_mem_ok (mode))
2708 /* Can't software pipeline the loads. */
2709 return dr_unaligned_supported;
2713 return dr_unaligned_unsupported;
2717 /* Function vect_transform_stmt.
2719 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2722 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2724 bool is_store = false;
2725 tree vec_stmt = NULL_TREE;
2726 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2729 switch (STMT_VINFO_TYPE (stmt_info))
2731 case op_vec_info_type:
2732 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2736 case assignment_vec_info_type:
2737 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2741 case load_vec_info_type:
2742 done = vectorizable_load (stmt, bsi, &vec_stmt);
2746 case store_vec_info_type:
2747 done = vectorizable_store (stmt, bsi, &vec_stmt);
2752 if (vect_debug_details (NULL))
2753 fprintf (dump_file, "stmt not supported.");
2757 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2763 /* This function builds ni_name = number of iterations loop executes
2764 on the loop preheader. */
2767 vect_build_loop_niters (loop_vec_info loop_vinfo)
2769 tree ni_name, stmt, var;
2771 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2772 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2774 var = create_tmp_var (TREE_TYPE (ni), "niters");
2775 add_referenced_tmp_var (var);
2776 ni_name = force_gimple_operand (ni, &stmt, false, var);
2778 pe = loop_preheader_edge (loop);
2781 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2782 gcc_assert (!new_bb);
2789 /* This function generates the following statements:
2791 ni_name = number of iterations loop executes
2792 ratio = ni_name / vf
2793 ratio_mult_vf_name = ratio * vf
2795 and places them at the loop preheader edge. */
2798 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2800 tree *ratio_mult_vf_name_ptr,
2801 tree *ratio_name_ptr)
2809 tree ratio_mult_vf_name;
2810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2811 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2812 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2813 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2815 pe = loop_preheader_edge (loop);
2817 /* Generate temporary variable that contains
2818 number of iterations loop executes. */
2820 ni_name = vect_build_loop_niters (loop_vinfo);
2822 /* Create: ratio = ni >> log2(vf) */
2824 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2825 add_referenced_tmp_var (var);
2826 ratio_name = make_ssa_name (var, NULL_TREE);
2827 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2828 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2829 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2831 pe = loop_preheader_edge (loop);
2832 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2833 gcc_assert (!new_bb);
2835 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2837 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2838 add_referenced_tmp_var (var);
2839 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2840 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2841 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2842 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2844 pe = loop_preheader_edge (loop);
2845 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2846 gcc_assert (!new_bb);
2848 *ni_name_ptr = ni_name;
2849 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2850 *ratio_name_ptr = ratio_name;
2856 /* Function vect_update_ivs_after_vectorizer.
2858 "Advance" the induction variables of LOOP to the value they should take
2859 after the execution of LOOP. This is currently necessary because the
2860 vectorizer does not handle induction variables that are used after the
2861 loop. Such a situation occurs when the last iterations of LOOP are
2863 1. We introduced new uses after LOOP for IVs that were not originally used
2864 after LOOP: the IVs of LOOP are now used by an epilog loop.
2865 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2866 times, whereas the loop IVs should be bumped N times.
2869 - LOOP - a loop that is going to be vectorized. The last few iterations
2870 of LOOP were peeled.
2871 - NITERS - the number of iterations that LOOP executes (before it is
2872 vectorized). i.e, the number of times the ivs should be bumped.
2873 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2874 coming out from LOOP on which there are uses of the LOOP ivs
2875 (this is the path from LOOP->exit to epilog_loop->preheader).
2877 The new definitions of the ivs are placed in LOOP->exit.
2878 The phi args associated with the edge UPDATE_E in the bb
2879 UPDATE_E->dest are updated accordingly.
2881 Assumption 1: Like the rest of the vectorizer, this function assumes
2882 a single loop exit that has a single predecessor.
2884 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2885 organized in the same order.
2887 Assumption 3: The access function of the ivs is simple enough (see
2888 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2890 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2891 coming out of LOOP on which the ivs of LOOP are used (this is the path
2892 that leads to the epilog loop; other paths skip the epilog loop). This
2893 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2894 needs to have its phis updated.
2898 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2900 basic_block exit_bb = loop->exit_edges[0]->dest;
2902 basic_block update_bb = update_e->dest;
2904 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2906 /* Make sure there exists a single-predecessor exit bb: */
2907 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2909 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2911 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2913 tree access_fn = NULL;
2914 tree evolution_part;
2917 tree var, stmt, ni, ni_name;
2918 block_stmt_iterator last_bsi;
2920 /* Skip virtual phi's. */
2921 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2923 if (vect_debug_details (NULL))
2924 fprintf (dump_file, "virtual phi. skip.");
2928 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2929 gcc_assert (access_fn);
2931 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2932 gcc_assert (evolution_part != NULL_TREE);
2934 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2935 of degree >= 2 or exponential. */
2936 gcc_assert (!tree_is_chrec (evolution_part));
2938 step_expr = evolution_part;
2939 init_expr = unshare_expr (initial_condition (access_fn));
2941 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2942 build2 (MULT_EXPR, TREE_TYPE (niters),
2943 niters, step_expr), init_expr);
2945 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2946 add_referenced_tmp_var (var);
2948 ni_name = force_gimple_operand (ni, &stmt, false, var);
2950 /* Insert stmt into exit_bb. */
2951 last_bsi = bsi_last (exit_bb);
2953 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2955 /* Fix phi expressions in the successor bb. */
2956 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2957 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2958 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2963 /* Function vect_do_peeling_for_loop_bound
2965 Peel the last iterations of the loop represented by LOOP_VINFO.
2966 The peeled iterations form a new epilog loop. Given that the loop now
2967 iterates NITERS times, the new epilog loop iterates
2968 NITERS % VECTORIZATION_FACTOR times.
2970 The original loop will later be made to iterate
2971 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2974 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2975 struct loops *loops)
2978 tree ni_name, ratio_mult_vf_name;
2979 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2980 struct loop *new_loop;
2982 #ifdef ENABLE_CHECKING
2986 if (vect_debug_details (NULL))
2987 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2989 /* Generate the following variables on the preheader of original loop:
2991 ni_name = number of iteration the original loop executes
2992 ratio = ni_name / vf
2993 ratio_mult_vf_name = ratio * vf */
2994 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2995 &ratio_mult_vf_name, ratio);
2997 /* Update loop info. */
2998 loop->pre_header = loop_preheader_edge (loop)->src;
2999 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3001 #ifdef ENABLE_CHECKING
3002 loop_num = loop->num;
3004 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3005 ratio_mult_vf_name, ni_name, false);
3006 #ifdef ENABLE_CHECKING
3007 gcc_assert (new_loop);
3008 gcc_assert (loop_num == loop->num);
3009 slpeel_verify_cfg_after_peeling (loop, new_loop);
3012 /* A guard that controls whether the new_loop is to be executed or skipped
3013 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3014 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3015 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3016 is on the path where the LOOP IVs are used and need to be updated. */
3018 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3019 update_e = EDGE_PRED (new_loop->pre_header, 0);
3021 update_e = EDGE_PRED (new_loop->pre_header, 1);
3023 /* Update IVs of original loop as if they were advanced
3024 by ratio_mult_vf_name steps. */
3025 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3027 /* After peeling we have to reset scalar evolution analyzer. */
3034 /* Function vect_gen_niters_for_prolog_loop
3036 Set the number of iterations for the loop represented by LOOP_VINFO
3037 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3038 and the misalignment of DR - the first data reference recorded in
3039 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3040 this loop, the data reference DR will refer to an aligned location.
3042 The following computation is generated:
3044 compute address misalignment in bytes:
3045 addr_mis = addr & (vectype_size - 1)
3047 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3049 (elem_size = element type size; an element is the scalar element
3050 whose type is the inner type of the vectype) */
3053 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3055 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3056 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3057 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3059 tree iters, iters_name;
3062 tree dr_stmt = DR_STMT (dr);
3063 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3064 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3065 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3068 tree new_stmts = NULL_TREE;
3070 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3071 tree ptr_type = TREE_TYPE (start_addr);
3072 tree size = TYPE_SIZE (ptr_type);
3073 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3074 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3075 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3076 tree niters_type = TREE_TYPE (loop_niters);
3077 tree elem_size_log =
3078 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3079 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3081 pe = loop_preheader_edge (loop);
3082 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3083 gcc_assert (!new_bb);
3085 /* Create: byte_misalign = addr & (vectype_size - 1) */
3086 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3088 /* Create: elem_misalign = byte_misalign / element_size */
3090 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3092 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3093 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3094 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3095 iters = fold_convert (niters_type, iters);
3097 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3098 /* If the loop bound is known at compile time we already verified that it is
3099 greater than vf; since the misalignment ('iters') is at most vf, there's
3100 no need to generate the MIN_EXPR in this case. */
3101 if (!host_integerp (loop_niters, 0))
3102 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3104 var = create_tmp_var (niters_type, "prolog_loop_niters");
3105 add_referenced_tmp_var (var);
3106 iters_name = force_gimple_operand (iters, &stmt, false, var);
3108 /* Insert stmt on loop preheader edge. */
3109 pe = loop_preheader_edge (loop);
3112 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3113 gcc_assert (!new_bb);
3120 /* Function vect_update_inits_of_dr
3122 NITERS iterations were peeled from LOOP. DR represents a data reference
3123 in LOOP. This function updates the information recorded in DR to
3124 account for the fact that the first NITERS iterations had already been
3125 executed. Specifically, it updates the initial_condition of the
3126 access_function of DR. */
3129 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3132 tree access_fn = DR_ACCESS_FN (dr, 0);
3133 tree init, init_new, step;
3135 step = evolution_part_in_loop_num (access_fn, loop->num);
3136 init = initial_condition (access_fn);
3138 init_new = build2 (PLUS_EXPR, TREE_TYPE (init),
3139 build2 (MULT_EXPR, TREE_TYPE (niters),
3140 niters, step), init);
3141 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3147 /* Function vect_update_inits_of_drs
3149 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3150 This function updates the information recorded for the data references in
3151 the loop to account for the fact that the first NITERS iterations had
3152 already been executed. Specifically, it updates the initial_condition of the
3153 access_function of all the data_references in the loop. */
3156 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3159 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3160 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3161 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3163 if (dump_file && (dump_flags & TDF_DETAILS))
3164 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3166 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3168 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3169 vect_update_inits_of_dr (dr, loop, niters);
3172 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3174 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3175 vect_update_inits_of_dr (dr, loop, niters);
3180 /* Function vect_do_peeling_for_alignment
3182 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3183 'niters' is set to the misalignment of one of the data references in the
3184 loop, thereby forcing it to refer to an aligned location at the beginning
3185 of the execution of this loop. The data reference for which we are
3186 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3189 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3191 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3192 tree niters_of_prolog_loop, ni_name;
3194 struct loop *new_loop;
3196 if (vect_debug_details (NULL))
3197 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3199 ni_name = vect_build_loop_niters (loop_vinfo);
3200 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3202 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3204 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3205 niters_of_prolog_loop, ni_name, true);
3206 #ifdef ENABLE_CHECKING
3207 gcc_assert (new_loop);
3208 slpeel_verify_cfg_after_peeling (new_loop, loop);
3211 /* Update number of times loop executes. */
3212 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3213 LOOP_VINFO_NITERS (loop_vinfo) =
3214 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3216 /* Update the init conditions of the access functions of all data refs. */
3217 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3219 /* After peeling we have to reset scalar evolution analyzer. */
3226 /* Function vect_transform_loop.
3228 The analysis phase has determined that the loop is vectorizable.
3229 Vectorize the loop - created vectorized stmts to replace the scalar
3230 stmts in the loop, and update the loop exit condition. */
3233 vect_transform_loop (loop_vec_info loop_vinfo,
3234 struct loops *loops ATTRIBUTE_UNUSED)
3236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3237 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3238 int nbbs = loop->num_nodes;
3239 block_stmt_iterator si;
3242 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3244 if (vect_debug_details (NULL))
3245 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3248 /* Peel the loop if there are data refs with unknown alignment.
3249 Only one data ref with unknown store is allowed. */
3251 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3252 vect_do_peeling_for_alignment (loop_vinfo, loops);
3254 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3255 compile time constant), or it is a constant that doesn't divide by the
3256 vectorization factor, then an epilog loop needs to be created.
3257 We therefore duplicate the loop: the original loop will be vectorized,
3258 and will compute the first (n/VF) iterations. The second copy of the loop
3259 will remain scalar and will compute the remaining (n%VF) iterations.
3260 (VF is the vectorization factor). */
3262 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3263 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3264 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3265 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3267 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3268 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3270 /* 1) Make sure the loop header has exactly two entries
3271 2) Make sure we have a preheader basic block. */
3273 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3275 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3278 /* FORNOW: the vectorizer supports only loops which body consist
3279 of one basic block (header + empty latch). When the vectorizer will
3280 support more involved loop forms, the order by which the BBs are
3281 traversed need to be reconsidered. */
3283 for (i = 0; i < nbbs; i++)
3285 basic_block bb = bbs[i];
3287 for (si = bsi_start (bb); !bsi_end_p (si);)
3289 tree stmt = bsi_stmt (si);
3290 stmt_vec_info stmt_info;
3293 if (vect_debug_details (NULL))
3295 fprintf (dump_file, "------>vectorizing statement: ");
3296 print_generic_expr (dump_file, stmt, TDF_SLIM);
3298 stmt_info = vinfo_for_stmt (stmt);
3299 gcc_assert (stmt_info);
3300 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3305 #ifdef ENABLE_CHECKING
3306 /* FORNOW: Verify that all stmts operate on the same number of
3307 units and no inner unrolling is necessary. */
3309 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3310 == vectorization_factor);
3312 /* -------- vectorize statement ------------ */
3313 if (vect_debug_details (NULL))
3314 fprintf (dump_file, "transform statement.");
3316 is_store = vect_transform_stmt (stmt, &si);
3319 /* free the attached stmt_vec_info and remove the stmt. */
3320 stmt_ann_t ann = stmt_ann (stmt);
3322 set_stmt_info (ann, NULL);
3331 slpeel_make_loop_iterate_ntimes (loop, ratio);
3333 if (vect_debug_details (loop))
3334 fprintf (dump_file,"Success! loop vectorized.");
3335 if (vect_debug_stats (loop))
3336 fprintf (dump_file, "LOOP VECTORIZED.");
3340 /* Function vect_is_simple_use.
3343 LOOP - the loop that is being vectorized.
3344 OPERAND - operand of a stmt in LOOP.
3345 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3347 Returns whether a stmt with OPERAND can be vectorized.
3348 Supportable operands are constants, loop invariants, and operands that are
3349 defined by the current iteration of the loop. Unsupportable operands are
3350 those that are defined by a previous iteration of the loop (as is the case
3351 in reduction/induction computations). */
3354 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3362 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3365 if (TREE_CODE (operand) != SSA_NAME)
3368 def_stmt = SSA_NAME_DEF_STMT (operand);
3369 if (def_stmt == NULL_TREE )
3371 if (vect_debug_details (NULL))
3372 fprintf (dump_file, "no def_stmt.");
3376 /* empty stmt is expected only in case of a function argument.
3377 (Otherwise - we expect a phi_node or a modify_expr). */
3378 if (IS_EMPTY_STMT (def_stmt))
3380 tree arg = TREE_OPERAND (def_stmt, 0);
3381 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3383 if (vect_debug_details (NULL))
3385 fprintf (dump_file, "Unexpected empty stmt: ");
3386 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3391 /* phi_node inside the loop indicates an induction/reduction pattern.
3392 This is not supported yet. */
3393 bb = bb_for_stmt (def_stmt);
3394 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3396 if (vect_debug_details (NULL))
3397 fprintf (dump_file, "reduction/induction - unsupported.");
3398 return false; /* FORNOW: not supported yet. */
3401 /* Expecting a modify_expr or a phi_node. */
3402 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3403 || TREE_CODE (def_stmt) == PHI_NODE)
3414 /* Function vect_analyze_operations.
3416 Scan the loop stmts and make sure they are all vectorizable. */
3419 vect_analyze_operations (loop_vec_info loop_vinfo)
3421 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3422 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3423 int nbbs = loop->num_nodes;
3424 block_stmt_iterator si;
3425 unsigned int vectorization_factor = 0;
3430 if (vect_debug_details (NULL))
3431 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3433 for (i = 0; i < nbbs; i++)
3435 basic_block bb = bbs[i];
3437 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3439 tree stmt = bsi_stmt (si);
3440 unsigned int nunits;
3441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3444 if (vect_debug_details (NULL))
3446 fprintf (dump_file, "==> examining statement: ");
3447 print_generic_expr (dump_file, stmt, TDF_SLIM);
3450 gcc_assert (stmt_info);
3452 /* skip stmts which do not need to be vectorized.
3453 this is expected to include:
3454 - the COND_EXPR which is the loop exit condition
3455 - any LABEL_EXPRs in the loop
3456 - computations that are used only for array indexing or loop
3459 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3461 if (vect_debug_details (NULL))
3462 fprintf (dump_file, "irrelevant.");
3466 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3468 if (vect_debug_stats (loop) || vect_debug_details (loop))
3470 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3471 print_generic_expr (dump_file, stmt, TDF_SLIM);
3476 if (STMT_VINFO_DATA_REF (stmt_info))
3477 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3478 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3479 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3481 scalar_type = TREE_TYPE (stmt);
3483 if (vect_debug_details (NULL))
3485 fprintf (dump_file, "get vectype for scalar type: ");
3486 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3489 vectype = get_vectype_for_scalar_type (scalar_type);
3492 if (vect_debug_stats (loop) || vect_debug_details (loop))
3494 fprintf (dump_file, "not vectorized: unsupported data-type ");
3495 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3500 if (vect_debug_details (NULL))
3502 fprintf (dump_file, "vectype: ");
3503 print_generic_expr (dump_file, vectype, TDF_SLIM);
3505 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3507 ok = (vectorizable_operation (stmt, NULL, NULL)
3508 || vectorizable_assignment (stmt, NULL, NULL)
3509 || vectorizable_load (stmt, NULL, NULL)
3510 || vectorizable_store (stmt, NULL, NULL));
3514 if (vect_debug_stats (loop) || vect_debug_details (loop))
3516 fprintf (dump_file, "not vectorized: stmt not supported: ");
3517 print_generic_expr (dump_file, stmt, TDF_SLIM);
3522 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3523 if (vect_debug_details (NULL))
3524 fprintf (dump_file, "nunits = %d", nunits);
3526 if (vectorization_factor)
3528 /* FORNOW: don't allow mixed units.
3529 This restriction will be relaxed in the future. */
3530 if (nunits != vectorization_factor)
3532 if (vect_debug_stats (loop) || vect_debug_details (loop))
3533 fprintf (dump_file, "not vectorized: mixed data-types");
3538 vectorization_factor = nunits;
3540 #ifdef ENABLE_CHECKING
3541 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3542 * vectorization_factor == UNITS_PER_SIMD_WORD);
3547 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3549 if (vectorization_factor <= 1)
3551 if (vect_debug_stats (loop) || vect_debug_details (loop))
3552 fprintf (dump_file, "not vectorized: unsupported data-type");
3555 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3557 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3559 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3560 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3562 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3563 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3565 if (vect_debug_stats (loop) || vect_debug_details (loop))
3566 fprintf (dump_file, "not vectorized: iteration count too small.");
3570 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3571 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3573 if (vect_debug_stats (loop) || vect_debug_details (loop))
3574 fprintf (dump_file, "epilog loop required.");
3575 if (!vect_can_advance_ivs_p (loop))
3577 if (vect_debug_stats (loop) || vect_debug_details (loop))
3578 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3581 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3583 if (vect_debug_stats (loop) || vect_debug_details (loop))
3584 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3593 /* Function exist_non_indexing_operands_for_use_p
3595 USE is one of the uses attached to STMT. Check if USE is
3596 used in STMT for anything other than indexing an array. */
3599 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3602 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3604 /* USE corresponds to some operand in STMT. If there is no data
3605 reference in STMT, then any operand that corresponds to USE
3606 is not indexing an array. */
3607 if (!STMT_VINFO_DATA_REF (stmt_info))
3610 /* STMT has a data_ref. FORNOW this means that its of one of
3611 the following forms:
3614 (This should have been verified in analyze_data_refs).
3616 'var' in the second case corresponds to a def, not a use,
3617 so USE cannot correspond to any operands that are not used
3620 Therefore, all we need to check is if STMT falls into the
3621 first case, and whether var corresponds to USE. */
3623 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3626 operand = TREE_OPERAND (stmt, 1);
3628 if (TREE_CODE (operand) != SSA_NAME)
3638 /* Function vect_is_simple_iv_evolution.
3640 FORNOW: A simple evolution of an induction variables in the loop is
3641 considered a polynomial evolution with constant step. */
3644 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3645 tree * step, bool strict)
3650 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3652 /* When there is no evolution in this loop, the evolution function
3654 if (evolution_part == NULL_TREE)
3657 /* When the evolution is a polynomial of degree >= 2
3658 the evolution function is not "simple". */
3659 if (tree_is_chrec (evolution_part))
3662 step_expr = evolution_part;
3663 init_expr = unshare_expr (initial_condition (access_fn));
3665 if (vect_debug_details (NULL))
3667 fprintf (dump_file, "step: ");
3668 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3669 fprintf (dump_file, ", init: ");
3670 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3676 if (TREE_CODE (step_expr) != INTEGER_CST)
3678 if (vect_debug_details (NULL))
3679 fprintf (dump_file, "step unknown.");
3684 if (!integer_onep (step_expr))
3686 if (vect_debug_details (NULL))
3687 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3695 /* Function vect_analyze_scalar_cycles.
3697 Examine the cross iteration def-use cycles of scalar variables, by
3698 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3699 cycles that they represent do not impede vectorization.
3701 FORNOW: Reduction as in the following loop, is not supported yet:
3705 The cross-iteration cycle corresponding to variable 'sum' will be
3706 considered too complicated and will impede vectorization.
3708 FORNOW: Induction as in the following loop, is not supported yet:
3713 However, the following loop *is* vectorizable:
3718 In both loops there exists a def-use cycle for the variable i:
3719 loop: i_2 = PHI (i_0, i_1)
3724 The evolution of the above cycle is considered simple enough,
3725 however, we also check that the cycle does not need to be
3726 vectorized, i.e - we check that the variable that this cycle
3727 defines is only used for array indexing or in stmts that do not
3728 need to be vectorized. This is not the case in loop2, but it
3729 *is* the case in loop3. */
3732 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3735 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3736 basic_block bb = loop->header;
3739 if (vect_debug_details (NULL))
3740 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3742 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3744 tree access_fn = NULL;
3746 if (vect_debug_details (NULL))
3748 fprintf (dump_file, "Analyze phi: ");
3749 print_generic_expr (dump_file, phi, TDF_SLIM);
3752 /* Skip virtual phi's. The data dependences that are associated with
3753 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3755 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3757 if (vect_debug_details (NULL))
3758 fprintf (dump_file, "virtual phi. skip.");
3762 /* Analyze the evolution function. */
3764 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3765 those of loop induction variables; This property is verified here.
3767 Furthermore, if that induction variable is used in an operation
3768 that needs to be vectorized (i.e, is not solely used to index
3769 arrays and check the exit condition) - we do not support its
3770 vectorization yet. This property is verified in vect_is_simple_use,
3771 during vect_analyze_operations. */
3773 access_fn = /* instantiate_parameters
3775 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3779 if (vect_debug_stats (loop) || vect_debug_details (loop))
3780 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3784 if (vect_debug_details (NULL))
3786 fprintf (dump_file, "Access function of PHI: ");
3787 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3790 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3793 if (vect_debug_stats (loop) || vect_debug_details (loop))
3794 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3803 /* Function vect_analyze_data_ref_dependence.
3805 Return TRUE if there (might) exist a dependence between a memory-reference
3806 DRA and a memory-reference DRB. */
3809 vect_analyze_data_ref_dependence (struct data_reference *dra,
3810 struct data_reference *drb,
3814 struct data_dependence_relation *ddr;
3816 if (!array_base_name_differ_p (dra, drb, &differ_p))
3818 if (vect_debug_stats (loop) || vect_debug_details (loop))
3821 "not vectorized: can't determine dependence between: ");
3822 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3823 fprintf (dump_file, " and ");
3824 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3832 ddr = initialize_data_dependence_relation (dra, drb);
3833 compute_affine_dependence (ddr);
3835 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3838 if (vect_debug_stats (loop) || vect_debug_details (loop))
3841 "not vectorized: possible dependence between data-refs ");
3842 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3843 fprintf (dump_file, " and ");
3844 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3851 /* Function vect_analyze_data_ref_dependences.
3853 Examine all the data references in the loop, and make sure there do not
3854 exist any data dependences between them.
3856 TODO: dependences which distance is greater than the vectorization factor
3860 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3863 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3864 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3865 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3867 /* Examine store-store (output) dependences. */
3869 if (vect_debug_details (NULL))
3870 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3872 if (vect_debug_details (NULL))
3873 fprintf (dump_file, "compare all store-store pairs.");
3875 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3877 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3879 struct data_reference *dra =
3880 VARRAY_GENERIC_PTR (loop_write_refs, i);
3881 struct data_reference *drb =
3882 VARRAY_GENERIC_PTR (loop_write_refs, j);
3883 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3888 /* Examine load-store (true/anti) dependences. */
3890 if (vect_debug_details (NULL))
3891 fprintf (dump_file, "compare all load-store pairs.");
3893 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3895 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3897 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3898 struct data_reference *drb =
3899 VARRAY_GENERIC_PTR (loop_write_refs, j);
3900 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3909 /* Function vect_get_first_index.
3911 REF is a data reference.
3912 If it is an ARRAY_REF: if its lower bound is simple enough,
3913 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3914 If it is not an ARRAY_REF: REF has no "first index";
3915 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3918 vect_get_first_index (tree ref, tree *array_first_index)
3922 if (TREE_CODE (ref) != ARRAY_REF)
3923 *array_first_index = size_zero_node;
3926 array_start = array_ref_low_bound (ref);
3927 if (!host_integerp (array_start, 0))
3929 if (vect_debug_details (NULL))
3931 fprintf (dump_file, "array min val not simple integer cst.");
3932 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3936 *array_first_index = array_start;
3943 /* Function vect_compute_array_base_alignment.
3944 A utility function of vect_compute_array_ref_alignment.
3946 Compute the misalignment of ARRAY in bits.
3949 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3950 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3951 if NULL: don't compute misalignment, just return the base of ARRAY.
3952 PREV_DIMENSIONS - initialized to one.
3953 MISALIGNMENT - the computed misalignment in bits.
3956 If VECTYPE is not NULL:
3957 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3958 the base of the array, and put the computed misalignment in MISALIGNMENT.
3960 Return the base of the array.
3962 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3963 a[idx_N]...[idx_2][idx_1] is
3964 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3965 ... + idx_N * dim_0 * ... * dim_N-1}.
3966 (The misalignment of &a is not checked here).
3967 Note, that every term contains dim_0, therefore, if dim_0 is a
3968 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3969 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3970 NUINTS, we can say that the misalignment of the sum is equal to
3971 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3972 we can't determine this array misalignment, and we return
3974 We proceed recursively in this manner, accumulating total misalignment
3975 and the multiplication of previous dimensions for correct misalignment
3979 vect_compute_array_base_alignment (tree array,
3981 tree *prev_dimensions,
3986 tree dimension_size;
3988 tree bits_per_vectype;
3989 tree bits_per_vectype_unit;
3991 /* The 'stop condition' of the recursion. */
3992 if (TREE_CODE (array) != ARRAY_REF)
3996 /* Just get the base decl. */
3997 return vect_compute_array_base_alignment
3998 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4000 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4001 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4004 domain = TYPE_DOMAIN (TREE_TYPE (array));
4006 int_const_binop (PLUS_EXPR,
4007 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4008 TYPE_MIN_VALUE (domain), 1),
4011 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4012 is a multiple of NUNITS:
4014 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4016 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4017 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4018 if (integer_zerop (mis))
4019 /* This array is aligned. Continue just in order to get the base decl. */
4020 return vect_compute_array_base_alignment
4021 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4023 index = TREE_OPERAND (array, 1);
4024 if (!host_integerp (index, 1))
4025 /* The current index is not constant. */
4028 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4030 bits_per_vectype = fold_convert (unsigned_type_node,
4031 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4032 GET_MODE_SIZE (TYPE_MODE (vectype))));
4033 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4034 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4035 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4037 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4041 (*misalignment + index_val * dimension_size * *prev_dimensions)
4045 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4046 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4047 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4048 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4049 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4052 *prev_dimensions = int_const_binop (MULT_EXPR,
4053 *prev_dimensions, dimension_size, 1);
4055 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4061 /* Function vect_compute_data_ref_alignment
4063 Compute the misalignment of the data reference DR.
4066 1. If during the misalignment computation it is found that the data reference
4067 cannot be vectorized then false is returned.
4068 2. DR_MISALIGNMENT (DR) is defined.
4070 FOR NOW: No analysis is actually performed. Misalignment is calculated
4071 only for trivial cases. TODO. */
4074 vect_compute_data_ref_alignment (struct data_reference *dr,
4075 loop_vec_info loop_vinfo)
4077 tree stmt = DR_STMT (dr);
4078 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4079 tree ref = DR_REF (dr);
4082 tree offset = size_zero_node;
4083 tree base, bit_offset, alignment;
4084 tree unit_bits = fold_convert (unsigned_type_node,
4085 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4087 bool base_aligned_p;
4089 if (vect_debug_details (NULL))
4090 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4092 /* Initialize misalignment to unknown. */
4093 DR_MISALIGNMENT (dr) = -1;
4095 scalar_type = TREE_TYPE (ref);
4096 vectype = get_vectype_for_scalar_type (scalar_type);
4099 if (vect_debug_details (NULL))
4101 fprintf (dump_file, "no vectype for stmt: ");
4102 print_generic_expr (dump_file, stmt, TDF_SLIM);
4103 fprintf (dump_file, " scalar_type: ");
4104 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4106 /* It is not possible to vectorize this data reference. */
4109 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4110 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4112 if (TREE_CODE (ref) == ARRAY_REF)
4115 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4117 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4118 loop_vinfo, &bit_offset, &base_aligned_p);
4121 if (vect_debug_details (NULL))
4123 fprintf (dump_file, "Unknown alignment for access: ");
4124 print_generic_expr (dump_file,
4125 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4130 if (!base_aligned_p)
4132 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4134 if (vect_debug_details (NULL))
4136 fprintf (dump_file, "can't force alignment of ref: ");
4137 print_generic_expr (dump_file, ref, TDF_SLIM);
4142 /* Force the alignment of the decl.
4143 NOTE: This is the only change to the code we make during
4144 the analysis phase, before deciding to vectorize the loop. */
4145 if (vect_debug_details (NULL))
4146 fprintf (dump_file, "force alignment");
4147 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4148 DECL_USER_ALIGN (base) = 1;
4151 /* At this point we assume that the base is aligned, and the offset from it
4152 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4153 gcc_assert (base_aligned_p
4154 || (TREE_CODE (base) == VAR_DECL
4155 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4157 /* Convert into bytes. */
4158 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4159 /* Check that there is no remainder in bits. */
4160 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4161 if (!integer_zerop (bit_offset))
4163 if (vect_debug_details (NULL))
4165 fprintf (dump_file, "bit offset alignment: ");
4166 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4171 /* Alignment required, in bytes: */
4172 alignment = fold_convert (unsigned_type_node,
4173 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4175 /* Modulo alignment. */
4176 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4177 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4179 if (vect_debug_details (NULL))
4180 fprintf (dump_file, "unexpected misalign value");
4184 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4186 if (vect_debug_details (NULL))
4187 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4193 /* Function vect_compute_array_ref_alignment
4195 Compute the alignment of an array-ref.
4196 The alignment we compute here is relative to
4197 TYPE_ALIGN(VECTYPE) boundary.
4200 OFFSET - the alignment in bits
4201 Return value - the base of the array-ref. E.g,
4202 if the array-ref is a.b[k].c[i][j] the returned
4207 vect_compute_array_ref_alignment (struct data_reference *dr,
4208 loop_vec_info loop_vinfo,
4212 tree array_first_index = size_zero_node;
4214 tree ref = DR_REF (dr);
4215 tree scalar_type = TREE_TYPE (ref);
4216 tree oprnd0 = TREE_OPERAND (ref, 0);
4217 tree dims = size_one_node;
4218 tree misalign = size_zero_node;
4219 tree next_ref, this_offset = size_zero_node;
4223 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4224 /* The reference is an array without its last index. */
4225 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4228 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4231 /* Alignment is not requested. Just return the base. */
4234 /* Compute alignment. */
4235 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4237 this_offset = misalign;
4239 /* Check the first index accessed. */
4240 if (!vect_get_first_index (ref, &array_first_index))
4242 if (vect_debug_details (NULL))
4243 fprintf (dump_file, "no first_index for array.");
4247 /* Check the index of the array_ref. */
4248 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4249 LOOP_VINFO_LOOP (loop_vinfo)->num);
4251 /* FORNOW: In order to simplify the handling of alignment, we make sure
4252 that the first location at which the array is accessed ('init') is on an
4253 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4254 This is too conservative, since we require that
4255 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4256 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4257 This should be relaxed in the future. */
4259 if (!init || !host_integerp (init, 0))
4261 if (vect_debug_details (NULL))
4262 fprintf (dump_file, "non constant init. ");
4266 /* bytes per scalar element: */
4267 nunits = fold_convert (unsigned_type_node,
4268 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4269 nbits = int_const_binop (MULT_EXPR, nunits,
4270 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4272 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4273 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4274 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4275 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4277 /* TODO: allow negative misalign values. */
4278 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4280 if (vect_debug_details (NULL))
4281 fprintf (dump_file, "unexpected misalign value");
4289 /* Function vect_compute_data_refs_alignment
4291 Compute the misalignment of data references in the loop.
4292 This pass may take place at function granularity instead of at loop
4295 FOR NOW: No analysis is actually performed. Misalignment is calculated
4296 only for trivial cases. TODO. */
4299 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4301 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4302 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4305 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4307 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4308 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4312 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4314 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4315 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4323 /* Function vect_enhance_data_refs_alignment
4325 This pass will use loop versioning and loop peeling in order to enhance
4326 the alignment of data references in the loop.
4328 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4329 original loop is to be vectorized; Any other loops that are created by
4330 the transformations performed in this pass - are not supposed to be
4331 vectorized. This restriction will be relaxed. */
4334 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4336 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4337 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4338 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4342 This pass will require a cost model to guide it whether to apply peeling
4343 or versioning or a combination of the two. For example, the scheme that
4344 intel uses when given a loop with several memory accesses, is as follows:
4345 choose one memory access ('p') which alignment you want to force by doing
4346 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4347 other accesses are not necessarily aligned, or (2) use loop versioning to
4348 generate one loop in which all accesses are aligned, and another loop in
4349 which only 'p' is necessarily aligned.
4351 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4352 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4353 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4355 Devising a cost model is the most critical aspect of this work. It will
4356 guide us on which access to peel for, whether to use loop versioning, how
4357 many versions to create, etc. The cost model will probably consist of
4358 generic considerations as well as target specific considerations (on
4359 powerpc for example, misaligned stores are more painful than misaligned
4362 Here is the general steps involved in alignment enhancements:
4364 -- original loop, before alignment analysis:
4365 for (i=0; i<N; i++){
4366 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4367 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4370 -- After vect_compute_data_refs_alignment:
4371 for (i=0; i<N; i++){
4372 x = q[i]; # DR_MISALIGNMENT(q) = 3
4373 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4376 -- Possibility 1: we do loop versioning:
4378 for (i=0; i<N; i++){ # loop 1A
4379 x = q[i]; # DR_MISALIGNMENT(q) = 3
4380 p[i] = y; # DR_MISALIGNMENT(p) = 0
4384 for (i=0; i<N; i++){ # loop 1B
4385 x = q[i]; # DR_MISALIGNMENT(q) = 3
4386 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4390 -- Possibility 2: we do loop peeling:
4391 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4395 for (i = 3; i < N; i++){ # loop 2A
4396 x = q[i]; # DR_MISALIGNMENT(q) = 0
4397 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4400 -- Possibility 3: combination of loop peeling and versioning:
4401 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4406 for (i = 3; i<N; i++){ # loop 3A
4407 x = q[i]; # DR_MISALIGNMENT(q) = 0
4408 p[i] = y; # DR_MISALIGNMENT(p) = 0
4412 for (i = 3; i<N; i++){ # loop 3B
4413 x = q[i]; # DR_MISALIGNMENT(q) = 0
4414 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4418 These loops are later passed to loop_transform to be vectorized. The
4419 vectorizer will use the alignment information to guide the transformation
4420 (whether to generate regular loads/stores, or with special handling for
4424 /* (1) Peeling to force alignment. */
4426 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4428 + How many accesses will become aligned due to the peeling
4429 - How many accesses will become unaligned due to the peeling,
4430 and the cost of misaligned accesses.
4431 - The cost of peeling (the extra runtime checks, the increase
4434 The scheme we use FORNOW: peel to force the alignment of the first
4435 misaligned store in the loop.
4436 Rationale: misaligned stores are not yet supported.
4438 TODO: Use a better cost model. */
4440 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4442 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4443 if (!aligned_access_p (dr))
4445 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4446 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4451 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4453 if (vect_debug_details (loop))
4454 fprintf (dump_file, "Peeling for alignment will not be applied.");
4458 if (vect_debug_details (loop))
4459 fprintf (dump_file, "Peeling for alignment will be applied.");
4462 /* (1.2) Update the alignment info according to the peeling factor.
4463 If the misalignment of the DR we peel for is M, then the
4464 peeling factor is VF - M, and the misalignment of each access DR_i
4465 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4466 If the misalignment of the DR we peel for is unknown, then the
4467 misalignment of each access DR_i in the loop is also unknown.
4469 FORNOW: set the misalignment of the accesses to unknown even
4470 if the peeling factor is known at compile time.
4472 TODO: - if the peeling factor is known at compile time, use that
4473 when updating the misalignment info of the loop DRs.
4474 - consider accesses that are known to have the same
4475 alignment, even if that alignment is unknown. */
4477 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4479 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4480 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4481 DR_MISALIGNMENT (dr) = 0;
4483 DR_MISALIGNMENT (dr) = -1;
4485 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4487 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4488 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4489 DR_MISALIGNMENT (dr) = 0;
4491 DR_MISALIGNMENT (dr) = -1;
4496 /* Function vect_analyze_data_refs_alignment
4498 Analyze the alignment of the data-references in the loop.
4499 FOR NOW: Until support for misliagned accesses is in place, only if all
4500 accesses are aligned can the loop be vectorized. This restriction will be
4504 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4506 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4507 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4508 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4509 enum dr_alignment_support supportable_dr_alignment;
4512 if (vect_debug_details (NULL))
4513 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4516 /* This pass may take place at function granularity instead of at loop
4519 if (!vect_compute_data_refs_alignment (loop_vinfo))
4521 if (vect_debug_details (loop) || vect_debug_stats (loop))
4523 "not vectorized: can't calculate alignment for data ref.");
4528 /* This pass will decide on using loop versioning and/or loop peeling in
4529 order to enhance the alignment of data references in the loop. */
4531 vect_enhance_data_refs_alignment (loop_vinfo);
4534 /* Finally, check that all the data references in the loop can be
4535 handled with respect to their alignment. */
4537 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4539 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4540 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4541 if (!supportable_dr_alignment)
4543 if (vect_debug_details (loop) || vect_debug_stats (loop))
4544 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4548 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4550 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4551 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4552 if (!supportable_dr_alignment)
4554 if (vect_debug_details (loop) || vect_debug_stats (loop))
4555 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4564 /* Function vect_analyze_data_ref_access.
4566 Analyze the access pattern of the data-reference DR. For now, a data access
4567 has to consecutive and aligned to be considered vectorizable. */
4570 vect_analyze_data_ref_access (struct data_reference *dr)
4572 varray_type access_fns = DR_ACCESS_FNS (dr);
4575 unsigned int dimensions, i;
4577 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4578 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4579 access is contiguous). */
4580 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4582 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4584 access_fn = DR_ACCESS_FN (dr, i);
4586 if (evolution_part_in_loop_num (access_fn,
4587 loop_containing_stmt (DR_STMT (dr))->num))
4589 /* Evolution part is not NULL in this loop (it is neither constant
4591 if (vect_debug_details (NULL))
4594 "not vectorized: complicated multidim. array access.");
4595 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4601 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4602 if (!evolution_function_is_constant_p (access_fn)
4603 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4604 access_fn, &init, &step, true))
4606 if (vect_debug_details (NULL))
4608 fprintf (dump_file, "not vectorized: complicated access function.");
4609 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4618 /* Function vect_analyze_data_ref_accesses.
4620 Analyze the access pattern of all the data references in the loop.
4622 FORNOW: the only access pattern that is considered vectorizable is a
4623 simple step 1 (consecutive) access.
4625 FORNOW: handle only arrays and pointer accesses. */
4628 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4631 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4632 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4634 if (vect_debug_details (NULL))
4635 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4637 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4639 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4640 bool ok = vect_analyze_data_ref_access (dr);
4643 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4644 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4645 fprintf (dump_file, "not vectorized: complicated access pattern.");
4650 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4652 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4653 bool ok = vect_analyze_data_ref_access (dr);
4656 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4657 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4658 fprintf (dump_file, "not vectorized: complicated access pattern.");
4667 /* Function vect_analyze_pointer_ref_access.
4670 STMT - a stmt that contains a data-ref
4671 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4673 If the data-ref access is vectorizable, return a data_reference structure
4674 that represents it (DR). Otherwise - return NULL. */
4676 static struct data_reference *
4677 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4679 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4680 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4681 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4684 tree reftype, innertype;
4685 enum machine_mode innermode;
4686 tree indx_access_fn;
4687 int loopnum = loop->num;
4688 struct data_reference *dr;
4692 if (vect_debug_stats (loop) || vect_debug_details (loop))
4693 fprintf (dump_file, "not vectorized: complicated pointer access.");
4697 if (vect_debug_details (NULL))
4699 fprintf (dump_file, "Access function of ptr: ");
4700 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4703 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4705 if (vect_debug_stats (loop) || vect_debug_details (loop))
4706 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4712 if (!host_integerp (step,0))
4714 if (vect_debug_stats (loop) || vect_debug_details (loop))
4716 "not vectorized: non constant step for pointer access.");
4720 step_val = TREE_INT_CST_LOW (step);
4722 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4723 if (TREE_CODE (reftype) != POINTER_TYPE)
4725 if (vect_debug_stats (loop) || vect_debug_details (loop))
4726 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4730 reftype = TREE_TYPE (init);
4731 if (TREE_CODE (reftype) != POINTER_TYPE)
4733 if (vect_debug_stats (loop) || vect_debug_details (loop))
4734 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4738 innertype = TREE_TYPE (reftype);
4739 innermode = TYPE_MODE (innertype);
4740 if (GET_MODE_SIZE (innermode) != step_val)
4742 /* FORNOW: support only consecutive access */
4743 if (vect_debug_stats (loop) || vect_debug_details (loop))
4744 fprintf (dump_file, "not vectorized: non consecutive access.");
4749 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4750 if (vect_debug_details (NULL))
4752 fprintf (dump_file, "Access function of ptr indx: ");
4753 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4755 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4760 /* Function vect_get_symbl_and_dr.
4762 The function returns SYMBL - the relevant variable for
4763 memory tag (for aliasing purposes).
4764 Also data reference structure DR is created.
4767 MEMREF - data reference in STMT
4768 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4771 DR - data_reference struct for MEMREF
4772 return value - the relevant variable for memory tag (for aliasing purposes).
4777 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4778 loop_vec_info loop_vinfo, struct data_reference **dr)
4780 tree symbl, oprnd0, oprnd1;
4781 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4783 tree array_base, base;
4784 struct data_reference *new_dr;
4785 bool base_aligned_p;
4788 switch (TREE_CODE (memref))
4791 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4795 symbl = DR_BASE_NAME (new_dr);
4796 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4798 switch (TREE_CODE (symbl))
4802 oprnd0 = TREE_OPERAND (symbl, 0);
4803 oprnd1 = TREE_OPERAND (symbl, 1);
4806 /* Only {address_base + offset} expressions are supported,
4807 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4808 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4809 TODO: swap operands if {offset + address_base}. */
4810 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4811 && TREE_CODE (oprnd1) != INTEGER_CST)
4812 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4815 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4818 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4819 loop_vinfo, &new_dr);
4823 /* symbl remains unchanged. */
4827 if (vect_debug_details (NULL))
4829 fprintf (dump_file, "unhandled data ref: ");
4830 print_generic_expr (dump_file, memref, TDF_SLIM);
4831 fprintf (dump_file, " (symbl ");
4832 print_generic_expr (dump_file, symbl, TDF_SLIM);
4833 fprintf (dump_file, ") in stmt ");
4834 print_generic_expr (dump_file, stmt, TDF_SLIM);
4841 offset = size_zero_node;
4843 /* Store the array base in the stmt info.
4844 For one dimensional array ref a[i], the base is a,
4845 for multidimensional a[i1][i2]..[iN], the base is
4846 a[i1][i2]..[iN-1]. */
4847 array_base = TREE_OPERAND (memref, 0);
4848 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4850 new_dr = analyze_array (stmt, memref, is_read);
4853 /* Find the relevant symbol for aliasing purposes. */
4854 base = DR_BASE_NAME (new_dr);
4855 switch (TREE_CODE (base))
4862 symbl = TREE_OPERAND (base, 0);
4866 /* Could have recorded more accurate information -
4867 i.e, the actual FIELD_DECL that is being referenced -
4868 but later passes expect VAR_DECL as the nmt. */
4869 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4870 loop_vinfo, &offset, &base_aligned_p);
4875 if (vect_debug_details (NULL))
4877 fprintf (dump_file, "unhandled struct/class field access ");
4878 print_generic_expr (dump_file, stmt, TDF_SLIM);
4885 if (vect_debug_details (NULL))
4887 fprintf (dump_file, "unhandled data ref: ");
4888 print_generic_expr (dump_file, memref, TDF_SLIM);
4889 fprintf (dump_file, " in stmt ");
4890 print_generic_expr (dump_file, stmt, TDF_SLIM);
4898 /* Function vect_analyze_data_refs.
4900 Find all the data references in the loop.
4902 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4903 which base is really an array (not a pointer) and which alignment
4904 can be forced. This restriction will be relaxed. */
4907 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4909 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4910 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4911 int nbbs = loop->num_nodes;
4912 block_stmt_iterator si;
4914 struct data_reference *dr;
4917 bool base_aligned_p;
4920 if (vect_debug_details (NULL))
4921 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4923 for (j = 0; j < nbbs; j++)
4925 basic_block bb = bbs[j];
4926 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4928 bool is_read = false;
4929 tree stmt = bsi_stmt (si);
4930 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4931 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4932 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4933 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4934 varray_type *datarefs = NULL;
4935 int nvuses, nv_may_defs, nv_must_defs;
4939 /* Assumption: there exists a data-ref in stmt, if and only if
4940 it has vuses/vdefs. */
4942 if (!vuses && !v_may_defs && !v_must_defs)
4945 nvuses = NUM_VUSES (vuses);
4946 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4947 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4949 if (nvuses && (nv_may_defs || nv_must_defs))
4951 if (vect_debug_details (NULL))
4953 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4954 print_generic_expr (dump_file, stmt, TDF_SLIM);
4959 if (TREE_CODE (stmt) != MODIFY_EXPR)
4961 if (vect_debug_details (NULL))
4963 fprintf (dump_file, "unexpected vops in stmt: ");
4964 print_generic_expr (dump_file, stmt, TDF_SLIM);
4971 memref = TREE_OPERAND (stmt, 1);
4972 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4977 memref = TREE_OPERAND (stmt, 0);
4978 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4982 /* Analyze MEMREF. If it is of a supported form, build data_reference
4983 struct for it (DR) and find the relevant symbol for aliasing
4985 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4989 if (vect_debug_stats (loop) || vect_debug_details (loop))
4991 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4992 print_generic_expr (dump_file, stmt, TDF_SLIM);
4997 /* Find and record the memtag assigned to this data-ref. */
4998 switch (TREE_CODE (symbl))
5001 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5005 symbl = SSA_NAME_VAR (symbl);
5006 tag = get_var_ann (symbl)->type_mem_tag;
5009 tree ptr = TREE_OPERAND (memref, 0);
5010 if (TREE_CODE (ptr) == SSA_NAME)
5011 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5015 if (vect_debug_stats (loop) || vect_debug_details (loop))
5016 fprintf (dump_file, "not vectorized: no memtag for ref.");
5019 STMT_VINFO_MEMTAG (stmt_info) = tag;
5023 address_base = TREE_OPERAND (symbl, 0);
5025 switch (TREE_CODE (address_base))
5029 struct data_reference *tmp_dr;
5031 tmp_dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5033 tag = vect_get_base_and_bit_offset
5034 (tmp_dr, DR_BASE_NAME (tmp_dr),
5035 NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5038 if (vect_debug_stats (loop)
5039 || vect_debug_details (loop))
5041 "not vectorized: no memtag for ref.");
5044 STMT_VINFO_MEMTAG (stmt_info) = tag;
5050 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5054 if (vect_debug_stats (loop) || vect_debug_details (loop))
5057 "not vectorized: unhandled address expr: ");
5058 print_generic_expr (dump_file, stmt, TDF_SLIM);
5065 if (vect_debug_stats (loop) || vect_debug_details (loop))
5067 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5068 print_generic_expr (dump_file, memref, TDF_SLIM);
5073 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5074 STMT_VINFO_DATA_REF (stmt_info) = dr;
5082 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5084 /* Function vect_mark_relevant.
5086 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5089 vect_mark_relevant (varray_type worklist, tree stmt)
5091 stmt_vec_info stmt_info;
5093 if (vect_debug_details (NULL))
5094 fprintf (dump_file, "mark relevant.");
5096 if (TREE_CODE (stmt) == PHI_NODE)
5098 VARRAY_PUSH_TREE (worklist, stmt);
5102 stmt_info = vinfo_for_stmt (stmt);
5106 if (vect_debug_details (NULL))
5108 fprintf (dump_file, "mark relevant: no stmt info!!.");
5109 print_generic_expr (dump_file, stmt, TDF_SLIM);
5114 if (STMT_VINFO_RELEVANT_P (stmt_info))
5116 if (vect_debug_details (NULL))
5117 fprintf (dump_file, "already marked relevant.");
5121 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5122 VARRAY_PUSH_TREE (worklist, stmt);
5126 /* Function vect_stmt_relevant_p.
5128 Return true if STMT in loop that is represented by LOOP_VINFO is
5129 "relevant for vectorization".
5131 A stmt is considered "relevant for vectorization" if:
5132 - it has uses outside the loop.
5133 - it has vdefs (it alters memory).
5134 - control stmts in the loop (except for the exit condition).
5136 CHECKME: what other side effects would the vectorizer allow? */
5139 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5141 v_may_def_optype v_may_defs;
5142 v_must_def_optype v_must_defs;
5143 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5148 /* cond stmt other than loop exit cond. */
5149 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5152 /* changing memory. */
5153 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5154 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5155 if (v_may_defs || v_must_defs)
5157 if (vect_debug_details (NULL))
5158 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5162 /* uses outside the loop. */
5163 df = get_immediate_uses (stmt);
5164 num_uses = num_immediate_uses (df);
5165 for (i = 0; i < num_uses; i++)
5167 tree use = immediate_use (df, i);
5168 basic_block bb = bb_for_stmt (use);
5169 if (!flow_bb_inside_loop_p (loop, bb))
5171 if (vect_debug_details (NULL))
5172 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5181 /* Function vect_mark_stmts_to_be_vectorized.
5183 Not all stmts in the loop need to be vectorized. For example:
5192 Stmt 1 and 3 do not need to be vectorized, because loop control and
5193 addressing of vectorized data-refs are handled differently.
5195 This pass detects such stmts. */
5198 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5200 varray_type worklist;
5201 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5202 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5203 unsigned int nbbs = loop->num_nodes;
5204 block_stmt_iterator si;
5210 stmt_vec_info stmt_info;
5212 if (vect_debug_details (NULL))
5213 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5215 VARRAY_TREE_INIT (worklist, 64, "work list");
5217 /* 1. Init worklist. */
5219 for (i = 0; i < nbbs; i++)
5221 basic_block bb = bbs[i];
5222 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5224 stmt = bsi_stmt (si);
5226 if (vect_debug_details (NULL))
5228 fprintf (dump_file, "init: stmt relevant? ");
5229 print_generic_expr (dump_file, stmt, TDF_SLIM);
5232 stmt_info = vinfo_for_stmt (stmt);
5233 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5235 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5236 vect_mark_relevant (worklist, stmt);
5241 /* 2. Process_worklist */
5243 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5245 stmt = VARRAY_TOP_TREE (worklist);
5246 VARRAY_POP (worklist);
5248 if (vect_debug_details (NULL))
5250 fprintf (dump_file, "worklist: examine stmt: ");
5251 print_generic_expr (dump_file, stmt, TDF_SLIM);
5254 /* Examine the USES in this statement. Mark all the statements which
5255 feed this statement's uses as "relevant", unless the USE is used as
5258 if (TREE_CODE (stmt) == PHI_NODE)
5260 /* follow the def-use chain inside the loop. */
5261 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5263 tree arg = PHI_ARG_DEF (stmt, j);
5264 tree def_stmt = NULL_TREE;
5266 if (!vect_is_simple_use (arg, loop, &def_stmt))
5268 if (vect_debug_details (NULL))
5269 fprintf (dump_file, "worklist: unsupported use.");
5270 varray_clear (worklist);
5276 if (vect_debug_details (NULL))
5278 fprintf (dump_file, "worklist: def_stmt: ");
5279 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5282 bb = bb_for_stmt (def_stmt);
5283 if (flow_bb_inside_loop_p (loop, bb))
5284 vect_mark_relevant (worklist, def_stmt);
5288 ann = stmt_ann (stmt);
5289 use_ops = USE_OPS (ann);
5291 for (i = 0; i < NUM_USES (use_ops); i++)
5293 tree use = USE_OP (use_ops, i);
5295 /* We are only interested in uses that need to be vectorized. Uses
5296 that are used for address computation are not considered relevant.
5298 if (exist_non_indexing_operands_for_use_p (use, stmt))
5300 tree def_stmt = NULL_TREE;
5302 if (!vect_is_simple_use (use, loop, &def_stmt))
5304 if (vect_debug_details (NULL))
5305 fprintf (dump_file, "worklist: unsupported use.");
5306 varray_clear (worklist);
5313 if (vect_debug_details (NULL))
5315 fprintf (dump_file, "worklist: examine use %d: ", i);
5316 print_generic_expr (dump_file, use, TDF_SLIM);
5319 bb = bb_for_stmt (def_stmt);
5320 if (flow_bb_inside_loop_p (loop, bb))
5321 vect_mark_relevant (worklist, def_stmt);
5324 } /* while worklist */
5326 varray_clear (worklist);
5331 /* Function vect_can_advance_ivs_p
5333 In case the number of iterations that LOOP iterates in unknown at compile
5334 time, an epilog loop will be generated, and the loop induction variables
5335 (IVs) will be "advanced" to the value they are supposed to take just before
5336 the epilog loop. Here we check that the access function of the loop IVs
5337 and the expression that represents the loop bound are simple enough.
5338 These restrictions will be relaxed in the future. */
5341 vect_can_advance_ivs_p (struct loop *loop)
5343 basic_block bb = loop->header;
5346 /* Analyze phi functions of the loop header. */
5348 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5350 tree access_fn = NULL;
5351 tree evolution_part;
5353 if (vect_debug_details (NULL))
5355 fprintf (dump_file, "Analyze phi: ");
5356 print_generic_expr (dump_file, phi, TDF_SLIM);
5359 /* Skip virtual phi's. The data dependences that are associated with
5360 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5362 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5364 if (vect_debug_details (NULL))
5365 fprintf (dump_file, "virtual phi. skip.");
5369 /* Analyze the evolution function. */
5371 access_fn = instantiate_parameters
5372 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5376 if (vect_debug_details (NULL))
5377 fprintf (dump_file, "No Access function.");
5381 if (vect_debug_details (NULL))
5383 fprintf (dump_file, "Access function of PHI: ");
5384 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5387 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5389 if (evolution_part == NULL_TREE)
5392 /* FORNOW: We do not transform initial conditions of IVs
5393 which evolution functions are a polynomial of degree >= 2. */
5395 if (tree_is_chrec (evolution_part))
5403 /* Function vect_get_loop_niters.
5405 Determine how many iterations the loop is executed.
5406 If an expression that represents the number of iterations
5407 can be constructed, place it in NUMBER_OF_ITERATIONS.
5408 Return the loop exit condition. */
5411 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5415 if (vect_debug_details (NULL))
5416 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5418 niters = number_of_iterations_in_loop (loop);
5420 if (niters != NULL_TREE
5421 && niters != chrec_dont_know)
5423 *number_of_iterations = niters;
5425 if (vect_debug_details (NULL))
5427 fprintf (dump_file, "==> get_loop_niters:" );
5428 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5432 return get_loop_exit_condition (loop);
5436 /* Function vect_analyze_loop_form.
5438 Verify the following restrictions (some may be relaxed in the future):
5439 - it's an inner-most loop
5440 - number of BBs = 2 (which are the loop header and the latch)
5441 - the loop has a pre-header
5442 - the loop has a single entry and exit
5443 - the loop exit condition is simple enough, and the number of iterations
5444 can be analyzed (a countable loop). */
5446 static loop_vec_info
5447 vect_analyze_loop_form (struct loop *loop)
5449 loop_vec_info loop_vinfo;
5451 tree number_of_iterations = NULL;
5452 bool rescan = false;
5454 if (vect_debug_details (loop))
5455 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5458 || !loop->single_exit
5459 || loop->num_nodes != 2
5460 || EDGE_COUNT (loop->header->preds) != 2
5461 || loop->num_entries != 1)
5463 if (vect_debug_stats (loop) || vect_debug_details (loop))
5465 fprintf (dump_file, "not vectorized: bad loop form. ");
5467 fprintf (dump_file, "nested loop.");
5468 else if (!loop->single_exit)
5469 fprintf (dump_file, "multiple exits.");
5470 else if (loop->num_nodes != 2)
5471 fprintf (dump_file, "too many BBs in loop.");
5472 else if (EDGE_COUNT (loop->header->preds) != 2)
5473 fprintf (dump_file, "too many incoming edges.");
5474 else if (loop->num_entries != 1)
5475 fprintf (dump_file, "too many entries.");
5481 /* We assume that the loop exit condition is at the end of the loop. i.e,
5482 that the loop is represented as a do-while (with a proper if-guard
5483 before the loop if needed), where the loop header contains all the
5484 executable statements, and the latch is empty. */
5485 if (!empty_block_p (loop->latch))
5487 if (vect_debug_stats (loop) || vect_debug_details (loop))
5488 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5492 /* Make sure we have a preheader basic block. */
5493 if (!loop->pre_header)
5496 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5499 /* Make sure there exists a single-predecessor exit bb: */
5500 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5503 loop_split_edge_with (loop->exit_edges[0], NULL);
5508 flow_loop_scan (loop, LOOP_ALL);
5509 /* Flow loop scan does not update loop->single_exit field. */
5510 loop->single_exit = loop->exit_edges[0];
5513 if (empty_block_p (loop->header))
5515 if (vect_debug_stats (loop) || vect_debug_details (loop))
5516 fprintf (dump_file, "not vectorized: empty loop.");
5520 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5523 if (vect_debug_stats (loop) || vect_debug_details (loop))
5524 fprintf (dump_file, "not vectorized: complicated exit condition.");
5528 if (!number_of_iterations)
5530 if (vect_debug_stats (loop) || vect_debug_details (loop))
5532 "not vectorized: number of iterations cannot be computed.");
5536 if (chrec_contains_undetermined (number_of_iterations))
5538 if (vect_debug_details (NULL))
5539 fprintf (dump_file, "Infinite number of iterations.");
5543 loop_vinfo = new_loop_vec_info (loop);
5544 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5546 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5548 if (vect_debug_details (loop))
5550 fprintf (dump_file, "loop bound unknown.\n");
5551 fprintf (dump_file, "Symbolic number of iterations is ");
5552 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5556 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5558 if (vect_debug_stats (loop) || vect_debug_details (loop))
5559 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5563 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5569 /* Function vect_analyze_loop.
5571 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5572 for it. The different analyses will record information in the
5573 loop_vec_info struct. */
5575 static loop_vec_info
5576 vect_analyze_loop (struct loop *loop)
5579 loop_vec_info loop_vinfo;
5581 if (vect_debug_details (NULL))
5582 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5584 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5586 loop_vinfo = vect_analyze_loop_form (loop);
5589 if (vect_debug_details (loop))
5590 fprintf (dump_file, "bad loop form.");
5594 /* Find all data references in the loop (which correspond to vdefs/vuses)
5595 and analyze their evolution in the loop.
5597 FORNOW: Handle only simple, array references, which
5598 alignment can be forced, and aligned pointer-references. */
5600 ok = vect_analyze_data_refs (loop_vinfo);
5603 if (vect_debug_details (loop))
5604 fprintf (dump_file, "bad data references.");
5605 destroy_loop_vec_info (loop_vinfo);
5609 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5611 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5614 if (vect_debug_details (loop))
5615 fprintf (dump_file, "unexpected pattern.");
5616 if (vect_debug_details (loop))
5617 fprintf (dump_file, "not vectorized: unexpected pattern.");
5618 destroy_loop_vec_info (loop_vinfo);
5622 /* Check that all cross-iteration scalar data-flow cycles are OK.
5623 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5625 ok = vect_analyze_scalar_cycles (loop_vinfo);
5628 if (vect_debug_details (loop))
5629 fprintf (dump_file, "bad scalar cycle.");
5630 destroy_loop_vec_info (loop_vinfo);
5634 /* Analyze data dependences between the data-refs in the loop.
5635 FORNOW: fail at the first data dependence that we encounter. */
5637 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5640 if (vect_debug_details (loop))
5641 fprintf (dump_file, "bad data dependence.");
5642 destroy_loop_vec_info (loop_vinfo);
5646 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5647 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5649 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5652 if (vect_debug_details (loop))
5653 fprintf (dump_file, "bad data access.");
5654 destroy_loop_vec_info (loop_vinfo);
5658 /* Analyze the alignment of the data-refs in the loop.
5659 FORNOW: Only aligned accesses are handled. */
5661 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5664 if (vect_debug_details (loop))
5665 fprintf (dump_file, "bad data alignment.");
5666 destroy_loop_vec_info (loop_vinfo);
5670 /* Scan all the operations in the loop and make sure they are
5673 ok = vect_analyze_operations (loop_vinfo);
5676 if (vect_debug_details (loop))
5677 fprintf (dump_file, "bad operation or unsupported loop bound.");
5678 destroy_loop_vec_info (loop_vinfo);
5682 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5688 /* Function need_imm_uses_for.
5690 Return whether we ought to include information for 'var'
5691 when calculating immediate uses. For this pass we only want use
5692 information for non-virtual variables. */
5695 need_imm_uses_for (tree var)
5697 return is_gimple_reg (var);
5701 /* Function vectorize_loops.
5703 Entry Point to loop vectorization phase. */
5706 vectorize_loops (struct loops *loops)
5708 unsigned int i, loops_num;
5709 unsigned int num_vectorized_loops = 0;
5711 /* Does the target support SIMD? */
5712 /* FORNOW: until more sophisticated machine modelling is in place. */
5713 if (!UNITS_PER_SIMD_WORD)
5715 if (vect_debug_details (NULL))
5716 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5720 #ifdef ENABLE_CHECKING
5721 verify_loop_closed_ssa ();
5724 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5726 /* ----------- Analyze loops. ----------- */
5728 /* If some loop was duplicated, it gets bigger number
5729 than all previously defined loops. This fact allows us to run
5730 only over initial loops skipping newly generated ones. */
5731 loops_num = loops->num;
5732 for (i = 1; i < loops_num; i++)
5734 loop_vec_info loop_vinfo;
5735 struct loop *loop = loops->parray[i];
5740 loop_vinfo = vect_analyze_loop (loop);
5741 loop->aux = loop_vinfo;
5743 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5746 vect_transform_loop (loop_vinfo, loops);
5747 num_vectorized_loops++;
5750 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5751 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5752 num_vectorized_loops);
5754 /* ----------- Finalize. ----------- */
5757 for (i = 1; i < loops_num; i++)
5759 struct loop *loop = loops->parray[i];
5760 loop_vec_info loop_vinfo;
5764 loop_vinfo = loop->aux;
5765 destroy_loop_vec_info (loop_vinfo);
5769 rewrite_into_ssa (false);
5770 rewrite_into_loop_closed_ssa (); /* FORNOW */
5771 bitmap_clear (vars_to_rename);