2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment
218 (struct data_reference *, loop_vec_info);
219 static bool vect_analyze_data_ref_access (struct data_reference *);
220 static bool vect_get_first_index (tree, tree *);
221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
222 static struct data_reference * vect_analyze_pointer_ref_access
224 static bool vect_can_advance_ivs_p (struct loop *);
225 static tree vect_get_base_and_bit_offset
226 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
227 static struct data_reference * vect_analyze_pointer_ref_access
229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
230 static tree vect_compute_array_ref_alignment
231 (struct data_reference *, loop_vec_info, tree, tree *);
232 static tree vect_get_ptr_offset (tree, tree, tree *);
233 static tree vect_get_symbl_and_dr
234 (tree, tree, bool, loop_vec_info, struct data_reference **);
236 /* Utility functions for the code transformation. */
237 static tree vect_create_destination_var (tree, tree);
238 static tree vect_create_data_ref_ptr
239 (tree, block_stmt_iterator *, tree, tree *, bool);
240 static tree vect_create_index_for_vector_ref
241 (struct loop *, block_stmt_iterator *);
242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
243 static tree get_vectype_for_scalar_type (tree);
244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
245 static tree vect_get_vec_def_for_operand (tree, tree);
246 static tree vect_init_vector (tree, tree);
247 static void vect_finish_stmt_generation
248 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
250 /* Utility function dealing with loop peeling (not peeling itself). */
251 static void vect_generate_tmps_on_preheader
252 (loop_vec_info, tree *, tree *, tree *);
253 static tree vect_build_loop_niters (loop_vec_info);
254 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
255 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
256 static void vect_update_inits_of_dr
257 (struct data_reference *, struct loop *, tree niters);
258 static void vect_update_inits_of_drs (loop_vec_info, tree);
259 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
260 static void vect_do_peeling_for_loop_bound
261 (loop_vec_info, tree *, struct loops *);
263 /* Utilities for creation and deletion of vec_info structs. */
264 loop_vec_info new_loop_vec_info (struct loop *loop);
265 void destroy_loop_vec_info (loop_vec_info);
266 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
268 static bool vect_debug_stats (struct loop *loop);
269 static bool vect_debug_details (struct loop *loop);
272 /*************************************************************************
273 Simple Loop Peeling Utilities
275 Utilities to support loop peeling for vectorization purposes.
276 *************************************************************************/
279 /* For each definition in DEFINITIONS this function allocates
283 allocate_new_names (bitmap definitions)
288 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
290 tree def = ssa_name (ver);
291 tree *new_name_ptr = xmalloc (sizeof (tree));
293 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
295 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
296 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
298 SSA_NAME_AUX (def) = new_name_ptr;
303 /* Renames the use *OP_P. */
306 rename_use_op (use_operand_p op_p)
310 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
313 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
315 /* Something defined outside of the loop. */
319 /* An ordinary ssa name defined in the loop. */
321 SET_USE (op_p, *new_name_ptr);
325 /* Renames the def *OP_P in statement STMT. */
328 rename_def_op (def_operand_p op_p, tree stmt)
332 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
335 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
337 /* Something defined outside of the loop. */
341 /* An ordinary ssa name defined in the loop. */
343 SET_DEF (op_p, *new_name_ptr);
344 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
348 /* Renames the variables in basic block BB. */
351 rename_variables_in_bb (basic_block bb)
354 block_stmt_iterator bsi;
360 v_may_def_optype v_may_defs;
361 v_must_def_optype v_must_defs;
365 struct loop *loop = bb->loop_father;
367 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
368 rename_def_op (PHI_RESULT_PTR (phi), phi);
370 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
372 stmt = bsi_stmt (bsi);
373 get_stmt_operands (stmt);
374 ann = stmt_ann (stmt);
376 uses = USE_OPS (ann);
377 for (i = 0; i < NUM_USES (uses); i++)
378 rename_use_op (USE_OP_PTR (uses, i));
380 defs = DEF_OPS (ann);
381 for (i = 0; i < NUM_DEFS (defs); i++)
382 rename_def_op (DEF_OP_PTR (defs, i), stmt);
384 vuses = VUSE_OPS (ann);
385 for (i = 0; i < NUM_VUSES (vuses); i++)
386 rename_use_op (VUSE_OP_PTR (vuses, i));
388 v_may_defs = V_MAY_DEF_OPS (ann);
389 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
391 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
392 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
395 v_must_defs = V_MUST_DEF_OPS (ann);
396 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
398 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
399 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
403 FOR_EACH_EDGE (e, ei, bb->succs)
405 if (!flow_bb_inside_loop_p (loop, e->dest))
407 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
408 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
413 /* Releases the structures holding the new ssa names. */
416 free_new_names (bitmap definitions)
421 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
423 tree def = ssa_name (ver);
425 if (SSA_NAME_AUX (def))
427 free (SSA_NAME_AUX (def));
428 SSA_NAME_AUX (def) = NULL;
434 /* Renames variables in new generated LOOP. */
437 rename_variables_in_loop (struct loop *loop)
442 bbs = get_loop_body (loop);
444 for (i = 0; i < loop->num_nodes; i++)
445 rename_variables_in_bb (bbs[i]);
451 /* Update the PHI nodes of NEW_LOOP.
453 NEW_LOOP is a duplicate of ORIG_LOOP.
454 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
455 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
456 executes before it. */
459 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
460 struct loop *new_loop, bool after)
462 tree *new_name_ptr, new_ssa_name;
463 tree phi_new, phi_orig;
465 edge orig_loop_latch = loop_latch_edge (orig_loop);
466 edge orig_entry_e = loop_preheader_edge (orig_loop);
467 edge new_loop_exit_e = new_loop->exit_edges[0];
468 edge new_loop_entry_e = loop_preheader_edge (new_loop);
469 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
472 step 1. For each loop-header-phi:
473 Add the first phi argument for the phi in NEW_LOOP
474 (the one associated with the entry of NEW_LOOP)
476 step 2. For each loop-header-phi:
477 Add the second phi argument for the phi in NEW_LOOP
478 (the one associated with the latch of NEW_LOOP)
480 step 3. Update the phis in the successor block of NEW_LOOP.
482 case 1: NEW_LOOP was placed before ORIG_LOOP:
483 The successor block of NEW_LOOP is the header of ORIG_LOOP.
484 Updating the phis in the successor block can therefore be done
485 along with the scanning of the loop header phis, because the
486 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
487 phi nodes, organized in the same order.
489 case 2: NEW_LOOP was placed after ORIG_LOOP:
490 The successor block of NEW_LOOP is the original exit block of
491 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
492 We postpone updating these phis to a later stage (when
493 loop guards are added).
497 /* Scan the phis in the headers of the old and new loops
498 (they are organized in exactly the same order). */
500 for (phi_new = phi_nodes (new_loop->header),
501 phi_orig = phi_nodes (orig_loop->header);
503 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
506 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
507 add_phi_arg (phi_new, def, new_loop_entry_e);
510 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
511 if (TREE_CODE (def) != SSA_NAME)
514 new_name_ptr = SSA_NAME_AUX (def);
516 /* Something defined outside of the loop. */
519 /* An ordinary ssa name defined in the loop. */
520 new_ssa_name = *new_name_ptr;
521 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
523 /* step 3 (case 1). */
526 gcc_assert (new_loop_exit_e == orig_entry_e);
527 SET_PHI_ARG_DEF (phi_orig,
528 phi_arg_from_edge (phi_orig, new_loop_exit_e),
535 /* Update PHI nodes for a guard of the LOOP.
538 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
539 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
540 originates from the guard-bb, skips LOOP and reaches the (unique) exit
541 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
542 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
543 LOOP header) before the guard code was added, and now it became a merge
544 point of two paths - the path that ends with the LOOP exit-edge, and
545 the path that ends with GUARD_EDGE.
547 This function creates and updates the relevant phi nodes to account for
548 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
549 1. Create phi nodes at NEW_MERGE_BB.
550 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
551 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
554 ===> The CFG before the guard-code was added:
556 if (exit_loop) goto update_bb : LOOP_header_bb
559 ==> The CFG after the guard-code was added:
561 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
563 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
568 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
569 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
570 organized in the same order.
571 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
574 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
575 "original" loop). FALSE if LOOP is an original loop (not a newly
576 created copy). The SSA_NAME_AUX fields of the defs in the original
577 loop are the corresponding new ssa-names used in the new duplicated
578 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
579 nodes in UPDATE_BB takes the original ssa-name, and which takes the
580 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
581 the LOOP-exit-edge takes the new-name, and the phi-arg that is
582 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
583 FALSE, it's the other way around.
587 slpeel_update_phi_nodes_for_guard (edge guard_edge,
592 tree orig_phi, new_phi, update_phi;
593 tree guard_arg, loop_arg;
594 basic_block new_merge_bb = guard_edge->dest;
595 edge e = EDGE_SUCC (new_merge_bb, 0);
596 basic_block update_bb = e->dest;
597 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
599 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
600 orig_phi && update_phi;
601 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
603 /* 1. Generate new phi node in NEW_MERGE_BB: */
604 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
607 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
608 of LOOP. Set the two phi args in NEW_PHI for these edges: */
611 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
612 EDGE_SUCC (loop->latch, 0));
613 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
617 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
618 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
622 new_name = *new_name_ptr;
624 /* Something defined outside of the loop */
629 guard_arg = orig_def;
634 guard_arg = new_name;
638 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
639 add_phi_arg (new_phi, guard_arg, guard_edge);
641 /* 3. Update phi in successor block. */
642 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
643 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
644 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
645 PHI_RESULT (new_phi));
648 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
652 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
653 that starts at zero, increases by one and its limit is NITERS.
655 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
658 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
660 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
662 edge exit_edge = loop->exit_edges[0];
663 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
664 tree begin_label = tree_block_label (loop->latch);
665 tree exit_label = tree_block_label (loop->single_exit->dest);
666 tree init = build_int_cst (TREE_TYPE (niters), 0);
667 tree step = build_int_cst (TREE_TYPE (niters), 1);
671 orig_cond = get_loop_exit_condition (loop);
672 gcc_assert (orig_cond);
673 create_iv (init, step, NULL_TREE, loop,
674 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
676 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
677 back to the exit condition statement. */
678 bsi_next (&loop_exit_bsi);
679 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
681 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
683 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
684 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
685 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
687 else /* 'then' edge loops back. */
689 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
690 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
691 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
694 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
695 then_label, else_label);
696 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
698 /* Remove old loop exit test: */
699 bsi_remove (&loop_exit_bsi);
701 if (vect_debug_stats (loop) || vect_debug_details (loop))
702 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
704 loop->nb_iterations = niters;
708 /* Given LOOP this function generates a new copy of it and puts it
709 on E which is either the entry or exit of LOOP. */
712 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
715 struct loop *new_loop;
716 basic_block *new_bbs, *bbs;
719 basic_block exit_dest;
722 at_exit = (e == loop->exit_edges[0]);
723 if (!at_exit && e != loop_preheader_edge (loop))
725 if (dump_file && (dump_flags & TDF_DETAILS))
726 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
730 bbs = get_loop_body (loop);
732 /* Check whether duplication is possible. */
733 if (!can_copy_bbs_p (bbs, loop->num_nodes))
735 if (vect_debug_stats (loop) || vect_debug_details (loop))
736 fprintf (dump_file, "Cannot copy basic blocks.\n");
741 /* Generate new loop structure. */
742 new_loop = duplicate_loop (loops, loop, loop->outer);
745 if (vect_debug_stats (loop) || vect_debug_details (loop))
746 fprintf (dump_file, "duplicate_loop returns NULL.\n");
751 exit_dest = loop->exit_edges[0]->dest;
752 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
753 exit_dest) == loop->header ?
756 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
758 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
760 /* Duplicating phi args at exit bbs as coming
761 also from exit of duplicated loop. */
762 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
764 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
767 edge new_loop_exit_edge;
769 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
770 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
772 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
774 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
778 if (at_exit) /* Add the loop copy at exit. */
780 redirect_edge_and_branch_force (e, new_loop->header);
781 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
783 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
785 else /* Add the copy at entry. */
788 edge entry_e = loop_preheader_edge (loop);
789 basic_block preheader = entry_e->src;
791 if (!flow_bb_inside_loop_p (new_loop,
792 EDGE_SUCC (new_loop->header, 0)->dest))
793 new_exit_e = EDGE_SUCC (new_loop->header, 0);
795 new_exit_e = EDGE_SUCC (new_loop->header, 1);
797 redirect_edge_and_branch_force (new_exit_e, loop->header);
798 set_immediate_dominator (CDI_DOMINATORS, loop->header,
801 /* We have to add phi args to the loop->header here as coming
802 from new_exit_e edge. */
803 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
805 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
807 add_phi_arg (phi, phi_arg, new_exit_e);
810 redirect_edge_and_branch_force (entry_e, new_loop->header);
811 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
814 flow_loop_scan (new_loop, LOOP_ALL);
815 flow_loop_scan (loop, LOOP_ALL);
823 /* Given the condition statement COND, put it as the last statement
824 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
825 Assumes that this is the single exit of the guarded loop.
826 Returns the skip edge. */
829 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
832 block_stmt_iterator bsi;
834 tree cond_stmt, then_label, else_label;
836 enter_e = EDGE_SUCC (guard_bb, 0);
837 enter_e->flags &= ~EDGE_FALLTHRU;
838 enter_e->flags |= EDGE_FALSE_VALUE;
839 bsi = bsi_last (guard_bb);
841 then_label = build1 (GOTO_EXPR, void_type_node,
842 tree_block_label (exit_bb));
843 else_label = build1 (GOTO_EXPR, void_type_node,
844 tree_block_label (enter_e->dest));
845 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
846 then_label, else_label);
847 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
848 /* Add new edge to connect entry block to the second loop. */
849 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
850 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
855 /* This function verifies that the following restrictions apply to LOOP:
857 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
858 (3) it is single entry, single exit
859 (4) its exit condition is the last stmt in the header
860 (5) E is the entry/exit edge of LOOP.
864 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
866 edge exit_e = loop->exit_edges [0];
867 edge entry_e = loop_preheader_edge (loop);
868 tree orig_cond = get_loop_exit_condition (loop);
869 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
871 if (any_marked_for_rewrite_p ())
875 /* All loops have an outer scope; the only case loop->outer is NULL is for
876 the function itself. */
878 || loop->num_nodes != 2
879 || !empty_block_p (loop->latch)
880 || loop->num_exits != 1
881 || loop->num_entries != 1
882 /* Verify that new loop exit condition can be trivially modified. */
883 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
884 || (e != exit_e && e != entry_e))
890 #ifdef ENABLE_CHECKING
892 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
893 struct loop *second_loop)
895 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
896 basic_block loop2_entry_bb = second_loop->pre_header;
897 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
899 /* A guard that controls whether the second_loop is to be executed or skipped
900 is placed in first_loop->exit. first_loopt->exit therefore has two
901 successors - one is the preheader of second_loop, and the other is a bb
904 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
907 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
910 /* The preheader of new_loop is expected to have two predessors:
911 first_loop->exit and the block that precedes first_loop. */
913 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
914 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
915 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
916 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
917 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
919 /* Verify that the other successor of first_loopt->exit is after the
925 /* Function slpeel_tree_peel_loop_to_edge.
927 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
928 that is placed on the entry (exit) edge E of LOOP. After this transformation
929 we have two loops one after the other - first-loop iterates FIRST_NITERS
930 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
933 - LOOP: the loop to be peeled.
934 - E: the exit or entry edge of LOOP.
935 If it is the entry edge, we peel the first iterations of LOOP. In this
936 case first-loop is LOOP, and second-loop is the newly created loop.
937 If it is the exit edge, we peel the last iterations of LOOP. In this
938 case, first-loop is the newly created loop, and second-loop is LOOP.
939 - NITERS: the number of iterations that LOOP iterates.
940 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
941 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
942 for updating the loop bound of the first-loop to FIRST_NITERS. If it
943 is false, the caller of this function may want to take care of this
944 (this can be useful if we don't want new stmts added to first-loop).
947 The function returns a pointer to the new loop-copy, or NULL if it failed
948 to perform the transformation.
950 The function generates two if-then-else guards: one before the first loop,
951 and the other before the second loop:
953 if (FIRST_NITERS == 0) then skip the first loop,
954 and go directly to the second loop.
956 if (FIRST_NITERS == NITERS) then skip the second loop.
958 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
959 FORNOW the resulting code will not be in loop-closed-ssa form.
963 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
964 edge e, tree first_niters,
965 tree niters, bool update_first_loop_count)
967 struct loop *new_loop = NULL, *first_loop, *second_loop;
971 basic_block bb_before_second_loop, bb_after_second_loop;
972 basic_block bb_before_first_loop;
973 basic_block bb_between_loops;
974 edge exit_e = loop->exit_edges [0];
976 if (!slpeel_can_duplicate_loop_p (loop, e))
979 /* We have to initialize cfg_hooks. Then, when calling
980 cfg_hooks->split_edge, the function tree_split_edge
981 is actually called and, when calling cfg_hooks->duplicate_block,
982 the function tree_duplicate_bb is called. */
983 tree_register_cfg_hooks ();
986 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
987 Resulting CFG would be:
1000 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1002 if (vect_debug_stats (loop) || vect_debug_details (loop))
1003 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1009 /* NEW_LOOP was placed after LOOP. */
1011 second_loop = new_loop;
1015 /* NEW_LOOP was placed before LOOP. */
1016 first_loop = new_loop;
1020 definitions = marked_ssa_names ();
1021 allocate_new_names (definitions);
1022 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1023 rename_variables_in_loop (new_loop);
1026 /* 2. Add the guard that controls whether the first loop is executed.
1027 Resulting CFG would be:
1029 bb_before_first_loop:
1030 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1037 bb_before_second_loop:
1046 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1047 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1048 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1049 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1050 flow_loop_scan (first_loop, LOOP_ALL);
1051 flow_loop_scan (second_loop, LOOP_ALL);
1054 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1055 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1056 bb_before_second_loop, bb_before_first_loop);
1057 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1058 first_loop == new_loop);
1061 /* 3. Add the guard that controls whether the second loop is executed.
1062 Resulting CFG would be:
1064 bb_before_first_loop:
1065 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1073 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1074 GOTO bb_before_second_loop
1076 bb_before_second_loop:
1082 bb_after_second_loop:
1087 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1088 add_bb_to_loop (bb_between_loops, first_loop->outer);
1089 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1090 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1091 flow_loop_scan (first_loop, LOOP_ALL);
1092 flow_loop_scan (second_loop, LOOP_ALL);
1094 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1095 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1096 bb_after_second_loop, bb_before_first_loop);
1097 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1098 second_loop == new_loop);
1100 /* Flow loop scan does not update loop->single_exit field. */
1101 first_loop->single_exit = first_loop->exit_edges[0];
1102 second_loop->single_exit = second_loop->exit_edges[0];
1104 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1106 if (update_first_loop_count)
1107 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1109 free_new_names (definitions);
1110 BITMAP_XFREE (definitions);
1111 unmark_all_for_rewrite ();
1117 /* Here the proper Vectorizer starts. */
1119 /*************************************************************************
1120 Vectorization Utilities.
1121 *************************************************************************/
1123 /* Function new_stmt_vec_info.
1125 Create and initialize a new stmt_vec_info struct for STMT. */
1128 new_stmt_vec_info (tree stmt, struct loop *loop)
1131 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1133 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1134 STMT_VINFO_STMT (res) = stmt;
1135 STMT_VINFO_LOOP (res) = loop;
1136 STMT_VINFO_RELEVANT_P (res) = 0;
1137 STMT_VINFO_VECTYPE (res) = NULL;
1138 STMT_VINFO_VEC_STMT (res) = NULL;
1139 STMT_VINFO_DATA_REF (res) = NULL;
1140 STMT_VINFO_MEMTAG (res) = NULL;
1141 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1147 /* Function new_loop_vec_info.
1149 Create and initialize a new loop_vec_info struct for LOOP, as well as
1150 stmt_vec_info structs for all the stmts in LOOP. */
1153 new_loop_vec_info (struct loop *loop)
1157 block_stmt_iterator si;
1160 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1162 bbs = get_loop_body (loop);
1164 /* Create stmt_info for all stmts in the loop. */
1165 for (i = 0; i < loop->num_nodes; i++)
1167 basic_block bb = bbs[i];
1168 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1170 tree stmt = bsi_stmt (si);
1173 get_stmt_operands (stmt);
1174 ann = stmt_ann (stmt);
1175 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1179 LOOP_VINFO_LOOP (res) = loop;
1180 LOOP_VINFO_BBS (res) = bbs;
1181 LOOP_VINFO_EXIT_COND (res) = NULL;
1182 LOOP_VINFO_NITERS (res) = NULL;
1183 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1184 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1185 LOOP_VINFO_VECT_FACTOR (res) = 0;
1186 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1187 "loop_write_datarefs");
1188 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1189 "loop_read_datarefs");
1190 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1196 /* Function destroy_loop_vec_info.
1198 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1199 stmts in the loop. */
1202 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1207 block_stmt_iterator si;
1213 loop = LOOP_VINFO_LOOP (loop_vinfo);
1215 bbs = LOOP_VINFO_BBS (loop_vinfo);
1216 nbbs = loop->num_nodes;
1218 for (j = 0; j < nbbs; j++)
1220 basic_block bb = bbs[j];
1221 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1223 tree stmt = bsi_stmt (si);
1224 stmt_ann_t ann = stmt_ann (stmt);
1225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1227 set_stmt_info (ann, NULL);
1231 free (LOOP_VINFO_BBS (loop_vinfo));
1232 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1233 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1239 /* Function debug_loop_stats.
1241 For vectorization statistics dumps. */
1244 vect_debug_stats (struct loop *loop)
1247 block_stmt_iterator si;
1248 tree node = NULL_TREE;
1250 if (!dump_file || !(dump_flags & TDF_STATS))
1255 fprintf (dump_file, "\n");
1264 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1266 node = bsi_stmt (si);
1267 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1271 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1272 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1274 fprintf (dump_file, "\nloop at %s:%d: ",
1275 EXPR_FILENAME (node), EXPR_LINENO (node));
1283 /* Function debug_loop_details.
1285 For vectorization debug dumps. */
1288 vect_debug_details (struct loop *loop)
1291 block_stmt_iterator si;
1292 tree node = NULL_TREE;
1294 if (!dump_file || !(dump_flags & TDF_DETAILS))
1299 fprintf (dump_file, "\n");
1308 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1310 node = bsi_stmt (si);
1311 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1315 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1316 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1318 fprintf (dump_file, "\nloop at %s:%d: ",
1319 EXPR_FILENAME (node), EXPR_LINENO (node));
1327 /* Function vect_get_ptr_offset
1329 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1332 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1333 tree vectype ATTRIBUTE_UNUSED,
1334 tree *offset ATTRIBUTE_UNUSED)
1336 /* TODO: Use alignment information. */
1341 /* Function vect_get_base_and_bit_offset
1343 Return the BASE of the data reference EXPR.
1344 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1345 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1346 bits of 'a.b[i] + 4B' from a.
1349 EXPR - the memory reference that is being analyzed
1350 DR - the data_reference struct of the _original_ memory reference
1351 (Note: DR_REF (DR) is not necessarily EXPR)
1352 VECTYPE - the type that defines the alignment (i.e, we compute
1353 alignment relative to TYPE_ALIGN(VECTYPE))
1356 BASE (returned value) - the base of the data reference EXPR.
1357 E.g, if EXPR is a.b[k].c[i][j] the returned
1359 OFFSET - offset of EXPR from BASE in bits
1360 BASE_ALIGNED_P - indicates if BASE is aligned
1362 If something unexpected is encountered (an unsupported form of data-ref),
1363 or if VECTYPE is given but OFFSET cannot be determined:
1364 then NULL_TREE is returned. */
1367 vect_get_base_and_bit_offset (struct data_reference *dr,
1370 loop_vec_info loop_vinfo,
1372 bool *base_aligned_p)
1374 tree this_offset = size_zero_node;
1375 tree base = NULL_TREE;
1377 tree oprnd0, oprnd1;
1378 struct data_reference *array_dr;
1379 enum tree_code code = TREE_CODE (expr);
1381 *base_aligned_p = false;
1385 /* These cases end the recursion: */
1387 *offset = size_zero_node;
1388 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1389 *base_aligned_p = true;
1396 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1399 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1401 base = vect_get_ptr_offset (expr, vectype, offset);
1403 *base_aligned_p = true;
1407 *base_aligned_p = true;
1408 *offset = size_zero_node;
1414 *offset = int_const_binop (MULT_EXPR, expr,
1415 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1418 /* These cases continue the recursion: */
1420 oprnd0 = TREE_OPERAND (expr, 0);
1421 oprnd1 = TREE_OPERAND (expr, 1);
1423 this_offset = bit_position (oprnd1);
1424 if (vectype && !host_integerp (this_offset, 1))
1430 oprnd0 = TREE_OPERAND (expr, 0);
1435 oprnd0 = TREE_OPERAND (expr, 0);
1440 if (DR_REF (dr) != expr)
1441 /* Build array data_reference struct if the existing DR_REF
1442 doesn't match EXPR. This happens, for example, when the
1443 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1444 contains information on the access of T, not of arr. In order
1445 to continue the analysis, we create a new DR struct that
1446 describes the access of arr.
1448 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1452 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1453 vectype, &this_offset);
1458 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1460 *offset = this_offset;
1461 *base_aligned_p = true;
1468 /* In case we have a PLUS_EXPR of the form
1469 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1470 This is verified in vect_get_symbl_and_dr. */
1471 oprnd0 = TREE_OPERAND (expr, 0);
1472 oprnd1 = TREE_OPERAND (expr, 1);
1474 base = vect_get_base_and_bit_offset
1475 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1476 if (vectype && !base)
1486 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1487 loop_vinfo, offset, base_aligned_p);
1489 if (vectype && base)
1491 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1492 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1495 if (vect_debug_details (NULL))
1497 print_generic_expr (dump_file, expr, TDF_SLIM);
1498 fprintf (dump_file, " --> total offset for ref: ");
1499 print_generic_expr (dump_file, *offset, TDF_SLIM);
1506 /* Function vect_force_dr_alignment_p.
1508 Returns whether the alignment of a DECL can be forced to be aligned
1509 on ALIGNMENT bit boundary. */
1512 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1514 if (TREE_CODE (decl) != VAR_DECL)
1517 if (DECL_EXTERNAL (decl))
1520 if (TREE_ASM_WRITTEN (decl))
1523 if (TREE_STATIC (decl))
1524 return (alignment <= MAX_OFILE_ALIGNMENT);
1526 /* This is not 100% correct. The absolute correct stack alignment
1527 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1528 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1529 However, until someone implements forced stack alignment, SSE
1530 isn't really usable without this. */
1531 return (alignment <= PREFERRED_STACK_BOUNDARY);
1535 /* Function vect_get_new_vect_var.
1537 Returns a name for a new variable. The current naming scheme appends the
1538 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1539 the name of vectorizer generated variables, and appends that to NAME if
1543 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1549 if (var_kind == vect_simple_var)
1554 prefix_len = strlen (prefix);
1557 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1559 new_vect_var = create_tmp_var (type, prefix);
1561 return new_vect_var;
1565 /* Function vect_create_index_for_vector_ref.
1567 Create (and return) an index variable, along with it's update chain in the
1568 loop. This variable will be used to access a memory location in a vector
1572 LOOP: The loop being vectorized.
1573 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1574 function can be added here, or in the loop pre-header.
1577 Return an index that will be used to index a vector array. It is expected
1578 that a pointer to the first vector will be used as the base address for the
1581 FORNOW: we are not trying to be efficient, just creating a new index each
1582 time from scratch. At this time all vector references could use the same
1585 TODO: create only one index to be used by all vector references. Record
1586 the index in the LOOP_VINFO the first time this procedure is called and
1587 return it on subsequent calls. The increment of this index must be placed
1588 just before the conditional expression that ends the single block loop. */
1591 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1594 tree indx_before_incr, indx_after_incr;
1596 /* It is assumed that the base pointer used for vectorized access contains
1597 the address of the first vector. Therefore the index used for vectorized
1598 access must be initialized to zero and incremented by 1. */
1600 init = integer_zero_node;
1601 step = integer_one_node;
1603 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1604 create_iv (init, step, NULL_TREE, loop, bsi, false,
1605 &indx_before_incr, &indx_after_incr);
1607 return indx_before_incr;
1611 /* Function vect_create_addr_base_for_vector_ref.
1613 Create an expression that computes the address of the first memory location
1614 that will be accessed for a data reference.
1617 STMT: The statement containing the data reference.
1618 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1619 OFFSET: Optional. If supplied, it is be added to the initial address.
1622 1. Return an SSA_NAME whose value is the address of the memory location of
1623 the first vector of the data reference.
1624 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1625 these statement(s) which define the returned SSA_NAME.
1627 FORNOW: We are only handling array accesses with step 1. */
1630 vect_create_addr_base_for_vector_ref (tree stmt,
1631 tree *new_stmt_list,
1634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1635 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1636 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1637 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1638 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1639 tree ref = DR_REF (dr);
1640 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1641 tree scalar_type = TREE_TYPE (ref);
1642 tree scalar_ptr_type = build_pointer_type (scalar_type);
1644 tree init_val, step, init_oval;
1646 bool is_ptr_ref, is_array_ref, is_addr_expr;
1651 tree addr_base, addr_expr;
1652 tree dest, new_stmt;
1654 /* Only the access function of the last index is relevant (i_n in
1655 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1656 access_fn = DR_ACCESS_FN (dr, 0);
1657 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1660 init_oval = integer_zero_node;
1662 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1663 && TREE_CODE (data_ref_base) == SSA_NAME;
1664 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1665 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1666 || TREE_CODE (data_ref_base) == PLUS_EXPR
1667 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1668 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1670 /** Create: &(base[init_val])
1672 if data_ref_base is an ARRAY_TYPE:
1673 base = data_ref_base
1675 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1676 base = *((scalar_array *) data_ref_base)
1680 array_base = data_ref_base;
1681 else /* is_ptr_ref or is_addr_expr */
1683 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1684 tree scalar_array_type = build_array_type (scalar_type, 0);
1685 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1686 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1687 add_referenced_tmp_var (array_ptr);
1689 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1690 add_referenced_tmp_var (dest);
1692 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1693 append_to_statement_list_force (new_stmt, new_stmt_list);
1695 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1696 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1697 new_temp = make_ssa_name (array_ptr, vec_stmt);
1698 TREE_OPERAND (vec_stmt, 0) = new_temp;
1699 append_to_statement_list_force (vec_stmt, new_stmt_list);
1702 array_base = build_fold_indirect_ref (new_temp);
1705 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1706 add_referenced_tmp_var (dest);
1707 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1708 append_to_statement_list_force (new_stmt, new_stmt_list);
1712 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1713 add_referenced_tmp_var (tmp);
1714 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1715 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1716 init_val = make_ssa_name (tmp, vec_stmt);
1717 TREE_OPERAND (vec_stmt, 0) = init_val;
1718 append_to_statement_list_force (vec_stmt, new_stmt_list);
1721 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1722 NULL_TREE, NULL_TREE);
1723 addr_base = build_fold_addr_expr (array_ref);
1725 /* addr_expr = addr_base */
1726 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1727 get_name (base_name));
1728 add_referenced_tmp_var (addr_expr);
1729 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1730 new_temp = make_ssa_name (addr_expr, vec_stmt);
1731 TREE_OPERAND (vec_stmt, 0) = new_temp;
1732 append_to_statement_list_force (vec_stmt, new_stmt_list);
1738 /* Function get_vectype_for_scalar_type.
1740 Returns the vector type corresponding to SCALAR_TYPE as supported
1744 get_vectype_for_scalar_type (tree scalar_type)
1746 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1747 int nbytes = GET_MODE_SIZE (inner_mode);
1754 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1756 nunits = UNITS_PER_SIMD_WORD / nbytes;
1758 vectype = build_vector_type (scalar_type, nunits);
1759 if (vect_debug_details (NULL))
1761 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1762 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1768 if (vect_debug_details (NULL))
1770 fprintf (dump_file, "vectype: ");
1771 print_generic_expr (dump_file, vectype, TDF_SLIM);
1774 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1776 /* TODO: tree-complex.c sometimes can parallelize operations
1777 on generic vectors. We can vectorize the loop in that case,
1778 but then we should re-run the lowering pass. */
1779 if (vect_debug_details (NULL))
1780 fprintf (dump_file, "mode not supported by target.");
1788 /* Function vect_align_data_ref.
1790 Handle mislignment of a memory accesses.
1792 FORNOW: Can't handle misaligned accesses.
1793 Make sure that the dataref is aligned. */
1796 vect_align_data_ref (tree stmt)
1798 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1799 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1801 /* FORNOW: can't handle misaligned accesses;
1802 all accesses expected to be aligned. */
1803 gcc_assert (aligned_access_p (dr));
1807 /* Function vect_create_data_ref_ptr.
1809 Create a memory reference expression for vector access, to be used in a
1810 vector load/store stmt. The reference is based on a new pointer to vector
1814 1. STMT: a stmt that references memory. Expected to be of the form
1815 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1816 2. BSI: block_stmt_iterator where new stmts can be added.
1817 3. OFFSET (optional): an offset to be added to the initial address accessed
1818 by the data-ref in STMT.
1819 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1820 pointing to the initial address.
1823 1. Declare a new ptr to vector_type, and have it point to the base of the
1824 data reference (initial addressed accessed by the data reference).
1825 For example, for vector of type V8HI, the following code is generated:
1828 vp = (v8hi *)initial_address;
1830 if OFFSET is not supplied:
1831 initial_address = &a[init];
1832 if OFFSET is supplied:
1833 initial_address = &a[init + OFFSET];
1835 Return the initial_address in INITIAL_ADDRESS.
1837 2. Create a data-reference in the loop based on the new vector pointer vp,
1838 and using a new index variable 'idx' as follows:
1842 where if ONLY_INIT is true:
1845 update = idx + vector_type_size
1847 Return the pointer vp'.
1850 FORNOW: handle only aligned and consecutive accesses. */
1853 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1854 tree *initial_address, bool only_init)
1857 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1858 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1859 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1860 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1864 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1865 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1866 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1867 int nvuses, nv_may_defs, nv_must_defs;
1871 tree new_stmt_list = NULL_TREE;
1873 edge pe = loop_preheader_edge (loop);
1879 tree type, tmp, size;
1881 base_name = unshare_expr (DR_BASE_NAME (dr));
1882 if (vect_debug_details (NULL))
1884 tree data_ref_base = base_name;
1885 fprintf (dump_file, "create array_ref of type: ");
1886 print_generic_expr (dump_file, vectype, TDF_SLIM);
1887 if (TREE_CODE (data_ref_base) == VAR_DECL)
1888 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1889 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1890 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1891 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1892 fprintf (dump_file, "vectorizing a record based array ref: ");
1893 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1894 fprintf (dump_file, "vectorizing a pointer ref: ");
1895 print_generic_expr (dump_file, base_name, TDF_SLIM);
1898 /** (1) Create the new vector-pointer variable: **/
1900 vect_ptr_type = build_pointer_type (vectype);
1901 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1902 get_name (base_name));
1903 add_referenced_tmp_var (vect_ptr);
1906 /** (2) Handle aliasing information of the new vector-pointer: **/
1908 tag = STMT_VINFO_MEMTAG (stmt_info);
1910 get_var_ann (vect_ptr)->type_mem_tag = tag;
1912 /* Mark for renaming all aliased variables
1913 (i.e, the may-aliases of the type-mem-tag). */
1914 nvuses = NUM_VUSES (vuses);
1915 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1916 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1917 for (i = 0; i < nvuses; i++)
1919 tree use = VUSE_OP (vuses, i);
1920 if (TREE_CODE (use) == SSA_NAME)
1921 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1923 for (i = 0; i < nv_may_defs; i++)
1925 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1926 if (TREE_CODE (def) == SSA_NAME)
1927 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1929 for (i = 0; i < nv_must_defs; i++)
1931 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1932 if (TREE_CODE (def) == SSA_NAME)
1933 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1937 /** (3) Calculate the initial address the vector-pointer, and set
1938 the vector-pointer to point to it before the loop: **/
1940 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1941 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1943 pe = loop_preheader_edge (loop);
1944 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1945 gcc_assert (!new_bb);
1946 *initial_address = new_temp;
1948 /* Create: p = (vectype *) initial_base */
1949 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1950 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1951 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1952 TREE_OPERAND (vec_stmt, 0) = new_temp;
1953 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1954 gcc_assert (!new_bb);
1955 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1958 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1960 if (only_init) /* No update in loop is required. */
1961 return vect_ptr_init;
1963 idx = vect_create_index_for_vector_ref (loop, bsi);
1965 /* Create: update = idx * vectype_size */
1966 tmp = create_tmp_var (integer_type_node, "update");
1967 add_referenced_tmp_var (tmp);
1968 size = TYPE_SIZE (vect_ptr_type);
1969 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1970 ptr_update = create_tmp_var (type, "update");
1971 add_referenced_tmp_var (ptr_update);
1972 vectype_size = build_int_cst (integer_type_node,
1973 GET_MODE_SIZE (TYPE_MODE (vectype)));
1974 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1975 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
1976 new_temp = make_ssa_name (tmp, vec_stmt);
1977 TREE_OPERAND (vec_stmt, 0) = new_temp;
1978 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1979 vec_stmt = fold_convert (type, new_temp);
1980 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1981 new_temp = make_ssa_name (ptr_update, vec_stmt);
1982 TREE_OPERAND (vec_stmt, 0) = new_temp;
1983 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1985 /* Create: data_ref_ptr = vect_ptr_init + update */
1986 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1987 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1988 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1989 TREE_OPERAND (vec_stmt, 0) = new_temp;
1990 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1991 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1993 return data_ref_ptr;
1997 /* Function vect_create_destination_var.
1999 Create a new temporary of type VECTYPE. */
2002 vect_create_destination_var (tree scalar_dest, tree vectype)
2005 const char *new_name;
2007 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2009 new_name = get_name (scalar_dest);
2012 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2013 add_referenced_tmp_var (vec_dest);
2019 /* Function vect_init_vector.
2021 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2022 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2023 used in the vectorization of STMT. */
2026 vect_init_vector (tree stmt, tree vector_var)
2028 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2029 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2032 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2038 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2039 add_referenced_tmp_var (new_var);
2041 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2042 new_temp = make_ssa_name (new_var, init_stmt);
2043 TREE_OPERAND (init_stmt, 0) = new_temp;
2045 pe = loop_preheader_edge (loop);
2046 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2047 gcc_assert (!new_bb);
2049 if (vect_debug_details (NULL))
2051 fprintf (dump_file, "created new init_stmt: ");
2052 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2055 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2060 /* Function vect_get_vec_def_for_operand.
2062 OP is an operand in STMT. This function returns a (vector) def that will be
2063 used in the vectorized stmt for STMT.
2065 In the case that OP is an SSA_NAME which is defined in the loop, then
2066 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2068 In case OP is an invariant or constant, a new stmt that creates a vector def
2069 needs to be introduced. */
2072 vect_get_vec_def_for_operand (tree op, tree stmt)
2077 stmt_vec_info def_stmt_info = NULL;
2078 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2079 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2080 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2081 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2088 if (vect_debug_details (NULL))
2090 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2091 print_generic_expr (dump_file, op, TDF_SLIM);
2094 /** ===> Case 1: operand is a constant. **/
2096 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2098 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2102 /* Build a tree with vector elements. */
2103 if (vect_debug_details (NULL))
2104 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2106 for (i = nunits - 1; i >= 0; --i)
2108 t = tree_cons (NULL_TREE, op, t);
2110 vec_cst = build_vector (vectype, t);
2111 return vect_init_vector (stmt, vec_cst);
2114 gcc_assert (TREE_CODE (op) == SSA_NAME);
2116 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2118 def_stmt = SSA_NAME_DEF_STMT (op);
2119 def_stmt_info = vinfo_for_stmt (def_stmt);
2121 if (vect_debug_details (NULL))
2123 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2124 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2128 /** ==> Case 2.1: operand is defined inside the loop. **/
2132 /* Get the def from the vectorized stmt. */
2134 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2135 gcc_assert (vec_stmt);
2136 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2141 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2142 it is a reduction/induction. **/
2144 bb = bb_for_stmt (def_stmt);
2145 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2147 if (vect_debug_details (NULL))
2148 fprintf (dump_file, "reduction/induction - unsupported.");
2149 internal_error ("no support for reduction/induction"); /* FORNOW */
2153 /** ==> Case 2.3: operand is defined outside the loop -
2154 it is a loop invariant. */
2156 switch (TREE_CODE (def_stmt))
2159 def = PHI_RESULT (def_stmt);
2162 def = TREE_OPERAND (def_stmt, 0);
2165 def = TREE_OPERAND (def_stmt, 0);
2166 gcc_assert (IS_EMPTY_STMT (def_stmt));
2170 if (vect_debug_details (NULL))
2172 fprintf (dump_file, "unsupported defining stmt: ");
2173 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2175 internal_error ("unsupported defining stmt");
2178 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2180 if (vect_debug_details (NULL))
2181 fprintf (dump_file, "Create vector_inv.");
2183 for (i = nunits - 1; i >= 0; --i)
2185 t = tree_cons (NULL_TREE, def, t);
2188 vec_inv = build_constructor (vectype, t);
2189 return vect_init_vector (stmt, vec_inv);
2193 /* Function vect_finish_stmt_generation.
2195 Insert a new stmt. */
2198 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2200 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2202 if (vect_debug_details (NULL))
2204 fprintf (dump_file, "add new stmt: ");
2205 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2208 /* Make sure bsi points to the stmt that is being vectorized. */
2210 /* Assumption: any stmts created for the vectorization of stmt S were
2211 inserted before S. BSI is expected to point to S or some new stmt before S.
2214 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2216 gcc_assert (stmt == bsi_stmt (*bsi));
2220 /* Function vectorizable_assignment.
2222 Check if STMT performs an assignment (copy) that can be vectorized.
2223 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2224 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2225 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2228 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2234 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2235 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2236 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2239 /* Is vectorizable assignment? */
2241 if (TREE_CODE (stmt) != MODIFY_EXPR)
2244 scalar_dest = TREE_OPERAND (stmt, 0);
2245 if (TREE_CODE (scalar_dest) != SSA_NAME)
2248 op = TREE_OPERAND (stmt, 1);
2249 if (!vect_is_simple_use (op, loop, NULL))
2251 if (vect_debug_details (NULL))
2252 fprintf (dump_file, "use not simple.");
2256 if (!vec_stmt) /* transformation not required. */
2258 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2263 if (vect_debug_details (NULL))
2264 fprintf (dump_file, "transform assignment.");
2267 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2270 op = TREE_OPERAND (stmt, 1);
2271 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2273 /* Arguments are ready. create the new vector stmt. */
2274 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2275 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2276 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2277 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2283 /* Function vectorizable_operation.
2285 Check if STMT performs a binary or unary operation that can be vectorized.
2286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2291 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2296 tree op0, op1 = NULL;
2297 tree vec_oprnd0, vec_oprnd1=NULL;
2298 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2299 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2300 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2302 enum tree_code code;
2303 enum machine_mode vec_mode;
2309 /* Is STMT a vectorizable binary/unary operation? */
2310 if (TREE_CODE (stmt) != MODIFY_EXPR)
2313 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2316 operation = TREE_OPERAND (stmt, 1);
2317 code = TREE_CODE (operation);
2318 optab = optab_for_tree_code (code, vectype);
2320 /* Support only unary or binary operations. */
2321 op_type = TREE_CODE_LENGTH (code);
2322 if (op_type != unary_op && op_type != binary_op)
2324 if (vect_debug_details (NULL))
2325 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2329 for (i = 0; i < op_type; i++)
2331 op = TREE_OPERAND (operation, i);
2332 if (!vect_is_simple_use (op, loop, NULL))
2334 if (vect_debug_details (NULL))
2335 fprintf (dump_file, "use not simple.");
2340 /* Supportable by target? */
2343 if (vect_debug_details (NULL))
2344 fprintf (dump_file, "no optab.");
2347 vec_mode = TYPE_MODE (vectype);
2348 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2350 if (vect_debug_details (NULL))
2351 fprintf (dump_file, "op not supported by target.");
2355 if (!vec_stmt) /* transformation not required. */
2357 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2363 if (vect_debug_details (NULL))
2364 fprintf (dump_file, "transform binary/unary operation.");
2367 scalar_dest = TREE_OPERAND (stmt, 0);
2368 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2371 op0 = TREE_OPERAND (operation, 0);
2372 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2374 if (op_type == binary_op)
2376 op1 = TREE_OPERAND (operation, 1);
2377 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2380 /* Arguments are ready. create the new vector stmt. */
2382 if (op_type == binary_op)
2383 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2384 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2386 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2387 build1 (code, vectype, vec_oprnd0));
2388 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2389 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2390 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2396 /* Function vectorizable_store.
2398 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2405 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2412 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2413 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2414 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2415 enum machine_mode vec_mode;
2417 enum dr_alignment_support alignment_support_cheme;
2419 /* Is vectorizable store? */
2421 if (TREE_CODE (stmt) != MODIFY_EXPR)
2424 scalar_dest = TREE_OPERAND (stmt, 0);
2425 if (TREE_CODE (scalar_dest) != ARRAY_REF
2426 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2429 op = TREE_OPERAND (stmt, 1);
2430 if (!vect_is_simple_use (op, loop, NULL))
2432 if (vect_debug_details (NULL))
2433 fprintf (dump_file, "use not simple.");
2437 vec_mode = TYPE_MODE (vectype);
2438 /* FORNOW. In some cases can vectorize even if data-type not supported
2439 (e.g. - array initialization with 0). */
2440 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2443 if (!STMT_VINFO_DATA_REF (stmt_info))
2447 if (!vec_stmt) /* transformation not required. */
2449 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2455 if (vect_debug_details (NULL))
2456 fprintf (dump_file, "transform store");
2458 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2459 gcc_assert (alignment_support_cheme);
2460 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2462 /* Handle use - get the vectorized def from the defining stmt. */
2463 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2466 /* FORNOW: make sure the data reference is aligned. */
2467 vect_align_data_ref (stmt);
2468 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2469 data_ref = build_fold_indirect_ref (data_ref);
2471 /* Arguments are ready. create the new vector stmt. */
2472 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2473 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2479 /* vectorizable_load.
2481 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2483 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2484 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2485 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2488 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2491 tree vec_dest = NULL;
2492 tree data_ref = NULL;
2494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2495 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2496 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2503 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2504 edge pe = loop_preheader_edge (loop);
2505 enum dr_alignment_support alignment_support_cheme;
2507 /* Is vectorizable load? */
2509 if (TREE_CODE (stmt) != MODIFY_EXPR)
2512 scalar_dest = TREE_OPERAND (stmt, 0);
2513 if (TREE_CODE (scalar_dest) != SSA_NAME)
2516 op = TREE_OPERAND (stmt, 1);
2517 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2520 if (!STMT_VINFO_DATA_REF (stmt_info))
2523 mode = (int) TYPE_MODE (vectype);
2525 /* FORNOW. In some cases can vectorize even if data-type not supported
2526 (e.g. - data copies). */
2527 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2529 if (vect_debug_details (loop))
2530 fprintf (dump_file, "Aligned load, but unsupported type.");
2534 if (!vec_stmt) /* transformation not required. */
2536 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2542 if (vect_debug_details (NULL))
2543 fprintf (dump_file, "transform load.");
2545 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2546 gcc_assert (alignment_support_cheme);
2548 if (alignment_support_cheme == dr_aligned
2549 || alignment_support_cheme == dr_unaligned_supported)
2560 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2561 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2562 if (aligned_access_p (dr))
2563 data_ref = build_fold_indirect_ref (data_ref);
2566 int mis = DR_MISALIGNMENT (dr);
2567 tree tmis = (mis == -1 ?
2569 build_int_cst (integer_type_node, mis));
2570 tmis = int_const_binop (MULT_EXPR, tmis,
2571 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2572 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2574 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2575 new_temp = make_ssa_name (vec_dest, new_stmt);
2576 TREE_OPERAND (new_stmt, 0) = new_temp;
2577 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2579 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2583 msq_init = *(floor(p1))
2584 p2 = initial_addr + VS - 1;
2585 magic = have_builtin ? builtin_result : initial_address;
2588 p2' = p2 + indx * vectype_size
2590 vec_dest = realign_load (msq, lsq, magic)
2604 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2605 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2606 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2608 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2609 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2610 new_temp = make_ssa_name (vec_dest, new_stmt);
2611 TREE_OPERAND (new_stmt, 0) = new_temp;
2612 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2613 gcc_assert (!new_bb);
2614 msq_init = TREE_OPERAND (new_stmt, 0);
2617 /* <2> Create lsq = *(floor(p2')) in the loop */
2618 offset = build_int_cst (integer_type_node,
2619 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2620 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2621 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2622 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2623 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2624 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2625 new_temp = make_ssa_name (vec_dest, new_stmt);
2626 TREE_OPERAND (new_stmt, 0) = new_temp;
2627 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2628 lsq = TREE_OPERAND (new_stmt, 0);
2632 if (targetm.vectorize.builtin_mask_for_load)
2634 /* Create permutation mask, if required, in loop preheader. */
2636 params = build_tree_list (NULL_TREE, init_addr);
2637 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2638 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2639 new_stmt = build_function_call_expr (builtin_decl, params);
2640 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2641 new_temp = make_ssa_name (vec_dest, new_stmt);
2642 TREE_OPERAND (new_stmt, 0) = new_temp;
2643 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2644 gcc_assert (!new_bb);
2645 magic = TREE_OPERAND (new_stmt, 0);
2649 /* Use current address instead of init_addr for reduced reg pressure.
2651 magic = dataref_ptr;
2655 /* <4> Create msq = phi <msq_init, lsq> in loop */
2656 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2657 msq = make_ssa_name (vec_dest, NULL_TREE);
2658 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2659 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2660 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2661 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2664 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2665 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2666 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2667 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2668 new_temp = make_ssa_name (vec_dest, new_stmt);
2669 TREE_OPERAND (new_stmt, 0) = new_temp;
2670 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2675 *vec_stmt = new_stmt;
2680 /* Function vect_supportable_dr_alignment
2682 Return whether the data reference DR is supported with respect to its
2685 static enum dr_alignment_support
2686 vect_supportable_dr_alignment (struct data_reference *dr)
2688 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2689 enum machine_mode mode = (int) TYPE_MODE (vectype);
2691 if (aligned_access_p (dr))
2694 /* Possibly unaligned access. */
2696 if (DR_IS_READ (dr))
2698 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2699 && (!targetm.vectorize.builtin_mask_for_load
2700 || targetm.vectorize.builtin_mask_for_load ()))
2701 return dr_unaligned_software_pipeline;
2703 if (targetm.vectorize.misaligned_mem_ok (mode))
2704 /* Can't software pipeline the loads. */
2705 return dr_unaligned_supported;
2709 return dr_unaligned_unsupported;
2713 /* Function vect_transform_stmt.
2715 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2718 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2720 bool is_store = false;
2721 tree vec_stmt = NULL_TREE;
2722 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2725 switch (STMT_VINFO_TYPE (stmt_info))
2727 case op_vec_info_type:
2728 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2732 case assignment_vec_info_type:
2733 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2737 case load_vec_info_type:
2738 done = vectorizable_load (stmt, bsi, &vec_stmt);
2742 case store_vec_info_type:
2743 done = vectorizable_store (stmt, bsi, &vec_stmt);
2748 if (vect_debug_details (NULL))
2749 fprintf (dump_file, "stmt not supported.");
2753 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2759 /* This function builds ni_name = number of iterations loop executes
2760 on the loop preheader. */
2763 vect_build_loop_niters (loop_vec_info loop_vinfo)
2765 tree ni_name, stmt, var;
2767 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2768 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2770 var = create_tmp_var (TREE_TYPE (ni), "niters");
2771 add_referenced_tmp_var (var);
2772 ni_name = force_gimple_operand (ni, &stmt, false, var);
2774 pe = loop_preheader_edge (loop);
2777 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2778 gcc_assert (!new_bb);
2785 /* This function generates the following statements:
2787 ni_name = number of iterations loop executes
2788 ratio = ni_name / vf
2789 ratio_mult_vf_name = ratio * vf
2791 and places them at the loop preheader edge. */
2794 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2796 tree *ratio_mult_vf_name_ptr,
2797 tree *ratio_name_ptr)
2805 tree ratio_mult_vf_name;
2806 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2807 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2808 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2809 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2811 pe = loop_preheader_edge (loop);
2813 /* Generate temporary variable that contains
2814 number of iterations loop executes. */
2816 ni_name = vect_build_loop_niters (loop_vinfo);
2818 /* Create: ratio = ni >> log2(vf) */
2820 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2821 add_referenced_tmp_var (var);
2822 ratio_name = make_ssa_name (var, NULL_TREE);
2823 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2824 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2825 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2827 pe = loop_preheader_edge (loop);
2828 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2829 gcc_assert (!new_bb);
2831 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2833 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2834 add_referenced_tmp_var (var);
2835 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2836 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2837 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2838 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2840 pe = loop_preheader_edge (loop);
2841 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2842 gcc_assert (!new_bb);
2844 *ni_name_ptr = ni_name;
2845 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2846 *ratio_name_ptr = ratio_name;
2852 /* Function vect_update_ivs_after_vectorizer.
2854 "Advance" the induction variables of LOOP to the value they should take
2855 after the execution of LOOP. This is currently necessary because the
2856 vectorizer does not handle induction variables that are used after the
2857 loop. Such a situation occurs when the last iterations of LOOP are
2859 1. We introduced new uses after LOOP for IVs that were not originally used
2860 after LOOP: the IVs of LOOP are now used by an epilog loop.
2861 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2862 times, whereas the loop IVs should be bumped N times.
2865 - LOOP - a loop that is going to be vectorized. The last few iterations
2866 of LOOP were peeled.
2867 - NITERS - the number of iterations that LOOP executes (before it is
2868 vectorized). i.e, the number of times the ivs should be bumped.
2869 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2870 coming out from LOOP on which there are uses of the LOOP ivs
2871 (this is the path from LOOP->exit to epilog_loop->preheader).
2873 The new definitions of the ivs are placed in LOOP->exit.
2874 The phi args associated with the edge UPDATE_E in the bb
2875 UPDATE_E->dest are updated accordingly.
2877 Assumption 1: Like the rest of the vectorizer, this function assumes
2878 a single loop exit that has a single predecessor.
2880 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2881 organized in the same order.
2883 Assumption 3: The access function of the ivs is simple enough (see
2884 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2886 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2887 coming out of LOOP on which the ivs of LOOP are used (this is the path
2888 that leads to the epilog loop; other paths skip the epilog loop). This
2889 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2890 needs to have its phis updated.
2894 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2896 basic_block exit_bb = loop->exit_edges[0]->dest;
2898 basic_block update_bb = update_e->dest;
2900 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2902 /* Make sure there exists a single-predecessor exit bb: */
2903 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2905 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2907 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2909 tree access_fn = NULL;
2910 tree evolution_part;
2913 tree var, stmt, ni, ni_name;
2914 block_stmt_iterator last_bsi;
2916 /* Skip virtual phi's. */
2917 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2919 if (vect_debug_details (NULL))
2920 fprintf (dump_file, "virtual phi. skip.");
2924 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2925 gcc_assert (access_fn);
2927 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2928 gcc_assert (evolution_part != NULL_TREE);
2930 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2931 of degree >= 2 or exponential. */
2932 gcc_assert (!tree_is_chrec (evolution_part));
2934 step_expr = evolution_part;
2935 init_expr = unshare_expr (initial_condition (access_fn));
2937 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2938 build2 (MULT_EXPR, TREE_TYPE (niters),
2939 niters, step_expr), init_expr);
2941 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2942 add_referenced_tmp_var (var);
2944 ni_name = force_gimple_operand (ni, &stmt, false, var);
2946 /* Insert stmt into exit_bb. */
2947 last_bsi = bsi_last (exit_bb);
2949 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2951 /* Fix phi expressions in the successor bb. */
2952 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2953 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2954 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2959 /* Function vect_do_peeling_for_loop_bound
2961 Peel the last iterations of the loop represented by LOOP_VINFO.
2962 The peeled iterations form a new epilog loop. Given that the loop now
2963 iterates NITERS times, the new epilog loop iterates
2964 NITERS % VECTORIZATION_FACTOR times.
2966 The original loop will later be made to iterate
2967 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2970 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2971 struct loops *loops)
2974 tree ni_name, ratio_mult_vf_name;
2975 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2976 struct loop *new_loop;
2978 #ifdef ENABLE_CHECKING
2982 if (vect_debug_details (NULL))
2983 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2985 /* Generate the following variables on the preheader of original loop:
2987 ni_name = number of iteration the original loop executes
2988 ratio = ni_name / vf
2989 ratio_mult_vf_name = ratio * vf */
2990 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2991 &ratio_mult_vf_name, ratio);
2993 /* Update loop info. */
2994 loop->pre_header = loop_preheader_edge (loop)->src;
2995 loop->pre_header_edges[0] = loop_preheader_edge (loop);
2997 #ifdef ENABLE_CHECKING
2998 loop_num = loop->num;
3000 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3001 ratio_mult_vf_name, ni_name, false);
3002 #ifdef ENABLE_CHECKING
3003 gcc_assert (new_loop);
3004 gcc_assert (loop_num == loop->num);
3005 slpeel_verify_cfg_after_peeling (loop, new_loop);
3008 /* A guard that controls whether the new_loop is to be executed or skipped
3009 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3010 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3011 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3012 is on the path where the LOOP IVs are used and need to be updated. */
3014 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3015 update_e = EDGE_PRED (new_loop->pre_header, 0);
3017 update_e = EDGE_PRED (new_loop->pre_header, 1);
3019 /* Update IVs of original loop as if they were advanced
3020 by ratio_mult_vf_name steps. */
3021 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3023 /* After peeling we have to reset scalar evolution analyzer. */
3030 /* Function vect_gen_niters_for_prolog_loop
3032 Set the number of iterations for the loop represented by LOOP_VINFO
3033 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3034 and the misalignment of DR - the first data reference recorded in
3035 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3036 this loop, the data reference DR will refer to an aligned location.
3038 The following computation is generated:
3040 compute address misalignment in bytes:
3041 addr_mis = addr & (vectype_size - 1)
3043 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3045 (elem_size = element type size; an element is the scalar element
3046 whose type is the inner type of the vectype) */
3049 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3051 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3052 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3053 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3055 tree iters, iters_name;
3058 tree dr_stmt = DR_STMT (dr);
3059 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3060 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3061 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3064 tree new_stmts = NULL_TREE;
3066 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3067 tree ptr_type = TREE_TYPE (start_addr);
3068 tree size = TYPE_SIZE (ptr_type);
3069 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3070 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3071 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3072 tree niters_type = TREE_TYPE (loop_niters);
3073 tree elem_size_log =
3074 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3075 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3077 pe = loop_preheader_edge (loop);
3078 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3079 gcc_assert (!new_bb);
3081 /* Create: byte_misalign = addr & (vectype_size - 1) */
3082 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3084 /* Create: elem_misalign = byte_misalign / element_size */
3086 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3088 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3089 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3090 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3091 iters = fold_convert (niters_type, iters);
3093 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3094 /* If the loop bound is known at compile time we already verified that it is
3095 greater than vf; since the misalignment ('iters') is at most vf, there's
3096 no need to generate the MIN_EXPR in this case. */
3097 if (!host_integerp (loop_niters, 0))
3098 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3100 var = create_tmp_var (niters_type, "prolog_loop_niters");
3101 add_referenced_tmp_var (var);
3102 iters_name = force_gimple_operand (iters, &stmt, false, var);
3104 /* Insert stmt on loop preheader edge. */
3105 pe = loop_preheader_edge (loop);
3108 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3109 gcc_assert (!new_bb);
3116 /* Function vect_update_inits_of_dr
3118 NITERS iterations were peeled from LOOP. DR represents a data reference
3119 in LOOP. This function updates the information recorded in DR to
3120 account for the fact that the first NITERS iterations had already been
3121 executed. Specifically, it updates the initial_condition of the
3122 access_function of DR. */
3125 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3128 tree access_fn = DR_ACCESS_FN (dr, 0);
3129 tree init, init_new, step;
3131 step = evolution_part_in_loop_num (access_fn, loop->num);
3132 init = initial_condition (access_fn);
3134 init_new = build2 (PLUS_EXPR, TREE_TYPE (init),
3135 build2 (MULT_EXPR, TREE_TYPE (niters),
3136 niters, step), init);
3137 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3143 /* Function vect_update_inits_of_drs
3145 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3146 This function updates the information recorded for the data references in
3147 the loop to account for the fact that the first NITERS iterations had
3148 already been executed. Specifically, it updates the initial_condition of the
3149 access_function of all the data_references in the loop. */
3152 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3155 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3156 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3157 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3159 if (dump_file && (dump_flags & TDF_DETAILS))
3160 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3162 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3164 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3165 vect_update_inits_of_dr (dr, loop, niters);
3168 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3170 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3171 vect_update_inits_of_dr (dr, loop, niters);
3176 /* Function vect_do_peeling_for_alignment
3178 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3179 'niters' is set to the misalignment of one of the data references in the
3180 loop, thereby forcing it to refer to an aligned location at the beginning
3181 of the execution of this loop. The data reference for which we are
3182 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3185 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3187 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3188 tree niters_of_prolog_loop, ni_name;
3190 struct loop *new_loop;
3192 if (vect_debug_details (NULL))
3193 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3195 ni_name = vect_build_loop_niters (loop_vinfo);
3196 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3198 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3200 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3201 niters_of_prolog_loop, ni_name, true);
3202 #ifdef ENABLE_CHECKING
3203 gcc_assert (new_loop);
3204 slpeel_verify_cfg_after_peeling (new_loop, loop);
3207 /* Update number of times loop executes. */
3208 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3209 LOOP_VINFO_NITERS (loop_vinfo) =
3210 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3212 /* Update the init conditions of the access functions of all data refs. */
3213 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3215 /* After peeling we have to reset scalar evolution analyzer. */
3222 /* Function vect_transform_loop.
3224 The analysis phase has determined that the loop is vectorizable.
3225 Vectorize the loop - created vectorized stmts to replace the scalar
3226 stmts in the loop, and update the loop exit condition. */
3229 vect_transform_loop (loop_vec_info loop_vinfo,
3230 struct loops *loops ATTRIBUTE_UNUSED)
3232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3233 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3234 int nbbs = loop->num_nodes;
3235 block_stmt_iterator si;
3238 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3240 if (vect_debug_details (NULL))
3241 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3244 /* Peel the loop if there are data refs with unknown alignment.
3245 Only one data ref with unknown store is allowed. */
3247 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3248 vect_do_peeling_for_alignment (loop_vinfo, loops);
3250 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3251 compile time constant), or it is a constant that doesn't divide by the
3252 vectorization factor, then an epilog loop needs to be created.
3253 We therefore duplicate the loop: the original loop will be vectorized,
3254 and will compute the first (n/VF) iterations. The second copy of the loop
3255 will remain scalar and will compute the remaining (n%VF) iterations.
3256 (VF is the vectorization factor). */
3258 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3259 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3260 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3261 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3263 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3264 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3266 /* 1) Make sure the loop header has exactly two entries
3267 2) Make sure we have a preheader basic block. */
3269 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3271 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3274 /* FORNOW: the vectorizer supports only loops which body consist
3275 of one basic block (header + empty latch). When the vectorizer will
3276 support more involved loop forms, the order by which the BBs are
3277 traversed need to be reconsidered. */
3279 for (i = 0; i < nbbs; i++)
3281 basic_block bb = bbs[i];
3283 for (si = bsi_start (bb); !bsi_end_p (si);)
3285 tree stmt = bsi_stmt (si);
3286 stmt_vec_info stmt_info;
3289 if (vect_debug_details (NULL))
3291 fprintf (dump_file, "------>vectorizing statement: ");
3292 print_generic_expr (dump_file, stmt, TDF_SLIM);
3294 stmt_info = vinfo_for_stmt (stmt);
3295 gcc_assert (stmt_info);
3296 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3301 #ifdef ENABLE_CHECKING
3302 /* FORNOW: Verify that all stmts operate on the same number of
3303 units and no inner unrolling is necessary. */
3305 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3306 == vectorization_factor);
3308 /* -------- vectorize statement ------------ */
3309 if (vect_debug_details (NULL))
3310 fprintf (dump_file, "transform statement.");
3312 is_store = vect_transform_stmt (stmt, &si);
3315 /* free the attached stmt_vec_info and remove the stmt. */
3316 stmt_ann_t ann = stmt_ann (stmt);
3318 set_stmt_info (ann, NULL);
3327 slpeel_make_loop_iterate_ntimes (loop, ratio);
3329 if (vect_debug_details (loop))
3330 fprintf (dump_file,"Success! loop vectorized.");
3331 if (vect_debug_stats (loop))
3332 fprintf (dump_file, "LOOP VECTORIZED.");
3336 /* Function vect_is_simple_use.
3339 LOOP - the loop that is being vectorized.
3340 OPERAND - operand of a stmt in LOOP.
3341 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3343 Returns whether a stmt with OPERAND can be vectorized.
3344 Supportable operands are constants, loop invariants, and operands that are
3345 defined by the current iteration of the loop. Unsupportable operands are
3346 those that are defined by a previous iteration of the loop (as is the case
3347 in reduction/induction computations). */
3350 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3358 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3361 if (TREE_CODE (operand) != SSA_NAME)
3364 def_stmt = SSA_NAME_DEF_STMT (operand);
3365 if (def_stmt == NULL_TREE )
3367 if (vect_debug_details (NULL))
3368 fprintf (dump_file, "no def_stmt.");
3372 /* empty stmt is expected only in case of a function argument.
3373 (Otherwise - we expect a phi_node or a modify_expr). */
3374 if (IS_EMPTY_STMT (def_stmt))
3376 tree arg = TREE_OPERAND (def_stmt, 0);
3377 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3379 if (vect_debug_details (NULL))
3381 fprintf (dump_file, "Unexpected empty stmt: ");
3382 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3387 /* phi_node inside the loop indicates an induction/reduction pattern.
3388 This is not supported yet. */
3389 bb = bb_for_stmt (def_stmt);
3390 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3392 if (vect_debug_details (NULL))
3393 fprintf (dump_file, "reduction/induction - unsupported.");
3394 return false; /* FORNOW: not supported yet. */
3397 /* Expecting a modify_expr or a phi_node. */
3398 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3399 || TREE_CODE (def_stmt) == PHI_NODE)
3410 /* Function vect_analyze_operations.
3412 Scan the loop stmts and make sure they are all vectorizable. */
3415 vect_analyze_operations (loop_vec_info loop_vinfo)
3417 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3418 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3419 int nbbs = loop->num_nodes;
3420 block_stmt_iterator si;
3421 unsigned int vectorization_factor = 0;
3426 if (vect_debug_details (NULL))
3427 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3429 for (i = 0; i < nbbs; i++)
3431 basic_block bb = bbs[i];
3433 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3435 tree stmt = bsi_stmt (si);
3436 unsigned int nunits;
3437 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3440 if (vect_debug_details (NULL))
3442 fprintf (dump_file, "==> examining statement: ");
3443 print_generic_expr (dump_file, stmt, TDF_SLIM);
3446 gcc_assert (stmt_info);
3448 /* skip stmts which do not need to be vectorized.
3449 this is expected to include:
3450 - the COND_EXPR which is the loop exit condition
3451 - any LABEL_EXPRs in the loop
3452 - computations that are used only for array indexing or loop
3455 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3457 if (vect_debug_details (NULL))
3458 fprintf (dump_file, "irrelevant.");
3462 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3464 if (vect_debug_stats (loop) || vect_debug_details (loop))
3466 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3467 print_generic_expr (dump_file, stmt, TDF_SLIM);
3472 if (STMT_VINFO_DATA_REF (stmt_info))
3473 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3474 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3475 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3477 scalar_type = TREE_TYPE (stmt);
3479 if (vect_debug_details (NULL))
3481 fprintf (dump_file, "get vectype for scalar type: ");
3482 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3485 vectype = get_vectype_for_scalar_type (scalar_type);
3488 if (vect_debug_stats (loop) || vect_debug_details (loop))
3490 fprintf (dump_file, "not vectorized: unsupported data-type ");
3491 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3496 if (vect_debug_details (NULL))
3498 fprintf (dump_file, "vectype: ");
3499 print_generic_expr (dump_file, vectype, TDF_SLIM);
3501 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3503 ok = (vectorizable_operation (stmt, NULL, NULL)
3504 || vectorizable_assignment (stmt, NULL, NULL)
3505 || vectorizable_load (stmt, NULL, NULL)
3506 || vectorizable_store (stmt, NULL, NULL));
3510 if (vect_debug_stats (loop) || vect_debug_details (loop))
3512 fprintf (dump_file, "not vectorized: stmt not supported: ");
3513 print_generic_expr (dump_file, stmt, TDF_SLIM);
3518 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3519 if (vect_debug_details (NULL))
3520 fprintf (dump_file, "nunits = %d", nunits);
3522 if (vectorization_factor)
3524 /* FORNOW: don't allow mixed units.
3525 This restriction will be relaxed in the future. */
3526 if (nunits != vectorization_factor)
3528 if (vect_debug_stats (loop) || vect_debug_details (loop))
3529 fprintf (dump_file, "not vectorized: mixed data-types");
3534 vectorization_factor = nunits;
3536 #ifdef ENABLE_CHECKING
3537 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3538 * vectorization_factor == UNITS_PER_SIMD_WORD);
3543 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3545 if (vectorization_factor <= 1)
3547 if (vect_debug_stats (loop) || vect_debug_details (loop))
3548 fprintf (dump_file, "not vectorized: unsupported data-type");
3551 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3553 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3555 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3556 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3558 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3559 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3561 if (vect_debug_stats (loop) || vect_debug_details (loop))
3562 fprintf (dump_file, "not vectorized: iteration count too small.");
3566 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3567 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3569 if (vect_debug_stats (loop) || vect_debug_details (loop))
3570 fprintf (dump_file, "epilog loop required.");
3571 if (!vect_can_advance_ivs_p (loop))
3573 if (vect_debug_stats (loop) || vect_debug_details (loop))
3574 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3577 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3579 if (vect_debug_stats (loop) || vect_debug_details (loop))
3580 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3589 /* Function exist_non_indexing_operands_for_use_p
3591 USE is one of the uses attached to STMT. Check if USE is
3592 used in STMT for anything other than indexing an array. */
3595 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3598 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3600 /* USE corresponds to some operand in STMT. If there is no data
3601 reference in STMT, then any operand that corresponds to USE
3602 is not indexing an array. */
3603 if (!STMT_VINFO_DATA_REF (stmt_info))
3606 /* STMT has a data_ref. FORNOW this means that its of one of
3607 the following forms:
3610 (This should have been verified in analyze_data_refs).
3612 'var' in the second case corresponds to a def, not a use,
3613 so USE cannot correspond to any operands that are not used
3616 Therefore, all we need to check is if STMT falls into the
3617 first case, and whether var corresponds to USE. */
3619 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3622 operand = TREE_OPERAND (stmt, 1);
3624 if (TREE_CODE (operand) != SSA_NAME)
3634 /* Function vect_is_simple_iv_evolution.
3636 FORNOW: A simple evolution of an induction variables in the loop is
3637 considered a polynomial evolution with constant step. */
3640 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3641 tree * step, bool strict)
3646 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3648 /* When there is no evolution in this loop, the evolution function
3650 if (evolution_part == NULL_TREE)
3653 /* When the evolution is a polynomial of degree >= 2
3654 the evolution function is not "simple". */
3655 if (tree_is_chrec (evolution_part))
3658 step_expr = evolution_part;
3659 init_expr = unshare_expr (initial_condition (access_fn));
3661 if (vect_debug_details (NULL))
3663 fprintf (dump_file, "step: ");
3664 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3665 fprintf (dump_file, ", init: ");
3666 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3672 if (TREE_CODE (step_expr) != INTEGER_CST)
3674 if (vect_debug_details (NULL))
3675 fprintf (dump_file, "step unknown.");
3680 if (!integer_onep (step_expr))
3682 if (vect_debug_details (NULL))
3683 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3691 /* Function vect_analyze_scalar_cycles.
3693 Examine the cross iteration def-use cycles of scalar variables, by
3694 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3695 cycles that they represent do not impede vectorization.
3697 FORNOW: Reduction as in the following loop, is not supported yet:
3701 The cross-iteration cycle corresponding to variable 'sum' will be
3702 considered too complicated and will impede vectorization.
3704 FORNOW: Induction as in the following loop, is not supported yet:
3709 However, the following loop *is* vectorizable:
3714 In both loops there exists a def-use cycle for the variable i:
3715 loop: i_2 = PHI (i_0, i_1)
3720 The evolution of the above cycle is considered simple enough,
3721 however, we also check that the cycle does not need to be
3722 vectorized, i.e - we check that the variable that this cycle
3723 defines is only used for array indexing or in stmts that do not
3724 need to be vectorized. This is not the case in loop2, but it
3725 *is* the case in loop3. */
3728 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3731 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3732 basic_block bb = loop->header;
3735 if (vect_debug_details (NULL))
3736 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3738 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3740 tree access_fn = NULL;
3742 if (vect_debug_details (NULL))
3744 fprintf (dump_file, "Analyze phi: ");
3745 print_generic_expr (dump_file, phi, TDF_SLIM);
3748 /* Skip virtual phi's. The data dependences that are associated with
3749 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3751 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3753 if (vect_debug_details (NULL))
3754 fprintf (dump_file, "virtual phi. skip.");
3758 /* Analyze the evolution function. */
3760 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3761 those of loop induction variables; This property is verified here.
3763 Furthermore, if that induction variable is used in an operation
3764 that needs to be vectorized (i.e, is not solely used to index
3765 arrays and check the exit condition) - we do not support its
3766 vectorization yet. This property is verified in vect_is_simple_use,
3767 during vect_analyze_operations. */
3769 access_fn = /* instantiate_parameters
3771 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3775 if (vect_debug_stats (loop) || vect_debug_details (loop))
3776 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3780 if (vect_debug_details (NULL))
3782 fprintf (dump_file, "Access function of PHI: ");
3783 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3786 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3789 if (vect_debug_stats (loop) || vect_debug_details (loop))
3790 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3799 /* Function vect_analyze_data_ref_dependence.
3801 Return TRUE if there (might) exist a dependence between a memory-reference
3802 DRA and a memory-reference DRB. */
3805 vect_analyze_data_ref_dependence (struct data_reference *dra,
3806 struct data_reference *drb,
3810 struct data_dependence_relation *ddr;
3812 if (!array_base_name_differ_p (dra, drb, &differ_p))
3814 if (vect_debug_stats (loop) || vect_debug_details (loop))
3817 "not vectorized: can't determine dependence between: ");
3818 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3819 fprintf (dump_file, " and ");
3820 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3828 ddr = initialize_data_dependence_relation (dra, drb);
3829 compute_affine_dependence (ddr);
3831 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3834 if (vect_debug_stats (loop) || vect_debug_details (loop))
3837 "not vectorized: possible dependence between data-refs ");
3838 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3839 fprintf (dump_file, " and ");
3840 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3847 /* Function vect_analyze_data_ref_dependences.
3849 Examine all the data references in the loop, and make sure there do not
3850 exist any data dependences between them.
3852 TODO: dependences which distance is greater than the vectorization factor
3856 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3859 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3860 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3861 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3863 /* Examine store-store (output) dependences. */
3865 if (vect_debug_details (NULL))
3866 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3868 if (vect_debug_details (NULL))
3869 fprintf (dump_file, "compare all store-store pairs.");
3871 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3873 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3875 struct data_reference *dra =
3876 VARRAY_GENERIC_PTR (loop_write_refs, i);
3877 struct data_reference *drb =
3878 VARRAY_GENERIC_PTR (loop_write_refs, j);
3879 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3884 /* Examine load-store (true/anti) dependences. */
3886 if (vect_debug_details (NULL))
3887 fprintf (dump_file, "compare all load-store pairs.");
3889 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3891 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3893 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3894 struct data_reference *drb =
3895 VARRAY_GENERIC_PTR (loop_write_refs, j);
3896 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3905 /* Function vect_get_first_index.
3907 REF is a data reference.
3908 If it is an ARRAY_REF: if its lower bound is simple enough,
3909 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3910 If it is not an ARRAY_REF: REF has no "first index";
3911 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3914 vect_get_first_index (tree ref, tree *array_first_index)
3918 if (TREE_CODE (ref) != ARRAY_REF)
3919 *array_first_index = size_zero_node;
3922 array_start = array_ref_low_bound (ref);
3923 if (!host_integerp (array_start, 0))
3925 if (vect_debug_details (NULL))
3927 fprintf (dump_file, "array min val not simple integer cst.");
3928 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3932 *array_first_index = array_start;
3939 /* Function vect_compute_array_base_alignment.
3940 A utility function of vect_compute_array_ref_alignment.
3942 Compute the misalignment of ARRAY in bits.
3945 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3946 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3947 if NULL: don't compute misalignment, just return the base of ARRAY.
3948 PREV_DIMENSIONS - initialized to one.
3949 MISALIGNMENT - the computed misalignment in bits.
3952 If VECTYPE is not NULL:
3953 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3954 the base of the array, and put the computed misalignment in MISALIGNMENT.
3956 Return the base of the array.
3958 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3959 a[idx_N]...[idx_2][idx_1] is
3960 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3961 ... + idx_N * dim_0 * ... * dim_N-1}.
3962 (The misalignment of &a is not checked here).
3963 Note, that every term contains dim_0, therefore, if dim_0 is a
3964 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3965 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3966 NUINTS, we can say that the misalignment of the sum is equal to
3967 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3968 we can't determine this array misalignment, and we return
3970 We proceed recursively in this manner, accumulating total misalignment
3971 and the multiplication of previous dimensions for correct misalignment
3975 vect_compute_array_base_alignment (tree array,
3977 tree *prev_dimensions,
3982 tree dimension_size;
3984 tree bits_per_vectype;
3985 tree bits_per_vectype_unit;
3987 /* The 'stop condition' of the recursion. */
3988 if (TREE_CODE (array) != ARRAY_REF)
3992 /* Just get the base decl. */
3993 return vect_compute_array_base_alignment
3994 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3996 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3997 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4000 domain = TYPE_DOMAIN (TREE_TYPE (array));
4002 int_const_binop (PLUS_EXPR,
4003 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4004 TYPE_MIN_VALUE (domain), 1),
4007 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4008 is a multiple of NUNITS:
4010 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4012 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4013 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4014 if (integer_zerop (mis))
4015 /* This array is aligned. Continue just in order to get the base decl. */
4016 return vect_compute_array_base_alignment
4017 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4019 index = TREE_OPERAND (array, 1);
4020 if (!host_integerp (index, 1))
4021 /* The current index is not constant. */
4024 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4026 bits_per_vectype = fold_convert (unsigned_type_node,
4027 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4028 GET_MODE_SIZE (TYPE_MODE (vectype))));
4029 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4030 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4031 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4033 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4037 (*misalignment + index_val * dimension_size * *prev_dimensions)
4041 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4042 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4043 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4044 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4045 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4048 *prev_dimensions = int_const_binop (MULT_EXPR,
4049 *prev_dimensions, dimension_size, 1);
4051 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4057 /* Function vect_compute_data_ref_alignment
4059 Compute the misalignment of the data reference DR.
4062 1. If during the misalignment computation it is found that the data reference
4063 cannot be vectorized then false is returned.
4064 2. DR_MISALIGNMENT (DR) is defined.
4066 FOR NOW: No analysis is actually performed. Misalignment is calculated
4067 only for trivial cases. TODO. */
4070 vect_compute_data_ref_alignment (struct data_reference *dr,
4071 loop_vec_info loop_vinfo)
4073 tree stmt = DR_STMT (dr);
4074 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4075 tree ref = DR_REF (dr);
4078 tree offset = size_zero_node;
4079 tree base, bit_offset, alignment;
4080 tree unit_bits = fold_convert (unsigned_type_node,
4081 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4083 bool base_aligned_p;
4085 if (vect_debug_details (NULL))
4086 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4088 /* Initialize misalignment to unknown. */
4089 DR_MISALIGNMENT (dr) = -1;
4091 scalar_type = TREE_TYPE (ref);
4092 vectype = get_vectype_for_scalar_type (scalar_type);
4095 if (vect_debug_details (NULL))
4097 fprintf (dump_file, "no vectype for stmt: ");
4098 print_generic_expr (dump_file, stmt, TDF_SLIM);
4099 fprintf (dump_file, " scalar_type: ");
4100 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4102 /* It is not possible to vectorize this data reference. */
4105 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4106 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4108 if (TREE_CODE (ref) == ARRAY_REF)
4111 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4113 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4114 loop_vinfo, &bit_offset, &base_aligned_p);
4117 if (vect_debug_details (NULL))
4119 fprintf (dump_file, "Unknown alignment for access: ");
4120 print_generic_expr (dump_file,
4121 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4126 if (!base_aligned_p)
4128 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4130 if (vect_debug_details (NULL))
4132 fprintf (dump_file, "can't force alignment of ref: ");
4133 print_generic_expr (dump_file, ref, TDF_SLIM);
4138 /* Force the alignment of the decl.
4139 NOTE: This is the only change to the code we make during
4140 the analysis phase, before deciding to vectorize the loop. */
4141 if (vect_debug_details (NULL))
4142 fprintf (dump_file, "force alignment");
4143 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4144 DECL_USER_ALIGN (base) = 1;
4147 /* At this point we assume that the base is aligned, and the offset from it
4148 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4149 gcc_assert (base_aligned_p
4150 || (TREE_CODE (base) == VAR_DECL
4151 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4153 /* Convert into bytes. */
4154 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4155 /* Check that there is no remainder in bits. */
4156 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4157 if (!integer_zerop (bit_offset))
4159 if (vect_debug_details (NULL))
4161 fprintf (dump_file, "bit offset alignment: ");
4162 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4167 /* Alignment required, in bytes: */
4168 alignment = fold_convert (unsigned_type_node,
4169 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4171 /* Modulo alignment. */
4172 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4173 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4175 if (vect_debug_details (NULL))
4176 fprintf (dump_file, "unexpected misalign value");
4180 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4182 if (vect_debug_details (NULL))
4183 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4189 /* Function vect_compute_array_ref_alignment
4191 Compute the alignment of an array-ref.
4192 The alignment we compute here is relative to
4193 TYPE_ALIGN(VECTYPE) boundary.
4196 OFFSET - the alignment in bits
4197 Return value - the base of the array-ref. E.g,
4198 if the array-ref is a.b[k].c[i][j] the returned
4203 vect_compute_array_ref_alignment (struct data_reference *dr,
4204 loop_vec_info loop_vinfo,
4208 tree array_first_index = size_zero_node;
4210 tree ref = DR_REF (dr);
4211 tree scalar_type = TREE_TYPE (ref);
4212 tree oprnd0 = TREE_OPERAND (ref, 0);
4213 tree dims = size_one_node;
4214 tree misalign = size_zero_node;
4215 tree next_ref, this_offset = size_zero_node;
4219 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4220 /* The reference is an array without its last index. */
4221 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4224 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4227 /* Alignment is not requested. Just return the base. */
4230 /* Compute alignment. */
4231 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4233 this_offset = misalign;
4235 /* Check the first index accessed. */
4236 if (!vect_get_first_index (ref, &array_first_index))
4238 if (vect_debug_details (NULL))
4239 fprintf (dump_file, "no first_index for array.");
4243 /* Check the index of the array_ref. */
4244 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4245 LOOP_VINFO_LOOP (loop_vinfo)->num);
4247 /* FORNOW: In order to simplify the handling of alignment, we make sure
4248 that the first location at which the array is accessed ('init') is on an
4249 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4250 This is too conservative, since we require that
4251 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4252 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4253 This should be relaxed in the future. */
4255 if (!init || !host_integerp (init, 0))
4257 if (vect_debug_details (NULL))
4258 fprintf (dump_file, "non constant init. ");
4262 /* bytes per scalar element: */
4263 nunits = fold_convert (unsigned_type_node,
4264 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4265 nbits = int_const_binop (MULT_EXPR, nunits,
4266 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4268 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4269 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4270 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4271 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4273 /* TODO: allow negative misalign values. */
4274 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4276 if (vect_debug_details (NULL))
4277 fprintf (dump_file, "unexpected misalign value");
4285 /* Function vect_compute_data_refs_alignment
4287 Compute the misalignment of data references in the loop.
4288 This pass may take place at function granularity instead of at loop
4291 FOR NOW: No analysis is actually performed. Misalignment is calculated
4292 only for trivial cases. TODO. */
4295 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4297 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4298 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4301 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4303 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4304 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4308 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4310 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4311 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4319 /* Function vect_enhance_data_refs_alignment
4321 This pass will use loop versioning and loop peeling in order to enhance
4322 the alignment of data references in the loop.
4324 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4325 original loop is to be vectorized; Any other loops that are created by
4326 the transformations performed in this pass - are not supposed to be
4327 vectorized. This restriction will be relaxed. */
4330 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4332 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4333 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4334 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4338 This pass will require a cost model to guide it whether to apply peeling
4339 or versioning or a combination of the two. For example, the scheme that
4340 intel uses when given a loop with several memory accesses, is as follows:
4341 choose one memory access ('p') which alignment you want to force by doing
4342 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4343 other accesses are not necessarily aligned, or (2) use loop versioning to
4344 generate one loop in which all accesses are aligned, and another loop in
4345 which only 'p' is necessarily aligned.
4347 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4348 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4349 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4351 Devising a cost model is the most critical aspect of this work. It will
4352 guide us on which access to peel for, whether to use loop versioning, how
4353 many versions to create, etc. The cost model will probably consist of
4354 generic considerations as well as target specific considerations (on
4355 powerpc for example, misaligned stores are more painful than misaligned
4358 Here is the general steps involved in alignment enhancements:
4360 -- original loop, before alignment analysis:
4361 for (i=0; i<N; i++){
4362 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4363 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4366 -- After vect_compute_data_refs_alignment:
4367 for (i=0; i<N; i++){
4368 x = q[i]; # DR_MISALIGNMENT(q) = 3
4369 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4372 -- Possibility 1: we do loop versioning:
4374 for (i=0; i<N; i++){ # loop 1A
4375 x = q[i]; # DR_MISALIGNMENT(q) = 3
4376 p[i] = y; # DR_MISALIGNMENT(p) = 0
4380 for (i=0; i<N; i++){ # loop 1B
4381 x = q[i]; # DR_MISALIGNMENT(q) = 3
4382 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4386 -- Possibility 2: we do loop peeling:
4387 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4391 for (i = 3; i < N; i++){ # loop 2A
4392 x = q[i]; # DR_MISALIGNMENT(q) = 0
4393 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4396 -- Possibility 3: combination of loop peeling and versioning:
4397 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4402 for (i = 3; i<N; i++){ # loop 3A
4403 x = q[i]; # DR_MISALIGNMENT(q) = 0
4404 p[i] = y; # DR_MISALIGNMENT(p) = 0
4408 for (i = 3; i<N; i++){ # loop 3B
4409 x = q[i]; # DR_MISALIGNMENT(q) = 0
4410 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4414 These loops are later passed to loop_transform to be vectorized. The
4415 vectorizer will use the alignment information to guide the transformation
4416 (whether to generate regular loads/stores, or with special handling for
4420 /* (1) Peeling to force alignment. */
4422 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4424 + How many accesses will become aligned due to the peeling
4425 - How many accesses will become unaligned due to the peeling,
4426 and the cost of misaligned accesses.
4427 - The cost of peeling (the extra runtime checks, the increase
4430 The scheme we use FORNOW: peel to force the alignment of the first
4431 misaligned store in the loop.
4432 Rationale: misaligned stores are not yet supported.
4434 TODO: Use a better cost model. */
4436 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4438 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4439 if (!aligned_access_p (dr))
4441 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4442 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4447 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4449 if (vect_debug_details (loop))
4450 fprintf (dump_file, "Peeling for alignment will not be applied.");
4454 if (vect_debug_details (loop))
4455 fprintf (dump_file, "Peeling for alignment will be applied.");
4458 /* (1.2) Update the alignment info according to the peeling factor.
4459 If the misalignment of the DR we peel for is M, then the
4460 peeling factor is VF - M, and the misalignment of each access DR_i
4461 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4462 If the misalignment of the DR we peel for is unknown, then the
4463 misalignment of each access DR_i in the loop is also unknown.
4465 FORNOW: set the misalignment of the accesses to unknown even
4466 if the peeling factor is known at compile time.
4468 TODO: - if the peeling factor is known at compile time, use that
4469 when updating the misalignment info of the loop DRs.
4470 - consider accesses that are known to have the same
4471 alignment, even if that alignment is unknown. */
4473 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4475 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4476 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4477 DR_MISALIGNMENT (dr) = 0;
4479 DR_MISALIGNMENT (dr) = -1;
4481 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4483 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4484 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4485 DR_MISALIGNMENT (dr) = 0;
4487 DR_MISALIGNMENT (dr) = -1;
4492 /* Function vect_analyze_data_refs_alignment
4494 Analyze the alignment of the data-references in the loop.
4495 FOR NOW: Until support for misliagned accesses is in place, only if all
4496 accesses are aligned can the loop be vectorized. This restriction will be
4500 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4502 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4503 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4504 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4505 enum dr_alignment_support supportable_dr_alignment;
4508 if (vect_debug_details (NULL))
4509 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4512 /* This pass may take place at function granularity instead of at loop
4515 if (!vect_compute_data_refs_alignment (loop_vinfo))
4517 if (vect_debug_details (loop) || vect_debug_stats (loop))
4519 "not vectorized: can't calculate alignment for data ref.");
4524 /* This pass will decide on using loop versioning and/or loop peeling in
4525 order to enhance the alignment of data references in the loop. */
4527 vect_enhance_data_refs_alignment (loop_vinfo);
4530 /* Finally, check that all the data references in the loop can be
4531 handled with respect to their alignment. */
4533 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4535 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4536 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4537 if (!supportable_dr_alignment)
4539 if (vect_debug_details (loop) || vect_debug_stats (loop))
4540 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4544 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4546 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4547 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4548 if (!supportable_dr_alignment)
4550 if (vect_debug_details (loop) || vect_debug_stats (loop))
4551 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4560 /* Function vect_analyze_data_ref_access.
4562 Analyze the access pattern of the data-reference DR. For now, a data access
4563 has to consecutive and aligned to be considered vectorizable. */
4566 vect_analyze_data_ref_access (struct data_reference *dr)
4568 varray_type access_fns = DR_ACCESS_FNS (dr);
4571 unsigned int dimensions, i;
4573 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4574 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4575 access is contiguous). */
4576 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4578 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4580 access_fn = DR_ACCESS_FN (dr, i);
4582 if (evolution_part_in_loop_num (access_fn,
4583 loop_containing_stmt (DR_STMT (dr))->num))
4585 /* Evolution part is not NULL in this loop (it is neither constant
4587 if (vect_debug_details (NULL))
4590 "not vectorized: complicated multidim. array access.");
4591 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4597 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4598 if (!evolution_function_is_constant_p (access_fn)
4599 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4600 access_fn, &init, &step, true))
4602 if (vect_debug_details (NULL))
4604 fprintf (dump_file, "not vectorized: complicated access function.");
4605 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4614 /* Function vect_analyze_data_ref_accesses.
4616 Analyze the access pattern of all the data references in the loop.
4618 FORNOW: the only access pattern that is considered vectorizable is a
4619 simple step 1 (consecutive) access.
4621 FORNOW: handle only arrays and pointer accesses. */
4624 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4627 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4628 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4630 if (vect_debug_details (NULL))
4631 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4633 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4635 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4636 bool ok = vect_analyze_data_ref_access (dr);
4639 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4640 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4641 fprintf (dump_file, "not vectorized: complicated access pattern.");
4646 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4648 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4649 bool ok = vect_analyze_data_ref_access (dr);
4652 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4653 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4654 fprintf (dump_file, "not vectorized: complicated access pattern.");
4663 /* Function vect_analyze_pointer_ref_access.
4666 STMT - a stmt that contains a data-ref
4667 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4669 If the data-ref access is vectorizable, return a data_reference structure
4670 that represents it (DR). Otherwise - return NULL. */
4672 static struct data_reference *
4673 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4675 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4676 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4677 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4680 tree reftype, innertype;
4681 enum machine_mode innermode;
4682 tree indx_access_fn;
4683 int loopnum = loop->num;
4684 struct data_reference *dr;
4688 if (vect_debug_stats (loop) || vect_debug_details (loop))
4689 fprintf (dump_file, "not vectorized: complicated pointer access.");
4693 if (vect_debug_details (NULL))
4695 fprintf (dump_file, "Access function of ptr: ");
4696 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4699 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4701 if (vect_debug_stats (loop) || vect_debug_details (loop))
4702 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4708 if (!host_integerp (step,0))
4710 if (vect_debug_stats (loop) || vect_debug_details (loop))
4712 "not vectorized: non constant step for pointer access.");
4716 step_val = TREE_INT_CST_LOW (step);
4718 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4719 if (TREE_CODE (reftype) != POINTER_TYPE)
4721 if (vect_debug_stats (loop) || vect_debug_details (loop))
4722 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4726 reftype = TREE_TYPE (init);
4727 if (TREE_CODE (reftype) != POINTER_TYPE)
4729 if (vect_debug_stats (loop) || vect_debug_details (loop))
4730 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4734 innertype = TREE_TYPE (reftype);
4735 innermode = TYPE_MODE (innertype);
4736 if (GET_MODE_SIZE (innermode) != step_val)
4738 /* FORNOW: support only consecutive access */
4739 if (vect_debug_stats (loop) || vect_debug_details (loop))
4740 fprintf (dump_file, "not vectorized: non consecutive access.");
4745 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4746 if (vect_debug_details (NULL))
4748 fprintf (dump_file, "Access function of ptr indx: ");
4749 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4751 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4756 /* Function vect_get_symbl_and_dr.
4758 The function returns SYMBL - the relevant variable for
4759 memory tag (for aliasing purposes).
4760 Also data reference structure DR is created.
4763 MEMREF - data reference in STMT
4764 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4767 DR - data_reference struct for MEMREF
4768 return value - the relevant variable for memory tag (for aliasing purposes).
4773 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4774 loop_vec_info loop_vinfo, struct data_reference **dr)
4776 tree symbl, oprnd0, oprnd1;
4777 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4779 tree array_base, base;
4780 struct data_reference *new_dr;
4781 bool base_aligned_p;
4784 switch (TREE_CODE (memref))
4787 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4791 symbl = DR_BASE_NAME (new_dr);
4792 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4794 switch (TREE_CODE (symbl))
4798 oprnd0 = TREE_OPERAND (symbl, 0);
4799 oprnd1 = TREE_OPERAND (symbl, 1);
4802 /* Only {address_base + offset} expressions are supported,
4803 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4804 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4805 TODO: swap operands if {offset + address_base}. */
4806 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4807 && TREE_CODE (oprnd1) != INTEGER_CST)
4808 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4811 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4814 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4815 loop_vinfo, &new_dr);
4819 /* symbl remains unchanged. */
4823 if (vect_debug_details (NULL))
4825 fprintf (dump_file, "unhandled data ref: ");
4826 print_generic_expr (dump_file, memref, TDF_SLIM);
4827 fprintf (dump_file, " (symbl ");
4828 print_generic_expr (dump_file, symbl, TDF_SLIM);
4829 fprintf (dump_file, ") in stmt ");
4830 print_generic_expr (dump_file, stmt, TDF_SLIM);
4837 offset = size_zero_node;
4839 /* Store the array base in the stmt info.
4840 For one dimensional array ref a[i], the base is a,
4841 for multidimensional a[i1][i2]..[iN], the base is
4842 a[i1][i2]..[iN-1]. */
4843 array_base = TREE_OPERAND (memref, 0);
4844 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4846 new_dr = analyze_array (stmt, memref, is_read);
4849 /* Find the relevant symbol for aliasing purposes. */
4850 base = DR_BASE_NAME (new_dr);
4851 switch (TREE_CODE (base))
4858 symbl = TREE_OPERAND (base, 0);
4862 /* Could have recorded more accurate information -
4863 i.e, the actual FIELD_DECL that is being referenced -
4864 but later passes expect VAR_DECL as the nmt. */
4865 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4866 loop_vinfo, &offset, &base_aligned_p);
4871 if (vect_debug_details (NULL))
4873 fprintf (dump_file, "unhandled struct/class field access ");
4874 print_generic_expr (dump_file, stmt, TDF_SLIM);
4881 if (vect_debug_details (NULL))
4883 fprintf (dump_file, "unhandled data ref: ");
4884 print_generic_expr (dump_file, memref, TDF_SLIM);
4885 fprintf (dump_file, " in stmt ");
4886 print_generic_expr (dump_file, stmt, TDF_SLIM);
4894 /* Function vect_analyze_data_refs.
4896 Find all the data references in the loop.
4898 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4899 which base is really an array (not a pointer) and which alignment
4900 can be forced. This restriction will be relaxed. */
4903 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4905 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4906 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4907 int nbbs = loop->num_nodes;
4908 block_stmt_iterator si;
4910 struct data_reference *dr;
4913 bool base_aligned_p;
4916 if (vect_debug_details (NULL))
4917 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4919 for (j = 0; j < nbbs; j++)
4921 basic_block bb = bbs[j];
4922 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4924 bool is_read = false;
4925 tree stmt = bsi_stmt (si);
4926 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4927 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4928 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4929 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4930 varray_type *datarefs = NULL;
4931 int nvuses, nv_may_defs, nv_must_defs;
4935 /* Assumption: there exists a data-ref in stmt, if and only if
4936 it has vuses/vdefs. */
4938 if (!vuses && !v_may_defs && !v_must_defs)
4941 nvuses = NUM_VUSES (vuses);
4942 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4943 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4945 if (nvuses && (nv_may_defs || nv_must_defs))
4947 if (vect_debug_details (NULL))
4949 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4950 print_generic_expr (dump_file, stmt, TDF_SLIM);
4955 if (TREE_CODE (stmt) != MODIFY_EXPR)
4957 if (vect_debug_details (NULL))
4959 fprintf (dump_file, "unexpected vops in stmt: ");
4960 print_generic_expr (dump_file, stmt, TDF_SLIM);
4967 memref = TREE_OPERAND (stmt, 1);
4968 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4973 memref = TREE_OPERAND (stmt, 0);
4974 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4978 /* Analyze MEMREF. If it is of a supported form, build data_reference
4979 struct for it (DR) and find the relevant symbol for aliasing
4981 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4985 if (vect_debug_stats (loop) || vect_debug_details (loop))
4987 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4988 print_generic_expr (dump_file, stmt, TDF_SLIM);
4993 /* Find and record the memtag assigned to this data-ref. */
4994 switch (TREE_CODE (symbl))
4997 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5001 symbl = SSA_NAME_VAR (symbl);
5002 tag = get_var_ann (symbl)->type_mem_tag;
5005 tree ptr = TREE_OPERAND (memref, 0);
5006 if (TREE_CODE (ptr) == SSA_NAME)
5007 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5011 if (vect_debug_stats (loop) || vect_debug_details (loop))
5012 fprintf (dump_file, "not vectorized: no memtag for ref.");
5015 STMT_VINFO_MEMTAG (stmt_info) = tag;
5019 address_base = TREE_OPERAND (symbl, 0);
5021 switch (TREE_CODE (address_base))
5025 struct data_reference *tmp_dr;
5027 tmp_dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5029 tag = vect_get_base_and_bit_offset
5030 (tmp_dr, DR_BASE_NAME (tmp_dr),
5031 NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5034 if (vect_debug_stats (loop)
5035 || vect_debug_details (loop))
5037 "not vectorized: no memtag for ref.");
5040 STMT_VINFO_MEMTAG (stmt_info) = tag;
5046 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5050 if (vect_debug_stats (loop) || vect_debug_details (loop))
5053 "not vectorized: unhandled address expr: ");
5054 print_generic_expr (dump_file, stmt, TDF_SLIM);
5061 if (vect_debug_stats (loop) || vect_debug_details (loop))
5063 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5064 print_generic_expr (dump_file, memref, TDF_SLIM);
5069 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5070 STMT_VINFO_DATA_REF (stmt_info) = dr;
5078 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5080 /* Function vect_mark_relevant.
5082 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5085 vect_mark_relevant (varray_type worklist, tree stmt)
5087 stmt_vec_info stmt_info;
5089 if (vect_debug_details (NULL))
5090 fprintf (dump_file, "mark relevant.");
5092 if (TREE_CODE (stmt) == PHI_NODE)
5094 VARRAY_PUSH_TREE (worklist, stmt);
5098 stmt_info = vinfo_for_stmt (stmt);
5102 if (vect_debug_details (NULL))
5104 fprintf (dump_file, "mark relevant: no stmt info!!.");
5105 print_generic_expr (dump_file, stmt, TDF_SLIM);
5110 if (STMT_VINFO_RELEVANT_P (stmt_info))
5112 if (vect_debug_details (NULL))
5113 fprintf (dump_file, "already marked relevant.");
5117 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5118 VARRAY_PUSH_TREE (worklist, stmt);
5122 /* Function vect_stmt_relevant_p.
5124 Return true if STMT in loop that is represented by LOOP_VINFO is
5125 "relevant for vectorization".
5127 A stmt is considered "relevant for vectorization" if:
5128 - it has uses outside the loop.
5129 - it has vdefs (it alters memory).
5130 - control stmts in the loop (except for the exit condition).
5132 CHECKME: what other side effects would the vectorizer allow? */
5135 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5137 v_may_def_optype v_may_defs;
5138 v_must_def_optype v_must_defs;
5139 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5144 /* cond stmt other than loop exit cond. */
5145 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5148 /* changing memory. */
5149 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5150 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5151 if (v_may_defs || v_must_defs)
5153 if (vect_debug_details (NULL))
5154 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5158 /* uses outside the loop. */
5159 df = get_immediate_uses (stmt);
5160 num_uses = num_immediate_uses (df);
5161 for (i = 0; i < num_uses; i++)
5163 tree use = immediate_use (df, i);
5164 basic_block bb = bb_for_stmt (use);
5165 if (!flow_bb_inside_loop_p (loop, bb))
5167 if (vect_debug_details (NULL))
5168 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5177 /* Function vect_mark_stmts_to_be_vectorized.
5179 Not all stmts in the loop need to be vectorized. For example:
5188 Stmt 1 and 3 do not need to be vectorized, because loop control and
5189 addressing of vectorized data-refs are handled differently.
5191 This pass detects such stmts. */
5194 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5196 varray_type worklist;
5197 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5198 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5199 unsigned int nbbs = loop->num_nodes;
5200 block_stmt_iterator si;
5206 stmt_vec_info stmt_info;
5208 if (vect_debug_details (NULL))
5209 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5211 VARRAY_TREE_INIT (worklist, 64, "work list");
5213 /* 1. Init worklist. */
5215 for (i = 0; i < nbbs; i++)
5217 basic_block bb = bbs[i];
5218 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5220 stmt = bsi_stmt (si);
5222 if (vect_debug_details (NULL))
5224 fprintf (dump_file, "init: stmt relevant? ");
5225 print_generic_expr (dump_file, stmt, TDF_SLIM);
5228 stmt_info = vinfo_for_stmt (stmt);
5229 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5231 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5232 vect_mark_relevant (worklist, stmt);
5237 /* 2. Process_worklist */
5239 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5241 stmt = VARRAY_TOP_TREE (worklist);
5242 VARRAY_POP (worklist);
5244 if (vect_debug_details (NULL))
5246 fprintf (dump_file, "worklist: examine stmt: ");
5247 print_generic_expr (dump_file, stmt, TDF_SLIM);
5250 /* Examine the USES in this statement. Mark all the statements which
5251 feed this statement's uses as "relevant", unless the USE is used as
5254 if (TREE_CODE (stmt) == PHI_NODE)
5256 /* follow the def-use chain inside the loop. */
5257 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5259 tree arg = PHI_ARG_DEF (stmt, j);
5260 tree def_stmt = NULL_TREE;
5262 if (!vect_is_simple_use (arg, loop, &def_stmt))
5264 if (vect_debug_details (NULL))
5265 fprintf (dump_file, "worklist: unsupported use.");
5266 varray_clear (worklist);
5272 if (vect_debug_details (NULL))
5274 fprintf (dump_file, "worklist: def_stmt: ");
5275 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5278 bb = bb_for_stmt (def_stmt);
5279 if (flow_bb_inside_loop_p (loop, bb))
5280 vect_mark_relevant (worklist, def_stmt);
5284 ann = stmt_ann (stmt);
5285 use_ops = USE_OPS (ann);
5287 for (i = 0; i < NUM_USES (use_ops); i++)
5289 tree use = USE_OP (use_ops, i);
5291 /* We are only interested in uses that need to be vectorized. Uses
5292 that are used for address computation are not considered relevant.
5294 if (exist_non_indexing_operands_for_use_p (use, stmt))
5296 tree def_stmt = NULL_TREE;
5298 if (!vect_is_simple_use (use, loop, &def_stmt))
5300 if (vect_debug_details (NULL))
5301 fprintf (dump_file, "worklist: unsupported use.");
5302 varray_clear (worklist);
5309 if (vect_debug_details (NULL))
5311 fprintf (dump_file, "worklist: examine use %d: ", i);
5312 print_generic_expr (dump_file, use, TDF_SLIM);
5315 bb = bb_for_stmt (def_stmt);
5316 if (flow_bb_inside_loop_p (loop, bb))
5317 vect_mark_relevant (worklist, def_stmt);
5320 } /* while worklist */
5322 varray_clear (worklist);
5327 /* Function vect_can_advance_ivs_p
5329 In case the number of iterations that LOOP iterates in unknown at compile
5330 time, an epilog loop will be generated, and the loop induction variables
5331 (IVs) will be "advanced" to the value they are supposed to take just before
5332 the epilog loop. Here we check that the access function of the loop IVs
5333 and the expression that represents the loop bound are simple enough.
5334 These restrictions will be relaxed in the future. */
5337 vect_can_advance_ivs_p (struct loop *loop)
5339 basic_block bb = loop->header;
5342 /* Analyze phi functions of the loop header. */
5344 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5346 tree access_fn = NULL;
5347 tree evolution_part;
5349 if (vect_debug_details (NULL))
5351 fprintf (dump_file, "Analyze phi: ");
5352 print_generic_expr (dump_file, phi, TDF_SLIM);
5355 /* Skip virtual phi's. The data dependences that are associated with
5356 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5358 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5360 if (vect_debug_details (NULL))
5361 fprintf (dump_file, "virtual phi. skip.");
5365 /* Analyze the evolution function. */
5367 access_fn = instantiate_parameters
5368 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5372 if (vect_debug_details (NULL))
5373 fprintf (dump_file, "No Access function.");
5377 if (vect_debug_details (NULL))
5379 fprintf (dump_file, "Access function of PHI: ");
5380 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5383 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5385 if (evolution_part == NULL_TREE)
5388 /* FORNOW: We do not transform initial conditions of IVs
5389 which evolution functions are a polynomial of degree >= 2. */
5391 if (tree_is_chrec (evolution_part))
5399 /* Function vect_get_loop_niters.
5401 Determine how many iterations the loop is executed.
5402 If an expression that represents the number of iterations
5403 can be constructed, place it in NUMBER_OF_ITERATIONS.
5404 Return the loop exit condition. */
5407 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5411 if (vect_debug_details (NULL))
5412 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5414 niters = number_of_iterations_in_loop (loop);
5416 if (niters != NULL_TREE
5417 && niters != chrec_dont_know)
5419 *number_of_iterations = niters;
5421 if (vect_debug_details (NULL))
5423 fprintf (dump_file, "==> get_loop_niters:" );
5424 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5428 return get_loop_exit_condition (loop);
5432 /* Function vect_analyze_loop_form.
5434 Verify the following restrictions (some may be relaxed in the future):
5435 - it's an inner-most loop
5436 - number of BBs = 2 (which are the loop header and the latch)
5437 - the loop has a pre-header
5438 - the loop has a single entry and exit
5439 - the loop exit condition is simple enough, and the number of iterations
5440 can be analyzed (a countable loop). */
5442 static loop_vec_info
5443 vect_analyze_loop_form (struct loop *loop)
5445 loop_vec_info loop_vinfo;
5447 tree number_of_iterations = NULL;
5448 bool rescan = false;
5450 if (vect_debug_details (loop))
5451 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5454 || !loop->single_exit
5455 || loop->num_nodes != 2
5456 || EDGE_COUNT (loop->header->preds) != 2
5457 || loop->num_entries != 1)
5459 if (vect_debug_stats (loop) || vect_debug_details (loop))
5461 fprintf (dump_file, "not vectorized: bad loop form. ");
5463 fprintf (dump_file, "nested loop.");
5464 else if (!loop->single_exit)
5465 fprintf (dump_file, "multiple exits.");
5466 else if (loop->num_nodes != 2)
5467 fprintf (dump_file, "too many BBs in loop.");
5468 else if (EDGE_COUNT (loop->header->preds) != 2)
5469 fprintf (dump_file, "too many incoming edges.");
5470 else if (loop->num_entries != 1)
5471 fprintf (dump_file, "too many entries.");
5477 /* We assume that the loop exit condition is at the end of the loop. i.e,
5478 that the loop is represented as a do-while (with a proper if-guard
5479 before the loop if needed), where the loop header contains all the
5480 executable statements, and the latch is empty. */
5481 if (!empty_block_p (loop->latch))
5483 if (vect_debug_stats (loop) || vect_debug_details (loop))
5484 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5488 /* Make sure we have a preheader basic block. */
5489 if (!loop->pre_header)
5492 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5495 /* Make sure there exists a single-predecessor exit bb: */
5496 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5499 loop_split_edge_with (loop->exit_edges[0], NULL);
5504 flow_loop_scan (loop, LOOP_ALL);
5505 /* Flow loop scan does not update loop->single_exit field. */
5506 loop->single_exit = loop->exit_edges[0];
5509 if (empty_block_p (loop->header))
5511 if (vect_debug_stats (loop) || vect_debug_details (loop))
5512 fprintf (dump_file, "not vectorized: empty loop.");
5516 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5519 if (vect_debug_stats (loop) || vect_debug_details (loop))
5520 fprintf (dump_file, "not vectorized: complicated exit condition.");
5524 if (!number_of_iterations)
5526 if (vect_debug_stats (loop) || vect_debug_details (loop))
5528 "not vectorized: number of iterations cannot be computed.");
5532 if (chrec_contains_undetermined (number_of_iterations))
5534 if (vect_debug_details (NULL))
5535 fprintf (dump_file, "Infinite number of iterations.");
5539 loop_vinfo = new_loop_vec_info (loop);
5540 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5542 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5544 if (vect_debug_details (loop))
5546 fprintf (dump_file, "loop bound unknown.\n");
5547 fprintf (dump_file, "Symbolic number of iterations is ");
5548 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5552 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5554 if (vect_debug_stats (loop) || vect_debug_details (loop))
5555 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5559 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5565 /* Function vect_analyze_loop.
5567 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5568 for it. The different analyses will record information in the
5569 loop_vec_info struct. */
5571 static loop_vec_info
5572 vect_analyze_loop (struct loop *loop)
5575 loop_vec_info loop_vinfo;
5577 if (vect_debug_details (NULL))
5578 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5580 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5582 loop_vinfo = vect_analyze_loop_form (loop);
5585 if (vect_debug_details (loop))
5586 fprintf (dump_file, "bad loop form.");
5590 /* Find all data references in the loop (which correspond to vdefs/vuses)
5591 and analyze their evolution in the loop.
5593 FORNOW: Handle only simple, array references, which
5594 alignment can be forced, and aligned pointer-references. */
5596 ok = vect_analyze_data_refs (loop_vinfo);
5599 if (vect_debug_details (loop))
5600 fprintf (dump_file, "bad data references.");
5601 destroy_loop_vec_info (loop_vinfo);
5605 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5607 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5610 if (vect_debug_details (loop))
5611 fprintf (dump_file, "unexpected pattern.");
5612 if (vect_debug_details (loop))
5613 fprintf (dump_file, "not vectorized: unexpected pattern.");
5614 destroy_loop_vec_info (loop_vinfo);
5618 /* Check that all cross-iteration scalar data-flow cycles are OK.
5619 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5621 ok = vect_analyze_scalar_cycles (loop_vinfo);
5624 if (vect_debug_details (loop))
5625 fprintf (dump_file, "bad scalar cycle.");
5626 destroy_loop_vec_info (loop_vinfo);
5630 /* Analyze data dependences between the data-refs in the loop.
5631 FORNOW: fail at the first data dependence that we encounter. */
5633 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5636 if (vect_debug_details (loop))
5637 fprintf (dump_file, "bad data dependence.");
5638 destroy_loop_vec_info (loop_vinfo);
5642 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5643 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5645 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5648 if (vect_debug_details (loop))
5649 fprintf (dump_file, "bad data access.");
5650 destroy_loop_vec_info (loop_vinfo);
5654 /* Analyze the alignment of the data-refs in the loop.
5655 FORNOW: Only aligned accesses are handled. */
5657 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5660 if (vect_debug_details (loop))
5661 fprintf (dump_file, "bad data alignment.");
5662 destroy_loop_vec_info (loop_vinfo);
5666 /* Scan all the operations in the loop and make sure they are
5669 ok = vect_analyze_operations (loop_vinfo);
5672 if (vect_debug_details (loop))
5673 fprintf (dump_file, "bad operation or unsupported loop bound.");
5674 destroy_loop_vec_info (loop_vinfo);
5678 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5684 /* Function need_imm_uses_for.
5686 Return whether we ought to include information for 'var'
5687 when calculating immediate uses. For this pass we only want use
5688 information for non-virtual variables. */
5691 need_imm_uses_for (tree var)
5693 return is_gimple_reg (var);
5697 /* Function vectorize_loops.
5699 Entry Point to loop vectorization phase. */
5702 vectorize_loops (struct loops *loops)
5704 unsigned int i, loops_num;
5705 unsigned int num_vectorized_loops = 0;
5707 /* Does the target support SIMD? */
5708 /* FORNOW: until more sophisticated machine modelling is in place. */
5709 if (!UNITS_PER_SIMD_WORD)
5711 if (vect_debug_details (NULL))
5712 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5716 #ifdef ENABLE_CHECKING
5717 verify_loop_closed_ssa ();
5720 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5722 /* ----------- Analyze loops. ----------- */
5724 /* If some loop was duplicated, it gets bigger number
5725 than all previously defined loops. This fact allows us to run
5726 only over initial loops skipping newly generated ones. */
5727 loops_num = loops->num;
5728 for (i = 1; i < loops_num; i++)
5730 loop_vec_info loop_vinfo;
5731 struct loop *loop = loops->parray[i];
5736 loop_vinfo = vect_analyze_loop (loop);
5737 loop->aux = loop_vinfo;
5739 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5742 vect_transform_loop (loop_vinfo, loops);
5743 num_vectorized_loops++;
5746 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5747 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5748 num_vectorized_loops);
5750 /* ----------- Finalize. ----------- */
5753 for (i = 1; i < loops_num; i++)
5755 struct loop *loop = loops->parray[i];
5756 loop_vec_info loop_vinfo;
5760 loop_vinfo = loop->aux;
5761 destroy_loop_vec_info (loop_vinfo);
5765 rewrite_into_ssa (false);
5766 rewrite_into_loop_closed_ssa (); /* FORNOW */
5767 bitmap_clear (vars_to_rename);