2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
150 /*************************************************************************
151 Simple Loop Peeling Utilities
152 *************************************************************************/
154 /* Entry point for peeling of simple loops.
155 Peel the first/last iterations of a loop.
156 It can be used outside of the vectorizer for loops that are simple enough
157 (see function documentation). In the vectorizer it is used to peel the
158 last few iterations when the loop bound is unknown or does not evenly
159 divide by the vectorization factor, and to peel the first few iterations
160 to force the alignment of data references in the loop. */
161 struct loop *slpeel_tree_peel_loop_to_edge
162 (struct loop *, struct loops *, edge, tree, tree, bool);
163 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
164 (struct loop *, struct loops *, edge);
165 static void slpeel_update_phis_for_duplicate_loop
166 (struct loop *, struct loop *, bool after);
167 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
168 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
169 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
170 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
171 static void allocate_new_names (bitmap);
172 static void rename_use_op (use_operand_p);
173 static void rename_def_op (def_operand_p, tree);
174 static void rename_variables_in_bb (basic_block);
175 static void free_new_names (bitmap);
176 static void rename_variables_in_loop (struct loop *);
177 #ifdef ENABLE_CHECKING
178 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
182 /*************************************************************************
183 Vectorization Utilities.
184 *************************************************************************/
186 /* Main analysis functions. */
187 static loop_vec_info vect_analyze_loop (struct loop *);
188 static loop_vec_info vect_analyze_loop_form (struct loop *);
189 static bool vect_analyze_data_refs (loop_vec_info);
190 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
191 static bool vect_analyze_scalar_cycles (loop_vec_info);
192 static bool vect_analyze_data_ref_accesses (loop_vec_info);
193 static bool vect_analyze_data_refs_alignment (loop_vec_info);
194 static bool vect_compute_data_refs_alignment (loop_vec_info);
195 static bool vect_analyze_operations (loop_vec_info);
197 /* Main code transformation functions. */
198 static void vect_transform_loop (loop_vec_info, struct loops *);
199 static void vect_transform_loop_bound (loop_vec_info, tree niters);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment
218 (struct data_reference *, loop_vec_info);
219 static bool vect_analyze_data_ref_access (struct data_reference *);
220 static bool vect_get_first_index (tree, tree *);
221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
222 static struct data_reference * vect_analyze_pointer_ref_access
224 static bool vect_can_advance_ivs_p (struct loop *);
225 static tree vect_get_base_and_bit_offset
226 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
227 static struct data_reference * vect_analyze_pointer_ref_access
229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
230 static tree vect_compute_array_ref_alignment
231 (struct data_reference *, loop_vec_info, tree, tree *);
232 static tree vect_get_ptr_offset (tree, tree, tree *);
233 static tree vect_get_symbl_and_dr
234 (tree, tree, bool, loop_vec_info, struct data_reference **);
236 /* Utility functions for the code transformation. */
237 static tree vect_create_destination_var (tree, tree);
238 static tree vect_create_data_ref_ptr
239 (tree, block_stmt_iterator *, tree, tree *, bool);
240 static tree vect_create_index_for_vector_ref
241 (struct loop *, block_stmt_iterator *);
242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
243 static tree get_vectype_for_scalar_type (tree);
244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
245 static tree vect_get_vec_def_for_operand (tree, tree);
246 static tree vect_init_vector (tree, tree);
247 static tree vect_build_symbol_bound (tree, int, struct loop *);
248 static void vect_finish_stmt_generation
249 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
251 /* Utility function dealing with loop peeling (not peeling itself). */
252 static void vect_generate_tmps_on_preheader
253 (loop_vec_info, tree *, tree *, tree *);
254 static tree vect_build_loop_niters (loop_vec_info);
255 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
256 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
257 static void vect_update_inits_of_dr
258 (struct data_reference *, struct loop *, tree niters);
259 static void vect_update_inits_of_drs (loop_vec_info, tree);
260 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
261 static void vect_do_peeling_for_loop_bound
262 (loop_vec_info, tree *, struct loops *);
264 /* Utilities for creation and deletion of vec_info structs. */
265 loop_vec_info new_loop_vec_info (struct loop *loop);
266 void destroy_loop_vec_info (loop_vec_info);
267 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
269 static bool vect_debug_stats (struct loop *loop);
270 static bool vect_debug_details (struct loop *loop);
273 /*************************************************************************
274 Simple Loop Peeling Utilities
276 Utilities to support loop peeling for vectorization purposes.
277 *************************************************************************/
280 /* For each definition in DEFINITIONS this function allocates
284 allocate_new_names (bitmap definitions)
289 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
291 tree def = ssa_name (ver);
292 tree *new_name_ptr = xmalloc (sizeof (tree));
294 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
296 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
297 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
299 SSA_NAME_AUX (def) = new_name_ptr;
304 /* Renames the use *OP_P. */
307 rename_use_op (use_operand_p op_p)
311 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
314 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
316 /* Something defined outside of the loop. */
320 /* An ordinary ssa name defined in the loop. */
322 SET_USE (op_p, *new_name_ptr);
326 /* Renames the def *OP_P in statement STMT. */
329 rename_def_op (def_operand_p op_p, tree stmt)
333 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
336 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
338 /* Something defined outside of the loop. */
342 /* An ordinary ssa name defined in the loop. */
344 SET_DEF (op_p, *new_name_ptr);
345 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
349 /* Renames the variables in basic block BB. */
352 rename_variables_in_bb (basic_block bb)
355 block_stmt_iterator bsi;
361 v_may_def_optype v_may_defs;
362 v_must_def_optype v_must_defs;
366 struct loop *loop = bb->loop_father;
368 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
369 rename_def_op (PHI_RESULT_PTR (phi), phi);
371 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
373 stmt = bsi_stmt (bsi);
374 get_stmt_operands (stmt);
375 ann = stmt_ann (stmt);
377 uses = USE_OPS (ann);
378 for (i = 0; i < NUM_USES (uses); i++)
379 rename_use_op (USE_OP_PTR (uses, i));
381 defs = DEF_OPS (ann);
382 for (i = 0; i < NUM_DEFS (defs); i++)
383 rename_def_op (DEF_OP_PTR (defs, i), stmt);
385 vuses = VUSE_OPS (ann);
386 for (i = 0; i < NUM_VUSES (vuses); i++)
387 rename_use_op (VUSE_OP_PTR (vuses, i));
389 v_may_defs = V_MAY_DEF_OPS (ann);
390 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
392 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
393 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
396 v_must_defs = V_MUST_DEF_OPS (ann);
397 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
399 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
400 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
404 FOR_EACH_EDGE (e, ei, bb->succs)
406 if (!flow_bb_inside_loop_p (loop, e->dest))
408 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
409 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
414 /* Releases the structures holding the new ssa names. */
417 free_new_names (bitmap definitions)
422 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
424 tree def = ssa_name (ver);
426 if (SSA_NAME_AUX (def))
428 free (SSA_NAME_AUX (def));
429 SSA_NAME_AUX (def) = NULL;
435 /* Renames variables in new generated LOOP. */
438 rename_variables_in_loop (struct loop *loop)
443 bbs = get_loop_body (loop);
445 for (i = 0; i < loop->num_nodes; i++)
446 rename_variables_in_bb (bbs[i]);
452 /* Update the PHI nodes of NEW_LOOP.
454 NEW_LOOP is a duplicate of ORIG_LOOP.
455 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
456 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
457 executes before it. */
460 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
461 struct loop *new_loop, bool after)
463 tree *new_name_ptr, new_ssa_name;
464 tree phi_new, phi_orig;
466 edge orig_loop_latch = loop_latch_edge (orig_loop);
467 edge orig_entry_e = loop_preheader_edge (orig_loop);
468 edge new_loop_exit_e = new_loop->exit_edges[0];
469 edge new_loop_entry_e = loop_preheader_edge (new_loop);
470 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
473 step 1. For each loop-header-phi:
474 Add the first phi argument for the phi in NEW_LOOP
475 (the one associated with the entry of NEW_LOOP)
477 step 2. For each loop-header-phi:
478 Add the second phi argument for the phi in NEW_LOOP
479 (the one associated with the latch of NEW_LOOP)
481 step 3. Update the phis in the successor block of NEW_LOOP.
483 case 1: NEW_LOOP was placed before ORIG_LOOP:
484 The successor block of NEW_LOOP is the header of ORIG_LOOP.
485 Updating the phis in the successor block can therefore be done
486 along with the scanning of the loop header phis, because the
487 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
488 phi nodes, organized in the same order.
490 case 2: NEW_LOOP was placed after ORIG_LOOP:
491 The successor block of NEW_LOOP is the original exit block of
492 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
493 We postpone updating these phis to a later stage (when
494 loop guards are added).
498 /* Scan the phis in the headers of the old and new loops
499 (they are organized in exactly the same order). */
501 for (phi_new = phi_nodes (new_loop->header),
502 phi_orig = phi_nodes (orig_loop->header);
504 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
507 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
508 add_phi_arg (&phi_new, def, new_loop_entry_e);
511 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
512 if (TREE_CODE (def) != SSA_NAME)
515 new_name_ptr = SSA_NAME_AUX (def);
517 /* Something defined outside of the loop. */
520 /* An ordinary ssa name defined in the loop. */
521 new_ssa_name = *new_name_ptr;
522 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop));
524 /* step 3 (case 1). */
527 gcc_assert (new_loop_exit_e == orig_entry_e);
528 SET_PHI_ARG_DEF (phi_orig,
529 phi_arg_from_edge (phi_orig, new_loop_exit_e),
536 /* Update PHI nodes for a guard of the LOOP.
539 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
540 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
541 originates from the guard-bb, skips LOOP and reaches the (unique) exit
542 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
543 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
544 LOOP header) before the guard code was added, and now it became a merge
545 point of two paths - the path that ends with the LOOP exit-edge, and
546 the path that ends with GUARD_EDGE.
548 This function creates and updates the relevant phi nodes to account for
549 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
550 1. Create phi nodes at NEW_MERGE_BB.
551 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
552 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
555 ===> The CFG before the guard-code was added:
557 if (exit_loop) goto update_bb : LOOP_header_bb
560 ==> The CFG after the guard-code was added:
562 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
564 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
569 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
570 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
571 organized in the same order.
572 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
575 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
576 "original" loop). FALSE if LOOP is an original loop (not a newly
577 created copy). The SSA_NAME_AUX fields of the defs in the original
578 loop are the corresponding new ssa-names used in the new duplicated
579 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
580 nodes in UPDATE_BB takes the original ssa-name, and which takes the
581 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
582 the LOOP-exit-edge takes the new-name, and the phi-arg that is
583 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
584 FALSE, it's the other way around.
588 slpeel_update_phi_nodes_for_guard (edge guard_edge,
593 tree orig_phi, new_phi, update_phi;
594 tree guard_arg, loop_arg;
595 basic_block new_merge_bb = guard_edge->dest;
596 edge e = EDGE_SUCC (new_merge_bb, 0);
597 basic_block update_bb = e->dest;
598 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
600 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
601 orig_phi && update_phi;
602 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
604 /* 1. Generate new phi node in NEW_MERGE_BB: */
605 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
608 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
609 of LOOP. Set the two phi args in NEW_PHI for these edges: */
612 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
613 EDGE_SUCC (loop->latch, 0));
614 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
618 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
619 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
623 new_name = *new_name_ptr;
625 /* Something defined outside of the loop */
630 guard_arg = orig_def;
635 guard_arg = new_name;
639 add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]);
640 add_phi_arg (&new_phi, guard_arg, guard_edge);
642 /* 3. Update phi in successor block. */
643 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
644 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
645 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
646 PHI_RESULT (new_phi));
649 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
653 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
654 that starts at zero, increases by one and its limit is NITERS.
656 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
659 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
661 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
663 edge exit_edge = loop->exit_edges[0];
664 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
665 tree begin_label = tree_block_label (loop->latch);
666 tree exit_label = tree_block_label (loop->single_exit->dest);
668 orig_cond = get_loop_exit_condition (loop);
669 gcc_assert (orig_cond);
670 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
671 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
673 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
674 back to the exit condition statement. */
675 bsi_next (&loop_exit_bsi);
676 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
678 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
679 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
680 else /* 'then' edge loops back. */
681 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
683 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
684 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
685 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
686 begin_label, exit_label);
687 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
689 /* Remove old loop exit test: */
690 bsi_remove (&loop_exit_bsi);
692 if (vect_debug_stats (loop) || vect_debug_details (loop))
693 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
695 loop->nb_iterations = niters;
699 /* Given LOOP this function generates a new copy of it and puts it
700 on E which is either the entry or exit of LOOP. */
703 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
706 struct loop *new_loop;
707 basic_block *new_bbs, *bbs;
710 basic_block exit_dest;
713 at_exit = (e == loop->exit_edges[0]);
714 if (!at_exit && e != loop_preheader_edge (loop))
716 if (dump_file && (dump_flags & TDF_DETAILS))
717 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
721 bbs = get_loop_body (loop);
723 /* Check whether duplication is possible. */
724 if (!can_copy_bbs_p (bbs, loop->num_nodes))
726 if (vect_debug_stats (loop) || vect_debug_details (loop))
727 fprintf (dump_file, "Cannot copy basic blocks.\n");
732 /* Generate new loop structure. */
733 new_loop = duplicate_loop (loops, loop, loop->outer);
736 if (vect_debug_stats (loop) || vect_debug_details (loop))
737 fprintf (dump_file, "duplicate_loop returns NULL.\n");
742 exit_dest = loop->exit_edges[0]->dest;
743 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
744 exit_dest) == loop->header ?
747 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
749 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
751 /* Duplicating phi args at exit bbs as coming
752 also from exit of duplicated loop. */
753 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
755 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
758 edge new_loop_exit_edge;
760 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
761 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
763 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
765 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
769 if (at_exit) /* Add the loop copy at exit. */
771 redirect_edge_and_branch_force (e, new_loop->header);
772 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
774 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
776 else /* Add the copy at entry. */
779 edge entry_e = loop_preheader_edge (loop);
780 basic_block preheader = entry_e->src;
782 if (!flow_bb_inside_loop_p (new_loop,
783 EDGE_SUCC (new_loop->header, 0)->dest))
784 new_exit_e = EDGE_SUCC (new_loop->header, 0);
786 new_exit_e = EDGE_SUCC (new_loop->header, 1);
788 redirect_edge_and_branch_force (new_exit_e, loop->header);
789 set_immediate_dominator (CDI_DOMINATORS, loop->header,
792 /* We have to add phi args to the loop->header here as coming
793 from new_exit_e edge. */
794 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
796 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
798 add_phi_arg (&phi, phi_arg, new_exit_e);
801 redirect_edge_and_branch_force (entry_e, new_loop->header);
802 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
805 flow_loop_scan (new_loop, LOOP_ALL);
806 flow_loop_scan (loop, LOOP_ALL);
814 /* Given the condition statement COND, put it as the last statement
815 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
816 Assumes that this is the single exit of the guarded loop.
817 Returns the skip edge. */
820 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
823 block_stmt_iterator bsi;
825 tree cond_stmt, then_label, else_label;
827 enter_e = EDGE_SUCC (guard_bb, 0);
828 enter_e->flags &= ~EDGE_FALLTHRU;
829 enter_e->flags |= EDGE_FALSE_VALUE;
830 bsi = bsi_last (guard_bb);
832 then_label = build1 (GOTO_EXPR, void_type_node,
833 tree_block_label (exit_bb));
834 else_label = build1 (GOTO_EXPR, void_type_node,
835 tree_block_label (enter_e->dest));
836 cond_stmt = build (COND_EXPR, void_type_node, cond,
837 then_label, else_label);
838 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
839 /* Add new edge to connect entry block to the second loop. */
840 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
841 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
846 /* This function verifies that the following restrictions apply to LOOP:
848 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
849 (3) it is single entry, single exit
850 (4) its exit condition is the last stmt in the header
851 (5) E is the entry/exit edge of LOOP.
855 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
857 edge exit_e = loop->exit_edges [0];
858 edge entry_e = loop_preheader_edge (loop);
859 tree orig_cond = get_loop_exit_condition (loop);
860 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
862 if (any_marked_for_rewrite_p ())
866 /* All loops have an outer scope; the only case loop->outer is NULL is for
867 the function itself. */
869 || loop->num_nodes != 2
870 || !empty_block_p (loop->latch)
871 || loop->num_exits != 1
872 || loop->num_entries != 1
873 /* Verify that new loop exit condition can be trivially modified. */
874 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
875 || (e != exit_e && e != entry_e))
881 #ifdef ENABLE_CHECKING
883 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
884 struct loop *second_loop)
886 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
887 basic_block loop2_entry_bb = second_loop->pre_header;
888 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
890 /* A guard that controls whether the second_loop is to be executed or skipped
891 is placed in first_loop->exit. first_loopt->exit therefore has two
892 successors - one is the preheader of second_loop, and the other is a bb
895 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
898 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
901 /* The preheader of new_loop is expected to have two predessors:
902 first_loop->exit and the block that precedes first_loop. */
904 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
905 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
906 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
907 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
908 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
910 /* Verify that the other successor of first_loopt->exit is after the
916 /* Function slpeel_tree_peel_loop_to_edge.
918 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
919 that is placed on the entry (exit) edge E of LOOP. After this transformation
920 we have two loops one after the other - first-loop iterates FIRST_NITERS
921 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
924 - LOOP: the loop to be peeled.
925 - E: the exit or entry edge of LOOP.
926 If it is the entry edge, we peel the first iterations of LOOP. In this
927 case first-loop is LOOP, and second-loop is the newly created loop.
928 If it is the exit edge, we peel the last iterations of LOOP. In this
929 case, first-loop is the newly created loop, and second-loop is LOOP.
930 - NITERS: the number of iterations that LOOP iterates.
931 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
932 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
933 for updating the loop bound of the first-loop to FIRST_NITERS. If it
934 is false, the caller of this function may want to take care of this
935 (this can be useful if we don't want new stmts added to first-loop).
938 The function returns a pointer to the new loop-copy, or NULL if it failed
939 to perform the transformation.
941 The function generates two if-then-else guards: one before the first loop,
942 and the other before the second loop:
944 if (FIRST_NITERS == 0) then skip the first loop,
945 and go directly to the second loop.
947 if (FIRST_NITERS == NITERS) then skip the second loop.
949 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
950 FORNOW the resulting code will not be in loop-closed-ssa form.
954 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
955 edge e, tree first_niters,
956 tree niters, bool update_first_loop_count)
958 struct loop *new_loop = NULL, *first_loop, *second_loop;
962 basic_block bb_before_second_loop, bb_after_second_loop;
963 basic_block bb_before_first_loop;
964 basic_block bb_between_loops;
965 edge exit_e = loop->exit_edges [0];
967 if (!slpeel_can_duplicate_loop_p (loop, e))
970 /* We have to initialize cfg_hooks. Then, when calling
971 cfg_hooks->split_edge, the function tree_split_edge
972 is actually called and, when calling cfg_hooks->duplicate_block,
973 the function tree_duplicate_bb is called. */
974 tree_register_cfg_hooks ();
977 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
978 Resulting CFG would be:
991 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
993 if (vect_debug_stats (loop) || vect_debug_details (loop))
994 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1000 /* NEW_LOOP was placed after LOOP. */
1002 second_loop = new_loop;
1006 /* NEW_LOOP was placed before LOOP. */
1007 first_loop = new_loop;
1011 definitions = marked_ssa_names ();
1012 allocate_new_names (definitions);
1013 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1014 rename_variables_in_loop (new_loop);
1017 /* 2. Add the guard that controls whether the first loop is executed.
1018 Resulting CFG would be:
1020 bb_before_first_loop:
1021 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1028 bb_before_second_loop:
1037 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1038 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1039 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1040 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1041 flow_loop_scan (first_loop, LOOP_ALL);
1042 flow_loop_scan (second_loop, LOOP_ALL);
1045 build (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1046 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1047 bb_before_second_loop, bb_before_first_loop);
1048 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1049 first_loop == new_loop);
1052 /* 3. Add the guard that controls whether the second loop is executed.
1053 Resulting CFG would be:
1055 bb_before_first_loop:
1056 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1064 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1065 GOTO bb_before_second_loop
1067 bb_before_second_loop:
1073 bb_after_second_loop:
1078 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1079 add_bb_to_loop (bb_between_loops, first_loop->outer);
1080 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1081 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1082 flow_loop_scan (first_loop, LOOP_ALL);
1083 flow_loop_scan (second_loop, LOOP_ALL);
1085 pre_condition = build (EQ_EXPR, boolean_type_node, first_niters, niters);
1086 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1087 bb_after_second_loop, bb_before_first_loop);
1088 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1089 second_loop == new_loop);
1091 /* Flow loop scan does not update loop->single_exit field. */
1092 first_loop->single_exit = first_loop->exit_edges[0];
1093 second_loop->single_exit = second_loop->exit_edges[0];
1095 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1097 if (update_first_loop_count)
1098 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1100 free_new_names (definitions);
1101 BITMAP_XFREE (definitions);
1102 unmark_all_for_rewrite ();
1108 /* Here the proper Vectorizer starts. */
1110 /*************************************************************************
1111 Vectorization Utilities.
1112 *************************************************************************/
1114 /* Function new_stmt_vec_info.
1116 Create and initialize a new stmt_vec_info struct for STMT. */
1119 new_stmt_vec_info (tree stmt, struct loop *loop)
1122 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1124 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1125 STMT_VINFO_STMT (res) = stmt;
1126 STMT_VINFO_LOOP (res) = loop;
1127 STMT_VINFO_RELEVANT_P (res) = 0;
1128 STMT_VINFO_VECTYPE (res) = NULL;
1129 STMT_VINFO_VEC_STMT (res) = NULL;
1130 STMT_VINFO_DATA_REF (res) = NULL;
1131 STMT_VINFO_MEMTAG (res) = NULL;
1132 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1138 /* Function new_loop_vec_info.
1140 Create and initialize a new loop_vec_info struct for LOOP, as well as
1141 stmt_vec_info structs for all the stmts in LOOP. */
1144 new_loop_vec_info (struct loop *loop)
1148 block_stmt_iterator si;
1151 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1153 bbs = get_loop_body (loop);
1155 /* Create stmt_info for all stmts in the loop. */
1156 for (i = 0; i < loop->num_nodes; i++)
1158 basic_block bb = bbs[i];
1159 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1161 tree stmt = bsi_stmt (si);
1164 get_stmt_operands (stmt);
1165 ann = stmt_ann (stmt);
1166 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1170 LOOP_VINFO_LOOP (res) = loop;
1171 LOOP_VINFO_BBS (res) = bbs;
1172 LOOP_VINFO_EXIT_COND (res) = NULL;
1173 LOOP_VINFO_NITERS (res) = NULL;
1174 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1175 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1176 LOOP_VINFO_VECT_FACTOR (res) = 0;
1177 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1178 "loop_write_datarefs");
1179 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1180 "loop_read_datarefs");
1181 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1187 /* Function destroy_loop_vec_info.
1189 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1190 stmts in the loop. */
1193 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1198 block_stmt_iterator si;
1204 loop = LOOP_VINFO_LOOP (loop_vinfo);
1206 bbs = LOOP_VINFO_BBS (loop_vinfo);
1207 nbbs = loop->num_nodes;
1209 for (j = 0; j < nbbs; j++)
1211 basic_block bb = bbs[j];
1212 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1214 tree stmt = bsi_stmt (si);
1215 stmt_ann_t ann = stmt_ann (stmt);
1216 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218 set_stmt_info (ann, NULL);
1222 free (LOOP_VINFO_BBS (loop_vinfo));
1223 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1224 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1230 /* Function debug_loop_stats.
1232 For vectorization statistics dumps. */
1235 vect_debug_stats (struct loop *loop)
1238 block_stmt_iterator si;
1239 tree node = NULL_TREE;
1241 if (!dump_file || !(dump_flags & TDF_STATS))
1246 fprintf (dump_file, "\n");
1255 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1257 node = bsi_stmt (si);
1258 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1262 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1263 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1265 fprintf (dump_file, "\nloop at %s:%d: ",
1266 EXPR_FILENAME (node), EXPR_LINENO (node));
1274 /* Function debug_loop_details.
1276 For vectorization debug dumps. */
1279 vect_debug_details (struct loop *loop)
1282 block_stmt_iterator si;
1283 tree node = NULL_TREE;
1285 if (!dump_file || !(dump_flags & TDF_DETAILS))
1290 fprintf (dump_file, "\n");
1299 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1301 node = bsi_stmt (si);
1302 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1306 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1307 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1309 fprintf (dump_file, "\nloop at %s:%d: ",
1310 EXPR_FILENAME (node), EXPR_LINENO (node));
1318 /* Function vect_get_ptr_offset
1320 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1323 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1324 tree vectype ATTRIBUTE_UNUSED,
1325 tree *offset ATTRIBUTE_UNUSED)
1327 /* TODO: Use alignment information. */
1332 /* Function vect_get_base_and_bit_offset
1334 Return the BASE of the data reference EXPR.
1335 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1336 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1337 bits of 'a.b[i] + 4B' from a.
1340 EXPR - the memory reference that is being analyzed
1341 DR - the data_reference struct of the _original_ memory reference
1342 (Note: DR_REF (DR) is not necessarily EXPR)
1343 VECTYPE - the type that defines the alignment (i.e, we compute
1344 alignment relative to TYPE_ALIGN(VECTYPE))
1347 BASE (returned value) - the base of the data reference EXPR.
1348 E.g, if EXPR is a.b[k].c[i][j] the returned
1350 OFFSET - offset of EXPR from BASE in bits
1351 BASE_ALIGNED_P - indicates if BASE is aligned
1353 If something unexpected is encountered (an unsupported form of data-ref),
1354 or if VECTYPE is given but OFFSET cannot be determined:
1355 then NULL_TREE is returned. */
1358 vect_get_base_and_bit_offset (struct data_reference *dr,
1361 loop_vec_info loop_vinfo,
1363 bool *base_aligned_p)
1365 tree this_offset = size_zero_node;
1366 tree base = NULL_TREE;
1368 tree oprnd0, oprnd1;
1369 struct data_reference *array_dr;
1370 enum tree_code code = TREE_CODE (expr);
1372 *base_aligned_p = false;
1376 /* These cases end the recursion: */
1378 *offset = size_zero_node;
1379 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1380 *base_aligned_p = true;
1387 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1390 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1392 base = vect_get_ptr_offset (expr, vectype, offset);
1394 *base_aligned_p = true;
1398 *base_aligned_p = true;
1399 *offset = size_zero_node;
1405 *offset = int_const_binop (MULT_EXPR, expr,
1406 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1409 /* These cases continue the recursion: */
1411 oprnd0 = TREE_OPERAND (expr, 0);
1412 oprnd1 = TREE_OPERAND (expr, 1);
1414 this_offset = bit_position (oprnd1);
1415 if (vectype && !host_integerp (this_offset, 1))
1421 oprnd0 = TREE_OPERAND (expr, 0);
1426 oprnd0 = TREE_OPERAND (expr, 0);
1431 if (DR_REF (dr) != expr)
1432 /* Build array data_reference struct if the existing DR_REF
1433 doesn't match EXPR. This happens, for example, when the
1434 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1435 contains information on the access of T, not of arr. In order
1436 to continue the analysis, we create a new DR struct that
1437 describes the access of arr.
1439 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1443 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1444 vectype, &this_offset);
1449 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1451 *offset = this_offset;
1452 *base_aligned_p = true;
1459 /* In case we have a PLUS_EXPR of the form
1460 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1461 This is verified in vect_get_symbl_and_dr. */
1462 oprnd0 = TREE_OPERAND (expr, 0);
1463 oprnd1 = TREE_OPERAND (expr, 1);
1465 base = vect_get_base_and_bit_offset
1466 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1467 if (vectype && !base)
1477 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1478 loop_vinfo, offset, base_aligned_p);
1480 if (vectype && base)
1482 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1483 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1486 if (vect_debug_details (NULL))
1488 print_generic_expr (dump_file, expr, TDF_SLIM);
1489 fprintf (dump_file, " --> total offset for ref: ");
1490 print_generic_expr (dump_file, *offset, TDF_SLIM);
1497 /* Function vect_force_dr_alignment_p.
1499 Returns whether the alignment of a DECL can be forced to be aligned
1500 on ALIGNMENT bit boundary. */
1503 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1505 if (TREE_CODE (decl) != VAR_DECL)
1508 if (DECL_EXTERNAL (decl))
1511 if (TREE_STATIC (decl))
1512 return (alignment <= MAX_OFILE_ALIGNMENT);
1514 /* This is not 100% correct. The absolute correct stack alignment
1515 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1516 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1517 However, until someone implements forced stack alignment, SSE
1518 isn't really usable without this. */
1519 return (alignment <= PREFERRED_STACK_BOUNDARY);
1523 /* Function vect_get_new_vect_var.
1525 Returns a name for a new variable. The current naming scheme appends the
1526 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1527 the name of vectorizer generated variables, and appends that to NAME if
1531 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1537 if (var_kind == vect_simple_var)
1542 prefix_len = strlen (prefix);
1545 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1547 new_vect_var = create_tmp_var (type, prefix);
1549 return new_vect_var;
1553 /* Function vect_create_index_for_vector_ref.
1555 Create (and return) an index variable, along with it's update chain in the
1556 loop. This variable will be used to access a memory location in a vector
1560 LOOP: The loop being vectorized.
1561 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1562 function can be added here, or in the loop pre-header.
1565 Return an index that will be used to index a vector array. It is expected
1566 that a pointer to the first vector will be used as the base address for the
1569 FORNOW: we are not trying to be efficient, just creating a new index each
1570 time from scratch. At this time all vector references could use the same
1573 TODO: create only one index to be used by all vector references. Record
1574 the index in the LOOP_VINFO the first time this procedure is called and
1575 return it on subsequent calls. The increment of this index must be placed
1576 just before the conditional expression that ends the single block loop. */
1579 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1582 tree indx_before_incr, indx_after_incr;
1584 /* It is assumed that the base pointer used for vectorized access contains
1585 the address of the first vector. Therefore the index used for vectorized
1586 access must be initialized to zero and incremented by 1. */
1588 init = integer_zero_node;
1589 step = integer_one_node;
1591 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1592 create_iv (init, step, NULL_TREE, loop, bsi, false,
1593 &indx_before_incr, &indx_after_incr);
1595 return indx_before_incr;
1599 /* Function vect_create_addr_base_for_vector_ref.
1601 Create an expression that computes the address of the first memory location
1602 that will be accessed for a data reference.
1605 STMT: The statement containing the data reference.
1606 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1607 OFFSET: Optional. If supplied, it is be added to the initial address.
1610 1. Return an SSA_NAME whose value is the address of the memory location of
1611 the first vector of the data reference.
1612 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1613 these statement(s) which define the returned SSA_NAME.
1615 FORNOW: We are only handling array accesses with step 1. */
1618 vect_create_addr_base_for_vector_ref (tree stmt,
1619 tree *new_stmt_list,
1622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1623 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1624 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1625 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1626 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1627 tree ref = DR_REF (dr);
1628 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1629 tree scalar_type = TREE_TYPE (ref);
1630 tree scalar_ptr_type = build_pointer_type (scalar_type);
1632 tree init_val, step, init_oval;
1634 bool is_ptr_ref, is_array_ref, is_addr_expr;
1639 tree addr_base, addr_expr;
1640 tree dest, new_stmt;
1642 /* Only the access function of the last index is relevant (i_n in
1643 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1644 access_fn = DR_ACCESS_FN (dr, 0);
1645 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1648 init_oval = integer_zero_node;
1650 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1651 && TREE_CODE (data_ref_base) == SSA_NAME;
1652 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1653 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1654 || TREE_CODE (data_ref_base) == PLUS_EXPR
1655 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1656 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1658 /** Create: &(base[init_val])
1660 if data_ref_base is an ARRAY_TYPE:
1661 base = data_ref_base
1663 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1664 base = *((scalar_array *) data_ref_base)
1668 array_base = data_ref_base;
1669 else /* is_ptr_ref or is_addr_expr */
1671 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1672 tree scalar_array_type = build_array_type (scalar_type, 0);
1673 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1674 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1675 add_referenced_tmp_var (array_ptr);
1677 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1678 add_referenced_tmp_var (dest);
1680 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1681 append_to_statement_list_force (new_stmt, new_stmt_list);
1683 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1684 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1685 new_temp = make_ssa_name (array_ptr, vec_stmt);
1686 TREE_OPERAND (vec_stmt, 0) = new_temp;
1687 append_to_statement_list_force (vec_stmt, new_stmt_list);
1690 array_base = build_fold_indirect_ref (new_temp);
1693 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1694 add_referenced_tmp_var (dest);
1695 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1696 append_to_statement_list_force (new_stmt, new_stmt_list);
1700 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1701 add_referenced_tmp_var (tmp);
1702 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1703 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1704 init_val = make_ssa_name (tmp, vec_stmt);
1705 TREE_OPERAND (vec_stmt, 0) = init_val;
1706 append_to_statement_list_force (vec_stmt, new_stmt_list);
1709 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1710 NULL_TREE, NULL_TREE);
1711 addr_base = build_fold_addr_expr (array_ref);
1713 /* addr_expr = addr_base */
1714 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1715 get_name (base_name));
1716 add_referenced_tmp_var (addr_expr);
1717 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1718 new_temp = make_ssa_name (addr_expr, vec_stmt);
1719 TREE_OPERAND (vec_stmt, 0) = new_temp;
1720 append_to_statement_list_force (vec_stmt, new_stmt_list);
1726 /* Function get_vectype_for_scalar_type.
1728 Returns the vector type corresponding to SCALAR_TYPE as supported
1732 get_vectype_for_scalar_type (tree scalar_type)
1734 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1735 int nbytes = GET_MODE_SIZE (inner_mode);
1742 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1744 nunits = UNITS_PER_SIMD_WORD / nbytes;
1746 vectype = build_vector_type (scalar_type, nunits);
1747 if (vect_debug_details (NULL))
1749 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1750 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1756 if (vect_debug_details (NULL))
1758 fprintf (dump_file, "vectype: ");
1759 print_generic_expr (dump_file, vectype, TDF_SLIM);
1762 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1764 /* TODO: tree-complex.c sometimes can parallelize operations
1765 on generic vectors. We can vectorize the loop in that case,
1766 but then we should re-run the lowering pass. */
1767 if (vect_debug_details (NULL))
1768 fprintf (dump_file, "mode not supported by target.");
1776 /* Function vect_align_data_ref.
1778 Handle mislignment of a memory accesses.
1780 FORNOW: Can't handle misaligned accesses.
1781 Make sure that the dataref is aligned. */
1784 vect_align_data_ref (tree stmt)
1786 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1787 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1789 /* FORNOW: can't handle misaligned accesses;
1790 all accesses expected to be aligned. */
1791 gcc_assert (aligned_access_p (dr));
1795 /* Function vect_create_data_ref_ptr.
1797 Create a memory reference expression for vector access, to be used in a
1798 vector load/store stmt. The reference is based on a new pointer to vector
1802 1. STMT: a stmt that references memory. Expected to be of the form
1803 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1804 2. BSI: block_stmt_iterator where new stmts can be added.
1805 3. OFFSET (optional): an offset to be added to the initial address accessed
1806 by the data-ref in STMT.
1807 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1808 pointing to the initial address.
1811 1. Declare a new ptr to vector_type, and have it point to the base of the
1812 data reference (initial addressed accessed by the data reference).
1813 For example, for vector of type V8HI, the following code is generated:
1816 vp = (v8hi *)initial_address;
1818 if OFFSET is not supplied:
1819 initial_address = &a[init];
1820 if OFFSET is supplied:
1821 initial_address = &a[init + OFFSET];
1823 Return the initial_address in INITIAL_ADDRESS.
1825 2. Create a data-reference in the loop based on the new vector pointer vp,
1826 and using a new index variable 'idx' as follows:
1830 where if ONLY_INIT is true:
1833 update = idx + vector_type_size
1835 Return the pointer vp'.
1838 FORNOW: handle only aligned and consecutive accesses. */
1841 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1842 tree *initial_address, bool only_init)
1845 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1846 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1847 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1848 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1852 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1853 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1854 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1855 int nvuses, nv_may_defs, nv_must_defs;
1859 tree new_stmt_list = NULL_TREE;
1861 edge pe = loop_preheader_edge (loop);
1868 base_name = unshare_expr (DR_BASE_NAME (dr));
1869 if (vect_debug_details (NULL))
1871 tree data_ref_base = base_name;
1872 fprintf (dump_file, "create array_ref of type: ");
1873 print_generic_expr (dump_file, vectype, TDF_SLIM);
1874 if (TREE_CODE (data_ref_base) == VAR_DECL)
1875 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1876 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1877 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1878 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1879 fprintf (dump_file, "vectorizing a record based array ref: ");
1880 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1881 fprintf (dump_file, "vectorizing a pointer ref: ");
1882 print_generic_expr (dump_file, base_name, TDF_SLIM);
1885 /** (1) Create the new vector-pointer variable: **/
1887 vect_ptr_type = build_pointer_type (vectype);
1888 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1889 get_name (base_name));
1890 add_referenced_tmp_var (vect_ptr);
1893 /** (2) Handle aliasing information of the new vector-pointer: **/
1895 tag = STMT_VINFO_MEMTAG (stmt_info);
1897 get_var_ann (vect_ptr)->type_mem_tag = tag;
1899 /* Mark for renaming all aliased variables
1900 (i.e, the may-aliases of the type-mem-tag). */
1901 nvuses = NUM_VUSES (vuses);
1902 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1903 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1904 for (i = 0; i < nvuses; i++)
1906 tree use = VUSE_OP (vuses, i);
1907 if (TREE_CODE (use) == SSA_NAME)
1908 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1910 for (i = 0; i < nv_may_defs; i++)
1912 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1913 if (TREE_CODE (def) == SSA_NAME)
1914 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1916 for (i = 0; i < nv_must_defs; i++)
1918 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1919 if (TREE_CODE (def) == SSA_NAME)
1920 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1924 /** (3) Calculate the initial address the vector-pointer, and set
1925 the vector-pointer to point to it before the loop: **/
1927 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1928 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1930 pe = loop_preheader_edge (loop);
1931 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1932 gcc_assert (!new_bb);
1933 *initial_address = new_temp;
1935 /* Create: p = (vectype *) initial_base */
1936 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1937 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1938 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1939 TREE_OPERAND (vec_stmt, 0) = new_temp;
1940 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1941 gcc_assert (!new_bb);
1942 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1945 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1947 if (only_init) /* No update in loop is required. */
1948 return vect_ptr_init;
1950 idx = vect_create_index_for_vector_ref (loop, bsi);
1952 /* Create: update = idx * vectype_size */
1953 ptr_update = create_tmp_var (integer_type_node, "update");
1954 add_referenced_tmp_var (ptr_update);
1955 vectype_size = build_int_cst (integer_type_node,
1956 GET_MODE_SIZE (TYPE_MODE (vectype)));
1957 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1958 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1959 new_temp = make_ssa_name (ptr_update, vec_stmt);
1960 TREE_OPERAND (vec_stmt, 0) = new_temp;
1961 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1963 /* Create: data_ref_ptr = vect_ptr_init + update */
1964 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1965 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1966 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1967 TREE_OPERAND (vec_stmt, 0) = new_temp;
1968 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1969 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1971 return data_ref_ptr;
1975 /* Function vect_create_destination_var.
1977 Create a new temporary of type VECTYPE. */
1980 vect_create_destination_var (tree scalar_dest, tree vectype)
1983 const char *new_name;
1985 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1987 new_name = get_name (scalar_dest);
1990 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1991 add_referenced_tmp_var (vec_dest);
1997 /* Function vect_init_vector.
1999 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2000 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2001 used in the vectorization of STMT. */
2004 vect_init_vector (tree stmt, tree vector_var)
2006 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2007 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2010 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2016 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2017 add_referenced_tmp_var (new_var);
2019 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2020 new_temp = make_ssa_name (new_var, init_stmt);
2021 TREE_OPERAND (init_stmt, 0) = new_temp;
2023 pe = loop_preheader_edge (loop);
2024 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2025 gcc_assert (!new_bb);
2027 if (vect_debug_details (NULL))
2029 fprintf (dump_file, "created new init_stmt: ");
2030 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2033 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2038 /* Function vect_get_vec_def_for_operand.
2040 OP is an operand in STMT. This function returns a (vector) def that will be
2041 used in the vectorized stmt for STMT.
2043 In the case that OP is an SSA_NAME which is defined in the loop, then
2044 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2046 In case OP is an invariant or constant, a new stmt that creates a vector def
2047 needs to be introduced. */
2050 vect_get_vec_def_for_operand (tree op, tree stmt)
2055 stmt_vec_info def_stmt_info = NULL;
2056 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2057 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2058 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2059 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2066 if (vect_debug_details (NULL))
2068 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2069 print_generic_expr (dump_file, op, TDF_SLIM);
2072 /** ===> Case 1: operand is a constant. **/
2074 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2076 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2080 /* Build a tree with vector elements. */
2081 if (vect_debug_details (NULL))
2082 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2084 for (i = nunits - 1; i >= 0; --i)
2086 t = tree_cons (NULL_TREE, op, t);
2088 vec_cst = build_vector (vectype, t);
2089 return vect_init_vector (stmt, vec_cst);
2092 gcc_assert (TREE_CODE (op) == SSA_NAME);
2094 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2096 def_stmt = SSA_NAME_DEF_STMT (op);
2097 def_stmt_info = vinfo_for_stmt (def_stmt);
2099 if (vect_debug_details (NULL))
2101 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2102 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2106 /** ==> Case 2.1: operand is defined inside the loop. **/
2110 /* Get the def from the vectorized stmt. */
2112 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2113 gcc_assert (vec_stmt);
2114 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2119 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2120 it is a reduction/induction. **/
2122 bb = bb_for_stmt (def_stmt);
2123 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2125 if (vect_debug_details (NULL))
2126 fprintf (dump_file, "reduction/induction - unsupported.");
2127 internal_error ("no support for reduction/induction"); /* FORNOW */
2131 /** ==> Case 2.3: operand is defined outside the loop -
2132 it is a loop invariant. */
2134 switch (TREE_CODE (def_stmt))
2137 def = PHI_RESULT (def_stmt);
2140 def = TREE_OPERAND (def_stmt, 0);
2143 def = TREE_OPERAND (def_stmt, 0);
2144 gcc_assert (IS_EMPTY_STMT (def_stmt));
2148 if (vect_debug_details (NULL))
2150 fprintf (dump_file, "unsupported defining stmt: ");
2151 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2153 internal_error ("unsupported defining stmt");
2156 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2158 if (vect_debug_details (NULL))
2159 fprintf (dump_file, "Create vector_inv.");
2161 for (i = nunits - 1; i >= 0; --i)
2163 t = tree_cons (NULL_TREE, def, t);
2166 vec_inv = build_constructor (vectype, t);
2167 return vect_init_vector (stmt, vec_inv);
2171 /* Function vect_finish_stmt_generation.
2173 Insert a new stmt. */
2176 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2178 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2180 if (vect_debug_details (NULL))
2182 fprintf (dump_file, "add new stmt: ");
2183 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2186 /* Make sure bsi points to the stmt that is being vectorized. */
2188 /* Assumption: any stmts created for the vectorization of stmt S were
2189 inserted before S. BSI is expected to point to S or some new stmt before S.
2192 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2194 gcc_assert (stmt == bsi_stmt (*bsi));
2198 /* Function vectorizable_assignment.
2200 Check if STMT performs an assignment (copy) that can be vectorized.
2201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2206 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2213 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2214 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2217 /* Is vectorizable assignment? */
2219 if (TREE_CODE (stmt) != MODIFY_EXPR)
2222 scalar_dest = TREE_OPERAND (stmt, 0);
2223 if (TREE_CODE (scalar_dest) != SSA_NAME)
2226 op = TREE_OPERAND (stmt, 1);
2227 if (!vect_is_simple_use (op, loop, NULL))
2229 if (vect_debug_details (NULL))
2230 fprintf (dump_file, "use not simple.");
2234 if (!vec_stmt) /* transformation not required. */
2236 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2241 if (vect_debug_details (NULL))
2242 fprintf (dump_file, "transform assignment.");
2245 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2248 op = TREE_OPERAND (stmt, 1);
2249 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2251 /* Arguments are ready. create the new vector stmt. */
2252 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2253 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2254 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2255 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2261 /* Function vectorizable_operation.
2263 Check if STMT performs a binary or unary operation that can be vectorized.
2264 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2265 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2266 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2269 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2274 tree op0, op1 = NULL;
2275 tree vec_oprnd0, vec_oprnd1=NULL;
2276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2277 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2278 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2280 enum tree_code code;
2281 enum machine_mode vec_mode;
2287 /* Is STMT a vectorizable binary/unary operation? */
2288 if (TREE_CODE (stmt) != MODIFY_EXPR)
2291 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2294 operation = TREE_OPERAND (stmt, 1);
2295 code = TREE_CODE (operation);
2296 optab = optab_for_tree_code (code, vectype);
2298 /* Support only unary or binary operations. */
2299 op_type = TREE_CODE_LENGTH (code);
2300 if (op_type != unary_op && op_type != binary_op)
2302 if (vect_debug_details (NULL))
2303 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2307 for (i = 0; i < op_type; i++)
2309 op = TREE_OPERAND (operation, i);
2310 if (!vect_is_simple_use (op, loop, NULL))
2312 if (vect_debug_details (NULL))
2313 fprintf (dump_file, "use not simple.");
2318 /* Supportable by target? */
2321 if (vect_debug_details (NULL))
2322 fprintf (dump_file, "no optab.");
2325 vec_mode = TYPE_MODE (vectype);
2326 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2328 if (vect_debug_details (NULL))
2329 fprintf (dump_file, "op not supported by target.");
2333 if (!vec_stmt) /* transformation not required. */
2335 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2341 if (vect_debug_details (NULL))
2342 fprintf (dump_file, "transform binary/unary operation.");
2345 scalar_dest = TREE_OPERAND (stmt, 0);
2346 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2349 op0 = TREE_OPERAND (operation, 0);
2350 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2352 if (op_type == binary_op)
2354 op1 = TREE_OPERAND (operation, 1);
2355 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2358 /* Arguments are ready. create the new vector stmt. */
2360 if (op_type == binary_op)
2361 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2362 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2364 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2365 build1 (code, vectype, vec_oprnd0));
2366 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2367 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2368 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2374 /* Function vectorizable_store.
2376 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2378 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2379 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2380 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2383 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2389 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2390 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2391 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2392 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2393 enum machine_mode vec_mode;
2395 enum dr_alignment_support alignment_support_cheme;
2397 /* Is vectorizable store? */
2399 if (TREE_CODE (stmt) != MODIFY_EXPR)
2402 scalar_dest = TREE_OPERAND (stmt, 0);
2403 if (TREE_CODE (scalar_dest) != ARRAY_REF
2404 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2407 op = TREE_OPERAND (stmt, 1);
2408 if (!vect_is_simple_use (op, loop, NULL))
2410 if (vect_debug_details (NULL))
2411 fprintf (dump_file, "use not simple.");
2415 vec_mode = TYPE_MODE (vectype);
2416 /* FORNOW. In some cases can vectorize even if data-type not supported
2417 (e.g. - array initialization with 0). */
2418 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2421 if (!STMT_VINFO_DATA_REF (stmt_info))
2425 if (!vec_stmt) /* transformation not required. */
2427 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2433 if (vect_debug_details (NULL))
2434 fprintf (dump_file, "transform store");
2436 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2437 gcc_assert (alignment_support_cheme);
2438 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2440 /* Handle use - get the vectorized def from the defining stmt. */
2441 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2444 /* FORNOW: make sure the data reference is aligned. */
2445 vect_align_data_ref (stmt);
2446 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2447 data_ref = build_fold_indirect_ref (data_ref);
2449 /* Arguments are ready. create the new vector stmt. */
2450 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2451 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2457 /* vectorizable_load.
2459 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2461 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2462 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2463 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2466 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2469 tree vec_dest = NULL;
2470 tree data_ref = NULL;
2472 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2473 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2474 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2481 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2482 edge pe = loop_preheader_edge (loop);
2483 enum dr_alignment_support alignment_support_cheme;
2485 /* Is vectorizable load? */
2487 if (TREE_CODE (stmt) != MODIFY_EXPR)
2490 scalar_dest = TREE_OPERAND (stmt, 0);
2491 if (TREE_CODE (scalar_dest) != SSA_NAME)
2494 op = TREE_OPERAND (stmt, 1);
2495 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2498 if (!STMT_VINFO_DATA_REF (stmt_info))
2501 mode = (int) TYPE_MODE (vectype);
2503 /* FORNOW. In some cases can vectorize even if data-type not supported
2504 (e.g. - data copies). */
2505 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2507 if (vect_debug_details (loop))
2508 fprintf (dump_file, "Aligned load, but unsupported type.");
2512 if (!vec_stmt) /* transformation not required. */
2514 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2520 if (vect_debug_details (NULL))
2521 fprintf (dump_file, "transform load.");
2523 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2524 gcc_assert (alignment_support_cheme);
2526 if (alignment_support_cheme == dr_aligned
2527 || alignment_support_cheme == dr_unaligned_supported)
2538 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2539 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2540 if (aligned_access_p (dr))
2541 data_ref = build_fold_indirect_ref (data_ref);
2544 int mis = DR_MISALIGNMENT (dr);
2545 tree tmis = (mis == -1 ?
2547 build_int_cst (integer_type_node, mis));
2548 tmis = int_const_binop (MULT_EXPR, tmis,
2549 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2550 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2552 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2553 new_temp = make_ssa_name (vec_dest, new_stmt);
2554 TREE_OPERAND (new_stmt, 0) = new_temp;
2555 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2557 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2561 msq_init = *(floor(p1))
2562 p2 = initial_addr + VS - 1;
2563 magic = have_builtin ? builtin_result : initial_address;
2566 p2' = p2 + indx * vectype_size
2568 vec_dest = realign_load (msq, lsq, magic)
2582 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2583 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2584 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2586 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2587 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2588 new_temp = make_ssa_name (vec_dest, new_stmt);
2589 TREE_OPERAND (new_stmt, 0) = new_temp;
2590 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2591 gcc_assert (!new_bb);
2592 msq_init = TREE_OPERAND (new_stmt, 0);
2595 /* <2> Create lsq = *(floor(p2')) in the loop */
2596 offset = build_int_cst (integer_type_node,
2597 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2598 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2599 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2600 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2601 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2602 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2603 new_temp = make_ssa_name (vec_dest, new_stmt);
2604 TREE_OPERAND (new_stmt, 0) = new_temp;
2605 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2606 lsq = TREE_OPERAND (new_stmt, 0);
2610 if (targetm.vectorize.builtin_mask_for_load)
2612 /* Create permutation mask, if required, in loop preheader. */
2614 params = build_tree_list (NULL_TREE, init_addr);
2615 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2616 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2617 new_stmt = build_function_call_expr (builtin_decl, params);
2618 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2619 new_temp = make_ssa_name (vec_dest, new_stmt);
2620 TREE_OPERAND (new_stmt, 0) = new_temp;
2621 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2622 gcc_assert (!new_bb);
2623 magic = TREE_OPERAND (new_stmt, 0);
2627 /* Use current address instead of init_addr for reduced reg pressure.
2629 magic = dataref_ptr;
2633 /* <4> Create msq = phi <msq_init, lsq> in loop */
2634 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2635 msq = make_ssa_name (vec_dest, NULL_TREE);
2636 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2637 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2638 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2639 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2642 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2643 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2644 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2645 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2646 new_temp = make_ssa_name (vec_dest, new_stmt);
2647 TREE_OPERAND (new_stmt, 0) = new_temp;
2648 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2653 *vec_stmt = new_stmt;
2658 /* Function vect_supportable_dr_alignment
2660 Return whether the data reference DR is supported with respect to its
2663 static enum dr_alignment_support
2664 vect_supportable_dr_alignment (struct data_reference *dr)
2666 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2667 enum machine_mode mode = (int) TYPE_MODE (vectype);
2669 if (aligned_access_p (dr))
2672 /* Possibly unaligned access. */
2674 if (DR_IS_READ (dr))
2676 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2677 && (!targetm.vectorize.builtin_mask_for_load
2678 || targetm.vectorize.builtin_mask_for_load ()))
2679 return dr_unaligned_software_pipeline;
2681 if (targetm.vectorize.misaligned_mem_ok (mode))
2682 /* Can't software pipeline the loads. */
2683 return dr_unaligned_supported;
2687 return dr_unaligned_unsupported;
2691 /* Function vect_transform_stmt.
2693 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2696 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2698 bool is_store = false;
2699 tree vec_stmt = NULL_TREE;
2700 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2703 switch (STMT_VINFO_TYPE (stmt_info))
2705 case op_vec_info_type:
2706 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2710 case assignment_vec_info_type:
2711 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2715 case load_vec_info_type:
2716 done = vectorizable_load (stmt, bsi, &vec_stmt);
2720 case store_vec_info_type:
2721 done = vectorizable_store (stmt, bsi, &vec_stmt);
2726 if (vect_debug_details (NULL))
2727 fprintf (dump_file, "stmt not supported.");
2731 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2737 /* This function builds ni_name = number of iterations loop executes
2738 on the loop preheader. */
2741 vect_build_loop_niters (loop_vec_info loop_vinfo)
2743 tree ni_name, stmt, var;
2745 basic_block new_bb = NULL;
2746 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2747 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2749 var = create_tmp_var (TREE_TYPE (ni), "niters");
2750 add_referenced_tmp_var (var);
2751 if (TREE_CODE (ni) == INTEGER_CST)
2753 /* This case is generated when treating a known loop bound
2754 indivisible by VF. Here we cannot use force_gimple_operand. */
2755 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2756 ni_name = make_ssa_name (var, stmt);
2757 TREE_OPERAND (stmt, 0) = ni_name;
2760 ni_name = force_gimple_operand (ni, &stmt, false, var);
2762 pe = loop_preheader_edge (loop);
2764 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2766 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2772 /* This function generates the following statements:
2774 ni_name = number of iterations loop executes
2775 ratio = ni_name / vf
2776 ratio_mult_vf_name = ratio * vf
2778 and places them at the loop preheader edge. */
2781 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2782 tree *ratio_mult_vf_name_p, tree *ratio_p)
2789 tree ratio_mult_vf_name, ratio_mult_vf;
2790 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2791 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2795 /* Generate temporary variable that contains
2796 number of iterations loop executes. */
2798 ni_name = vect_build_loop_niters (loop_vinfo);
2801 vf is power of 2; then if ratio = = n >> log2 (vf). */
2802 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2803 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2805 /* Update initial conditions of loop copy. */
2807 /* ratio_mult_vf = ratio * vf;
2808 then if ratio_mult_vf = ratio << log2 (vf). */
2810 i = exact_log2 (vf);
2811 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2812 add_referenced_tmp_var (ratio_mult_vf);
2814 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2816 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2817 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2818 ratio, build_int_cst (unsigned_type_node,
2821 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2823 pe = loop_preheader_edge (loop);
2824 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2826 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2828 *ni_name_p = ni_name;
2829 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2836 /* This function generates stmt
2840 and attaches it to preheader of LOOP. */
2843 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2845 tree var, stmt, var_name;
2850 /* create temporary variable */
2851 var = create_tmp_var (TREE_TYPE (n), "bnd");
2852 add_referenced_tmp_var (var);
2854 var_name = make_ssa_name (var, NULL_TREE);
2856 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2858 i = exact_log2 (vf);
2859 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2860 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2861 n, build_int_cst (unsigned_type_node,i)));
2863 SSA_NAME_DEF_STMT (var_name) = stmt;
2865 pe = loop_preheader_edge (loop);
2866 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2868 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2870 if (vect_debug_details (NULL))
2871 fprintf (dump_file, "New bb on preheader edge was not generated.");
2877 /* Function vect_transform_loop_bound.
2879 Create a new exit condition for the loop. */
2882 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2884 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2885 tree orig_cond_expr;
2886 HOST_WIDE_INT old_N = 0;
2888 tree new_loop_bound;
2892 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2895 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2897 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2899 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2900 #ifdef ENABLE_CHECKING
2901 gcc_assert (orig_cond_expr);
2904 /* new loop exit test: */
2905 lb_type = TREE_TYPE (TREE_OPERAND (COND_EXPR_COND (orig_cond_expr), 1));
2908 fold_convert (lb_type, build_int_cst (unsigned_type_node, old_N/vf));
2910 new_loop_bound = niters;
2912 slpeel_make_loop_iterate_ntimes (loop, new_loop_bound);
2916 /* Function vect_update_ivs_after_vectorizer.
2918 "Advance" the induction variables of LOOP to the value they should take
2919 after the execution of LOOP. This is currently necessary because the
2920 vectorizer does not handle induction variables that are used after the
2921 loop. Such a situation occurs when the last iterations of LOOP are
2923 1. We introduced new uses after LOOP for IVs that were not originally used
2924 after LOOP: the IVs of LOOP are now used by an epilog loop.
2925 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2926 times, whereas the loop IVs should be bumped N times.
2929 - LOOP - a loop that is going to be vectorized. The last few iterations
2930 of LOOP were peeled.
2931 - NITERS - the number of iterations that LOOP executes (before it is
2932 vectorized). i.e, the number of times the ivs should be bumped.
2933 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2934 coming out from LOOP on which there are uses of the LOOP ivs
2935 (this is the path from LOOP->exit to epilog_loop->preheader).
2937 The new definitions of the ivs are placed in LOOP->exit.
2938 The phi args associated with the edge UPDATE_E in the bb
2939 UPDATE_E->dest are updated accordingly.
2941 Assumption 1: Like the rest of the vectorizer, this function assumes
2942 a single loop exit that has a single predecessor.
2944 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2945 organized in the same order.
2947 Assumption 3: The access function of the ivs is simple enough (see
2948 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2950 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2951 coming out of LOOP on which the ivs of LOOP are used (this is the path
2952 that leads to the epilog loop; other paths skip the epilog loop). This
2953 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2954 needs to have its phis updated.
2958 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2960 basic_block exit_bb = loop->exit_edges[0]->dest;
2962 basic_block update_bb = update_e->dest;
2964 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2966 /* Make sure there exists a single-predecessor exit bb: */
2967 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2969 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2971 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2973 tree access_fn = NULL;
2974 tree evolution_part;
2977 tree var, stmt, ni, ni_name;
2978 block_stmt_iterator last_bsi;
2980 /* Skip virtual phi's. */
2981 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2983 if (vect_debug_details (NULL))
2984 fprintf (dump_file, "virtual phi. skip.");
2988 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2989 gcc_assert (access_fn);
2991 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2992 gcc_assert (evolution_part != NULL_TREE);
2994 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2995 of degree >= 2 or exponential. */
2996 gcc_assert (!tree_is_chrec (evolution_part));
2998 step_expr = evolution_part;
2999 init_expr = unshare_expr (initial_condition (access_fn));
3001 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3002 build2 (MULT_EXPR, TREE_TYPE (niters),
3003 niters, step_expr), init_expr);
3005 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3006 add_referenced_tmp_var (var);
3008 ni_name = force_gimple_operand (ni, &stmt, false, var);
3010 /* Insert stmt into exit_bb. */
3011 last_bsi = bsi_last (exit_bb);
3013 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3015 /* Fix phi expressions in the successor bb. */
3016 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3017 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3018 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3023 /* Function vect_do_peeling_for_loop_bound
3025 Peel the last iterations of the loop represented by LOOP_VINFO.
3026 The peeled iterations form a new epilog loop. Given that the loop now
3027 iterates NITERS times, the new epilog loop iterates
3028 NITERS % VECTORIZATION_FACTOR times.
3030 The original loop will later be made to iterate
3031 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3034 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3035 struct loops *loops)
3038 tree ni_name, ratio_mult_vf_name;
3039 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3040 struct loop *new_loop;
3042 #ifdef ENABLE_CHECKING
3046 if (vect_debug_details (NULL))
3047 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3049 /* Generate the following variables on the preheader of original loop:
3051 ni_name = number of iteration the original loop executes
3052 ratio = ni_name / vf
3053 ratio_mult_vf_name = ratio * vf */
3054 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3055 &ratio_mult_vf_name, ratio);
3057 /* Update loop info. */
3058 loop->pre_header = loop_preheader_edge (loop)->src;
3059 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3061 #ifdef ENABLE_CHECKING
3062 loop_num = loop->num;
3064 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3065 ratio_mult_vf_name, ni_name, false);
3066 #ifdef ENABLE_CHECKING
3067 gcc_assert (new_loop);
3068 gcc_assert (loop_num == loop->num);
3069 slpeel_verify_cfg_after_peeling (loop, new_loop);
3072 /* A guard that controls whether the new_loop is to be executed or skipped
3073 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3074 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3075 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3076 is on the path where the LOOP IVs are used and need to be updated. */
3078 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3079 update_e = EDGE_PRED (new_loop->pre_header, 0);
3081 update_e = EDGE_PRED (new_loop->pre_header, 1);
3083 /* Update IVs of original loop as if they were advanced
3084 by ratio_mult_vf_name steps. */
3085 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3087 /* After peeling we have to reset scalar evolution analyzer. */
3094 /* Function vect_gen_niters_for_prolog_loop
3096 Set the number of iterations for the loop represented by LOOP_VINFO
3097 to the minimum between NITERS (the original iteration count of the loop)
3098 and the misalignment of DR - the first data reference recorded in
3099 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3100 this loop, the data reference DR will refer to an aligned location. */
3103 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3105 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3106 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3109 tree iters, iters_name;
3112 tree dr_stmt = DR_STMT (dr);
3113 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3114 tree start_addr, byte_miss_align, elem_miss_align;
3115 int vec_type_align =
3116 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3119 tree new_stmt_list = NULL_TREE;
3121 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3122 &new_stmt_list, NULL_TREE);
3124 pe = loop_preheader_edge (loop);
3125 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3127 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3130 build (BIT_AND_EXPR, integer_type_node, start_addr,
3131 build (MINUS_EXPR, integer_type_node,
3132 build_int_cst (unsigned_type_node,
3133 vec_type_align), integer_one_node));
3134 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3135 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3136 byte_miss_align, tmp1);
3139 build (BIT_AND_EXPR, integer_type_node,
3140 build (MINUS_EXPR, integer_type_node,
3141 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3142 build (MINUS_EXPR, integer_type_node,
3143 build_int_cst (unsigned_type_node, vf), integer_one_node));
3145 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3146 var = create_tmp_var (TREE_TYPE (iters), "iters");
3147 add_referenced_tmp_var (var);
3148 iters_name = force_gimple_operand (iters, &stmt, false, var);
3150 /* Insert stmt on loop preheader edge. */
3151 pe = loop_preheader_edge (loop);
3153 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3155 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3161 /* Function vect_update_inits_of_dr
3163 NITERS iterations were peeled from LOOP. DR represents a data reference
3164 in LOOP. This function updates the information recorded in DR to
3165 account for the fact that the first NITERS iterations had already been
3166 executed. Specifically, it updates the initial_condition of the
3167 access_function of DR. */
3170 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3173 tree access_fn = DR_ACCESS_FN (dr, 0);
3174 tree init, init_new, step;
3176 step = evolution_part_in_loop_num (access_fn, loop->num);
3177 init = initial_condition (access_fn);
3179 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3180 build (MULT_EXPR, TREE_TYPE (niters),
3181 niters, step), init);
3182 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3188 /* Function vect_update_inits_of_drs
3190 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3191 This function updates the information recorded for the data references in
3192 the loop to account for the fact that the first NITERS iterations had
3193 already been executed. Specifically, it updates the initial_condition of the
3194 access_function of all the data_references in the loop. */
3197 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3200 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3201 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3202 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3205 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3207 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3209 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3210 vect_update_inits_of_dr (dr, loop, niters);
3213 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3215 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3216 vect_update_inits_of_dr (dr, loop, niters);
3221 /* Function vect_do_peeling_for_alignment
3223 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3224 'niters' is set to the misalignment of one of the data references in the
3225 loop, thereby forcing it to refer to an aligned location at the beginning
3226 of the execution of this loop. The data reference for which we are
3227 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3230 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3233 tree niters_of_prolog_loop, ni_name;
3235 struct loop *new_loop;
3237 if (vect_debug_details (NULL))
3238 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3240 ni_name = vect_build_loop_niters (loop_vinfo);
3241 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3243 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3245 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3246 niters_of_prolog_loop, ni_name, true);
3247 #ifdef ENABLE_CHECKING
3248 gcc_assert (new_loop);
3249 slpeel_verify_cfg_after_peeling (new_loop, loop);
3252 /* Update number of times loop executes. */
3253 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3254 LOOP_VINFO_NITERS (loop_vinfo) =
3255 build (MINUS_EXPR, integer_type_node, n_iters, niters_of_prolog_loop);
3257 /* Update the init conditions of the access functions of all data refs. */
3258 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3260 /* After peeling we have to reset scalar evolution analyzer. */
3267 /* Function vect_transform_loop.
3269 The analysis phase has determined that the loop is vectorizable.
3270 Vectorize the loop - created vectorized stmts to replace the scalar
3271 stmts in the loop, and update the loop exit condition. */
3274 vect_transform_loop (loop_vec_info loop_vinfo,
3275 struct loops *loops ATTRIBUTE_UNUSED)
3277 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3278 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3279 int nbbs = loop->num_nodes;
3280 block_stmt_iterator si;
3283 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3285 if (vect_debug_details (NULL))
3286 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3289 /* Peel the loop if there are data refs with unknown alignment.
3290 Only one data ref with unknown store is allowed. */
3292 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3293 vect_do_peeling_for_alignment (loop_vinfo, loops);
3295 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3296 compile time constant), or it is a constant that doesn't divide by the
3297 vectorization factor, then an epilog loop needs to be created.
3298 We therefore duplicate the loop: the original loop will be vectorized,
3299 and will compute the first (n/VF) iterations. The second copy of the loop
3300 will remain scalar and will compute the remaining (n%VF) iterations.
3301 (VF is the vectorization factor). */
3303 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3304 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3305 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3306 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3308 /* 1) Make sure the loop header has exactly two entries
3309 2) Make sure we have a preheader basic block. */
3311 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3313 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3316 /* FORNOW: the vectorizer supports only loops which body consist
3317 of one basic block (header + empty latch). When the vectorizer will
3318 support more involved loop forms, the order by which the BBs are
3319 traversed need to be reconsidered. */
3321 for (i = 0; i < nbbs; i++)
3323 basic_block bb = bbs[i];
3325 for (si = bsi_start (bb); !bsi_end_p (si);)
3327 tree stmt = bsi_stmt (si);
3328 stmt_vec_info stmt_info;
3331 if (vect_debug_details (NULL))
3333 fprintf (dump_file, "------>vectorizing statement: ");
3334 print_generic_expr (dump_file, stmt, TDF_SLIM);
3336 stmt_info = vinfo_for_stmt (stmt);
3337 gcc_assert (stmt_info);
3338 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3343 #ifdef ENABLE_CHECKING
3344 /* FORNOW: Verify that all stmts operate on the same number of
3345 units and no inner unrolling is necessary. */
3347 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3348 == vectorization_factor);
3350 /* -------- vectorize statement ------------ */
3351 if (vect_debug_details (NULL))
3352 fprintf (dump_file, "transform statement.");
3354 is_store = vect_transform_stmt (stmt, &si);
3357 /* free the attached stmt_vec_info and remove the stmt. */
3358 stmt_ann_t ann = stmt_ann (stmt);
3360 set_stmt_info (ann, NULL);
3369 vect_transform_loop_bound (loop_vinfo, ratio);
3371 if (vect_debug_details (loop))
3372 fprintf (dump_file,"Success! loop vectorized.");
3373 if (vect_debug_stats (loop))
3374 fprintf (dump_file, "LOOP VECTORIZED.");
3378 /* Function vect_is_simple_use.
3381 LOOP - the loop that is being vectorized.
3382 OPERAND - operand of a stmt in LOOP.
3383 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3385 Returns whether a stmt with OPERAND can be vectorized.
3386 Supportable operands are constants, loop invariants, and operands that are
3387 defined by the current iteration of the loop. Unsupportable operands are
3388 those that are defined by a previous iteration of the loop (as is the case
3389 in reduction/induction computations). */
3392 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3400 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3403 if (TREE_CODE (operand) != SSA_NAME)
3406 def_stmt = SSA_NAME_DEF_STMT (operand);
3407 if (def_stmt == NULL_TREE )
3409 if (vect_debug_details (NULL))
3410 fprintf (dump_file, "no def_stmt.");
3414 /* empty stmt is expected only in case of a function argument.
3415 (Otherwise - we expect a phi_node or a modify_expr). */
3416 if (IS_EMPTY_STMT (def_stmt))
3418 tree arg = TREE_OPERAND (def_stmt, 0);
3419 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3421 if (vect_debug_details (NULL))
3423 fprintf (dump_file, "Unexpected empty stmt: ");
3424 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3429 /* phi_node inside the loop indicates an induction/reduction pattern.
3430 This is not supported yet. */
3431 bb = bb_for_stmt (def_stmt);
3432 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3434 if (vect_debug_details (NULL))
3435 fprintf (dump_file, "reduction/induction - unsupported.");
3436 return false; /* FORNOW: not supported yet. */
3439 /* Expecting a modify_expr or a phi_node. */
3440 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3441 || TREE_CODE (def_stmt) == PHI_NODE)
3452 /* Function vect_analyze_operations.
3454 Scan the loop stmts and make sure they are all vectorizable. */
3457 vect_analyze_operations (loop_vec_info loop_vinfo)
3459 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3460 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3461 int nbbs = loop->num_nodes;
3462 block_stmt_iterator si;
3463 int vectorization_factor = 0;
3468 if (vect_debug_details (NULL))
3469 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3471 for (i = 0; i < nbbs; i++)
3473 basic_block bb = bbs[i];
3475 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3477 tree stmt = bsi_stmt (si);
3479 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3482 if (vect_debug_details (NULL))
3484 fprintf (dump_file, "==> examining statement: ");
3485 print_generic_expr (dump_file, stmt, TDF_SLIM);
3488 gcc_assert (stmt_info);
3490 /* skip stmts which do not need to be vectorized.
3491 this is expected to include:
3492 - the COND_EXPR which is the loop exit condition
3493 - any LABEL_EXPRs in the loop
3494 - computations that are used only for array indexing or loop
3497 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3499 if (vect_debug_details (NULL))
3500 fprintf (dump_file, "irrelevant.");
3504 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3506 if (vect_debug_stats (loop) || vect_debug_details (loop))
3508 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3509 print_generic_expr (dump_file, stmt, TDF_SLIM);
3514 if (STMT_VINFO_DATA_REF (stmt_info))
3515 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3516 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3517 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3519 scalar_type = TREE_TYPE (stmt);
3521 if (vect_debug_details (NULL))
3523 fprintf (dump_file, "get vectype for scalar type: ");
3524 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3527 vectype = get_vectype_for_scalar_type (scalar_type);
3530 if (vect_debug_stats (loop) || vect_debug_details (loop))
3532 fprintf (dump_file, "not vectorized: unsupported data-type ");
3533 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3538 if (vect_debug_details (NULL))
3540 fprintf (dump_file, "vectype: ");
3541 print_generic_expr (dump_file, vectype, TDF_SLIM);
3543 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3545 ok = (vectorizable_operation (stmt, NULL, NULL)
3546 || vectorizable_assignment (stmt, NULL, NULL)
3547 || vectorizable_load (stmt, NULL, NULL)
3548 || vectorizable_store (stmt, NULL, NULL));
3552 if (vect_debug_stats (loop) || vect_debug_details (loop))
3554 fprintf (dump_file, "not vectorized: stmt not supported: ");
3555 print_generic_expr (dump_file, stmt, TDF_SLIM);
3560 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3561 if (vect_debug_details (NULL))
3562 fprintf (dump_file, "nunits = %d", nunits);
3564 if (vectorization_factor)
3566 /* FORNOW: don't allow mixed units.
3567 This restriction will be relaxed in the future. */
3568 if (nunits != vectorization_factor)
3570 if (vect_debug_stats (loop) || vect_debug_details (loop))
3571 fprintf (dump_file, "not vectorized: mixed data-types");
3576 vectorization_factor = nunits;
3578 #ifdef ENABLE_CHECKING
3579 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3580 * vectorization_factor == UNITS_PER_SIMD_WORD);
3585 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3587 if (vectorization_factor <= 1)
3589 if (vect_debug_stats (loop) || vect_debug_details (loop))
3590 fprintf (dump_file, "not vectorized: unsupported data-type");
3593 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3595 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3597 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3598 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3600 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3601 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3603 if (vect_debug_stats (loop) || vect_debug_details (loop))
3604 fprintf (dump_file, "epilog loop required.");
3605 if (!vect_can_advance_ivs_p (loop))
3607 if (vect_debug_stats (loop) || vect_debug_details (loop))
3608 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3611 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3613 if (vect_debug_stats (loop) || vect_debug_details (loop))
3614 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3623 /* Function exist_non_indexing_operands_for_use_p
3625 USE is one of the uses attached to STMT. Check if USE is
3626 used in STMT for anything other than indexing an array. */
3629 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3632 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3634 /* USE corresponds to some operand in STMT. If there is no data
3635 reference in STMT, then any operand that corresponds to USE
3636 is not indexing an array. */
3637 if (!STMT_VINFO_DATA_REF (stmt_info))
3640 /* STMT has a data_ref. FORNOW this means that its of one of
3641 the following forms:
3644 (This should have been verified in analyze_data_refs).
3646 'var' in the second case corresponds to a def, not a use,
3647 so USE cannot correspond to any operands that are not used
3650 Therefore, all we need to check is if STMT falls into the
3651 first case, and whether var corresponds to USE. */
3653 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3656 operand = TREE_OPERAND (stmt, 1);
3658 if (TREE_CODE (operand) != SSA_NAME)
3668 /* Function vect_is_simple_iv_evolution.
3670 FORNOW: A simple evolution of an induction variables in the loop is
3671 considered a polynomial evolution with constant step. */
3674 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3675 tree * step, bool strict)
3680 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3682 /* When there is no evolution in this loop, the evolution function
3684 if (evolution_part == NULL_TREE)
3687 /* When the evolution is a polynomial of degree >= 2
3688 the evolution function is not "simple". */
3689 if (tree_is_chrec (evolution_part))
3692 step_expr = evolution_part;
3693 init_expr = unshare_expr (initial_condition (access_fn));
3695 if (vect_debug_details (NULL))
3697 fprintf (dump_file, "step: ");
3698 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3699 fprintf (dump_file, ", init: ");
3700 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3706 if (TREE_CODE (step_expr) != INTEGER_CST)
3708 if (vect_debug_details (NULL))
3709 fprintf (dump_file, "step unknown.");
3714 if (!integer_onep (step_expr))
3716 if (vect_debug_details (NULL))
3717 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3725 /* Function vect_analyze_scalar_cycles.
3727 Examine the cross iteration def-use cycles of scalar variables, by
3728 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3729 cycles that they represent do not impede vectorization.
3731 FORNOW: Reduction as in the following loop, is not supported yet:
3735 The cross-iteration cycle corresponding to variable 'sum' will be
3736 considered too complicated and will impede vectorization.
3738 FORNOW: Induction as in the following loop, is not supported yet:
3743 However, the following loop *is* vectorizable:
3748 In both loops there exists a def-use cycle for the variable i:
3749 loop: i_2 = PHI (i_0, i_1)
3754 The evolution of the above cycle is considered simple enough,
3755 however, we also check that the cycle does not need to be
3756 vectorized, i.e - we check that the variable that this cycle
3757 defines is only used for array indexing or in stmts that do not
3758 need to be vectorized. This is not the case in loop2, but it
3759 *is* the case in loop3. */
3762 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3765 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3766 basic_block bb = loop->header;
3769 if (vect_debug_details (NULL))
3770 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3772 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3774 tree access_fn = NULL;
3776 if (vect_debug_details (NULL))
3778 fprintf (dump_file, "Analyze phi: ");
3779 print_generic_expr (dump_file, phi, TDF_SLIM);
3782 /* Skip virtual phi's. The data dependences that are associated with
3783 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3785 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3787 if (vect_debug_details (NULL))
3788 fprintf (dump_file, "virtual phi. skip.");
3792 /* Analyze the evolution function. */
3794 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3795 those of loop induction variables; This property is verified here.
3797 Furthermore, if that induction variable is used in an operation
3798 that needs to be vectorized (i.e, is not solely used to index
3799 arrays and check the exit condition) - we do not support its
3800 vectorization yet. This property is verified in vect_is_simple_use,
3801 during vect_analyze_operations. */
3803 access_fn = /* instantiate_parameters
3805 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3809 if (vect_debug_stats (loop) || vect_debug_details (loop))
3810 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3814 if (vect_debug_details (NULL))
3816 fprintf (dump_file, "Access function of PHI: ");
3817 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3820 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3823 if (vect_debug_stats (loop) || vect_debug_details (loop))
3824 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3833 /* Function vect_analyze_data_ref_dependence.
3835 Return TRUE if there (might) exist a dependence between a memory-reference
3836 DRA and a memory-reference DRB. */
3839 vect_analyze_data_ref_dependence (struct data_reference *dra,
3840 struct data_reference *drb,
3844 struct data_dependence_relation *ddr;
3846 if (!array_base_name_differ_p (dra, drb, &differ_p))
3848 if (vect_debug_stats (loop) || vect_debug_details (loop))
3851 "not vectorized: can't determine dependence between: ");
3852 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3853 fprintf (dump_file, " and ");
3854 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3862 ddr = initialize_data_dependence_relation (dra, drb);
3863 compute_affine_dependence (ddr);
3865 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3868 if (vect_debug_stats (loop) || vect_debug_details (loop))
3871 "not vectorized: possible dependence between data-refs ");
3872 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3873 fprintf (dump_file, " and ");
3874 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3881 /* Function vect_analyze_data_ref_dependences.
3883 Examine all the data references in the loop, and make sure there do not
3884 exist any data dependences between them.
3886 TODO: dependences which distance is greater than the vectorization factor
3890 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3893 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3894 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3897 /* Examine store-store (output) dependences. */
3899 if (vect_debug_details (NULL))
3900 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3902 if (vect_debug_details (NULL))
3903 fprintf (dump_file, "compare all store-store pairs.");
3905 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3907 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3909 struct data_reference *dra =
3910 VARRAY_GENERIC_PTR (loop_write_refs, i);
3911 struct data_reference *drb =
3912 VARRAY_GENERIC_PTR (loop_write_refs, j);
3913 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3918 /* Examine load-store (true/anti) dependences. */
3920 if (vect_debug_details (NULL))
3921 fprintf (dump_file, "compare all load-store pairs.");
3923 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3925 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3927 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3928 struct data_reference *drb =
3929 VARRAY_GENERIC_PTR (loop_write_refs, j);
3930 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3939 /* Function vect_get_first_index.
3941 REF is a data reference.
3942 If it is an ARRAY_REF: if its lower bound is simple enough,
3943 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3944 If it is not an ARRAY_REF: REF has no "first index";
3945 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3948 vect_get_first_index (tree ref, tree *array_first_index)
3952 if (TREE_CODE (ref) != ARRAY_REF)
3953 *array_first_index = size_zero_node;
3956 array_start = array_ref_low_bound (ref);
3957 if (!host_integerp (array_start,0))
3959 if (vect_debug_details (NULL))
3961 fprintf (dump_file, "array min val not simple integer cst.");
3962 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3966 *array_first_index = array_start;
3973 /* Function vect_compute_array_base_alignment.
3974 A utility function of vect_compute_array_ref_alignment.
3976 Compute the misalignment of ARRAY in bits.
3979 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3980 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3981 if NULL: don't compute misalignment, just return the base of ARRAY.
3982 PREV_DIMENSIONS - initialized to one.
3983 MISALIGNMENT - the computed misalignment in bits.
3986 If VECTYPE is not NULL:
3987 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3988 the base of the array, and put the computed misalignment in MISALIGNMENT.
3990 Return the base of the array.
3992 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3993 a[idx_N]...[idx_2][idx_1] is
3994 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3995 ... + idx_N * dim_0 * ... * dim_N-1}.
3996 (The misalignment of &a is not checked here).
3997 Note, that every term contains dim_0, therefore, if dim_0 is a
3998 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3999 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
4000 NUINTS, we can say that the misalignment of the sum is equal to
4001 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
4002 we can't determine this array misalignment, and we return
4004 We proceed recursively in this manner, accumulating total misalignment
4005 and the multiplication of previous dimensions for correct misalignment
4009 vect_compute_array_base_alignment (tree array,
4011 tree *prev_dimensions,
4016 tree dimension_size;
4018 tree bits_per_vectype;
4019 tree bits_per_vectype_unit;
4021 /* The 'stop condition' of the recursion. */
4022 if (TREE_CODE (array) != ARRAY_REF)
4026 /* Just get the base decl. */
4027 return vect_compute_array_base_alignment
4028 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4030 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4031 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4034 domain = TYPE_DOMAIN (TREE_TYPE (array));
4036 int_const_binop (PLUS_EXPR,
4037 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4038 TYPE_MIN_VALUE (domain), 1),
4041 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4042 is a multiple of NUNITS:
4044 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4046 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4047 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4048 if (integer_zerop (mis))
4049 /* This array is aligned. Continue just in order to get the base decl. */
4050 return vect_compute_array_base_alignment
4051 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4053 index = TREE_OPERAND (array, 1);
4054 if (!host_integerp (index, 1))
4055 /* The current index is not constant. */
4058 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4060 bits_per_vectype = fold_convert (unsigned_type_node,
4061 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4062 GET_MODE_SIZE (TYPE_MODE (vectype))));
4063 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4064 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4065 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4067 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4071 (*misalignment + index_val * dimension_size * *prev_dimensions)
4075 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4076 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4077 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4078 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4079 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4082 *prev_dimensions = int_const_binop (MULT_EXPR,
4083 *prev_dimensions, dimension_size, 1);
4085 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4091 /* Function vect_compute_data_ref_alignment
4093 Compute the misalignment of the data reference DR.
4096 1. If during the misalignment computation it is found that the data reference
4097 cannot be vectorized then false is returned.
4098 2. DR_MISALIGNMENT (DR) is defined.
4100 FOR NOW: No analysis is actually performed. Misalignment is calculated
4101 only for trivial cases. TODO. */
4104 vect_compute_data_ref_alignment (struct data_reference *dr,
4105 loop_vec_info loop_vinfo)
4107 tree stmt = DR_STMT (dr);
4108 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4109 tree ref = DR_REF (dr);
4112 tree offset = size_zero_node;
4113 tree base, bit_offset, alignment;
4114 tree unit_bits = fold_convert (unsigned_type_node,
4115 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4117 bool base_aligned_p;
4119 if (vect_debug_details (NULL))
4120 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4122 /* Initialize misalignment to unknown. */
4123 DR_MISALIGNMENT (dr) = -1;
4125 scalar_type = TREE_TYPE (ref);
4126 vectype = get_vectype_for_scalar_type (scalar_type);
4129 if (vect_debug_details (NULL))
4131 fprintf (dump_file, "no vectype for stmt: ");
4132 print_generic_expr (dump_file, stmt, TDF_SLIM);
4133 fprintf (dump_file, " scalar_type: ");
4134 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4136 /* It is not possible to vectorize this data reference. */
4139 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4140 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4142 if (TREE_CODE (ref) == ARRAY_REF)
4145 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4147 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4148 loop_vinfo, &bit_offset, &base_aligned_p);
4151 if (vect_debug_details (NULL))
4153 fprintf (dump_file, "Unknown alignment for access: ");
4154 print_generic_expr (dump_file,
4155 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4160 if (!base_aligned_p)
4162 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4164 if (vect_debug_details (NULL))
4166 fprintf (dump_file, "can't force alignment of ref: ");
4167 print_generic_expr (dump_file, ref, TDF_SLIM);
4172 /* Force the alignment of the decl.
4173 NOTE: This is the only change to the code we make during
4174 the analysis phase, before deciding to vectorize the loop. */
4175 if (vect_debug_details (NULL))
4176 fprintf (dump_file, "force alignment");
4177 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4178 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4181 /* At this point we assume that the base is aligned, and the offset from it
4182 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4183 gcc_assert (base_aligned_p
4184 || (TREE_CODE (base) == VAR_DECL
4185 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4187 /* Convert into bytes. */
4188 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4189 /* Check that there is no remainder in bits. */
4190 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4191 if (!integer_zerop (bit_offset))
4193 if (vect_debug_details (NULL))
4195 fprintf (dump_file, "bit offset alignment: ");
4196 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4201 /* Alignment required, in bytes: */
4202 alignment = fold_convert (unsigned_type_node,
4203 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4205 /* Modulo alignment. */
4206 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4207 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4209 if (vect_debug_details (NULL))
4210 fprintf (dump_file, "unexpected misalign value");
4214 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4216 if (vect_debug_details (NULL))
4217 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4223 /* Function vect_compute_array_ref_alignment
4225 Compute the alignment of an array-ref.
4226 The alignment we compute here is relative to
4227 TYPE_ALIGN(VECTYPE) boundary.
4230 OFFSET - the alignment in bits
4231 Return value - the base of the array-ref. E.g,
4232 if the array-ref is a.b[k].c[i][j] the returned
4237 vect_compute_array_ref_alignment (struct data_reference *dr,
4238 loop_vec_info loop_vinfo,
4242 tree array_first_index = size_zero_node;
4244 tree ref = DR_REF (dr);
4245 tree scalar_type = TREE_TYPE (ref);
4246 tree oprnd0 = TREE_OPERAND (ref, 0);
4247 tree dims = size_one_node;
4248 tree misalign = size_zero_node;
4249 tree next_ref, this_offset = size_zero_node;
4253 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4254 /* The reference is an array without its last index. */
4255 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4258 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4261 /* Alignment is not requested. Just return the base. */
4264 /* Compute alignment. */
4265 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4267 this_offset = misalign;
4269 /* Check the first index accessed. */
4270 if (!vect_get_first_index (ref, &array_first_index))
4272 if (vect_debug_details (NULL))
4273 fprintf (dump_file, "no first_index for array.");
4277 /* Check the index of the array_ref. */
4278 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4279 LOOP_VINFO_LOOP (loop_vinfo)->num);
4281 /* FORNOW: In order to simplify the handling of alignment, we make sure
4282 that the first location at which the array is accessed ('init') is on an
4283 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4284 This is too conservative, since we require that
4285 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4286 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4287 This should be relaxed in the future. */
4289 if (!init || !host_integerp (init, 0))
4291 if (vect_debug_details (NULL))
4292 fprintf (dump_file, "non constant init. ");
4296 /* bytes per scalar element: */
4297 nunits = fold_convert (unsigned_type_node,
4298 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4299 nbits = int_const_binop (MULT_EXPR, nunits,
4300 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4302 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4303 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4304 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4305 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4307 /* TODO: allow negative misalign values. */
4308 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4310 if (vect_debug_details (NULL))
4311 fprintf (dump_file, "unexpected misalign value");
4319 /* Function vect_compute_data_refs_alignment
4321 Compute the misalignment of data references in the loop.
4322 This pass may take place at function granularity instead of at loop
4325 FOR NOW: No analysis is actually performed. Misalignment is calculated
4326 only for trivial cases. TODO. */
4329 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4331 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4332 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4335 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4337 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4338 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4342 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4344 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4345 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4353 /* Function vect_enhance_data_refs_alignment
4355 This pass will use loop versioning and loop peeling in order to enhance
4356 the alignment of data references in the loop.
4358 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4359 original loop is to be vectorized; Any other loops that are created by
4360 the transformations performed in this pass - are not supposed to be
4361 vectorized. This restriction will be relaxed. */
4364 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4366 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4367 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4368 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4372 This pass will require a cost model to guide it whether to apply peeling
4373 or versioning or a combination of the two. For example, the scheme that
4374 intel uses when given a loop with several memory accesses, is as follows:
4375 choose one memory access ('p') which alignment you want to force by doing
4376 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4377 other accesses are not necessarily aligned, or (2) use loop versioning to
4378 generate one loop in which all accesses are aligned, and another loop in
4379 which only 'p' is necessarily aligned.
4381 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4382 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4383 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4385 Devising a cost model is the most critical aspect of this work. It will
4386 guide us on which access to peel for, whether to use loop versioning, how
4387 many versions to create, etc. The cost model will probably consist of
4388 generic considerations as well as target specific considerations (on
4389 powerpc for example, misaligned stores are more painful than misaligned
4392 Here is the general steps involved in alignment enhancements:
4394 -- original loop, before alignment analysis:
4395 for (i=0; i<N; i++){
4396 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4397 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4400 -- After vect_compute_data_refs_alignment:
4401 for (i=0; i<N; i++){
4402 x = q[i]; # DR_MISALIGNMENT(q) = 3
4403 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4406 -- Possibility 1: we do loop versioning:
4408 for (i=0; i<N; i++){ # loop 1A
4409 x = q[i]; # DR_MISALIGNMENT(q) = 3
4410 p[i] = y; # DR_MISALIGNMENT(p) = 0
4414 for (i=0; i<N; i++){ # loop 1B
4415 x = q[i]; # DR_MISALIGNMENT(q) = 3
4416 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4420 -- Possibility 2: we do loop peeling:
4421 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4425 for (i = 3; i < N; i++){ # loop 2A
4426 x = q[i]; # DR_MISALIGNMENT(q) = 0
4427 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4430 -- Possibility 3: combination of loop peeling and versioning:
4431 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4436 for (i = 3; i<N; i++){ # loop 3A
4437 x = q[i]; # DR_MISALIGNMENT(q) = 0
4438 p[i] = y; # DR_MISALIGNMENT(p) = 0
4442 for (i = 3; i<N; i++){ # loop 3B
4443 x = q[i]; # DR_MISALIGNMENT(q) = 0
4444 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4448 These loops are later passed to loop_transform to be vectorized. The
4449 vectorizer will use the alignment information to guide the transformation
4450 (whether to generate regular loads/stores, or with special handling for
4454 /* (1) Peeling to force alignment. */
4456 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4458 + How many accesses will become aligned due to the peeling
4459 - How many accesses will become unaligned due to the peeling,
4460 and the cost of misaligned accesses.
4461 - The cost of peeling (the extra runtime checks, the increase
4464 The scheme we use FORNOW: peel to force the alignment of the first
4465 misaligned store in the loop.
4466 Rationale: misaligned stores are not yet supported.
4468 TODO: Use a better cost model. */
4470 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4472 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4473 if (!aligned_access_p (dr))
4475 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4476 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4481 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4483 if (vect_debug_details (loop))
4484 fprintf (dump_file, "Peeling for alignment will not be applied.");
4488 if (vect_debug_details (loop))
4489 fprintf (dump_file, "Peeling for alignment will be applied.");
4492 /* (1.2) Update the alignment info according to the peeling factor.
4493 If the misalignment of the DR we peel for is M, then the
4494 peeling factor is VF - M, and the misalignment of each access DR_i
4495 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4496 If the misalignment of the DR we peel for is unknown, then the
4497 misalignment of each access DR_i in the loop is also unknown.
4499 FORNOW: set the misalignment of the accesses to unknown even
4500 if the peeling factor is known at compile time.
4502 TODO: - if the peeling factor is known at compile time, use that
4503 when updating the misalignment info of the loop DRs.
4504 - consider accesses that are known to have the same
4505 alignment, even if that alignment is unknown. */
4507 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4509 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4510 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4511 DR_MISALIGNMENT (dr) = 0;
4513 DR_MISALIGNMENT (dr) = -1;
4515 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4517 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4518 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4519 DR_MISALIGNMENT (dr) = 0;
4521 DR_MISALIGNMENT (dr) = -1;
4526 /* Function vect_analyze_data_refs_alignment
4528 Analyze the alignment of the data-references in the loop.
4529 FOR NOW: Until support for misliagned accesses is in place, only if all
4530 accesses are aligned can the loop be vectorized. This restriction will be
4534 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4536 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4537 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4538 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4539 enum dr_alignment_support supportable_dr_alignment;
4542 if (vect_debug_details (NULL))
4543 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4546 /* This pass may take place at function granularity instead of at loop
4549 if (!vect_compute_data_refs_alignment (loop_vinfo))
4551 if (vect_debug_details (loop) || vect_debug_stats (loop))
4553 "not vectorized: can't calculate alignment for data ref.");
4558 /* This pass will decide on using loop versioning and/or loop peeling in
4559 order to enhance the alignment of data references in the loop. */
4561 vect_enhance_data_refs_alignment (loop_vinfo);
4564 /* Finally, check that all the data references in the loop can be
4565 handled with respect to their alignment. */
4567 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4569 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4570 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4571 if (!supportable_dr_alignment)
4573 if (vect_debug_details (loop) || vect_debug_stats (loop))
4574 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4578 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4580 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4581 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4582 if (!supportable_dr_alignment)
4584 if (vect_debug_details (loop) || vect_debug_stats (loop))
4585 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4594 /* Function vect_analyze_data_ref_access.
4596 Analyze the access pattern of the data-reference DR. For now, a data access
4597 has to consecutive and aligned to be considered vectorizable. */
4600 vect_analyze_data_ref_access (struct data_reference *dr)
4602 varray_type access_fns = DR_ACCESS_FNS (dr);
4605 unsigned int dimensions, i;
4607 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4608 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4609 access is contiguous). */
4610 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4612 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4614 access_fn = DR_ACCESS_FN (dr, i);
4616 if (evolution_part_in_loop_num (access_fn,
4617 loop_containing_stmt (DR_STMT (dr))->num))
4619 /* Evolution part is not NULL in this loop (it is neither constant
4621 if (vect_debug_details (NULL))
4624 "not vectorized: complicated multidim. array access.");
4625 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4631 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4632 if (!evolution_function_is_constant_p (access_fn)
4633 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4634 access_fn, &init, &step, true))
4636 if (vect_debug_details (NULL))
4638 fprintf (dump_file, "not vectorized: complicated access function.");
4639 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4648 /* Function vect_analyze_data_ref_accesses.
4650 Analyze the access pattern of all the data references in the loop.
4652 FORNOW: the only access pattern that is considered vectorizable is a
4653 simple step 1 (consecutive) access.
4655 FORNOW: handle only arrays and pointer accesses. */
4658 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4661 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4662 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4664 if (vect_debug_details (NULL))
4665 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4667 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4669 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4670 bool ok = vect_analyze_data_ref_access (dr);
4673 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4674 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4675 fprintf (dump_file, "not vectorized: complicated access pattern.");
4680 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4682 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4683 bool ok = vect_analyze_data_ref_access (dr);
4686 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4687 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4688 fprintf (dump_file, "not vectorized: complicated access pattern.");
4697 /* Function vect_analyze_pointer_ref_access.
4700 STMT - a stmt that contains a data-ref
4701 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4703 If the data-ref access is vectorizable, return a data_reference structure
4704 that represents it (DR). Otherwise - return NULL. */
4706 static struct data_reference *
4707 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4710 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4711 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4714 tree reftype, innertype;
4715 enum machine_mode innermode;
4716 tree indx_access_fn;
4717 int loopnum = loop->num;
4718 struct data_reference *dr;
4722 if (vect_debug_stats (loop) || vect_debug_details (loop))
4723 fprintf (dump_file, "not vectorized: complicated pointer access.");
4727 if (vect_debug_details (NULL))
4729 fprintf (dump_file, "Access function of ptr: ");
4730 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4733 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4735 if (vect_debug_stats (loop) || vect_debug_details (loop))
4736 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4742 if (!host_integerp (step,0))
4744 if (vect_debug_stats (loop) || vect_debug_details (loop))
4746 "not vectorized: non constant step for pointer access.");
4750 step_val = TREE_INT_CST_LOW (step);
4752 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4753 if (TREE_CODE (reftype) != POINTER_TYPE)
4755 if (vect_debug_stats (loop) || vect_debug_details (loop))
4756 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4760 reftype = TREE_TYPE (init);
4761 if (TREE_CODE (reftype) != POINTER_TYPE)
4763 if (vect_debug_stats (loop) || vect_debug_details (loop))
4764 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4768 innertype = TREE_TYPE (reftype);
4769 innermode = TYPE_MODE (innertype);
4770 if (GET_MODE_SIZE (innermode) != step_val)
4772 /* FORNOW: support only consecutive access */
4773 if (vect_debug_stats (loop) || vect_debug_details (loop))
4774 fprintf (dump_file, "not vectorized: non consecutive access.");
4779 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4780 if (vect_debug_details (NULL))
4782 fprintf (dump_file, "Access function of ptr indx: ");
4783 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4785 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4790 /* Function vect_get_symbl_and_dr.
4792 The function returns SYMBL - the relevant variable for
4793 memory tag (for aliasing purposes).
4794 Also data reference structure DR is created.
4797 MEMREF - data reference in STMT
4798 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4801 DR - data_reference struct for MEMREF
4802 return value - the relevant variable for memory tag (for aliasing purposes).
4807 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4808 loop_vec_info loop_vinfo, struct data_reference **dr)
4810 tree symbl, oprnd0, oprnd1;
4811 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4813 tree array_base, base;
4814 struct data_reference *new_dr;
4815 bool base_aligned_p;
4818 switch (TREE_CODE (memref))
4821 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4825 symbl = DR_BASE_NAME (new_dr);
4826 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4828 switch (TREE_CODE (symbl))
4832 oprnd0 = TREE_OPERAND (symbl, 0);
4833 oprnd1 = TREE_OPERAND (symbl, 1);
4836 /* Only {address_base + offset} expressions are supported,
4837 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4838 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4839 TODO: swap operands if {offset + address_base}. */
4840 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4841 && TREE_CODE (oprnd1) != INTEGER_CST)
4842 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4845 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4848 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4849 loop_vinfo, &new_dr);
4853 /* symbl remains unchanged. */
4857 if (vect_debug_details (NULL))
4859 fprintf (dump_file, "unhandled data ref: ");
4860 print_generic_expr (dump_file, memref, TDF_SLIM);
4861 fprintf (dump_file, " (symbl ");
4862 print_generic_expr (dump_file, symbl, TDF_SLIM);
4863 fprintf (dump_file, ") in stmt ");
4864 print_generic_expr (dump_file, stmt, TDF_SLIM);
4871 offset = size_zero_node;
4873 /* Store the array base in the stmt info.
4874 For one dimensional array ref a[i], the base is a,
4875 for multidimensional a[i1][i2]..[iN], the base is
4876 a[i1][i2]..[iN-1]. */
4877 array_base = TREE_OPERAND (memref, 0);
4878 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4880 new_dr = analyze_array (stmt, memref, is_read);
4883 /* Find the relevant symbol for aliasing purposes. */
4884 base = DR_BASE_NAME (new_dr);
4885 switch (TREE_CODE (base))
4892 symbl = TREE_OPERAND (base, 0);
4896 /* Could have recorded more accurate information -
4897 i.e, the actual FIELD_DECL that is being referenced -
4898 but later passes expect VAR_DECL as the nmt. */
4899 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4900 loop_vinfo, &offset, &base_aligned_p);
4905 if (vect_debug_details (NULL))
4907 fprintf (dump_file, "unhandled struct/class field access ");
4908 print_generic_expr (dump_file, stmt, TDF_SLIM);
4915 if (vect_debug_details (NULL))
4917 fprintf (dump_file, "unhandled data ref: ");
4918 print_generic_expr (dump_file, memref, TDF_SLIM);
4919 fprintf (dump_file, " in stmt ");
4920 print_generic_expr (dump_file, stmt, TDF_SLIM);
4928 /* Function vect_analyze_data_refs.
4930 Find all the data references in the loop.
4932 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4933 which base is really an array (not a pointer) and which alignment
4934 can be forced. This restriction will be relaxed. */
4937 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4939 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4940 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4941 int nbbs = loop->num_nodes;
4942 block_stmt_iterator si;
4944 struct data_reference *dr;
4947 bool base_aligned_p;
4950 if (vect_debug_details (NULL))
4951 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4953 for (j = 0; j < nbbs; j++)
4955 basic_block bb = bbs[j];
4956 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4958 bool is_read = false;
4959 tree stmt = bsi_stmt (si);
4960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4961 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4962 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4963 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4964 varray_type *datarefs = NULL;
4965 int nvuses, nv_may_defs, nv_must_defs;
4969 /* Assumption: there exists a data-ref in stmt, if and only if
4970 it has vuses/vdefs. */
4972 if (!vuses && !v_may_defs && !v_must_defs)
4975 nvuses = NUM_VUSES (vuses);
4976 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4977 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4979 if (nvuses && (nv_may_defs || nv_must_defs))
4981 if (vect_debug_details (NULL))
4983 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4984 print_generic_expr (dump_file, stmt, TDF_SLIM);
4989 if (TREE_CODE (stmt) != MODIFY_EXPR)
4991 if (vect_debug_details (NULL))
4993 fprintf (dump_file, "unexpected vops in stmt: ");
4994 print_generic_expr (dump_file, stmt, TDF_SLIM);
5001 memref = TREE_OPERAND (stmt, 1);
5002 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5007 memref = TREE_OPERAND (stmt, 0);
5008 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5012 /* Analyze MEMREF. If it is of a supported form, build data_reference
5013 struct for it (DR) and find the relevant symbol for aliasing
5015 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
5019 if (vect_debug_stats (loop) || vect_debug_details (loop))
5021 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5022 print_generic_expr (dump_file, stmt, TDF_SLIM);
5027 /* Find and record the memtag assigned to this data-ref. */
5028 switch (TREE_CODE (symbl))
5031 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5035 symbl = SSA_NAME_VAR (symbl);
5036 tag = get_var_ann (symbl)->type_mem_tag;
5039 tree ptr = TREE_OPERAND (memref, 0);
5040 if (TREE_CODE (ptr) == SSA_NAME)
5041 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5045 if (vect_debug_stats (loop) || vect_debug_details (loop))
5046 fprintf (dump_file, "not vectorized: no memtag for ref.");
5049 STMT_VINFO_MEMTAG (stmt_info) = tag;
5053 address_base = TREE_OPERAND (symbl, 0);
5055 switch (TREE_CODE (address_base))
5058 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5060 STMT_VINFO_MEMTAG (stmt_info) =
5061 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5062 loop_vinfo, &offset,
5067 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5071 if (vect_debug_stats (loop) || vect_debug_details (loop))
5074 "not vectorized: unhandled address expr: ");
5075 print_generic_expr (dump_file, stmt, TDF_SLIM);
5082 if (vect_debug_stats (loop) || vect_debug_details (loop))
5084 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5085 print_generic_expr (dump_file, memref, TDF_SLIM);
5090 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5091 STMT_VINFO_DATA_REF (stmt_info) = dr;
5099 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5101 /* Function vect_mark_relevant.
5103 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5106 vect_mark_relevant (varray_type worklist, tree stmt)
5108 stmt_vec_info stmt_info;
5110 if (vect_debug_details (NULL))
5111 fprintf (dump_file, "mark relevant.");
5113 if (TREE_CODE (stmt) == PHI_NODE)
5115 VARRAY_PUSH_TREE (worklist, stmt);
5119 stmt_info = vinfo_for_stmt (stmt);
5123 if (vect_debug_details (NULL))
5125 fprintf (dump_file, "mark relevant: no stmt info!!.");
5126 print_generic_expr (dump_file, stmt, TDF_SLIM);
5131 if (STMT_VINFO_RELEVANT_P (stmt_info))
5133 if (vect_debug_details (NULL))
5134 fprintf (dump_file, "already marked relevant.");
5138 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5139 VARRAY_PUSH_TREE (worklist, stmt);
5143 /* Function vect_stmt_relevant_p.
5145 Return true if STMT in loop that is represented by LOOP_VINFO is
5146 "relevant for vectorization".
5148 A stmt is considered "relevant for vectorization" if:
5149 - it has uses outside the loop.
5150 - it has vdefs (it alters memory).
5151 - control stmts in the loop (except for the exit condition).
5153 CHECKME: what other side effects would the vectorizer allow? */
5156 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5158 v_may_def_optype v_may_defs;
5159 v_must_def_optype v_must_defs;
5160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5165 /* cond stmt other than loop exit cond. */
5166 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5169 /* changing memory. */
5170 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5171 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5172 if (v_may_defs || v_must_defs)
5174 if (vect_debug_details (NULL))
5175 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5179 /* uses outside the loop. */
5180 df = get_immediate_uses (stmt);
5181 num_uses = num_immediate_uses (df);
5182 for (i = 0; i < num_uses; i++)
5184 tree use = immediate_use (df, i);
5185 basic_block bb = bb_for_stmt (use);
5186 if (!flow_bb_inside_loop_p (loop, bb))
5188 if (vect_debug_details (NULL))
5189 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5198 /* Function vect_mark_stmts_to_be_vectorized.
5200 Not all stmts in the loop need to be vectorized. For example:
5209 Stmt 1 and 3 do not need to be vectorized, because loop control and
5210 addressing of vectorized data-refs are handled differently.
5212 This pass detects such stmts. */
5215 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5217 varray_type worklist;
5218 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5219 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5220 unsigned int nbbs = loop->num_nodes;
5221 block_stmt_iterator si;
5227 stmt_vec_info stmt_info;
5229 if (vect_debug_details (NULL))
5230 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5232 VARRAY_TREE_INIT (worklist, 64, "work list");
5234 /* 1. Init worklist. */
5236 for (i = 0; i < nbbs; i++)
5238 basic_block bb = bbs[i];
5239 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5241 stmt = bsi_stmt (si);
5243 if (vect_debug_details (NULL))
5245 fprintf (dump_file, "init: stmt relevant? ");
5246 print_generic_expr (dump_file, stmt, TDF_SLIM);
5249 stmt_info = vinfo_for_stmt (stmt);
5250 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5252 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5253 vect_mark_relevant (worklist, stmt);
5258 /* 2. Process_worklist */
5260 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5262 stmt = VARRAY_TOP_TREE (worklist);
5263 VARRAY_POP (worklist);
5265 if (vect_debug_details (NULL))
5267 fprintf (dump_file, "worklist: examine stmt: ");
5268 print_generic_expr (dump_file, stmt, TDF_SLIM);
5271 /* Examine the USES in this statement. Mark all the statements which
5272 feed this statement's uses as "relevant", unless the USE is used as
5275 if (TREE_CODE (stmt) == PHI_NODE)
5277 /* follow the def-use chain inside the loop. */
5278 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5280 tree arg = PHI_ARG_DEF (stmt, j);
5281 tree def_stmt = NULL_TREE;
5283 if (!vect_is_simple_use (arg, loop, &def_stmt))
5285 if (vect_debug_details (NULL))
5286 fprintf (dump_file, "worklist: unsupported use.");
5287 varray_clear (worklist);
5293 if (vect_debug_details (NULL))
5295 fprintf (dump_file, "worklist: def_stmt: ");
5296 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5299 bb = bb_for_stmt (def_stmt);
5300 if (flow_bb_inside_loop_p (loop, bb))
5301 vect_mark_relevant (worklist, def_stmt);
5305 ann = stmt_ann (stmt);
5306 use_ops = USE_OPS (ann);
5308 for (i = 0; i < NUM_USES (use_ops); i++)
5310 tree use = USE_OP (use_ops, i);
5312 /* We are only interested in uses that need to be vectorized. Uses
5313 that are used for address computation are not considered relevant.
5315 if (exist_non_indexing_operands_for_use_p (use, stmt))
5317 tree def_stmt = NULL_TREE;
5319 if (!vect_is_simple_use (use, loop, &def_stmt))
5321 if (vect_debug_details (NULL))
5322 fprintf (dump_file, "worklist: unsupported use.");
5323 varray_clear (worklist);
5330 if (vect_debug_details (NULL))
5332 fprintf (dump_file, "worklist: examine use %d: ", i);
5333 print_generic_expr (dump_file, use, TDF_SLIM);
5336 bb = bb_for_stmt (def_stmt);
5337 if (flow_bb_inside_loop_p (loop, bb))
5338 vect_mark_relevant (worklist, def_stmt);
5341 } /* while worklist */
5343 varray_clear (worklist);
5348 /* Function vect_can_advance_ivs_p
5350 In case the number of iterations that LOOP iterates in unknown at compile
5351 time, an epilog loop will be generated, and the loop induction variables
5352 (IVs) will be "advanced" to the value they are supposed to take just before
5353 the epilog loop. Here we check that the access function of the loop IVs
5354 and the expression that represents the loop bound are simple enough.
5355 These restrictions will be relaxed in the future. */
5358 vect_can_advance_ivs_p (struct loop *loop)
5360 basic_block bb = loop->header;
5363 /* Analyze phi functions of the loop header. */
5365 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5367 tree access_fn = NULL;
5368 tree evolution_part;
5370 if (vect_debug_details (NULL))
5372 fprintf (dump_file, "Analyze phi: ");
5373 print_generic_expr (dump_file, phi, TDF_SLIM);
5376 /* Skip virtual phi's. The data dependences that are associated with
5377 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5379 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5381 if (vect_debug_details (NULL))
5382 fprintf (dump_file, "virtual phi. skip.");
5386 /* Analyze the evolution function. */
5388 access_fn = instantiate_parameters
5389 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5393 if (vect_debug_details (NULL))
5394 fprintf (dump_file, "No Access function.");
5398 if (vect_debug_details (NULL))
5400 fprintf (dump_file, "Access function of PHI: ");
5401 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5404 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5406 if (evolution_part == NULL_TREE)
5409 /* FORNOW: We do not transform initial conditions of IVs
5410 which evolution functions are a polynomial of degree >= 2. */
5412 if (tree_is_chrec (evolution_part))
5420 /* Function vect_get_loop_niters.
5422 Determine how many iterations the loop is executed.
5423 If an expression that represents the number of iterations
5424 can be constructed, place it in NUMBER_OF_ITERATIONS.
5425 Return the loop exit condition. */
5428 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5432 if (vect_debug_details (NULL))
5433 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5435 niters = number_of_iterations_in_loop (loop);
5437 if (niters != NULL_TREE
5438 && niters != chrec_dont_know)
5440 *number_of_iterations = niters;
5442 if (vect_debug_details (NULL))
5444 fprintf (dump_file, "==> get_loop_niters:" );
5445 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5449 return get_loop_exit_condition (loop);
5453 /* Function vect_analyze_loop_form.
5455 Verify the following restrictions (some may be relaxed in the future):
5456 - it's an inner-most loop
5457 - number of BBs = 2 (which are the loop header and the latch)
5458 - the loop has a pre-header
5459 - the loop has a single entry and exit
5460 - the loop exit condition is simple enough, and the number of iterations
5461 can be analyzed (a countable loop). */
5463 static loop_vec_info
5464 vect_analyze_loop_form (struct loop *loop)
5466 loop_vec_info loop_vinfo;
5468 tree number_of_iterations = NULL;
5469 bool rescan = false;
5471 if (vect_debug_details (loop))
5472 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5475 || !loop->single_exit
5476 || loop->num_nodes != 2
5477 || EDGE_COUNT (loop->header->preds) != 2
5478 || loop->num_entries != 1)
5480 if (vect_debug_stats (loop) || vect_debug_details (loop))
5482 fprintf (dump_file, "not vectorized: bad loop form. ");
5484 fprintf (dump_file, "nested loop.");
5485 else if (!loop->single_exit)
5486 fprintf (dump_file, "multiple exits.");
5487 else if (loop->num_nodes != 2)
5488 fprintf (dump_file, "too many BBs in loop.");
5489 else if (EDGE_COUNT (loop->header->preds) != 2)
5490 fprintf (dump_file, "too many incoming edges.");
5491 else if (loop->num_entries != 1)
5492 fprintf (dump_file, "too many entries.");
5498 /* We assume that the loop exit condition is at the end of the loop. i.e,
5499 that the loop is represented as a do-while (with a proper if-guard
5500 before the loop if needed), where the loop header contains all the
5501 executable statements, and the latch is empty. */
5502 if (!empty_block_p (loop->latch))
5504 if (vect_debug_stats (loop) || vect_debug_details (loop))
5505 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5509 /* Make sure we have a preheader basic block. */
5510 if (!loop->pre_header)
5513 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5516 /* Make sure there exists a single-predecessor exit bb: */
5517 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5520 loop_split_edge_with (loop->exit_edges[0], NULL);
5525 flow_loop_scan (loop, LOOP_ALL);
5526 /* Flow loop scan does not update loop->single_exit field. */
5527 loop->single_exit = loop->exit_edges[0];
5530 if (empty_block_p (loop->header))
5532 if (vect_debug_stats (loop) || vect_debug_details (loop))
5533 fprintf (dump_file, "not vectorized: empty loop.");
5537 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5540 if (vect_debug_stats (loop) || vect_debug_details (loop))
5541 fprintf (dump_file, "not vectorized: complicated exit condition.");
5545 if (!number_of_iterations)
5547 if (vect_debug_stats (loop) || vect_debug_details (loop))
5549 "not vectorized: number of iterations cannot be computed.");
5553 if (chrec_contains_undetermined (number_of_iterations))
5555 if (vect_debug_details (NULL))
5556 fprintf (dump_file, "Infinite number of iterations.");
5560 loop_vinfo = new_loop_vec_info (loop);
5561 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5563 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5565 if (vect_debug_details (loop))
5567 fprintf (dump_file, "loop bound unknown.\n");
5568 fprintf (dump_file, "Symbolic number of iterations is ");
5569 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5573 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5575 if (vect_debug_stats (loop) || vect_debug_details (loop))
5576 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5580 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5586 /* Function vect_analyze_loop.
5588 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5589 for it. The different analyses will record information in the
5590 loop_vec_info struct. */
5592 static loop_vec_info
5593 vect_analyze_loop (struct loop *loop)
5596 loop_vec_info loop_vinfo;
5598 if (vect_debug_details (NULL))
5599 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5601 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5603 loop_vinfo = vect_analyze_loop_form (loop);
5606 if (vect_debug_details (loop))
5607 fprintf (dump_file, "bad loop form.");
5611 /* Find all data references in the loop (which correspond to vdefs/vuses)
5612 and analyze their evolution in the loop.
5614 FORNOW: Handle only simple, array references, which
5615 alignment can be forced, and aligned pointer-references. */
5617 ok = vect_analyze_data_refs (loop_vinfo);
5620 if (vect_debug_details (loop))
5621 fprintf (dump_file, "bad data references.");
5622 destroy_loop_vec_info (loop_vinfo);
5626 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5628 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5631 if (vect_debug_details (loop))
5632 fprintf (dump_file, "unexpected pattern.");
5633 if (vect_debug_details (loop))
5634 fprintf (dump_file, "not vectorized: unexpected pattern.");
5635 destroy_loop_vec_info (loop_vinfo);
5639 /* Check that all cross-iteration scalar data-flow cycles are OK.
5640 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5642 ok = vect_analyze_scalar_cycles (loop_vinfo);
5645 if (vect_debug_details (loop))
5646 fprintf (dump_file, "bad scalar cycle.");
5647 destroy_loop_vec_info (loop_vinfo);
5651 /* Analyze data dependences between the data-refs in the loop.
5652 FORNOW: fail at the first data dependence that we encounter. */
5654 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5657 if (vect_debug_details (loop))
5658 fprintf (dump_file, "bad data dependence.");
5659 destroy_loop_vec_info (loop_vinfo);
5663 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5664 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5666 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5669 if (vect_debug_details (loop))
5670 fprintf (dump_file, "bad data access.");
5671 destroy_loop_vec_info (loop_vinfo);
5675 /* Analyze the alignment of the data-refs in the loop.
5676 FORNOW: Only aligned accesses are handled. */
5678 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5681 if (vect_debug_details (loop))
5682 fprintf (dump_file, "bad data alignment.");
5683 destroy_loop_vec_info (loop_vinfo);
5687 /* Scan all the operations in the loop and make sure they are
5690 ok = vect_analyze_operations (loop_vinfo);
5693 if (vect_debug_details (loop))
5694 fprintf (dump_file, "bad operation or unsupported loop bound.");
5695 destroy_loop_vec_info (loop_vinfo);
5699 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5705 /* Function need_imm_uses_for.
5707 Return whether we ought to include information for 'var'
5708 when calculating immediate uses. For this pass we only want use
5709 information for non-virtual variables. */
5712 need_imm_uses_for (tree var)
5714 return is_gimple_reg (var);
5718 /* Function vectorize_loops.
5720 Entry Point to loop vectorization phase. */
5723 vectorize_loops (struct loops *loops)
5725 unsigned int i, loops_num;
5726 unsigned int num_vectorized_loops = 0;
5728 /* Does the target support SIMD? */
5729 /* FORNOW: until more sophisticated machine modelling is in place. */
5730 if (!UNITS_PER_SIMD_WORD)
5732 if (vect_debug_details (NULL))
5733 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5737 #ifdef ENABLE_CHECKING
5738 verify_loop_closed_ssa ();
5741 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5743 /* ----------- Analyze loops. ----------- */
5745 /* If some loop was duplicated, it gets bigger number
5746 than all previously defined loops. This fact allows us to run
5747 only over initial loops skipping newly generated ones. */
5748 loops_num = loops->num;
5749 for (i = 1; i < loops_num; i++)
5751 loop_vec_info loop_vinfo;
5752 struct loop *loop = loops->parray[i];
5757 loop_vinfo = vect_analyze_loop (loop);
5758 loop->aux = loop_vinfo;
5760 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5763 vect_transform_loop (loop_vinfo, loops);
5764 num_vectorized_loops++;
5767 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5768 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5769 num_vectorized_loops);
5771 /* ----------- Finalize. ----------- */
5774 for (i = 1; i < loops_num; i++)
5776 struct loop *loop = loops->parray[i];
5777 loop_vec_info loop_vinfo;
5781 loop_vinfo = loop->aux;
5782 destroy_loop_vec_info (loop_vinfo);
5786 rewrite_into_ssa (false);
5787 rewrite_into_loop_closed_ssa (); /* FORNOW */
5788 bitmap_clear (vars_to_rename);