2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
147 #include "tree-vectorizer.h"
148 #include "tree-pass.h"
149 #include "langhooks.h"
152 /*************************************************************************
153 Simple Loop Peeling Utilities
154 *************************************************************************/
156 /* Entry point for peeling of simple loops.
157 Peel the first/last iterations of a loop.
158 It can be used outside of the vectorizer for loops that are simple enough
159 (see function documentation). In the vectorizer it is used to peel the
160 last few iterations when the loop bound is unknown or does not evenly
161 divide by the vectorization factor, and to peel the first few iterations
162 to force the alignment of data references in the loop. */
163 struct loop *slpeel_tree_peel_loop_to_edge
164 (struct loop *, struct loops *, edge, tree, tree, bool);
165 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
166 (struct loop *, struct loops *, edge);
167 static void slpeel_update_phis_for_duplicate_loop
168 (struct loop *, struct loop *, bool after);
169 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
170 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
171 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
172 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
173 static void allocate_new_names (bitmap);
174 static void rename_use_op (use_operand_p);
175 static void rename_def_op (def_operand_p, tree);
176 static void rename_variables_in_bb (basic_block);
177 static void free_new_names (bitmap);
178 static void rename_variables_in_loop (struct loop *);
179 #ifdef ENABLE_CHECKING
180 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
182 static LOC find_loop_location (struct loop *);
185 /*************************************************************************
186 Vectorization Utilities.
187 *************************************************************************/
189 /* Main analysis functions. */
190 static loop_vec_info vect_analyze_loop (struct loop *);
191 static loop_vec_info vect_analyze_loop_form (struct loop *);
192 static bool vect_analyze_data_refs (loop_vec_info);
193 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
194 static bool vect_analyze_scalar_cycles (loop_vec_info);
195 static bool vect_analyze_data_ref_accesses (loop_vec_info);
196 static bool vect_analyze_data_ref_dependence
197 (struct data_reference *, struct data_reference *, loop_vec_info);
198 static bool vect_analyze_data_ref_dependences (loop_vec_info);
199 static bool vect_analyze_data_refs_alignment (loop_vec_info);
200 static bool vect_compute_data_refs_alignment (loop_vec_info);
201 static bool vect_analyze_operations (loop_vec_info);
203 /* Main code transformation functions. */
204 static void vect_transform_loop (loop_vec_info, struct loops *);
205 static bool vect_transform_stmt (tree, block_stmt_iterator *);
206 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
207 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
208 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
209 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
210 static enum dr_alignment_support vect_supportable_dr_alignment
211 (struct data_reference *);
212 static void vect_align_data_ref (tree);
213 static void vect_enhance_data_refs_alignment (loop_vec_info);
215 /* Utility functions for the analyses. */
216 static bool vect_is_simple_use (tree , loop_vec_info, tree *);
217 static bool exist_non_indexing_operands_for_use_p (tree, tree);
218 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
219 static void vect_mark_relevant (varray_type *, tree);
220 static bool vect_stmt_relevant_p (tree, loop_vec_info);
221 static tree vect_get_loop_niters (struct loop *, tree *);
222 static bool vect_compute_data_ref_alignment (struct data_reference *);
223 static bool vect_analyze_data_ref_access (struct data_reference *);
224 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
225 static struct data_reference * vect_analyze_pointer_ref_access
227 static bool vect_can_advance_ivs_p (loop_vec_info);
228 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
229 loop_vec_info, tree *, tree *, tree *,
231 static struct data_reference * vect_analyze_pointer_ref_access
233 static tree vect_get_ptr_offset (tree, tree, tree *);
234 static tree vect_get_memtag_and_dr
235 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
236 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
238 static tree vect_strip_conversion (tree);
240 /* Utility functions for the code transformation. */
241 static tree vect_create_destination_var (tree, tree);
242 static tree vect_create_data_ref_ptr
243 (tree, block_stmt_iterator *, tree, tree *, bool);
244 static tree vect_create_index_for_vector_ref (loop_vec_info);
245 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
246 static tree get_vectype_for_scalar_type (tree);
247 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
248 static tree vect_get_vec_def_for_operand (tree, tree);
249 static tree vect_init_vector (tree, tree);
250 static void vect_finish_stmt_generation
251 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
253 /* Utility function dealing with loop peeling (not peeling itself). */
254 static void vect_generate_tmps_on_preheader
255 (loop_vec_info, tree *, tree *, tree *);
256 static tree vect_build_loop_niters (loop_vec_info);
257 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
258 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
259 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
260 static void vect_update_inits_of_drs (loop_vec_info, tree);
261 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
262 static void vect_do_peeling_for_loop_bound
263 (loop_vec_info, tree *, struct loops *);
265 /* Utilities for creation and deletion of vec_info structs. */
266 loop_vec_info new_loop_vec_info (struct loop *loop);
267 void destroy_loop_vec_info (loop_vec_info);
268 stmt_vec_info new_stmt_vec_info (tree, loop_vec_info);
270 /*************************************************************************
271 Vectorization Debug Information.
272 *************************************************************************/
274 /* Utilities for output formatting. */
275 static bool vect_debug_stats (LOC);
276 static bool vect_debug_details (LOC);
279 /*************************************************************************
280 Simple Loop Peeling Utilities
282 Utilities to support loop peeling for vectorization purposes.
283 *************************************************************************/
286 /* For each definition in DEFINITIONS this function allocates
290 allocate_new_names (bitmap definitions)
295 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
297 tree def = ssa_name (ver);
298 tree *new_name_ptr = xmalloc (sizeof (tree));
300 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
302 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
303 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
305 SSA_NAME_AUX (def) = new_name_ptr;
310 /* Renames the use *OP_P. */
313 rename_use_op (use_operand_p op_p)
317 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
320 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
322 /* Something defined outside of the loop. */
326 /* An ordinary ssa name defined in the loop. */
328 SET_USE (op_p, *new_name_ptr);
332 /* Renames the def *OP_P in statement STMT. */
335 rename_def_op (def_operand_p op_p, tree stmt)
339 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
342 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
344 /* Something defined outside of the loop. */
348 /* An ordinary ssa name defined in the loop. */
350 SET_DEF (op_p, *new_name_ptr);
351 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
355 /* Renames the variables in basic block BB. */
358 rename_variables_in_bb (basic_block bb)
361 block_stmt_iterator bsi;
367 v_may_def_optype v_may_defs;
368 v_must_def_optype v_must_defs;
372 struct loop *loop = bb->loop_father;
374 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
375 rename_def_op (PHI_RESULT_PTR (phi), phi);
377 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
379 stmt = bsi_stmt (bsi);
380 get_stmt_operands (stmt);
381 ann = stmt_ann (stmt);
383 uses = USE_OPS (ann);
384 for (i = 0; i < NUM_USES (uses); i++)
385 rename_use_op (USE_OP_PTR (uses, i));
387 defs = DEF_OPS (ann);
388 for (i = 0; i < NUM_DEFS (defs); i++)
389 rename_def_op (DEF_OP_PTR (defs, i), stmt);
391 vuses = VUSE_OPS (ann);
392 for (i = 0; i < NUM_VUSES (vuses); i++)
393 rename_use_op (VUSE_OP_PTR (vuses, i));
395 v_may_defs = V_MAY_DEF_OPS (ann);
396 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
398 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
399 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
402 v_must_defs = V_MUST_DEF_OPS (ann);
403 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
405 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
406 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
410 FOR_EACH_EDGE (e, ei, bb->succs)
412 if (!flow_bb_inside_loop_p (loop, e->dest))
414 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
415 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
420 /* Releases the structures holding the new ssa names. */
423 free_new_names (bitmap definitions)
428 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
430 tree def = ssa_name (ver);
432 if (SSA_NAME_AUX (def))
434 free (SSA_NAME_AUX (def));
435 SSA_NAME_AUX (def) = NULL;
441 /* Renames variables in new generated LOOP. */
444 rename_variables_in_loop (struct loop *loop)
449 bbs = get_loop_body (loop);
451 for (i = 0; i < loop->num_nodes; i++)
452 rename_variables_in_bb (bbs[i]);
458 /* Update the PHI nodes of NEW_LOOP.
460 NEW_LOOP is a duplicate of ORIG_LOOP.
461 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
462 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
463 executes before it. */
466 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
467 struct loop *new_loop, bool after)
469 tree *new_name_ptr, new_ssa_name;
470 tree phi_new, phi_orig;
472 edge orig_loop_latch = loop_latch_edge (orig_loop);
473 edge orig_entry_e = loop_preheader_edge (orig_loop);
474 edge new_loop_exit_e = new_loop->exit_edges[0];
475 edge new_loop_entry_e = loop_preheader_edge (new_loop);
476 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
479 step 1. For each loop-header-phi:
480 Add the first phi argument for the phi in NEW_LOOP
481 (the one associated with the entry of NEW_LOOP)
483 step 2. For each loop-header-phi:
484 Add the second phi argument for the phi in NEW_LOOP
485 (the one associated with the latch of NEW_LOOP)
487 step 3. Update the phis in the successor block of NEW_LOOP.
489 case 1: NEW_LOOP was placed before ORIG_LOOP:
490 The successor block of NEW_LOOP is the header of ORIG_LOOP.
491 Updating the phis in the successor block can therefore be done
492 along with the scanning of the loop header phis, because the
493 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
494 phi nodes, organized in the same order.
496 case 2: NEW_LOOP was placed after ORIG_LOOP:
497 The successor block of NEW_LOOP is the original exit block of
498 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
499 We postpone updating these phis to a later stage (when
500 loop guards are added).
504 /* Scan the phis in the headers of the old and new loops
505 (they are organized in exactly the same order). */
507 for (phi_new = phi_nodes (new_loop->header),
508 phi_orig = phi_nodes (orig_loop->header);
510 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
513 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
514 add_phi_arg (phi_new, def, new_loop_entry_e);
517 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
518 if (TREE_CODE (def) != SSA_NAME)
521 new_name_ptr = SSA_NAME_AUX (def);
523 /* Something defined outside of the loop. */
526 /* An ordinary ssa name defined in the loop. */
527 new_ssa_name = *new_name_ptr;
528 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
530 /* step 3 (case 1). */
533 gcc_assert (new_loop_exit_e == orig_entry_e);
534 SET_PHI_ARG_DEF (phi_orig,
535 new_loop_exit_e->dest_idx,
542 /* Update PHI nodes for a guard of the LOOP.
545 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
546 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
547 originates from the guard-bb, skips LOOP and reaches the (unique) exit
548 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
549 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
550 LOOP header) before the guard code was added, and now it became a merge
551 point of two paths - the path that ends with the LOOP exit-edge, and
552 the path that ends with GUARD_EDGE.
554 This function creates and updates the relevant phi nodes to account for
555 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
556 1. Create phi nodes at NEW_MERGE_BB.
557 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
558 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
561 ===> The CFG before the guard-code was added:
563 if (exit_loop) goto update_bb : LOOP_header_bb
566 ==> The CFG after the guard-code was added:
568 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
570 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
575 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
576 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
577 organized in the same order.
578 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
581 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
582 "original" loop). FALSE if LOOP is an original loop (not a newly
583 created copy). The SSA_NAME_AUX fields of the defs in the original
584 loop are the corresponding new ssa-names used in the new duplicated
585 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
586 nodes in UPDATE_BB takes the original ssa-name, and which takes the
587 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
588 the LOOP-exit-edge takes the new-name, and the phi-arg that is
589 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
590 FALSE, it's the other way around.
594 slpeel_update_phi_nodes_for_guard (edge guard_edge,
599 tree orig_phi, new_phi, update_phi;
600 tree guard_arg, loop_arg;
601 basic_block new_merge_bb = guard_edge->dest;
602 edge e = EDGE_SUCC (new_merge_bb, 0);
603 basic_block update_bb = e->dest;
604 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
606 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
607 orig_phi && update_phi;
608 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
610 /* 1. Generate new phi node in NEW_MERGE_BB: */
611 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
614 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
615 of LOOP. Set the two phi args in NEW_PHI for these edges: */
618 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
619 EDGE_SUCC (loop->latch, 0));
620 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
624 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
625 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
629 new_name = *new_name_ptr;
631 /* Something defined outside of the loop */
636 guard_arg = orig_def;
641 guard_arg = new_name;
645 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
646 add_phi_arg (new_phi, guard_arg, guard_edge);
648 /* 3. Update phi in successor block. */
649 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
650 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
651 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
654 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
658 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
659 that starts at zero, increases by one and its limit is NITERS.
661 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
664 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
666 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
668 edge exit_edge = loop->exit_edges[0];
669 block_stmt_iterator loop_cond_bsi;
670 block_stmt_iterator incr_bsi;
672 tree begin_label = tree_block_label (loop->latch);
673 tree exit_label = tree_block_label (loop->single_exit->dest);
674 tree init = build_int_cst (TREE_TYPE (niters), 0);
675 tree step = build_int_cst (TREE_TYPE (niters), 1);
680 orig_cond = get_loop_exit_condition (loop);
681 #ifdef ENABLE_CHECKING
682 gcc_assert (orig_cond);
684 loop_cond_bsi = bsi_for_stmt (orig_cond);
686 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
687 create_iv (init, step, NULL_TREE, loop,
688 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
690 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
692 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
693 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
694 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
696 else /* 'then' edge loops back. */
698 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
699 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
700 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
703 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
704 then_label, else_label);
705 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
707 /* Remove old loop exit test: */
708 bsi_remove (&loop_cond_bsi);
710 loop_loc = find_loop_location (loop);
711 if (vect_debug_details (loop_loc))
712 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
714 loop->nb_iterations = niters;
718 /* Given LOOP this function generates a new copy of it and puts it
719 on E which is either the entry or exit of LOOP. */
722 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
725 struct loop *new_loop;
726 basic_block *new_bbs, *bbs;
729 basic_block exit_dest;
732 at_exit = (e == loop->exit_edges[0]);
733 if (!at_exit && e != loop_preheader_edge (loop))
736 bbs = get_loop_body (loop);
738 /* Check whether duplication is possible. */
739 if (!can_copy_bbs_p (bbs, loop->num_nodes))
745 /* Generate new loop structure. */
746 new_loop = duplicate_loop (loops, loop, loop->outer);
753 exit_dest = loop->exit_edges[0]->dest;
754 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
755 exit_dest) == loop->header ?
758 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
760 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
762 /* Duplicating phi args at exit bbs as coming
763 also from exit of duplicated loop. */
764 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
766 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
769 edge new_loop_exit_edge;
771 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
772 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
774 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
776 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
780 if (at_exit) /* Add the loop copy at exit. */
782 redirect_edge_and_branch_force (e, new_loop->header);
783 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
785 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
787 else /* Add the copy at entry. */
790 edge entry_e = loop_preheader_edge (loop);
791 basic_block preheader = entry_e->src;
793 if (!flow_bb_inside_loop_p (new_loop,
794 EDGE_SUCC (new_loop->header, 0)->dest))
795 new_exit_e = EDGE_SUCC (new_loop->header, 0);
797 new_exit_e = EDGE_SUCC (new_loop->header, 1);
799 redirect_edge_and_branch_force (new_exit_e, loop->header);
800 set_immediate_dominator (CDI_DOMINATORS, loop->header,
803 /* We have to add phi args to the loop->header here as coming
804 from new_exit_e edge. */
805 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
807 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
809 add_phi_arg (phi, phi_arg, new_exit_e);
812 redirect_edge_and_branch_force (entry_e, new_loop->header);
813 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
816 flow_loop_scan (new_loop, LOOP_ALL);
817 flow_loop_scan (loop, LOOP_ALL);
825 /* Given the condition statement COND, put it as the last statement
826 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
827 Assumes that this is the single exit of the guarded loop.
828 Returns the skip edge. */
831 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
834 block_stmt_iterator bsi;
836 tree cond_stmt, then_label, else_label;
838 enter_e = EDGE_SUCC (guard_bb, 0);
839 enter_e->flags &= ~EDGE_FALLTHRU;
840 enter_e->flags |= EDGE_FALSE_VALUE;
841 bsi = bsi_last (guard_bb);
843 then_label = build1 (GOTO_EXPR, void_type_node,
844 tree_block_label (exit_bb));
845 else_label = build1 (GOTO_EXPR, void_type_node,
846 tree_block_label (enter_e->dest));
847 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
848 then_label, else_label);
849 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
850 /* Add new edge to connect entry block to the second loop. */
851 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
852 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
857 /* This function verifies that the following restrictions apply to LOOP:
859 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
860 (3) it is single entry, single exit
861 (4) its exit condition is the last stmt in the header
862 (5) E is the entry/exit edge of LOOP.
866 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
868 edge exit_e = loop->exit_edges [0];
869 edge entry_e = loop_preheader_edge (loop);
870 tree orig_cond = get_loop_exit_condition (loop);
871 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
873 if (any_marked_for_rewrite_p ())
877 /* All loops have an outer scope; the only case loop->outer is NULL is for
878 the function itself. */
880 || loop->num_nodes != 2
881 || !empty_block_p (loop->latch)
882 || loop->num_exits != 1
883 || loop->num_entries != 1
884 /* Verify that new loop exit condition can be trivially modified. */
885 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
886 || (e != exit_e && e != entry_e))
892 #ifdef ENABLE_CHECKING
894 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
895 struct loop *second_loop)
897 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
898 basic_block loop2_entry_bb = second_loop->pre_header;
899 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
901 /* A guard that controls whether the second_loop is to be executed or skipped
902 is placed in first_loop->exit. first_loopt->exit therefore has two
903 successors - one is the preheader of second_loop, and the other is a bb
906 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
909 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
912 /* The preheader of new_loop is expected to have two predessors:
913 first_loop->exit and the block that precedes first_loop. */
915 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
916 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
917 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
918 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
919 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
921 /* Verify that the other successor of first_loopt->exit is after the
927 /* Function slpeel_tree_peel_loop_to_edge.
929 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
930 that is placed on the entry (exit) edge E of LOOP. After this transformation
931 we have two loops one after the other - first-loop iterates FIRST_NITERS
932 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
935 - LOOP: the loop to be peeled.
936 - E: the exit or entry edge of LOOP.
937 If it is the entry edge, we peel the first iterations of LOOP. In this
938 case first-loop is LOOP, and second-loop is the newly created loop.
939 If it is the exit edge, we peel the last iterations of LOOP. In this
940 case, first-loop is the newly created loop, and second-loop is LOOP.
941 - NITERS: the number of iterations that LOOP iterates.
942 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
943 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
944 for updating the loop bound of the first-loop to FIRST_NITERS. If it
945 is false, the caller of this function may want to take care of this
946 (this can be useful if we don't want new stmts added to first-loop).
949 The function returns a pointer to the new loop-copy, or NULL if it failed
950 to perform the transformation.
952 The function generates two if-then-else guards: one before the first loop,
953 and the other before the second loop:
955 if (FIRST_NITERS == 0) then skip the first loop,
956 and go directly to the second loop.
958 if (FIRST_NITERS == NITERS) then skip the second loop.
960 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
961 FORNOW the resulting code will not be in loop-closed-ssa form.
965 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
966 edge e, tree first_niters,
967 tree niters, bool update_first_loop_count)
969 struct loop *new_loop = NULL, *first_loop, *second_loop;
973 basic_block bb_before_second_loop, bb_after_second_loop;
974 basic_block bb_before_first_loop;
975 basic_block bb_between_loops;
976 edge exit_e = loop->exit_edges [0];
979 if (!slpeel_can_duplicate_loop_p (loop, e))
982 /* We have to initialize cfg_hooks. Then, when calling
983 cfg_hooks->split_edge, the function tree_split_edge
984 is actually called and, when calling cfg_hooks->duplicate_block,
985 the function tree_duplicate_bb is called. */
986 tree_register_cfg_hooks ();
989 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
990 Resulting CFG would be:
1003 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1005 loop_loc = find_loop_location (loop);
1006 if (vect_debug_stats (loop_loc)
1007 || vect_debug_details (loop_loc))
1008 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1014 /* NEW_LOOP was placed after LOOP. */
1016 second_loop = new_loop;
1020 /* NEW_LOOP was placed before LOOP. */
1021 first_loop = new_loop;
1025 definitions = marked_ssa_names ();
1026 allocate_new_names (definitions);
1027 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1028 rename_variables_in_loop (new_loop);
1031 /* 2. Add the guard that controls whether the first loop is executed.
1032 Resulting CFG would be:
1034 bb_before_first_loop:
1035 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1042 bb_before_second_loop:
1051 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1052 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1053 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1054 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1055 flow_loop_scan (first_loop, LOOP_ALL);
1056 flow_loop_scan (second_loop, LOOP_ALL);
1059 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1060 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1061 bb_before_second_loop, bb_before_first_loop);
1062 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1063 first_loop == new_loop);
1066 /* 3. Add the guard that controls whether the second loop is executed.
1067 Resulting CFG would be:
1069 bb_before_first_loop:
1070 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1078 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1079 GOTO bb_before_second_loop
1081 bb_before_second_loop:
1087 bb_after_second_loop:
1092 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1093 add_bb_to_loop (bb_between_loops, first_loop->outer);
1094 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1095 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1096 flow_loop_scan (first_loop, LOOP_ALL);
1097 flow_loop_scan (second_loop, LOOP_ALL);
1099 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1100 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1101 bb_after_second_loop, bb_before_first_loop);
1102 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1103 second_loop == new_loop);
1105 /* Flow loop scan does not update loop->single_exit field. */
1106 first_loop->single_exit = first_loop->exit_edges[0];
1107 second_loop->single_exit = second_loop->exit_edges[0];
1109 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1111 if (update_first_loop_count)
1112 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1114 free_new_names (definitions);
1115 BITMAP_XFREE (definitions);
1116 unmark_all_for_rewrite ();
1121 /* Function vect_get_loop_location.
1123 Extract the location of the loop in the source code.
1124 If the loop is not well formed for vectorization, an estimated
1125 location is calculated.
1126 Return the loop location if succeed and NULL if not. */
1129 find_loop_location (struct loop *loop)
1131 tree node = NULL_TREE;
1133 block_stmt_iterator si;
1138 node = get_loop_exit_condition (loop);
1140 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1141 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1142 return EXPR_LOC (node);
1144 /* If we got here the loop is probably not "well formed",
1145 try to estimate the loop location */
1152 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1154 node = bsi_stmt (si);
1155 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1156 return EXPR_LOC (node);
1164 /* Here the proper Vectorizer starts. */
1166 /*************************************************************************
1167 Vectorization Utilities.
1168 *************************************************************************/
1170 /* Function new_stmt_vec_info.
1172 Create and initialize a new stmt_vec_info struct for STMT. */
1175 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1178 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1180 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1181 STMT_VINFO_STMT (res) = stmt;
1182 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1183 STMT_VINFO_RELEVANT_P (res) = 0;
1184 STMT_VINFO_VECTYPE (res) = NULL;
1185 STMT_VINFO_VEC_STMT (res) = NULL;
1186 STMT_VINFO_DATA_REF (res) = NULL;
1187 STMT_VINFO_MEMTAG (res) = NULL;
1188 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1189 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1190 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1191 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1192 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1198 /* Function new_loop_vec_info.
1200 Create and initialize a new loop_vec_info struct for LOOP, as well as
1201 stmt_vec_info structs for all the stmts in LOOP. */
1204 new_loop_vec_info (struct loop *loop)
1208 block_stmt_iterator si;
1211 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1213 bbs = get_loop_body (loop);
1215 /* Create stmt_info for all stmts in the loop. */
1216 for (i = 0; i < loop->num_nodes; i++)
1218 basic_block bb = bbs[i];
1219 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1221 tree stmt = bsi_stmt (si);
1224 get_stmt_operands (stmt);
1225 ann = stmt_ann (stmt);
1226 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1230 LOOP_VINFO_LOOP (res) = loop;
1231 LOOP_VINFO_BBS (res) = bbs;
1232 LOOP_VINFO_EXIT_COND (res) = NULL;
1233 LOOP_VINFO_NITERS (res) = NULL;
1234 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1235 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1236 LOOP_VINFO_VECT_FACTOR (res) = 0;
1237 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1238 "loop_write_datarefs");
1239 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1240 "loop_read_datarefs");
1241 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1242 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1248 /* Function destroy_loop_vec_info.
1250 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1251 stmts in the loop. */
1254 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1259 block_stmt_iterator si;
1265 loop = LOOP_VINFO_LOOP (loop_vinfo);
1267 bbs = LOOP_VINFO_BBS (loop_vinfo);
1268 nbbs = loop->num_nodes;
1270 for (j = 0; j < nbbs; j++)
1272 basic_block bb = bbs[j];
1273 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1275 tree stmt = bsi_stmt (si);
1276 stmt_ann_t ann = stmt_ann (stmt);
1277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1279 set_stmt_info (ann, NULL);
1283 free (LOOP_VINFO_BBS (loop_vinfo));
1284 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1285 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1291 /* Function debug_loop_stats.
1293 For vectorization statistics dumps. */
1296 vect_debug_stats (LOC loc)
1298 if (!dump_file || !(dump_flags & TDF_STATS))
1301 if (loc == UNKNOWN_LOC)
1302 fprintf (dump_file, "\n");
1304 fprintf (dump_file, "\nloop at %s:%d: ",
1305 LOC_FILE (loc), LOC_LINE (loc));
1311 /* Function debug_loop_details.
1313 For vectorization debug dumps. */
1316 vect_debug_details (LOC loc)
1318 if (!dump_file || !(dump_flags & TDF_DETAILS))
1321 if (loc == UNKNOWN_LOC)
1322 fprintf (dump_file, "\n");
1324 fprintf (dump_file, "\nloop at %s:%d: ",
1325 LOC_FILE (loc), LOC_LINE (loc));
1331 /* Function vect_get_ptr_offset
1333 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1336 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1337 tree vectype ATTRIBUTE_UNUSED,
1338 tree *offset ATTRIBUTE_UNUSED)
1340 /* TODO: Use alignment information. */
1345 /* Function vect_strip_conversions
1347 Strip conversions that don't narrow the mode. */
1350 vect_strip_conversion (tree expr)
1352 tree to, ti, oprnd0;
1354 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1356 to = TREE_TYPE (expr);
1357 oprnd0 = TREE_OPERAND (expr, 0);
1358 ti = TREE_TYPE (oprnd0);
1360 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1362 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1371 /* Function vect_analyze_offset_expr
1373 Given an offset expression EXPR received from get_inner_reference, analyze
1374 it and create an expression for INITIAL_OFFSET by substituting the variables
1375 of EXPR with initial_condition of the corresponding access_fn in the loop.
1378 for (j = 3; j < N; j++)
1381 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1382 substituted, since its access_fn in the inner loop is i. 'j' will be
1383 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1386 Compute MISALIGN (the misalignment of the data reference initial access from
1387 its base) if possible. Misalignment can be calculated only if all the
1388 variables can be substituted with constants, or if a variable is multiplied
1389 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1390 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1391 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1392 VECTYPE_ALIGNMENT computation in the caller of this function).
1394 STEP is an evolution of the data reference in this loop in bytes.
1395 In the above example, STEP is C_j.
1397 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1398 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1399 are NULL_TREEs. Otherwise, return TRUE.
1404 vect_analyze_offset_expr (tree expr,
1406 tree vectype_alignment,
1407 tree *initial_offset,
1413 tree left_offset = ssize_int (0);
1414 tree right_offset = ssize_int (0);
1415 tree left_misalign = ssize_int (0);
1416 tree right_misalign = ssize_int (0);
1417 tree left_step = ssize_int (0);
1418 tree right_step = ssize_int (0);
1419 enum tree_code code;
1420 tree init, evolution;
1423 *misalign = NULL_TREE;
1424 *initial_offset = NULL_TREE;
1426 /* Strip conversions that don't narrow the mode. */
1427 expr = vect_strip_conversion (expr);
1433 if (TREE_CODE (expr) == INTEGER_CST)
1435 *initial_offset = fold_convert (ssizetype, expr);
1436 *misalign = fold_convert (ssizetype, expr);
1437 *step = ssize_int (0);
1441 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1442 access_fn in the current loop. */
1443 if (SSA_VAR_P (expr))
1445 tree access_fn = analyze_scalar_evolution (loop, expr);
1447 if (access_fn == chrec_dont_know)
1451 init = initial_condition_in_loop_num (access_fn, loop->num);
1452 if (init == expr && !expr_invariant_in_loop_p (loop, init))
1453 /* Not enough information: may be not loop invariant.
1454 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1455 initial_condition is D, but it depends on i - loop's induction
1459 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1460 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1461 /* Evolution is not constant. */
1464 if (TREE_CODE (init) == INTEGER_CST)
1465 *misalign = fold_convert (ssizetype, init);
1467 /* Not constant, misalignment cannot be calculated. */
1468 *misalign = NULL_TREE;
1470 *initial_offset = fold_convert (ssizetype, init);
1472 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
1476 /* Recursive computation. */
1477 if (!BINARY_CLASS_P (expr))
1479 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
1480 if (vect_debug_details (UNKNOWN_LOC))
1482 fprintf (dump_file, "Not binary expression ");
1483 print_generic_expr (dump_file, expr, TDF_SLIM);
1487 oprnd0 = TREE_OPERAND (expr, 0);
1488 oprnd1 = TREE_OPERAND (expr, 1);
1490 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1491 &left_misalign, &left_step)
1492 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1493 &right_offset, &right_misalign, &right_step))
1496 /* The type of the operation: plus, minus or mult. */
1497 code = TREE_CODE (expr);
1501 if (TREE_CODE (right_offset) != INTEGER_CST)
1502 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1504 FORNOW: We don't support such cases. */
1507 /* Strip conversions that don't narrow the mode. */
1508 left_offset = vect_strip_conversion (left_offset);
1511 /* Misalignment computation. */
1512 if (SSA_VAR_P (left_offset))
1514 /* If the left side contains variables that can't be substituted with
1515 constants, we check if the right side is a multiple of ALIGNMENT.
1517 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1518 fold_convert (ssizetype, vectype_alignment))))
1519 *misalign = ssize_int (0);
1521 /* If the remainder is not zero or the right side isn't constant,
1522 we can't compute misalignment. */
1523 *misalign = NULL_TREE;
1527 /* The left operand was successfully substituted with constant. */
1529 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1531 *misalign = size_binop (code, left_misalign, right_misalign);
1533 *misalign = NULL_TREE;
1536 /* Step calculation. */
1537 /* Multiply the step by the right operand. */
1538 *step = size_binop (MULT_EXPR, left_step, right_offset);
1543 /* Combine the recursive calculations for step and misalignment. */
1544 *step = size_binop (code, left_step, right_step);
1546 if (left_misalign && right_misalign)
1547 *misalign = size_binop (code, left_misalign, right_misalign);
1549 *misalign = NULL_TREE;
1557 /* Compute offset. */
1558 *initial_offset = fold_convert (ssizetype,
1559 fold (build2 (code, TREE_TYPE (left_offset),
1566 /* Function vect_get_base_and_offset
1568 Return the BASE of the data reference EXPR.
1569 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1571 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1572 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1573 instantiated with initial_conditions of access_functions of variables,
1574 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1576 Function get_inner_reference is used for the above in case of ARRAY_REF and
1580 EXPR - the memory reference that is being analyzed
1581 DR - the data_reference struct of the _original_ memory reference
1582 (Note: DR_REF (DR) is not necessarily EXPR)
1583 VECTYPE - the type that defines the alignment (i.e, we compute
1584 alignment relative to TYPE_ALIGN(VECTYPE))
1587 BASE (returned value) - the base of the data reference EXPR.
1588 E.g, if EXPR is a.b[k].c[i][j] the returned
1590 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1591 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1592 computation is impossible
1593 STEP - evolution of the DR_REF in the loop
1594 BASE_ALIGNED_P - indicates if BASE is aligned
1596 If something unexpected is encountered (an unsupported form of data-ref),
1597 then NULL_TREE is returned. */
1600 vect_get_base_and_offset (struct data_reference *dr,
1603 loop_vec_info loop_vinfo,
1604 tree *initial_offset,
1607 bool *base_aligned_p)
1609 tree this_offset = ssize_int (0);
1610 tree this_misalign = ssize_int (0);
1611 tree this_step = ssize_int (0);
1612 tree base = NULL_TREE;
1614 tree oprnd0, oprnd1;
1615 enum tree_code code = TREE_CODE (expr);
1616 HOST_WIDE_INT pbitsize;
1617 HOST_WIDE_INT pbitpos;
1619 enum machine_mode pmode;
1620 int punsignedp, pvolatilep;
1621 tree bit_pos_in_bytes;
1622 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1624 *base_aligned_p = false;
1628 /* These cases end the recursion: */
1631 *initial_offset = ssize_int (0);
1632 *step = ssize_int (0);
1633 *misalign = ssize_int (0);
1634 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1635 *base_aligned_p = true;
1639 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1642 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1644 base = vect_get_ptr_offset (expr, vectype, misalign);
1646 *base_aligned_p = true;
1650 *base_aligned_p = true;
1651 *misalign = ssize_int (0);
1653 *initial_offset = ssize_int (0);
1654 *step = ssize_int (0);
1658 *initial_offset = fold_convert (ssizetype, expr);
1659 *misalign = fold_convert (ssizetype, expr);
1660 *step = ssize_int (0);
1663 /* These cases continue the recursion: */
1665 oprnd0 = TREE_OPERAND (expr, 0);
1670 oprnd0 = TREE_OPERAND (expr, 0);
1676 oprnd0 = TREE_OPERAND (expr, 0);
1677 oprnd1 = TREE_OPERAND (expr, 1);
1679 /* In case we have a PLUS_EXPR of the form
1680 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1681 This is verified in vect_get_memtag_and_dr. */
1682 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1683 &this_offset, &this_misalign,
1684 &this_step, base_aligned_p);
1685 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1686 this_offset = ssize_int (0);
1689 this_misalign = NULL_TREE;
1691 this_misalign = size_binop (TREE_CODE (expr), ssize_int (0),
1697 if (!handled_component_p (expr))
1698 /* Unsupported expression. */
1701 /* Find the base and the offset from it. */
1702 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1703 &pmode, &punsignedp, &pvolatilep, false);
1708 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1709 &this_offset, &this_misalign,
1712 /* Failed to compute offset or step. */
1714 *initial_offset = NULL_TREE;
1715 *misalign = NULL_TREE;
1719 /* Add bit position to OFFSET and MISALIGN. */
1721 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1722 /* Check that there is no remainder in bits. */
1723 if (pbitpos%BITS_PER_UNIT)
1725 if (vect_debug_details (UNKNOWN_LOC))
1726 fprintf (dump_file, "bit offset alignment.");
1729 this_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes,
1730 fold_convert (ssizetype, this_offset));
1732 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1734 /* Continue the recursion to refine the base (get_inner_reference returns
1735 &a for &a[i], and not a). */
1739 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1740 initial_offset, misalign, step,
1744 /* Combine the results. */
1745 if (this_misalign && *misalign)
1746 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1748 *misalign = NULL_TREE;
1750 *step = size_binop (PLUS_EXPR, *step, this_step);
1752 *initial_offset = size_binop (PLUS_EXPR, *initial_offset, this_offset);
1754 if (vect_debug_details (UNKNOWN_LOC))
1756 print_generic_expr (dump_file, expr, TDF_SLIM);
1757 fprintf (dump_file, "\n --> total offset for ref: ");
1758 print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1759 fprintf (dump_file, "\n --> total misalign for ref: ");
1760 print_generic_expr (dump_file, *misalign, TDF_SLIM);
1761 fprintf (dump_file, "\n --> total step for ref: ");
1762 print_generic_expr (dump_file, *step, TDF_SLIM);
1769 /* Function vect_force_dr_alignment_p.
1771 Returns whether the alignment of a DECL can be forced to be aligned
1772 on ALIGNMENT bit boundary. */
1775 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1777 if (TREE_CODE (decl) != VAR_DECL)
1780 if (DECL_EXTERNAL (decl))
1783 if (TREE_ASM_WRITTEN (decl))
1786 if (TREE_STATIC (decl))
1787 return (alignment <= MAX_OFILE_ALIGNMENT);
1789 /* This is not 100% correct. The absolute correct stack alignment
1790 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1791 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1792 However, until someone implements forced stack alignment, SSE
1793 isn't really usable without this. */
1794 return (alignment <= PREFERRED_STACK_BOUNDARY);
1798 /* Function vect_get_new_vect_var.
1800 Returns a name for a new variable. The current naming scheme appends the
1801 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1802 the name of vectorizer generated variables, and appends that to NAME if
1806 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1812 if (var_kind == vect_simple_var)
1817 prefix_len = strlen (prefix);
1820 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1822 new_vect_var = create_tmp_var (type, prefix);
1824 return new_vect_var;
1828 /* Function vect_create_index_for_vector_ref.
1830 Create (and return) an index variable, along with it's update chain in the
1831 loop. This variable will be used to access a memory location in a vector
1835 LOOP: The loop being vectorized.
1836 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1837 function can be added here, or in the loop pre-header.
1840 Return an index that will be used to index a vector array. It is expected
1841 that a pointer to the first vector will be used as the base address for the
1844 FORNOW: we are not trying to be efficient, just creating a new index each
1845 time from scratch. At this time all vector references could use the same
1848 TODO: create only one index to be used by all vector references. Record
1849 the index in the LOOP_VINFO the first time this procedure is called and
1850 return it on subsequent calls. The increment of this index must be placed
1851 just before the conditional expression that ends the single block loop. */
1854 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
1857 block_stmt_iterator incr_bsi;
1859 tree indx_before_incr, indx_after_incr;
1860 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1863 /* It is assumed that the base pointer used for vectorized access contains
1864 the address of the first vector. Therefore the index used for vectorized
1865 access must be initialized to zero and incremented by 1. */
1867 init = integer_zero_node;
1868 step = integer_one_node;
1870 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1871 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
1872 &indx_before_incr, &indx_after_incr);
1873 incr = bsi_stmt (incr_bsi);
1874 get_stmt_operands (incr);
1875 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1877 return indx_before_incr;
1881 /* Function vect_create_addr_base_for_vector_ref.
1883 Create an expression that computes the address of the first memory location
1884 that will be accessed for a data reference.
1887 STMT: The statement containing the data reference.
1888 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1889 OFFSET: Optional. If supplied, it is be added to the initial address.
1892 1. Return an SSA_NAME whose value is the address of the memory location of
1893 the first vector of the data reference.
1894 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1895 these statement(s) which define the returned SSA_NAME.
1897 FORNOW: We are only handling array accesses with step 1. */
1900 vect_create_addr_base_for_vector_ref (tree stmt,
1901 tree *new_stmt_list,
1904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1905 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1906 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1907 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1908 tree ref = DR_REF (dr);
1909 tree scalar_type = TREE_TYPE (ref);
1910 tree scalar_ptr_type = build_pointer_type (scalar_type);
1913 tree addr_base, addr_expr;
1914 tree dest, new_stmt;
1915 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1917 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1918 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1920 /* Add '&' to ref_base. */
1921 data_ref_base = build_fold_addr_expr (data_ref_base);
1924 /* Create '(scalar_type*) base' for pointers. */
1925 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1926 tree scalar_array_type = build_array_type (scalar_type, 0);
1927 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1928 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1929 add_referenced_tmp_var (array_ptr);
1931 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1932 add_referenced_tmp_var (dest);
1933 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1934 append_to_statement_list_force (new_stmt, new_stmt_list);
1936 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1937 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1938 new_temp = make_ssa_name (array_ptr, vec_stmt);
1939 TREE_OPERAND (vec_stmt, 0) = new_temp;
1940 append_to_statement_list_force (vec_stmt, new_stmt_list);
1941 data_ref_base = new_temp;
1944 /* Create base_offset */
1945 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1946 add_referenced_tmp_var (dest);
1947 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1948 append_to_statement_list_force (new_stmt, new_stmt_list);
1952 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1953 add_referenced_tmp_var (tmp);
1954 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1955 STMT_VINFO_VECT_STEP (stmt_info)));
1956 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
1957 base_offset, offset));
1958 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1959 append_to_statement_list_force (new_stmt, new_stmt_list);
1962 /* base + base_offset */
1963 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1966 /* addr_expr = addr_base */
1967 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1968 get_name (base_name));
1969 add_referenced_tmp_var (addr_expr);
1970 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1971 new_temp = make_ssa_name (addr_expr, vec_stmt);
1972 TREE_OPERAND (vec_stmt, 0) = new_temp;
1973 append_to_statement_list_force (vec_stmt, new_stmt_list);
1975 if (vect_debug_details (UNKNOWN_LOC))
1977 fprintf (dump_file, "created ");
1978 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1979 fprintf (dump_file, "\n");
1985 /* Function get_vectype_for_scalar_type.
1987 Returns the vector type corresponding to SCALAR_TYPE as supported
1991 get_vectype_for_scalar_type (tree scalar_type)
1993 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1994 int nbytes = GET_MODE_SIZE (inner_mode);
2001 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
2003 nunits = UNITS_PER_SIMD_WORD / nbytes;
2005 vectype = build_vector_type (scalar_type, nunits);
2006 if (vect_debug_details (UNKNOWN_LOC))
2008 fprintf (dump_file, "get vectype with %d units of type ", nunits);
2009 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2015 if (vect_debug_details (UNKNOWN_LOC))
2017 fprintf (dump_file, "vectype: ");
2018 print_generic_expr (dump_file, vectype, TDF_SLIM);
2021 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2023 /* TODO: tree-complex.c sometimes can parallelize operations
2024 on generic vectors. We can vectorize the loop in that case,
2025 but then we should re-run the lowering pass. */
2026 if (vect_debug_details (UNKNOWN_LOC))
2027 fprintf (dump_file, "mode not supported by target.");
2035 /* Function vect_align_data_ref.
2037 Handle mislignment of a memory accesses.
2039 FORNOW: Can't handle misaligned accesses.
2040 Make sure that the dataref is aligned. */
2043 vect_align_data_ref (tree stmt)
2045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2046 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2048 /* FORNOW: can't handle misaligned accesses;
2049 all accesses expected to be aligned. */
2050 gcc_assert (aligned_access_p (dr));
2054 /* Function vect_create_data_ref_ptr.
2056 Create a memory reference expression for vector access, to be used in a
2057 vector load/store stmt. The reference is based on a new pointer to vector
2061 1. STMT: a stmt that references memory. Expected to be of the form
2062 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2063 2. BSI: block_stmt_iterator where new stmts can be added.
2064 3. OFFSET (optional): an offset to be added to the initial address accessed
2065 by the data-ref in STMT.
2066 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2067 pointing to the initial address.
2070 1. Declare a new ptr to vector_type, and have it point to the base of the
2071 data reference (initial addressed accessed by the data reference).
2072 For example, for vector of type V8HI, the following code is generated:
2075 vp = (v8hi *)initial_address;
2077 if OFFSET is not supplied:
2078 initial_address = &a[init];
2079 if OFFSET is supplied:
2080 initial_address = &a[init + OFFSET];
2082 Return the initial_address in INITIAL_ADDRESS.
2084 2. Create a data-reference in the loop based on the new vector pointer vp,
2085 and using a new index variable 'idx' as follows:
2089 where if ONLY_INIT is true:
2092 update = idx + vector_type_size
2094 Return the pointer vp'.
2097 FORNOW: handle only aligned and consecutive accesses. */
2100 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2101 tree *initial_address, bool only_init)
2104 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2105 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2108 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2112 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2113 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2114 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2115 int nvuses, nv_may_defs, nv_must_defs;
2119 tree new_stmt_list = NULL_TREE;
2121 edge pe = loop_preheader_edge (loop);
2127 tree type, tmp, size;
2129 base_name = unshare_expr (DR_BASE_NAME (dr));
2130 if (vect_debug_details (UNKNOWN_LOC))
2132 tree data_ref_base = base_name;
2133 fprintf (dump_file, "create array_ref of type: ");
2134 print_generic_expr (dump_file, vectype, TDF_SLIM);
2135 if (TREE_CODE (data_ref_base) == VAR_DECL)
2136 fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2137 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2138 fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2139 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2140 fprintf (dump_file, "\nvectorizing a record based array ref: ");
2141 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2142 fprintf (dump_file, "\nvectorizing a pointer ref: ");
2143 print_generic_expr (dump_file, base_name, TDF_SLIM);
2146 /** (1) Create the new vector-pointer variable: **/
2148 vect_ptr_type = build_pointer_type (vectype);
2149 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2150 get_name (base_name));
2151 add_referenced_tmp_var (vect_ptr);
2154 /** (2) Handle aliasing information of the new vector-pointer: **/
2156 tag = STMT_VINFO_MEMTAG (stmt_info);
2158 get_var_ann (vect_ptr)->type_mem_tag = tag;
2160 /* Mark for renaming all aliased variables
2161 (i.e, the may-aliases of the type-mem-tag). */
2162 nvuses = NUM_VUSES (vuses);
2163 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2164 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2165 for (i = 0; i < nvuses; i++)
2167 tree use = VUSE_OP (vuses, i);
2168 if (TREE_CODE (use) == SSA_NAME)
2169 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2171 for (i = 0; i < nv_may_defs; i++)
2173 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2174 if (TREE_CODE (def) == SSA_NAME)
2175 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2177 for (i = 0; i < nv_must_defs; i++)
2179 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2180 if (TREE_CODE (def) == SSA_NAME)
2181 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2185 /** (3) Calculate the initial address the vector-pointer, and set
2186 the vector-pointer to point to it before the loop: **/
2188 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2189 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2191 pe = loop_preheader_edge (loop);
2192 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2193 gcc_assert (!new_bb);
2194 *initial_address = new_temp;
2196 /* Create: p = (vectype *) initial_base */
2197 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2198 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2199 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2200 TREE_OPERAND (vec_stmt, 0) = new_temp;
2201 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2202 gcc_assert (!new_bb);
2203 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2206 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2208 if (only_init) /* No update in loop is required. */
2209 return vect_ptr_init;
2211 idx = vect_create_index_for_vector_ref (loop_vinfo);
2213 /* Create: update = idx * vectype_size */
2214 tmp = create_tmp_var (integer_type_node, "update");
2215 add_referenced_tmp_var (tmp);
2216 size = TYPE_SIZE (vect_ptr_type);
2217 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2218 ptr_update = create_tmp_var (type, "update");
2219 add_referenced_tmp_var (ptr_update);
2220 vectype_size = TYPE_SIZE_UNIT (vectype);
2221 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2222 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2223 new_temp = make_ssa_name (tmp, vec_stmt);
2224 TREE_OPERAND (vec_stmt, 0) = new_temp;
2225 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2226 vec_stmt = fold_convert (type, new_temp);
2227 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2228 new_temp = make_ssa_name (ptr_update, vec_stmt);
2229 TREE_OPERAND (vec_stmt, 0) = new_temp;
2230 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2232 /* Create: data_ref_ptr = vect_ptr_init + update */
2233 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2234 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2235 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2236 TREE_OPERAND (vec_stmt, 0) = new_temp;
2237 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2238 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2240 return data_ref_ptr;
2244 /* Function vect_create_destination_var.
2246 Create a new temporary of type VECTYPE. */
2249 vect_create_destination_var (tree scalar_dest, tree vectype)
2252 const char *new_name;
2254 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2256 new_name = get_name (scalar_dest);
2259 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2260 add_referenced_tmp_var (vec_dest);
2266 /* Function vect_init_vector.
2268 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2269 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2270 used in the vectorization of STMT. */
2273 vect_init_vector (tree stmt, tree vector_var)
2275 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2276 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2277 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2280 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2286 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2287 add_referenced_tmp_var (new_var);
2289 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2290 new_temp = make_ssa_name (new_var, init_stmt);
2291 TREE_OPERAND (init_stmt, 0) = new_temp;
2293 pe = loop_preheader_edge (loop);
2294 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2295 gcc_assert (!new_bb);
2297 if (vect_debug_details (UNKNOWN_LOC))
2299 fprintf (dump_file, "created new init_stmt: ");
2300 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2303 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2308 /* Function vect_get_vec_def_for_operand.
2310 OP is an operand in STMT. This function returns a (vector) def that will be
2311 used in the vectorized stmt for STMT.
2313 In the case that OP is an SSA_NAME which is defined in the loop, then
2314 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2316 In case OP is an invariant or constant, a new stmt that creates a vector def
2317 needs to be introduced. */
2320 vect_get_vec_def_for_operand (tree op, tree stmt)
2325 stmt_vec_info def_stmt_info = NULL;
2326 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2327 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2328 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2329 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2330 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2337 if (vect_debug_details (UNKNOWN_LOC))
2339 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2340 print_generic_expr (dump_file, op, TDF_SLIM);
2343 /** ===> Case 1: operand is a constant. **/
2345 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2347 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2351 /* Build a tree with vector elements. */
2352 if (vect_debug_details (UNKNOWN_LOC))
2353 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2355 for (i = nunits - 1; i >= 0; --i)
2357 t = tree_cons (NULL_TREE, op, t);
2359 vec_cst = build_vector (vectype, t);
2360 return vect_init_vector (stmt, vec_cst);
2363 gcc_assert (TREE_CODE (op) == SSA_NAME);
2365 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2367 def_stmt = SSA_NAME_DEF_STMT (op);
2368 def_stmt_info = vinfo_for_stmt (def_stmt);
2370 if (vect_debug_details (UNKNOWN_LOC))
2372 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2373 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2377 /** ==> Case 2.1: operand is defined inside the loop. **/
2381 /* Get the def from the vectorized stmt. */
2383 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2384 gcc_assert (vec_stmt);
2385 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2390 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2391 it is a reduction/induction. **/
2393 bb = bb_for_stmt (def_stmt);
2394 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2396 if (vect_debug_details (UNKNOWN_LOC))
2397 fprintf (dump_file, "reduction/induction - unsupported.");
2398 internal_error ("no support for reduction/induction"); /* FORNOW */
2402 /** ==> Case 2.3: operand is defined outside the loop -
2403 it is a loop invariant. */
2405 switch (TREE_CODE (def_stmt))
2408 def = PHI_RESULT (def_stmt);
2411 def = TREE_OPERAND (def_stmt, 0);
2414 def = TREE_OPERAND (def_stmt, 0);
2415 gcc_assert (IS_EMPTY_STMT (def_stmt));
2419 if (vect_debug_details (UNKNOWN_LOC))
2421 fprintf (dump_file, "unsupported defining stmt: ");
2422 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2424 internal_error ("unsupported defining stmt");
2427 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2429 if (vect_debug_details (UNKNOWN_LOC))
2430 fprintf (dump_file, "Create vector_inv.");
2432 for (i = nunits - 1; i >= 0; --i)
2434 t = tree_cons (NULL_TREE, def, t);
2437 vec_inv = build_constructor (vectype, t);
2438 return vect_init_vector (stmt, vec_inv);
2442 /* Function vect_finish_stmt_generation.
2444 Insert a new stmt. */
2447 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2449 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2451 if (vect_debug_details (UNKNOWN_LOC))
2453 fprintf (dump_file, "add new stmt: ");
2454 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2457 #ifdef ENABLE_CHECKING
2458 /* Make sure bsi points to the stmt that is being vectorized. */
2459 gcc_assert (stmt == bsi_stmt (*bsi));
2462 #ifdef USE_MAPPED_LOCATION
2463 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt));
2465 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2470 /* Function vectorizable_assignment.
2472 Check if STMT performs an assignment (copy) that can be vectorized.
2473 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2474 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2475 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2478 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2485 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2486 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2489 /* Is vectorizable assignment? */
2491 if (TREE_CODE (stmt) != MODIFY_EXPR)
2494 scalar_dest = TREE_OPERAND (stmt, 0);
2495 if (TREE_CODE (scalar_dest) != SSA_NAME)
2498 op = TREE_OPERAND (stmt, 1);
2499 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2501 if (vect_debug_details (UNKNOWN_LOC))
2502 fprintf (dump_file, "use not simple.");
2506 if (!vec_stmt) /* transformation not required. */
2508 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2513 if (vect_debug_details (UNKNOWN_LOC))
2514 fprintf (dump_file, "transform assignment.");
2517 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2520 op = TREE_OPERAND (stmt, 1);
2521 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2523 /* Arguments are ready. create the new vector stmt. */
2524 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2525 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2526 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2527 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2533 /* Function vectorizable_operation.
2535 Check if STMT performs a binary or unary operation that can be vectorized.
2536 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2537 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2538 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2541 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2546 tree op0, op1 = NULL;
2547 tree vec_oprnd0, vec_oprnd1=NULL;
2548 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2552 enum tree_code code;
2553 enum machine_mode vec_mode;
2559 /* Is STMT a vectorizable binary/unary operation? */
2560 if (TREE_CODE (stmt) != MODIFY_EXPR)
2563 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2566 operation = TREE_OPERAND (stmt, 1);
2567 code = TREE_CODE (operation);
2568 optab = optab_for_tree_code (code, vectype);
2570 /* Support only unary or binary operations. */
2571 op_type = TREE_CODE_LENGTH (code);
2572 if (op_type != unary_op && op_type != binary_op)
2574 if (vect_debug_details (UNKNOWN_LOC))
2575 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2579 for (i = 0; i < op_type; i++)
2581 op = TREE_OPERAND (operation, i);
2582 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2584 if (vect_debug_details (UNKNOWN_LOC))
2585 fprintf (dump_file, "use not simple.");
2590 /* Supportable by target? */
2593 if (vect_debug_details (UNKNOWN_LOC))
2594 fprintf (dump_file, "no optab.");
2597 vec_mode = TYPE_MODE (vectype);
2598 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2600 if (vect_debug_details (UNKNOWN_LOC))
2601 fprintf (dump_file, "op not supported by target.");
2605 if (!vec_stmt) /* transformation not required. */
2607 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2613 if (vect_debug_details (UNKNOWN_LOC))
2614 fprintf (dump_file, "transform binary/unary operation.");
2617 scalar_dest = TREE_OPERAND (stmt, 0);
2618 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2621 op0 = TREE_OPERAND (operation, 0);
2622 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2624 if (op_type == binary_op)
2626 op1 = TREE_OPERAND (operation, 1);
2627 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2630 /* Arguments are ready. create the new vector stmt. */
2632 if (op_type == binary_op)
2633 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2634 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2636 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2637 build1 (code, vectype, vec_oprnd0));
2638 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2639 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2640 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2646 /* Function vectorizable_store.
2648 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2650 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2651 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2652 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2655 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2661 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2662 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2663 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2664 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2665 enum machine_mode vec_mode;
2667 enum dr_alignment_support alignment_support_cheme;
2669 /* Is vectorizable store? */
2671 if (TREE_CODE (stmt) != MODIFY_EXPR)
2674 scalar_dest = TREE_OPERAND (stmt, 0);
2675 if (TREE_CODE (scalar_dest) != ARRAY_REF
2676 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2679 op = TREE_OPERAND (stmt, 1);
2680 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2682 if (vect_debug_details (UNKNOWN_LOC))
2683 fprintf (dump_file, "use not simple.");
2687 vec_mode = TYPE_MODE (vectype);
2688 /* FORNOW. In some cases can vectorize even if data-type not supported
2689 (e.g. - array initialization with 0). */
2690 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2693 if (!STMT_VINFO_DATA_REF (stmt_info))
2697 if (!vec_stmt) /* transformation not required. */
2699 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2705 if (vect_debug_details (UNKNOWN_LOC))
2706 fprintf (dump_file, "transform store");
2708 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2709 gcc_assert (alignment_support_cheme);
2710 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2712 /* Handle use - get the vectorized def from the defining stmt. */
2713 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2716 /* FORNOW: make sure the data reference is aligned. */
2717 vect_align_data_ref (stmt);
2718 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2719 data_ref = build_fold_indirect_ref (data_ref);
2721 /* Arguments are ready. create the new vector stmt. */
2722 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2723 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2729 /* vectorizable_load.
2731 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2733 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2734 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2735 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2738 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2741 tree vec_dest = NULL;
2742 tree data_ref = NULL;
2744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2745 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2746 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2753 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2754 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2755 edge pe = loop_preheader_edge (loop);
2756 enum dr_alignment_support alignment_support_cheme;
2758 /* Is vectorizable load? */
2760 if (TREE_CODE (stmt) != MODIFY_EXPR)
2763 scalar_dest = TREE_OPERAND (stmt, 0);
2764 if (TREE_CODE (scalar_dest) != SSA_NAME)
2767 op = TREE_OPERAND (stmt, 1);
2768 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2771 if (!STMT_VINFO_DATA_REF (stmt_info))
2774 mode = (int) TYPE_MODE (vectype);
2776 /* FORNOW. In some cases can vectorize even if data-type not supported
2777 (e.g. - data copies). */
2778 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2780 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
2781 fprintf (dump_file, "Aligned load, but unsupported type.");
2785 if (!vec_stmt) /* transformation not required. */
2787 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2793 if (vect_debug_details (UNKNOWN_LOC))
2794 fprintf (dump_file, "transform load.");
2796 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2797 gcc_assert (alignment_support_cheme);
2799 if (alignment_support_cheme == dr_aligned
2800 || alignment_support_cheme == dr_unaligned_supported)
2811 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2812 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2813 if (aligned_access_p (dr))
2814 data_ref = build_fold_indirect_ref (data_ref);
2817 int mis = DR_MISALIGNMENT (dr);
2818 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2819 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2820 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2822 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2823 new_temp = make_ssa_name (vec_dest, new_stmt);
2824 TREE_OPERAND (new_stmt, 0) = new_temp;
2825 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2827 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2831 msq_init = *(floor(p1))
2832 p2 = initial_addr + VS - 1;
2833 magic = have_builtin ? builtin_result : initial_address;
2836 p2' = p2 + indx * vectype_size
2838 vec_dest = realign_load (msq, lsq, magic)
2852 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2854 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2856 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2857 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2858 new_temp = make_ssa_name (vec_dest, new_stmt);
2859 TREE_OPERAND (new_stmt, 0) = new_temp;
2860 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2861 gcc_assert (!new_bb);
2862 msq_init = TREE_OPERAND (new_stmt, 0);
2865 /* <2> Create lsq = *(floor(p2')) in the loop */
2866 offset = build_int_cst (integer_type_node,
2867 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2868 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2869 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2870 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2871 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2872 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2873 new_temp = make_ssa_name (vec_dest, new_stmt);
2874 TREE_OPERAND (new_stmt, 0) = new_temp;
2875 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2876 lsq = TREE_OPERAND (new_stmt, 0);
2880 if (targetm.vectorize.builtin_mask_for_load)
2882 /* Create permutation mask, if required, in loop preheader. */
2884 params = build_tree_list (NULL_TREE, init_addr);
2885 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2886 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2887 new_stmt = build_function_call_expr (builtin_decl, params);
2888 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2889 new_temp = make_ssa_name (vec_dest, new_stmt);
2890 TREE_OPERAND (new_stmt, 0) = new_temp;
2891 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2892 gcc_assert (!new_bb);
2893 magic = TREE_OPERAND (new_stmt, 0);
2895 /* Since we have just created a CALL_EXPR, we may need to
2896 rename call-clobbered variables. */
2897 mark_call_clobbered_vars_to_rename ();
2901 /* Use current address instead of init_addr for reduced reg pressure.
2903 magic = dataref_ptr;
2907 /* <4> Create msq = phi <msq_init, lsq> in loop */
2908 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2909 msq = make_ssa_name (vec_dest, NULL_TREE);
2910 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2911 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2912 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2913 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2916 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2917 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2918 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2919 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2920 new_temp = make_ssa_name (vec_dest, new_stmt);
2921 TREE_OPERAND (new_stmt, 0) = new_temp;
2922 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2927 *vec_stmt = new_stmt;
2932 /* Function vect_supportable_dr_alignment
2934 Return whether the data reference DR is supported with respect to its
2937 static enum dr_alignment_support
2938 vect_supportable_dr_alignment (struct data_reference *dr)
2940 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2941 enum machine_mode mode = (int) TYPE_MODE (vectype);
2943 if (aligned_access_p (dr))
2946 /* Possibly unaligned access. */
2948 if (DR_IS_READ (dr))
2950 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2951 && (!targetm.vectorize.builtin_mask_for_load
2952 || targetm.vectorize.builtin_mask_for_load ()))
2953 return dr_unaligned_software_pipeline;
2955 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2956 /* Can't software pipeline the loads, but can at least do them. */
2957 return dr_unaligned_supported;
2961 return dr_unaligned_unsupported;
2965 /* Function vect_transform_stmt.
2967 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2970 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2972 bool is_store = false;
2973 tree vec_stmt = NULL_TREE;
2974 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2977 switch (STMT_VINFO_TYPE (stmt_info))
2979 case op_vec_info_type:
2980 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2984 case assignment_vec_info_type:
2985 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2989 case load_vec_info_type:
2990 done = vectorizable_load (stmt, bsi, &vec_stmt);
2994 case store_vec_info_type:
2995 done = vectorizable_store (stmt, bsi, &vec_stmt);
3000 if (vect_debug_details (UNKNOWN_LOC))
3001 fprintf (dump_file, "stmt not supported.");
3005 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3011 /* This function builds ni_name = number of iterations loop executes
3012 on the loop preheader. */
3015 vect_build_loop_niters (loop_vec_info loop_vinfo)
3017 tree ni_name, stmt, var;
3019 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3020 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3022 var = create_tmp_var (TREE_TYPE (ni), "niters");
3023 add_referenced_tmp_var (var);
3024 ni_name = force_gimple_operand (ni, &stmt, false, var);
3026 pe = loop_preheader_edge (loop);
3029 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3030 gcc_assert (!new_bb);
3037 /* This function generates the following statements:
3039 ni_name = number of iterations loop executes
3040 ratio = ni_name / vf
3041 ratio_mult_vf_name = ratio * vf
3043 and places them at the loop preheader edge. */
3046 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3048 tree *ratio_mult_vf_name_ptr,
3049 tree *ratio_name_ptr)
3057 tree ratio_mult_vf_name;
3058 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3059 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3060 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3061 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3063 pe = loop_preheader_edge (loop);
3065 /* Generate temporary variable that contains
3066 number of iterations loop executes. */
3068 ni_name = vect_build_loop_niters (loop_vinfo);
3070 /* Create: ratio = ni >> log2(vf) */
3072 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3073 add_referenced_tmp_var (var);
3074 ratio_name = make_ssa_name (var, NULL_TREE);
3075 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3076 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3077 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3079 pe = loop_preheader_edge (loop);
3080 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3081 gcc_assert (!new_bb);
3083 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3085 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3086 add_referenced_tmp_var (var);
3087 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3088 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3089 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3090 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3092 pe = loop_preheader_edge (loop);
3093 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3094 gcc_assert (!new_bb);
3096 *ni_name_ptr = ni_name;
3097 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3098 *ratio_name_ptr = ratio_name;
3104 /* Function vect_update_ivs_after_vectorizer.
3106 "Advance" the induction variables of LOOP to the value they should take
3107 after the execution of LOOP. This is currently necessary because the
3108 vectorizer does not handle induction variables that are used after the
3109 loop. Such a situation occurs when the last iterations of LOOP are
3111 1. We introduced new uses after LOOP for IVs that were not originally used
3112 after LOOP: the IVs of LOOP are now used by an epilog loop.
3113 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3114 times, whereas the loop IVs should be bumped N times.
3117 - LOOP - a loop that is going to be vectorized. The last few iterations
3118 of LOOP were peeled.
3119 - NITERS - the number of iterations that LOOP executes (before it is
3120 vectorized). i.e, the number of times the ivs should be bumped.
3121 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3122 coming out from LOOP on which there are uses of the LOOP ivs
3123 (this is the path from LOOP->exit to epilog_loop->preheader).
3125 The new definitions of the ivs are placed in LOOP->exit.
3126 The phi args associated with the edge UPDATE_E in the bb
3127 UPDATE_E->dest are updated accordingly.
3129 Assumption 1: Like the rest of the vectorizer, this function assumes
3130 a single loop exit that has a single predecessor.
3132 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3133 organized in the same order.
3135 Assumption 3: The access function of the ivs is simple enough (see
3136 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3138 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3139 coming out of LOOP on which the ivs of LOOP are used (this is the path
3140 that leads to the epilog loop; other paths skip the epilog loop). This
3141 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3142 needs to have its phis updated.
3146 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3149 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3150 basic_block exit_bb = loop->exit_edges[0]->dest;
3152 basic_block update_bb = update_e->dest;
3154 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3156 /* Make sure there exists a single-predecessor exit bb: */
3157 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3159 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3161 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3163 tree access_fn = NULL;
3164 tree evolution_part;
3167 tree var, stmt, ni, ni_name;
3168 block_stmt_iterator last_bsi;
3170 /* Skip virtual phi's. */
3171 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3173 if (vect_debug_details (UNKNOWN_LOC))
3174 fprintf (dump_file, "virtual phi. skip.");
3178 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3179 gcc_assert (access_fn);
3181 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3182 gcc_assert (evolution_part != NULL_TREE);
3184 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3185 of degree >= 2 or exponential. */
3186 gcc_assert (!tree_is_chrec (evolution_part));
3188 step_expr = evolution_part;
3189 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3192 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3193 build2 (MULT_EXPR, TREE_TYPE (niters),
3194 niters, step_expr), init_expr);
3196 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3197 add_referenced_tmp_var (var);
3199 ni_name = force_gimple_operand (ni, &stmt, false, var);
3201 /* Insert stmt into exit_bb. */
3202 last_bsi = bsi_last (exit_bb);
3204 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3206 /* Fix phi expressions in the successor bb. */
3207 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3208 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3209 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3214 /* Function vect_do_peeling_for_loop_bound
3216 Peel the last iterations of the loop represented by LOOP_VINFO.
3217 The peeled iterations form a new epilog loop. Given that the loop now
3218 iterates NITERS times, the new epilog loop iterates
3219 NITERS % VECTORIZATION_FACTOR times.
3221 The original loop will later be made to iterate
3222 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3225 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3226 struct loops *loops)
3229 tree ni_name, ratio_mult_vf_name;
3230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3231 struct loop *new_loop;
3233 #ifdef ENABLE_CHECKING
3237 if (vect_debug_details (UNKNOWN_LOC))
3238 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3240 /* Generate the following variables on the preheader of original loop:
3242 ni_name = number of iteration the original loop executes
3243 ratio = ni_name / vf
3244 ratio_mult_vf_name = ratio * vf */
3245 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3246 &ratio_mult_vf_name, ratio);
3248 /* Update loop info. */
3249 loop->pre_header = loop_preheader_edge (loop)->src;
3250 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3252 #ifdef ENABLE_CHECKING
3253 loop_num = loop->num;
3255 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3256 ratio_mult_vf_name, ni_name, false);
3257 #ifdef ENABLE_CHECKING
3258 gcc_assert (new_loop);
3259 gcc_assert (loop_num == loop->num);
3260 slpeel_verify_cfg_after_peeling (loop, new_loop);
3263 /* A guard that controls whether the new_loop is to be executed or skipped
3264 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3265 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3266 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3267 is on the path where the LOOP IVs are used and need to be updated. */
3269 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3270 update_e = EDGE_PRED (new_loop->pre_header, 0);
3272 update_e = EDGE_PRED (new_loop->pre_header, 1);
3274 /* Update IVs of original loop as if they were advanced
3275 by ratio_mult_vf_name steps. */
3276 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3278 /* After peeling we have to reset scalar evolution analyzer. */
3285 /* Function vect_gen_niters_for_prolog_loop
3287 Set the number of iterations for the loop represented by LOOP_VINFO
3288 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3289 and the misalignment of DR - the first data reference recorded in
3290 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3291 this loop, the data reference DR will refer to an aligned location.
3293 The following computation is generated:
3295 compute address misalignment in bytes:
3296 addr_mis = addr & (vectype_size - 1)
3298 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3300 (elem_size = element type size; an element is the scalar element
3301 whose type is the inner type of the vectype) */
3304 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3306 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3307 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3310 tree iters, iters_name;
3313 tree dr_stmt = DR_STMT (dr);
3314 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3315 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3316 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3319 tree new_stmts = NULL_TREE;
3321 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3322 tree ptr_type = TREE_TYPE (start_addr);
3323 tree size = TYPE_SIZE (ptr_type);
3324 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3325 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3326 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3327 tree niters_type = TREE_TYPE (loop_niters);
3328 tree elem_size_log =
3329 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3330 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3332 pe = loop_preheader_edge (loop);
3333 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3334 gcc_assert (!new_bb);
3336 /* Create: byte_misalign = addr & (vectype_size - 1) */
3337 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3339 /* Create: elem_misalign = byte_misalign / element_size */
3341 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3343 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3344 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3345 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3346 iters = fold_convert (niters_type, iters);
3348 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3349 /* If the loop bound is known at compile time we already verified that it is
3350 greater than vf; since the misalignment ('iters') is at most vf, there's
3351 no need to generate the MIN_EXPR in this case. */
3352 if (TREE_CODE (loop_niters) != INTEGER_CST)
3353 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3355 var = create_tmp_var (niters_type, "prolog_loop_niters");
3356 add_referenced_tmp_var (var);
3357 iters_name = force_gimple_operand (iters, &stmt, false, var);
3359 /* Insert stmt on loop preheader edge. */
3360 pe = loop_preheader_edge (loop);
3363 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3364 gcc_assert (!new_bb);
3371 /* Function vect_update_inits_of_dr
3373 NITERS iterations were peeled from LOOP. DR represents a data reference
3374 in LOOP. This function updates the information recorded in DR to
3375 account for the fact that the first NITERS iterations had already been
3376 executed. Specifically, it updates the OFFSET field of stmt_info. */
3379 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3381 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3382 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3384 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3385 STMT_VINFO_VECT_STEP (stmt_info)));
3386 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3387 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3391 /* Function vect_update_inits_of_drs
3393 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3394 This function updates the information recorded for the data references in
3395 the loop to account for the fact that the first NITERS iterations had
3396 already been executed. Specifically, it updates the initial_condition of the
3397 access_function of all the data_references in the loop. */
3400 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3403 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3404 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3406 if (dump_file && (dump_flags & TDF_DETAILS))
3407 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3409 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3411 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3412 vect_update_inits_of_dr (dr, niters);
3415 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3417 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3418 vect_update_inits_of_dr (dr, niters);
3423 /* Function vect_do_peeling_for_alignment
3425 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3426 'niters' is set to the misalignment of one of the data references in the
3427 loop, thereby forcing it to refer to an aligned location at the beginning
3428 of the execution of this loop. The data reference for which we are
3429 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3432 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3434 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3435 tree niters_of_prolog_loop, ni_name;
3437 struct loop *new_loop;
3439 if (vect_debug_details (UNKNOWN_LOC))
3440 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3442 ni_name = vect_build_loop_niters (loop_vinfo);
3443 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3445 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3447 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3448 niters_of_prolog_loop, ni_name, true);
3449 #ifdef ENABLE_CHECKING
3450 gcc_assert (new_loop);
3451 slpeel_verify_cfg_after_peeling (new_loop, loop);
3454 /* Update number of times loop executes. */
3455 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3456 LOOP_VINFO_NITERS (loop_vinfo) =
3457 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3459 /* Update the init conditions of the access functions of all data refs. */
3460 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3462 /* After peeling we have to reset scalar evolution analyzer. */
3469 /* Function vect_transform_loop.
3471 The analysis phase has determined that the loop is vectorizable.
3472 Vectorize the loop - created vectorized stmts to replace the scalar
3473 stmts in the loop, and update the loop exit condition. */
3476 vect_transform_loop (loop_vec_info loop_vinfo,
3477 struct loops *loops ATTRIBUTE_UNUSED)
3479 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3480 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3481 int nbbs = loop->num_nodes;
3482 block_stmt_iterator si;
3485 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3487 if (vect_debug_details (UNKNOWN_LOC))
3488 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3491 /* Peel the loop if there are data refs with unknown alignment.
3492 Only one data ref with unknown store is allowed. */
3494 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3495 vect_do_peeling_for_alignment (loop_vinfo, loops);
3497 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3498 compile time constant), or it is a constant that doesn't divide by the
3499 vectorization factor, then an epilog loop needs to be created.
3500 We therefore duplicate the loop: the original loop will be vectorized,
3501 and will compute the first (n/VF) iterations. The second copy of the loop
3502 will remain scalar and will compute the remaining (n%VF) iterations.
3503 (VF is the vectorization factor). */
3505 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3506 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3507 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3508 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3510 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3511 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3513 /* 1) Make sure the loop header has exactly two entries
3514 2) Make sure we have a preheader basic block. */
3516 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3518 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3521 /* FORNOW: the vectorizer supports only loops which body consist
3522 of one basic block (header + empty latch). When the vectorizer will
3523 support more involved loop forms, the order by which the BBs are
3524 traversed need to be reconsidered. */
3526 for (i = 0; i < nbbs; i++)
3528 basic_block bb = bbs[i];
3530 for (si = bsi_start (bb); !bsi_end_p (si);)
3532 tree stmt = bsi_stmt (si);
3533 stmt_vec_info stmt_info;
3536 if (vect_debug_details (UNKNOWN_LOC))
3538 fprintf (dump_file, "------>vectorizing statement: ");
3539 print_generic_expr (dump_file, stmt, TDF_SLIM);
3541 stmt_info = vinfo_for_stmt (stmt);
3542 gcc_assert (stmt_info);
3543 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3548 #ifdef ENABLE_CHECKING
3549 /* FORNOW: Verify that all stmts operate on the same number of
3550 units and no inner unrolling is necessary. */
3552 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3553 == vectorization_factor);
3555 /* -------- vectorize statement ------------ */
3556 if (vect_debug_details (UNKNOWN_LOC))
3557 fprintf (dump_file, "transform statement.");
3559 is_store = vect_transform_stmt (stmt, &si);
3562 /* free the attached stmt_vec_info and remove the stmt. */
3563 stmt_ann_t ann = stmt_ann (stmt);
3565 set_stmt_info (ann, NULL);
3574 slpeel_make_loop_iterate_ntimes (loop, ratio);
3576 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
3577 fprintf (dump_file,"Success! loop vectorized.");
3578 if (vect_debug_stats (LOOP_LOC (loop_vinfo)))
3579 fprintf (dump_file, "LOOP VECTORIZED.");
3583 /* Function vect_is_simple_use.
3586 LOOP - the loop that is being vectorized.
3587 OPERAND - operand of a stmt in LOOP.
3588 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3590 Returns whether a stmt with OPERAND can be vectorized.
3591 Supportable operands are constants, loop invariants, and operands that are
3592 defined by the current iteration of the loop. Unsupportable operands are
3593 those that are defined by a previous iteration of the loop (as is the case
3594 in reduction/induction computations). */
3597 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
3601 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3606 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3609 if (TREE_CODE (operand) != SSA_NAME)
3612 def_stmt = SSA_NAME_DEF_STMT (operand);
3613 if (def_stmt == NULL_TREE )
3615 if (vect_debug_details (UNKNOWN_LOC))
3616 fprintf (dump_file, "no def_stmt.");
3620 /* empty stmt is expected only in case of a function argument.
3621 (Otherwise - we expect a phi_node or a modify_expr). */
3622 if (IS_EMPTY_STMT (def_stmt))
3624 tree arg = TREE_OPERAND (def_stmt, 0);
3625 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3627 if (vect_debug_details (UNKNOWN_LOC))
3629 fprintf (dump_file, "Unexpected empty stmt: ");
3630 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3635 /* phi_node inside the loop indicates an induction/reduction pattern.
3636 This is not supported yet. */
3637 bb = bb_for_stmt (def_stmt);
3638 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3640 if (vect_debug_details (UNKNOWN_LOC))
3641 fprintf (dump_file, "reduction/induction - unsupported.");
3642 return false; /* FORNOW: not supported yet. */
3645 /* Expecting a modify_expr or a phi_node. */
3646 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3647 || TREE_CODE (def_stmt) == PHI_NODE)
3658 /* Function vect_analyze_operations.
3660 Scan the loop stmts and make sure they are all vectorizable. */
3663 vect_analyze_operations (loop_vec_info loop_vinfo)
3665 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3666 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3667 int nbbs = loop->num_nodes;
3668 block_stmt_iterator si;
3669 unsigned int vectorization_factor = 0;
3674 if (vect_debug_details (UNKNOWN_LOC))
3675 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3677 for (i = 0; i < nbbs; i++)
3679 basic_block bb = bbs[i];
3681 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3683 tree stmt = bsi_stmt (si);
3684 unsigned int nunits;
3685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3688 if (vect_debug_details (UNKNOWN_LOC))
3690 fprintf (dump_file, "==> examining statement: ");
3691 print_generic_expr (dump_file, stmt, TDF_SLIM);
3694 gcc_assert (stmt_info);
3696 /* skip stmts which do not need to be vectorized.
3697 this is expected to include:
3698 - the COND_EXPR which is the loop exit condition
3699 - any LABEL_EXPRs in the loop
3700 - computations that are used only for array indexing or loop
3703 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3705 if (vect_debug_details (UNKNOWN_LOC))
3706 fprintf (dump_file, "irrelevant.");
3710 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3712 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3713 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3715 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3716 print_generic_expr (dump_file, stmt, TDF_SLIM);
3721 if (STMT_VINFO_DATA_REF (stmt_info))
3722 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3723 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3724 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3726 scalar_type = TREE_TYPE (stmt);
3728 if (vect_debug_details (UNKNOWN_LOC))
3730 fprintf (dump_file, "get vectype for scalar type: ");
3731 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3734 vectype = get_vectype_for_scalar_type (scalar_type);
3737 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3738 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3740 fprintf (dump_file, "not vectorized: unsupported data-type ");
3741 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3746 if (vect_debug_details (UNKNOWN_LOC))
3748 fprintf (dump_file, "vectype: ");
3749 print_generic_expr (dump_file, vectype, TDF_SLIM);
3751 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3753 ok = (vectorizable_operation (stmt, NULL, NULL)
3754 || vectorizable_assignment (stmt, NULL, NULL)
3755 || vectorizable_load (stmt, NULL, NULL)
3756 || vectorizable_store (stmt, NULL, NULL));
3760 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3761 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3763 fprintf (dump_file, "not vectorized: stmt not supported: ");
3764 print_generic_expr (dump_file, stmt, TDF_SLIM);
3769 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3770 if (vect_debug_details (UNKNOWN_LOC))
3771 fprintf (dump_file, "nunits = %d", nunits);
3773 if (vectorization_factor)
3775 /* FORNOW: don't allow mixed units.
3776 This restriction will be relaxed in the future. */
3777 if (nunits != vectorization_factor)
3779 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3780 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3781 fprintf (dump_file, "not vectorized: mixed data-types");
3786 vectorization_factor = nunits;
3788 #ifdef ENABLE_CHECKING
3789 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3790 * vectorization_factor == UNITS_PER_SIMD_WORD);
3795 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3797 if (vectorization_factor <= 1)
3799 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3800 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3801 fprintf (dump_file, "not vectorized: unsupported data-type");
3804 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3806 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3807 && vect_debug_details (UNKNOWN_LOC))
3809 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3810 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3812 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3813 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3815 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3816 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3817 fprintf (dump_file, "not vectorized: iteration count too small.");
3821 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3822 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3824 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3825 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3826 fprintf (dump_file, "epilog loop required.");
3827 if (!vect_can_advance_ivs_p (loop_vinfo))
3829 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3830 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3831 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3834 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3836 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
3837 || vect_debug_details (LOOP_LOC (loop_vinfo)))
3838 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3847 /* Function exist_non_indexing_operands_for_use_p
3849 USE is one of the uses attached to STMT. Check if USE is
3850 used in STMT for anything other than indexing an array. */
3853 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3856 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3858 /* USE corresponds to some operand in STMT. If there is no data
3859 reference in STMT, then any operand that corresponds to USE
3860 is not indexing an array. */
3861 if (!STMT_VINFO_DATA_REF (stmt_info))
3864 /* STMT has a data_ref. FORNOW this means that its of one of
3865 the following forms:
3868 (This should have been verified in analyze_data_refs).
3870 'var' in the second case corresponds to a def, not a use,
3871 so USE cannot correspond to any operands that are not used
3874 Therefore, all we need to check is if STMT falls into the
3875 first case, and whether var corresponds to USE. */
3877 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3880 operand = TREE_OPERAND (stmt, 1);
3882 if (TREE_CODE (operand) != SSA_NAME)
3892 /* Function vect_is_simple_iv_evolution.
3894 FORNOW: A simple evolution of an induction variables in the loop is
3895 considered a polynomial evolution with constant step. */
3898 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3899 tree * step, bool strict)
3904 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3906 /* When there is no evolution in this loop, the evolution function
3908 if (evolution_part == NULL_TREE)
3911 /* When the evolution is a polynomial of degree >= 2
3912 the evolution function is not "simple". */
3913 if (tree_is_chrec (evolution_part))
3916 step_expr = evolution_part;
3917 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
3919 if (vect_debug_details (UNKNOWN_LOC))
3921 fprintf (dump_file, "step: ");
3922 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3923 fprintf (dump_file, ", init: ");
3924 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3930 if (TREE_CODE (step_expr) != INTEGER_CST)
3932 if (vect_debug_details (UNKNOWN_LOC))
3933 fprintf (dump_file, "step unknown.");
3938 if (!integer_onep (step_expr))
3940 if (vect_debug_details (UNKNOWN_LOC))
3941 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3949 /* Function vect_analyze_scalar_cycles.
3951 Examine the cross iteration def-use cycles of scalar variables, by
3952 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3953 cycles that they represent do not impede vectorization.
3955 FORNOW: Reduction as in the following loop, is not supported yet:
3959 The cross-iteration cycle corresponding to variable 'sum' will be
3960 considered too complicated and will impede vectorization.
3962 FORNOW: Induction as in the following loop, is not supported yet:
3967 However, the following loop *is* vectorizable:
3972 In both loops there exists a def-use cycle for the variable i:
3973 loop: i_2 = PHI (i_0, i_1)
3978 The evolution of the above cycle is considered simple enough,
3979 however, we also check that the cycle does not need to be
3980 vectorized, i.e - we check that the variable that this cycle
3981 defines is only used for array indexing or in stmts that do not
3982 need to be vectorized. This is not the case in loop2, but it
3983 *is* the case in loop3. */
3986 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3989 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3990 basic_block bb = loop->header;
3993 if (vect_debug_details (UNKNOWN_LOC))
3994 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3996 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3998 tree access_fn = NULL;
4000 if (vect_debug_details (UNKNOWN_LOC))
4002 fprintf (dump_file, "Analyze phi: ");
4003 print_generic_expr (dump_file, phi, TDF_SLIM);
4006 /* Skip virtual phi's. The data dependences that are associated with
4007 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
4009 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4011 if (vect_debug_details (UNKNOWN_LOC))
4012 fprintf (dump_file, "virtual phi. skip.");
4016 /* Analyze the evolution function. */
4018 /* FORNOW: The only scalar cross-iteration cycles that we allow are
4019 those of loop induction variables; This property is verified here.
4021 Furthermore, if that induction variable is used in an operation
4022 that needs to be vectorized (i.e, is not solely used to index
4023 arrays and check the exit condition) - we do not support its
4024 vectorization yet. This property is verified in vect_is_simple_use,
4025 during vect_analyze_operations. */
4027 access_fn = /* instantiate_parameters
4029 analyze_scalar_evolution (loop, PHI_RESULT (phi));
4033 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4034 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4035 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4039 if (vect_debug_details (UNKNOWN_LOC))
4041 fprintf (dump_file, "Access function of PHI: ");
4042 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4045 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4048 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4049 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4050 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4059 /* Function vect_analyze_data_ref_dependence.
4061 Return TRUE if there (might) exist a dependence between a memory-reference
4062 DRA and a memory-reference DRB. */
4065 vect_analyze_data_ref_dependence (struct data_reference *dra,
4066 struct data_reference *drb,
4067 loop_vec_info loop_vinfo)
4070 struct data_dependence_relation *ddr;
4072 if (!array_base_name_differ_p (dra, drb, &differ_p))
4074 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4075 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4078 "not vectorized: can't determine dependence between: ");
4079 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4080 fprintf (dump_file, " and ");
4081 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4089 ddr = initialize_data_dependence_relation (dra, drb);
4090 compute_affine_dependence (ddr);
4092 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4095 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4096 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4099 "not vectorized: possible dependence between data-refs ");
4100 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4101 fprintf (dump_file, " and ");
4102 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4109 /* Function vect_analyze_data_ref_dependences.
4111 Examine all the data references in the loop, and make sure there do not
4112 exist any data dependences between them.
4114 TODO: dependences which distance is greater than the vectorization factor
4118 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4121 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4122 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4124 /* Examine store-store (output) dependences. */
4126 if (vect_debug_details (UNKNOWN_LOC))
4127 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4129 if (vect_debug_details (UNKNOWN_LOC))
4130 fprintf (dump_file, "compare all store-store pairs.");
4132 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4134 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4136 struct data_reference *dra =
4137 VARRAY_GENERIC_PTR (loop_write_refs, i);
4138 struct data_reference *drb =
4139 VARRAY_GENERIC_PTR (loop_write_refs, j);
4140 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4145 /* Examine load-store (true/anti) dependences. */
4147 if (vect_debug_details (UNKNOWN_LOC))
4148 fprintf (dump_file, "compare all load-store pairs.");
4150 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4152 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4154 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4155 struct data_reference *drb =
4156 VARRAY_GENERIC_PTR (loop_write_refs, j);
4157 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4166 /* Function vect_compute_data_ref_alignment
4168 Compute the misalignment of the data reference DR.
4171 1. If during the misalignment computation it is found that the data reference
4172 cannot be vectorized then false is returned.
4173 2. DR_MISALIGNMENT (DR) is defined.
4175 FOR NOW: No analysis is actually performed. Misalignment is calculated
4176 only for trivial cases. TODO. */
4179 vect_compute_data_ref_alignment (struct data_reference *dr)
4181 tree stmt = DR_STMT (dr);
4182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4183 tree ref = DR_REF (dr);
4185 tree base, alignment;
4186 bool base_aligned_p;
4189 if (vect_debug_details (UNKNOWN_LOC))
4190 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4192 /* Initialize misalignment to unknown. */
4193 DR_MISALIGNMENT (dr) = -1;
4195 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4196 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4197 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4198 vectype = STMT_VINFO_VECTYPE (stmt_info);
4202 if (vect_debug_details (UNKNOWN_LOC))
4204 fprintf (dump_file, "Unknown alignment for access: ");
4205 print_generic_expr (dump_file, base, TDF_SLIM);
4210 if (!base_aligned_p)
4212 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4214 if (vect_debug_details (UNKNOWN_LOC))
4216 fprintf (dump_file, "can't force alignment of ref: ");
4217 print_generic_expr (dump_file, ref, TDF_SLIM);
4222 /* Force the alignment of the decl.
4223 NOTE: This is the only change to the code we make during
4224 the analysis phase, before deciding to vectorize the loop. */
4225 if (vect_debug_details (UNKNOWN_LOC))
4226 fprintf (dump_file, "force alignment");
4227 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4228 DECL_USER_ALIGN (base) = 1;
4231 /* At this point we assume that the base is aligned. */
4232 gcc_assert (base_aligned_p
4233 || (TREE_CODE (base) == VAR_DECL
4234 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4236 /* Alignment required, in bytes: */
4237 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4239 /* Modulo alignment. */
4240 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4241 if (tree_int_cst_sgn (misalign) < 0)
4243 /* Negative misalignment value. */
4244 if (vect_debug_details (UNKNOWN_LOC))
4245 fprintf (dump_file, "unexpected misalign value");
4249 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4251 if (vect_debug_details (UNKNOWN_LOC))
4252 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4258 /* Function vect_compute_data_refs_alignment
4260 Compute the misalignment of data references in the loop.
4261 This pass may take place at function granularity instead of at loop
4264 FOR NOW: No analysis is actually performed. Misalignment is calculated
4265 only for trivial cases. TODO. */
4268 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4270 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4271 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4274 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4276 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4277 if (!vect_compute_data_ref_alignment (dr))
4281 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4283 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4284 if (!vect_compute_data_ref_alignment (dr))
4292 /* Function vect_enhance_data_refs_alignment
4294 This pass will use loop versioning and loop peeling in order to enhance
4295 the alignment of data references in the loop.
4297 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4298 original loop is to be vectorized; Any other loops that are created by
4299 the transformations performed in this pass - are not supposed to be
4300 vectorized. This restriction will be relaxed. */
4303 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4305 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4306 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4310 This pass will require a cost model to guide it whether to apply peeling
4311 or versioning or a combination of the two. For example, the scheme that
4312 intel uses when given a loop with several memory accesses, is as follows:
4313 choose one memory access ('p') which alignment you want to force by doing
4314 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4315 other accesses are not necessarily aligned, or (2) use loop versioning to
4316 generate one loop in which all accesses are aligned, and another loop in
4317 which only 'p' is necessarily aligned.
4319 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4320 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4321 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4323 Devising a cost model is the most critical aspect of this work. It will
4324 guide us on which access to peel for, whether to use loop versioning, how
4325 many versions to create, etc. The cost model will probably consist of
4326 generic considerations as well as target specific considerations (on
4327 powerpc for example, misaligned stores are more painful than misaligned
4330 Here is the general steps involved in alignment enhancements:
4332 -- original loop, before alignment analysis:
4333 for (i=0; i<N; i++){
4334 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4335 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4338 -- After vect_compute_data_refs_alignment:
4339 for (i=0; i<N; i++){
4340 x = q[i]; # DR_MISALIGNMENT(q) = 3
4341 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4344 -- Possibility 1: we do loop versioning:
4346 for (i=0; i<N; i++){ # loop 1A
4347 x = q[i]; # DR_MISALIGNMENT(q) = 3
4348 p[i] = y; # DR_MISALIGNMENT(p) = 0
4352 for (i=0; i<N; i++){ # loop 1B
4353 x = q[i]; # DR_MISALIGNMENT(q) = 3
4354 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4358 -- Possibility 2: we do loop peeling:
4359 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4363 for (i = 3; i < N; i++){ # loop 2A
4364 x = q[i]; # DR_MISALIGNMENT(q) = 0
4365 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4368 -- Possibility 3: combination of loop peeling and versioning:
4369 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4374 for (i = 3; i<N; i++){ # loop 3A
4375 x = q[i]; # DR_MISALIGNMENT(q) = 0
4376 p[i] = y; # DR_MISALIGNMENT(p) = 0
4380 for (i = 3; i<N; i++){ # loop 3B
4381 x = q[i]; # DR_MISALIGNMENT(q) = 0
4382 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4386 These loops are later passed to loop_transform to be vectorized. The
4387 vectorizer will use the alignment information to guide the transformation
4388 (whether to generate regular loads/stores, or with special handling for
4392 /* (1) Peeling to force alignment. */
4394 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4396 + How many accesses will become aligned due to the peeling
4397 - How many accesses will become unaligned due to the peeling,
4398 and the cost of misaligned accesses.
4399 - The cost of peeling (the extra runtime checks, the increase
4402 The scheme we use FORNOW: peel to force the alignment of the first
4403 misaligned store in the loop.
4404 Rationale: misaligned stores are not yet supported.
4406 TODO: Use a better cost model. */
4408 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4410 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4411 if (!aligned_access_p (dr))
4413 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4414 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4419 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4421 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
4422 fprintf (dump_file, "Peeling for alignment will not be applied.");
4426 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
4427 fprintf (dump_file, "Peeling for alignment will be applied.");
4430 /* (1.2) Update the alignment info according to the peeling factor.
4431 If the misalignment of the DR we peel for is M, then the
4432 peeling factor is VF - M, and the misalignment of each access DR_i
4433 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4434 If the misalignment of the DR we peel for is unknown, then the
4435 misalignment of each access DR_i in the loop is also unknown.
4437 FORNOW: set the misalignment of the accesses to unknown even
4438 if the peeling factor is known at compile time.
4440 TODO: - if the peeling factor is known at compile time, use that
4441 when updating the misalignment info of the loop DRs.
4442 - consider accesses that are known to have the same
4443 alignment, even if that alignment is unknown. */
4445 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4447 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4448 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4450 DR_MISALIGNMENT (dr) = 0;
4451 if (vect_debug_details (LOOP_LOC (loop_vinfo))
4452 || vect_debug_stats (LOOP_LOC (loop_vinfo)))
4453 fprintf (dump_file, "Alignment of access forced using peeling.");
4456 DR_MISALIGNMENT (dr) = -1;
4458 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4460 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4461 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4463 DR_MISALIGNMENT (dr) = 0;
4464 if (vect_debug_details (LOOP_LOC (loop_vinfo))
4465 || vect_debug_stats (LOOP_LOC (loop_vinfo)))
4466 fprintf (dump_file, "Alignment of access forced using peeling.");
4469 DR_MISALIGNMENT (dr) = -1;
4474 /* Function vect_analyze_data_refs_alignment
4476 Analyze the alignment of the data-references in the loop.
4477 FOR NOW: Until support for misliagned accesses is in place, only if all
4478 accesses are aligned can the loop be vectorized. This restriction will be
4482 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4484 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4485 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4486 enum dr_alignment_support supportable_dr_alignment;
4489 if (vect_debug_details (UNKNOWN_LOC))
4490 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4493 /* This pass may take place at function granularity instead of at loop
4496 if (!vect_compute_data_refs_alignment (loop_vinfo))
4498 if (vect_debug_details (LOOP_LOC (loop_vinfo))
4499 || vect_debug_stats (LOOP_LOC (loop_vinfo)))
4501 "not vectorized: can't calculate alignment for data ref.");
4506 /* This pass will decide on using loop versioning and/or loop peeling in
4507 order to enhance the alignment of data references in the loop. */
4509 vect_enhance_data_refs_alignment (loop_vinfo);
4512 /* Finally, check that all the data references in the loop can be
4513 handled with respect to their alignment. */
4515 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4517 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4518 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4519 if (!supportable_dr_alignment)
4521 if (vect_debug_details (LOOP_LOC (loop_vinfo))
4522 || vect_debug_stats (LOOP_LOC (loop_vinfo)))
4523 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4526 if (supportable_dr_alignment != dr_aligned
4527 && (vect_debug_details (LOOP_LOC (loop_vinfo))
4528 || vect_debug_stats (LOOP_LOC (loop_vinfo))))
4529 fprintf (dump_file, "Vectorizing an unaligned access.");
4531 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4533 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4534 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4535 if (!supportable_dr_alignment)
4537 if (vect_debug_details (LOOP_LOC (loop_vinfo))
4538 || vect_debug_stats (LOOP_LOC (loop_vinfo)))
4539 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4542 if (supportable_dr_alignment != dr_aligned
4543 && (vect_debug_details (LOOP_LOC (loop_vinfo))
4544 || vect_debug_stats (LOOP_LOC (loop_vinfo))))
4545 fprintf (dump_file, "Vectorizing an unaligned access.");
4552 /* Function vect_analyze_data_ref_access.
4554 Analyze the access pattern of the data-reference DR. For now, a data access
4555 has to consecutive to be considered vectorizable. */
4558 vect_analyze_data_ref_access (struct data_reference *dr)
4560 tree stmt = DR_STMT (dr);
4561 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4562 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4563 tree scalar_type = TREE_TYPE (DR_REF (dr));
4565 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4567 if (vect_debug_details (UNKNOWN_LOC))
4568 fprintf (dump_file, "not consecutive access");
4575 /* Function vect_analyze_data_ref_accesses.
4577 Analyze the access pattern of all the data references in the loop.
4579 FORNOW: the only access pattern that is considered vectorizable is a
4580 simple step 1 (consecutive) access.
4582 FORNOW: handle only arrays and pointer accesses. */
4585 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4588 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4589 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4591 if (vect_debug_details (UNKNOWN_LOC))
4592 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4594 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4596 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4597 bool ok = vect_analyze_data_ref_access (dr);
4600 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4601 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4602 fprintf (dump_file, "not vectorized: complicated access pattern.");
4607 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4609 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4610 bool ok = vect_analyze_data_ref_access (dr);
4613 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4614 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4615 fprintf (dump_file, "not vectorized: complicated access pattern.");
4624 /* Function vect_analyze_pointer_ref_access.
4627 STMT - a stmt that contains a data-ref
4628 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4630 If the data-ref access is vectorizable, return a data_reference structure
4631 that represents it (DR). Otherwise - return NULL. */
4633 static struct data_reference *
4634 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4638 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4639 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4641 tree reftype, innertype;
4642 tree indx_access_fn;
4643 int loopnum = loop->num;
4644 struct data_reference *dr;
4648 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4649 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4650 fprintf (dump_file, "not vectorized: complicated pointer access.");
4654 if (vect_debug_details (UNKNOWN_LOC))
4656 fprintf (dump_file, "Access function of ptr: ");
4657 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4660 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4662 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4663 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4664 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4670 if (!expr_invariant_in_loop_p (loop, init))
4672 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4673 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4675 "not vectorized: initial condition is not loop invariant.");
4679 if (TREE_CODE (step) != INTEGER_CST)
4681 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4682 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4684 "not vectorized: non constant step for pointer access.");
4688 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4689 if (TREE_CODE (reftype) != POINTER_TYPE)
4691 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4692 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4693 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4697 reftype = TREE_TYPE (init);
4698 if (TREE_CODE (reftype) != POINTER_TYPE)
4700 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4701 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4702 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4706 innertype = TREE_TYPE (reftype);
4707 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4709 /* FORNOW: support only consecutive access */
4710 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
4711 || vect_debug_details (LOOP_LOC (loop_vinfo)))
4712 fprintf (dump_file, "not vectorized: non consecutive access.");
4716 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (ssizetype, step);
4717 if (TREE_CODE (init) == PLUS_EXPR
4718 || TREE_CODE (init) == MINUS_EXPR)
4719 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4720 size_binop (TREE_CODE (init), ssize_int (0),
4721 fold_convert (ssizetype, TREE_OPERAND (init, 1)));
4723 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = ssize_int (0);
4726 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4727 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
4729 fprintf (dump_file, "Access function of ptr indx: ");
4730 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4732 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4737 /* Function vect_get_memtag_and_dr.
4739 The function returns the relevant variable for memory tag (for aliasing
4740 purposes). Also data reference structure DR is created.
4742 This function handles three kinds of MEMREF:
4744 It is called from vect_analyze_data_refs with a MEMREF that is either an
4745 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4746 It builds a DR for them using vect_get_base_and_offset, and calls itself
4747 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4748 MEMREF along the way. During the recursive calls, the function may be called
4749 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4750 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4751 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4752 and SSA_NAME (this is category 3 - "recursion stop condition").
4754 When the MEMREF falls into category 1 there is still no data reference struct
4755 (DR) available. It is created by this function, and then, along the
4756 recursion, MEMREF will fall into category 2 or 3, in which case a DR will
4757 have already been created, but the analysis continues to retrieve the MEMTAG.
4760 MEMREF - data reference in STMT
4761 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4764 DR - data_reference struct for MEMREF
4765 return value - the relevant variable for memory tag (for aliasing purposes).
4770 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4771 loop_vec_info loop_vinfo,
4772 tree vectype, struct data_reference **dr)
4774 tree symbl, oprnd0, oprnd1;
4775 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4776 tree offset, misalign, step;
4777 tree ref_to_be_analyzed, tag, dr_base;
4778 struct data_reference *new_dr;
4779 bool base_aligned_p;
4783 /* Category 3: recursion stop condition. */
4784 /* (1) A DR already exists. We only need to get the relevant memtag for
4785 MEMREF, the rest of the data was already initialized. */
4787 switch (TREE_CODE (memref))
4789 /* (1.1) Stop condition: find the relevant memtag and return. */
4791 symbl = SSA_NAME_VAR (memref);
4792 tag = get_var_ann (symbl)->type_mem_tag;
4795 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4796 if (TREE_CODE (ptr) == SSA_NAME)
4797 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4801 if (vect_debug_details (UNKNOWN_LOC))
4802 fprintf (dump_file, "not vectorized: no memtag for ref.");
4811 /* Category 2: recursion continues. */
4812 /* (1.2) A recursive call to find the relevant memtag is required. */
4814 symbl = TREE_OPERAND (memref, 0);
4815 break; /* For recursive call. */
4818 /* Could have recorded more accurate information -
4819 i.e, the actual FIELD_DECL that is being referenced -
4820 but later passes expect VAR_DECL as the nmt. */
4824 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4825 break; /* For recursive call. */
4829 /* Although DR exists, we have to call the function recursively to
4830 build MEMTAG for such expression. This is handled below. */
4831 oprnd0 = TREE_OPERAND (memref, 0);
4832 oprnd1 = TREE_OPERAND (memref, 1);
4834 STRIP_NOPS (oprnd1);
4835 /* Supported plus/minus expressions are of the form
4836 {address_base + offset}, such that address_base is of type
4837 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4838 or it's not of type POINTER/ARRAY.
4839 TODO: swap operands if {offset + address_base}. */
4840 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4841 && TREE_CODE (oprnd1) != INTEGER_CST)
4842 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4846 break; /* For recursive call. */
4854 /* Category 1: recursion begins. */
4855 /* (2) A DR does not exist yet and must be built, followed by a
4856 recursive call to get the relevant memtag for MEMREF. */
4858 switch (TREE_CODE (memref))
4861 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4865 symbl = DR_BASE_NAME (new_dr);
4866 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4870 new_dr = analyze_array (stmt, memref, is_read);
4872 symbl = DR_BASE_NAME (new_dr);
4873 ref_to_be_analyzed = memref;
4877 /* TODO: Support data-refs of form a[i].p for unions and single
4878 field structures. */
4882 offset = ssize_int (0);
4883 misalign = ssize_int (0);
4884 step = ssize_int (0);
4886 /* Analyze data-ref, find its base, initial offset from the base, step,
4888 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4889 vectype, loop_vinfo, &offset,
4890 &misalign, &step, &base_aligned_p);
4894 /* Initialize information according to above analysis. */
4895 /* Since offset and step of a pointer can be also set in
4896 vect_analyze_pointer_ref_access, we combine the values here. */
4897 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4898 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4899 size_binop (PLUS_EXPR, offset,
4900 STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
4902 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4904 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4905 STMT_VINFO_VECT_STEP (stmt_info) =
4906 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4908 STMT_VINFO_VECT_STEP (stmt_info) = step;
4910 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4911 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4912 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4917 /* Recursive call to retrieve the relevant memtag. */
4918 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4923 /* Function vect_analyze_data_refs.
4925 Find all the data references in the loop.
4927 The general structure of the analysis of data refs in the vectorizer is as
4929 1- vect_analyze_data_refs(loop):
4930 Find and analyze all data-refs in the loop:
4932 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4933 1.1- vect_get_memtag_and_dr(ref):
4934 Analyze ref, and build a DR (data_referece struct) for it;
4935 call vect_get_base_and_offset to compute base, initial_offset,
4936 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4937 ref_stmt.alignment, and ref_stmt.step accordingly.
4938 1.1.1- vect_get_base_and_offset():
4939 Calculate base, initial_offset, step and alignment.
4940 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4941 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4942 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4943 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4945 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4946 which base is really an array (not a pointer) and which alignment
4947 can be forced. This restriction will be relaxed. */
4950 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4952 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4953 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4954 int nbbs = loop->num_nodes;
4955 block_stmt_iterator si;
4957 struct data_reference *dr;
4959 if (vect_debug_details (UNKNOWN_LOC))
4960 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4962 for (j = 0; j < nbbs; j++)
4964 basic_block bb = bbs[j];
4965 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4967 bool is_read = false;
4968 tree stmt = bsi_stmt (si);
4969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4970 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4971 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4972 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4973 varray_type *datarefs = NULL;
4974 int nvuses, nv_may_defs, nv_must_defs;
4977 tree scalar_type, vectype;
4979 /* Assumption: there exists a data-ref in stmt, if and only if
4980 it has vuses/vdefs. */
4982 if (!vuses && !v_may_defs && !v_must_defs)
4985 nvuses = NUM_VUSES (vuses);
4986 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4987 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4989 if (nvuses && (nv_may_defs || nv_must_defs))
4991 if (vect_debug_details (UNKNOWN_LOC))
4993 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4994 print_generic_expr (dump_file, stmt, TDF_SLIM);
4999 if (TREE_CODE (stmt) != MODIFY_EXPR)
5001 if (vect_debug_details (UNKNOWN_LOC))
5003 fprintf (dump_file, "unexpected vops in stmt: ");
5004 print_generic_expr (dump_file, stmt, TDF_SLIM);
5011 memref = TREE_OPERAND (stmt, 1);
5012 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5017 memref = TREE_OPERAND (stmt, 0);
5018 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5022 scalar_type = TREE_TYPE (memref);
5023 vectype = get_vectype_for_scalar_type (scalar_type);
5026 if (vect_debug_details (UNKNOWN_LOC))
5028 fprintf (dump_file, "no vectype for stmt: ");
5029 print_generic_expr (dump_file, stmt, TDF_SLIM);
5030 fprintf (dump_file, " scalar_type: ");
5031 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
5033 /* It is not possible to vectorize this data reference. */
5036 /* Analyze MEMREF. If it is of a supported form, build data_reference
5037 struct for it (DR) and find memtag for aliasing purposes. */
5039 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
5043 if (vect_debug_stats (LOOP_LOC (loop_vinfo))
5044 || vect_debug_details (LOOP_LOC (loop_vinfo)))
5046 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5047 print_generic_expr (dump_file, stmt, TDF_SLIM);
5051 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5052 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5053 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5054 STMT_VINFO_DATA_REF (stmt_info) = dr;
5062 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5064 /* Function vect_mark_relevant.
5066 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5069 vect_mark_relevant (varray_type *worklist, tree stmt)
5071 stmt_vec_info stmt_info;
5073 if (vect_debug_details (UNKNOWN_LOC))
5074 fprintf (dump_file, "mark relevant.");
5076 if (TREE_CODE (stmt) == PHI_NODE)
5078 VARRAY_PUSH_TREE (*worklist, stmt);
5082 stmt_info = vinfo_for_stmt (stmt);
5086 if (vect_debug_details (UNKNOWN_LOC))
5088 fprintf (dump_file, "mark relevant: no stmt info!!.");
5089 print_generic_expr (dump_file, stmt, TDF_SLIM);
5094 if (STMT_VINFO_RELEVANT_P (stmt_info))
5096 if (vect_debug_details (UNKNOWN_LOC))
5097 fprintf (dump_file, "already marked relevant.");
5101 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5102 VARRAY_PUSH_TREE (*worklist, stmt);
5106 /* Function vect_stmt_relevant_p.
5108 Return true if STMT in loop that is represented by LOOP_VINFO is
5109 "relevant for vectorization".
5111 A stmt is considered "relevant for vectorization" if:
5112 - it has uses outside the loop.
5113 - it has vdefs (it alters memory).
5114 - control stmts in the loop (except for the exit condition).
5116 CHECKME: what other side effects would the vectorizer allow? */
5119 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5121 v_may_def_optype v_may_defs;
5122 v_must_def_optype v_must_defs;
5123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5128 /* cond stmt other than loop exit cond. */
5129 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5132 /* changing memory. */
5133 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5134 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5135 if (v_may_defs || v_must_defs)
5137 if (vect_debug_details (UNKNOWN_LOC))
5138 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5142 /* uses outside the loop. */
5143 df = get_immediate_uses (stmt);
5144 num_uses = num_immediate_uses (df);
5145 for (i = 0; i < num_uses; i++)
5147 tree use = immediate_use (df, i);
5148 basic_block bb = bb_for_stmt (use);
5149 if (!flow_bb_inside_loop_p (loop, bb))
5151 if (vect_debug_details (UNKNOWN_LOC))
5152 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5161 /* Function vect_mark_stmts_to_be_vectorized.
5163 Not all stmts in the loop need to be vectorized. For example:
5172 Stmt 1 and 3 do not need to be vectorized, because loop control and
5173 addressing of vectorized data-refs are handled differently.
5175 This pass detects such stmts. */
5178 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5180 varray_type worklist;
5181 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5182 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5183 unsigned int nbbs = loop->num_nodes;
5184 block_stmt_iterator si;
5190 stmt_vec_info stmt_info;
5194 if (vect_debug_details (UNKNOWN_LOC))
5195 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5198 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5200 if (vect_debug_details (UNKNOWN_LOC))
5202 fprintf (dump_file, "init: phi relevant? ");
5203 print_generic_expr (dump_file, phi, TDF_SLIM);
5206 if (vect_stmt_relevant_p (phi, loop_vinfo))
5208 if (vect_debug_details (UNKNOWN_LOC))
5209 fprintf (dump_file, "unsupported reduction/induction.");
5214 VARRAY_TREE_INIT (worklist, 64, "work list");
5216 /* 1. Init worklist. */
5218 for (i = 0; i < nbbs; i++)
5221 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5223 stmt = bsi_stmt (si);
5225 if (vect_debug_details (UNKNOWN_LOC))
5227 fprintf (dump_file, "init: stmt relevant? ");
5228 print_generic_expr (dump_file, stmt, TDF_SLIM);
5231 stmt_info = vinfo_for_stmt (stmt);
5232 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5234 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5235 vect_mark_relevant (&worklist, stmt);
5240 /* 2. Process_worklist */
5242 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5244 stmt = VARRAY_TOP_TREE (worklist);
5245 VARRAY_POP (worklist);
5247 if (vect_debug_details (UNKNOWN_LOC))
5249 fprintf (dump_file, "worklist: examine stmt: ");
5250 print_generic_expr (dump_file, stmt, TDF_SLIM);
5253 /* Examine the USES in this statement. Mark all the statements which
5254 feed this statement's uses as "relevant", unless the USE is used as
5257 if (TREE_CODE (stmt) == PHI_NODE)
5259 /* follow the def-use chain inside the loop. */
5260 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5262 tree arg = PHI_ARG_DEF (stmt, j);
5263 tree def_stmt = NULL_TREE;
5265 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
5267 if (vect_debug_details (UNKNOWN_LOC))
5268 fprintf (dump_file, "worklist: unsupported use.");
5269 varray_clear (worklist);
5275 if (vect_debug_details (UNKNOWN_LOC))
5277 fprintf (dump_file, "worklist: def_stmt: ");
5278 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5281 bb = bb_for_stmt (def_stmt);
5282 if (flow_bb_inside_loop_p (loop, bb))
5283 vect_mark_relevant (&worklist, def_stmt);
5287 ann = stmt_ann (stmt);
5288 use_ops = USE_OPS (ann);
5290 for (i = 0; i < NUM_USES (use_ops); i++)
5292 tree use = USE_OP (use_ops, i);
5294 /* We are only interested in uses that need to be vectorized. Uses
5295 that are used for address computation are not considered relevant.
5297 if (exist_non_indexing_operands_for_use_p (use, stmt))
5299 tree def_stmt = NULL_TREE;
5301 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
5303 if (vect_debug_details (UNKNOWN_LOC))
5304 fprintf (dump_file, "worklist: unsupported use.");
5305 varray_clear (worklist);
5312 if (vect_debug_details (UNKNOWN_LOC))
5314 fprintf (dump_file, "worklist: examine use %d: ", i);
5315 print_generic_expr (dump_file, use, TDF_SLIM);
5318 bb = bb_for_stmt (def_stmt);
5319 if (flow_bb_inside_loop_p (loop, bb))
5320 vect_mark_relevant (&worklist, def_stmt);
5323 } /* while worklist */
5325 varray_clear (worklist);
5330 /* Function vect_can_advance_ivs_p
5332 In case the number of iterations that LOOP iterates in unknown at compile
5333 time, an epilog loop will be generated, and the loop induction variables
5334 (IVs) will be "advanced" to the value they are supposed to take just before
5335 the epilog loop. Here we check that the access function of the loop IVs
5336 and the expression that represents the loop bound are simple enough.
5337 These restrictions will be relaxed in the future. */
5340 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
5342 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5343 basic_block bb = loop->header;
5346 /* Analyze phi functions of the loop header. */
5348 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5350 tree access_fn = NULL;
5351 tree evolution_part;
5353 if (vect_debug_details (UNKNOWN_LOC))
5355 fprintf (dump_file, "Analyze phi: ");
5356 print_generic_expr (dump_file, phi, TDF_SLIM);
5359 /* Skip virtual phi's. The data dependences that are associated with
5360 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5362 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5364 if (vect_debug_details (UNKNOWN_LOC))
5365 fprintf (dump_file, "virtual phi. skip.");
5369 /* Analyze the evolution function. */
5371 access_fn = instantiate_parameters
5372 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5376 if (vect_debug_details (UNKNOWN_LOC))
5377 fprintf (dump_file, "No Access function.");
5381 if (vect_debug_details (UNKNOWN_LOC))
5383 fprintf (dump_file, "Access function of PHI: ");
5384 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5387 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5389 if (evolution_part == NULL_TREE)
5392 /* FORNOW: We do not transform initial conditions of IVs
5393 which evolution functions are a polynomial of degree >= 2. */
5395 if (tree_is_chrec (evolution_part))
5403 /* Function vect_get_loop_niters.
5405 Determine how many iterations the loop is executed.
5406 If an expression that represents the number of iterations
5407 can be constructed, place it in NUMBER_OF_ITERATIONS.
5408 Return the loop exit condition. */
5411 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5415 if (vect_debug_details (UNKNOWN_LOC))
5416 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5418 niters = number_of_iterations_in_loop (loop);
5420 if (niters != NULL_TREE
5421 && niters != chrec_dont_know)
5423 *number_of_iterations = niters;
5425 if (vect_debug_details (UNKNOWN_LOC))
5427 fprintf (dump_file, "==> get_loop_niters:" );
5428 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5432 return get_loop_exit_condition (loop);
5436 /* Function vect_analyze_loop_form.
5438 Verify the following restrictions (some may be relaxed in the future):
5439 - it's an inner-most loop
5440 - number of BBs = 2 (which are the loop header and the latch)
5441 - the loop has a pre-header
5442 - the loop has a single entry and exit
5443 - the loop exit condition is simple enough, and the number of iterations
5444 can be analyzed (a countable loop). */
5446 static loop_vec_info
5447 vect_analyze_loop_form (struct loop *loop)
5449 loop_vec_info loop_vinfo;
5451 tree number_of_iterations = NULL;
5452 bool rescan = false;
5455 loop_loc = find_loop_location (loop);
5457 if (vect_debug_details (loop_loc))
5458 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5461 || !loop->single_exit
5462 || loop->num_nodes != 2
5463 || EDGE_COUNT (loop->header->preds) != 2
5464 || loop->num_entries != 1)
5466 if (vect_debug_stats (loop_loc) || vect_debug_details (loop_loc))
5468 fprintf (dump_file, "not vectorized: bad loop form. ");
5470 fprintf (dump_file, "nested loop.");
5471 else if (!loop->single_exit)
5472 fprintf (dump_file, "multiple exits.");
5473 else if (loop->num_nodes != 2)
5474 fprintf (dump_file, "too many BBs in loop.");
5475 else if (EDGE_COUNT (loop->header->preds) != 2)
5476 fprintf (dump_file, "too many incoming edges.");
5477 else if (loop->num_entries != 1)
5478 fprintf (dump_file, "too many entries.");
5484 /* We assume that the loop exit condition is at the end of the loop. i.e,
5485 that the loop is represented as a do-while (with a proper if-guard
5486 before the loop if needed), where the loop header contains all the
5487 executable statements, and the latch is empty. */
5488 if (!empty_block_p (loop->latch))
5490 if (vect_debug_stats (loop_loc) || vect_debug_details (loop_loc))
5491 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5495 /* Make sure we have a preheader basic block. */
5496 if (!loop->pre_header)
5499 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5502 /* Make sure there exists a single-predecessor exit bb: */
5503 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5506 loop_split_edge_with (loop->exit_edges[0], NULL);
5511 flow_loop_scan (loop, LOOP_ALL);
5512 /* Flow loop scan does not update loop->single_exit field. */
5513 loop->single_exit = loop->exit_edges[0];
5516 if (empty_block_p (loop->header))
5518 if (vect_debug_stats (loop_loc) || vect_debug_details (loop_loc))
5519 fprintf (dump_file, "not vectorized: empty loop.");
5523 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5526 if (vect_debug_stats (loop_loc) || vect_debug_details (loop_loc))
5527 fprintf (dump_file, "not vectorized: complicated exit condition.");
5531 if (!number_of_iterations)
5533 if (vect_debug_stats (loop_loc) || vect_debug_details (loop_loc))
5535 "not vectorized: number of iterations cannot be computed.");
5539 if (chrec_contains_undetermined (number_of_iterations))
5541 if (vect_debug_details (loop_loc))
5542 fprintf (dump_file, "Infinite number of iterations.");
5546 loop_vinfo = new_loop_vec_info (loop);
5547 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5549 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5551 if (vect_debug_details (loop_loc))
5553 fprintf (dump_file, "loop bound unknown.\n");
5554 fprintf (dump_file, "Symbolic number of iterations is ");
5555 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5559 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5561 if (vect_debug_stats (loop_loc) || vect_debug_details (loop_loc))
5562 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5566 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5567 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
5573 /* Function vect_analyze_loop.
5575 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5576 for it. The different analyses will record information in the
5577 loop_vec_info struct. */
5579 static loop_vec_info
5580 vect_analyze_loop (struct loop *loop)
5583 loop_vec_info loop_vinfo;
5585 if (vect_debug_details (UNKNOWN_LOC))
5586 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5588 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5590 loop_vinfo = vect_analyze_loop_form (loop);
5593 if (vect_debug_details (UNKNOWN_LOC))
5594 fprintf (dump_file, "bad loop form.");
5598 /* Find all data references in the loop (which correspond to vdefs/vuses)
5599 and analyze their evolution in the loop.
5601 FORNOW: Handle only simple, array references, which
5602 alignment can be forced, and aligned pointer-references. */
5604 ok = vect_analyze_data_refs (loop_vinfo);
5607 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
5608 fprintf (dump_file, "bad data references.");
5609 destroy_loop_vec_info (loop_vinfo);
5613 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5615 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5618 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
5619 fprintf (dump_file, "unexpected pattern.");
5620 if (vect_debug_stats (LOOP_LOC (loop_vinfo)))
5621 fprintf (dump_file, "not vectorized: unexpected pattern.");
5622 destroy_loop_vec_info (loop_vinfo);
5626 /* Check that all cross-iteration scalar data-flow cycles are OK.
5627 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5629 ok = vect_analyze_scalar_cycles (loop_vinfo);
5632 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
5633 fprintf (dump_file, "bad scalar cycle.");
5634 destroy_loop_vec_info (loop_vinfo);
5638 /* Analyze data dependences between the data-refs in the loop.
5639 FORNOW: fail at the first data dependence that we encounter. */
5641 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5644 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
5645 fprintf (dump_file, "bad data dependence.");
5646 destroy_loop_vec_info (loop_vinfo);
5650 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5651 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5653 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5656 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
5657 fprintf (dump_file, "bad data access.");
5658 destroy_loop_vec_info (loop_vinfo);
5662 /* Analyze the alignment of the data-refs in the loop.
5663 FORNOW: Only aligned accesses are handled. */
5665 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5668 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
5669 fprintf (dump_file, "bad data alignment.");
5670 destroy_loop_vec_info (loop_vinfo);
5674 /* Scan all the operations in the loop and make sure they are
5677 ok = vect_analyze_operations (loop_vinfo);
5680 if (vect_debug_details (LOOP_LOC (loop_vinfo)))
5681 fprintf (dump_file, "bad operation or unsupported loop bound.");
5682 destroy_loop_vec_info (loop_vinfo);
5686 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5692 /* Function need_imm_uses_for.
5694 Return whether we ought to include information for 'var'
5695 when calculating immediate uses. For this pass we only want use
5696 information for non-virtual variables. */
5699 need_imm_uses_for (tree var)
5701 return is_gimple_reg (var);
5705 /* Function vectorize_loops.
5707 Entry Point to loop vectorization phase. */
5710 vectorize_loops (struct loops *loops)
5712 unsigned int i, loops_num;
5713 unsigned int num_vectorized_loops = 0;
5715 /* Does the target support SIMD? */
5716 /* FORNOW: until more sophisticated machine modelling is in place. */
5717 if (!UNITS_PER_SIMD_WORD)
5719 if (vect_debug_details (UNKNOWN_LOC))
5720 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5724 #ifdef ENABLE_CHECKING
5725 verify_loop_closed_ssa ();
5728 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5730 /* ----------- Analyze loops. ----------- */
5732 /* If some loop was duplicated, it gets bigger number
5733 than all previously defined loops. This fact allows us to run
5734 only over initial loops skipping newly generated ones. */
5735 loops_num = loops->num;
5736 for (i = 1; i < loops_num; i++)
5738 loop_vec_info loop_vinfo;
5739 struct loop *loop = loops->parray[i];
5744 loop_vinfo = vect_analyze_loop (loop);
5745 loop->aux = loop_vinfo;
5747 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5750 vect_transform_loop (loop_vinfo, loops);
5751 num_vectorized_loops++;
5754 if (vect_debug_stats (UNKNOWN_LOC) || vect_debug_details (UNKNOWN_LOC))
5755 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5756 num_vectorized_loops);
5758 /* ----------- Finalize. ----------- */
5761 for (i = 1; i < loops_num; i++)
5763 struct loop *loop = loops->parray[i];
5764 loop_vec_info loop_vinfo;
5768 loop_vinfo = loop->aux;
5769 destroy_loop_vec_info (loop_vinfo);
5773 rewrite_into_ssa (false);
5774 rewrite_into_loop_closed_ssa (); /* FORNOW */
5775 bitmap_clear (vars_to_rename);