2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_ref_dependence
195 (struct data_reference *, struct data_reference *, loop_vec_info);
196 static bool vect_analyze_data_ref_dependences (loop_vec_info);
197 static bool vect_analyze_data_refs_alignment (loop_vec_info);
198 static bool vect_compute_data_refs_alignment (loop_vec_info);
199 static bool vect_analyze_operations (loop_vec_info);
201 /* Main code transformation functions. */
202 static void vect_transform_loop (loop_vec_info, struct loops *);
203 static bool vect_transform_stmt (tree, block_stmt_iterator *);
204 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
205 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
206 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
207 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
208 static enum dr_alignment_support vect_supportable_dr_alignment
209 (struct data_reference *);
210 static void vect_align_data_ref (tree);
211 static void vect_enhance_data_refs_alignment (loop_vec_info);
213 /* Utility functions for the analyses. */
214 static bool vect_is_simple_use (tree , loop_vec_info, tree *);
215 static bool exist_non_indexing_operands_for_use_p (tree, tree);
216 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
217 static void vect_mark_relevant (varray_type *, tree);
218 static bool vect_stmt_relevant_p (tree, loop_vec_info);
219 static tree vect_get_loop_niters (struct loop *, tree *);
220 static bool vect_compute_data_ref_alignment (struct data_reference *);
221 static bool vect_analyze_data_ref_access (struct data_reference *);
222 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
223 static struct data_reference * vect_analyze_pointer_ref_access
225 static bool vect_can_advance_ivs_p (loop_vec_info);
226 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
227 loop_vec_info, tree *, tree *, tree *,
229 static struct data_reference * vect_analyze_pointer_ref_access
231 static tree vect_get_ptr_offset (tree, tree, tree *);
232 static tree vect_get_memtag_and_dr
233 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
234 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
236 static tree vect_strip_conversion (tree);
238 /* Utility functions for the code transformation. */
239 static tree vect_create_destination_var (tree, tree);
240 static tree vect_create_data_ref_ptr
241 (tree, block_stmt_iterator *, tree, tree *, bool);
242 static tree vect_create_index_for_vector_ref (loop_vec_info);
243 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
244 static tree get_vectype_for_scalar_type (tree);
245 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
246 static tree vect_get_vec_def_for_operand (tree, tree);
247 static tree vect_init_vector (tree, tree);
248 static void vect_finish_stmt_generation
249 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
251 /* Utility function dealing with loop peeling (not peeling itself). */
252 static void vect_generate_tmps_on_preheader
253 (loop_vec_info, tree *, tree *, tree *);
254 static tree vect_build_loop_niters (loop_vec_info);
255 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
256 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
257 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
258 static void vect_update_inits_of_drs (loop_vec_info, tree);
259 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
260 static void vect_do_peeling_for_loop_bound
261 (loop_vec_info, tree *, struct loops *);
263 /* Utilities for creation and deletion of vec_info structs. */
264 loop_vec_info new_loop_vec_info (struct loop *loop);
265 void destroy_loop_vec_info (loop_vec_info);
266 stmt_vec_info new_stmt_vec_info (tree, loop_vec_info);
268 static bool vect_debug_stats (struct loop *loop);
269 static bool vect_debug_details (struct loop *loop);
272 /*************************************************************************
273 Simple Loop Peeling Utilities
275 Utilities to support loop peeling for vectorization purposes.
276 *************************************************************************/
279 /* For each definition in DEFINITIONS this function allocates
283 allocate_new_names (bitmap definitions)
288 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
290 tree def = ssa_name (ver);
291 tree *new_name_ptr = xmalloc (sizeof (tree));
293 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
295 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
296 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
298 SSA_NAME_AUX (def) = new_name_ptr;
303 /* Renames the use *OP_P. */
306 rename_use_op (use_operand_p op_p)
310 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
313 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
315 /* Something defined outside of the loop. */
319 /* An ordinary ssa name defined in the loop. */
321 SET_USE (op_p, *new_name_ptr);
325 /* Renames the def *OP_P in statement STMT. */
328 rename_def_op (def_operand_p op_p, tree stmt)
332 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
335 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
337 /* Something defined outside of the loop. */
341 /* An ordinary ssa name defined in the loop. */
343 SET_DEF (op_p, *new_name_ptr);
344 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
348 /* Renames the variables in basic block BB. */
351 rename_variables_in_bb (basic_block bb)
354 block_stmt_iterator bsi;
360 v_may_def_optype v_may_defs;
361 v_must_def_optype v_must_defs;
365 struct loop *loop = bb->loop_father;
367 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
368 rename_def_op (PHI_RESULT_PTR (phi), phi);
370 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
372 stmt = bsi_stmt (bsi);
373 get_stmt_operands (stmt);
374 ann = stmt_ann (stmt);
376 uses = USE_OPS (ann);
377 for (i = 0; i < NUM_USES (uses); i++)
378 rename_use_op (USE_OP_PTR (uses, i));
380 defs = DEF_OPS (ann);
381 for (i = 0; i < NUM_DEFS (defs); i++)
382 rename_def_op (DEF_OP_PTR (defs, i), stmt);
384 vuses = VUSE_OPS (ann);
385 for (i = 0; i < NUM_VUSES (vuses); i++)
386 rename_use_op (VUSE_OP_PTR (vuses, i));
388 v_may_defs = V_MAY_DEF_OPS (ann);
389 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
391 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
392 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
395 v_must_defs = V_MUST_DEF_OPS (ann);
396 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
398 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
399 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
403 FOR_EACH_EDGE (e, ei, bb->succs)
405 if (!flow_bb_inside_loop_p (loop, e->dest))
407 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
408 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
413 /* Releases the structures holding the new ssa names. */
416 free_new_names (bitmap definitions)
421 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
423 tree def = ssa_name (ver);
425 if (SSA_NAME_AUX (def))
427 free (SSA_NAME_AUX (def));
428 SSA_NAME_AUX (def) = NULL;
434 /* Renames variables in new generated LOOP. */
437 rename_variables_in_loop (struct loop *loop)
442 bbs = get_loop_body (loop);
444 for (i = 0; i < loop->num_nodes; i++)
445 rename_variables_in_bb (bbs[i]);
451 /* Update the PHI nodes of NEW_LOOP.
453 NEW_LOOP is a duplicate of ORIG_LOOP.
454 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
455 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
456 executes before it. */
459 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
460 struct loop *new_loop, bool after)
462 tree *new_name_ptr, new_ssa_name;
463 tree phi_new, phi_orig;
465 edge orig_loop_latch = loop_latch_edge (orig_loop);
466 edge orig_entry_e = loop_preheader_edge (orig_loop);
467 edge new_loop_exit_e = new_loop->exit_edges[0];
468 edge new_loop_entry_e = loop_preheader_edge (new_loop);
469 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
472 step 1. For each loop-header-phi:
473 Add the first phi argument for the phi in NEW_LOOP
474 (the one associated with the entry of NEW_LOOP)
476 step 2. For each loop-header-phi:
477 Add the second phi argument for the phi in NEW_LOOP
478 (the one associated with the latch of NEW_LOOP)
480 step 3. Update the phis in the successor block of NEW_LOOP.
482 case 1: NEW_LOOP was placed before ORIG_LOOP:
483 The successor block of NEW_LOOP is the header of ORIG_LOOP.
484 Updating the phis in the successor block can therefore be done
485 along with the scanning of the loop header phis, because the
486 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
487 phi nodes, organized in the same order.
489 case 2: NEW_LOOP was placed after ORIG_LOOP:
490 The successor block of NEW_LOOP is the original exit block of
491 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
492 We postpone updating these phis to a later stage (when
493 loop guards are added).
497 /* Scan the phis in the headers of the old and new loops
498 (they are organized in exactly the same order). */
500 for (phi_new = phi_nodes (new_loop->header),
501 phi_orig = phi_nodes (orig_loop->header);
503 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
506 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
507 add_phi_arg (phi_new, def, new_loop_entry_e);
510 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
511 if (TREE_CODE (def) != SSA_NAME)
514 new_name_ptr = SSA_NAME_AUX (def);
516 /* Something defined outside of the loop. */
519 /* An ordinary ssa name defined in the loop. */
520 new_ssa_name = *new_name_ptr;
521 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
523 /* step 3 (case 1). */
526 gcc_assert (new_loop_exit_e == orig_entry_e);
527 SET_PHI_ARG_DEF (phi_orig,
528 new_loop_exit_e->dest_idx,
535 /* Update PHI nodes for a guard of the LOOP.
538 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
539 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
540 originates from the guard-bb, skips LOOP and reaches the (unique) exit
541 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
542 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
543 LOOP header) before the guard code was added, and now it became a merge
544 point of two paths - the path that ends with the LOOP exit-edge, and
545 the path that ends with GUARD_EDGE.
547 This function creates and updates the relevant phi nodes to account for
548 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
549 1. Create phi nodes at NEW_MERGE_BB.
550 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
551 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
554 ===> The CFG before the guard-code was added:
556 if (exit_loop) goto update_bb : LOOP_header_bb
559 ==> The CFG after the guard-code was added:
561 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
563 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
568 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
569 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
570 organized in the same order.
571 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
574 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
575 "original" loop). FALSE if LOOP is an original loop (not a newly
576 created copy). The SSA_NAME_AUX fields of the defs in the original
577 loop are the corresponding new ssa-names used in the new duplicated
578 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
579 nodes in UPDATE_BB takes the original ssa-name, and which takes the
580 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
581 the LOOP-exit-edge takes the new-name, and the phi-arg that is
582 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
583 FALSE, it's the other way around.
587 slpeel_update_phi_nodes_for_guard (edge guard_edge,
592 tree orig_phi, new_phi, update_phi;
593 tree guard_arg, loop_arg;
594 basic_block new_merge_bb = guard_edge->dest;
595 edge e = EDGE_SUCC (new_merge_bb, 0);
596 basic_block update_bb = e->dest;
597 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
599 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
600 orig_phi && update_phi;
601 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
603 /* 1. Generate new phi node in NEW_MERGE_BB: */
604 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
607 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
608 of LOOP. Set the two phi args in NEW_PHI for these edges: */
611 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
612 EDGE_SUCC (loop->latch, 0));
613 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
617 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
618 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
622 new_name = *new_name_ptr;
624 /* Something defined outside of the loop */
629 guard_arg = orig_def;
634 guard_arg = new_name;
638 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
639 add_phi_arg (new_phi, guard_arg, guard_edge);
641 /* 3. Update phi in successor block. */
642 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
643 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
644 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
647 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
651 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
652 that starts at zero, increases by one and its limit is NITERS.
654 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
657 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
659 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
661 edge exit_edge = loop->exit_edges[0];
662 block_stmt_iterator loop_cond_bsi;
663 block_stmt_iterator incr_bsi;
665 tree begin_label = tree_block_label (loop->latch);
666 tree exit_label = tree_block_label (loop->single_exit->dest);
667 tree init = build_int_cst (TREE_TYPE (niters), 0);
668 tree step = build_int_cst (TREE_TYPE (niters), 1);
672 orig_cond = get_loop_exit_condition (loop);
673 #ifdef ENABLE_CHECKING
674 gcc_assert (orig_cond);
676 loop_cond_bsi = bsi_for_stmt (orig_cond);
678 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
679 create_iv (init, step, NULL_TREE, loop,
680 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
682 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
684 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
685 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
686 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
688 else /* 'then' edge loops back. */
690 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
691 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
692 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
695 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
696 then_label, else_label);
697 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
699 /* Remove old loop exit test: */
700 bsi_remove (&loop_cond_bsi);
702 if (vect_debug_stats (loop) || vect_debug_details (loop))
703 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
705 loop->nb_iterations = niters;
709 /* Given LOOP this function generates a new copy of it and puts it
710 on E which is either the entry or exit of LOOP. */
713 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
716 struct loop *new_loop;
717 basic_block *new_bbs, *bbs;
720 basic_block exit_dest;
723 at_exit = (e == loop->exit_edges[0]);
724 if (!at_exit && e != loop_preheader_edge (loop))
727 bbs = get_loop_body (loop);
729 /* Check whether duplication is possible. */
730 if (!can_copy_bbs_p (bbs, loop->num_nodes))
736 /* Generate new loop structure. */
737 new_loop = duplicate_loop (loops, loop, loop->outer);
744 exit_dest = loop->exit_edges[0]->dest;
745 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
746 exit_dest) == loop->header ?
749 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
751 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
753 /* Duplicating phi args at exit bbs as coming
754 also from exit of duplicated loop. */
755 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
757 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
760 edge new_loop_exit_edge;
762 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
763 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
765 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
767 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
771 if (at_exit) /* Add the loop copy at exit. */
773 redirect_edge_and_branch_force (e, new_loop->header);
774 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
776 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
778 else /* Add the copy at entry. */
781 edge entry_e = loop_preheader_edge (loop);
782 basic_block preheader = entry_e->src;
784 if (!flow_bb_inside_loop_p (new_loop,
785 EDGE_SUCC (new_loop->header, 0)->dest))
786 new_exit_e = EDGE_SUCC (new_loop->header, 0);
788 new_exit_e = EDGE_SUCC (new_loop->header, 1);
790 redirect_edge_and_branch_force (new_exit_e, loop->header);
791 set_immediate_dominator (CDI_DOMINATORS, loop->header,
794 /* We have to add phi args to the loop->header here as coming
795 from new_exit_e edge. */
796 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
798 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
800 add_phi_arg (phi, phi_arg, new_exit_e);
803 redirect_edge_and_branch_force (entry_e, new_loop->header);
804 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
807 flow_loop_scan (new_loop, LOOP_ALL);
808 flow_loop_scan (loop, LOOP_ALL);
816 /* Given the condition statement COND, put it as the last statement
817 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
818 Assumes that this is the single exit of the guarded loop.
819 Returns the skip edge. */
822 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
825 block_stmt_iterator bsi;
827 tree cond_stmt, then_label, else_label;
829 enter_e = EDGE_SUCC (guard_bb, 0);
830 enter_e->flags &= ~EDGE_FALLTHRU;
831 enter_e->flags |= EDGE_FALSE_VALUE;
832 bsi = bsi_last (guard_bb);
834 then_label = build1 (GOTO_EXPR, void_type_node,
835 tree_block_label (exit_bb));
836 else_label = build1 (GOTO_EXPR, void_type_node,
837 tree_block_label (enter_e->dest));
838 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
839 then_label, else_label);
840 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
841 /* Add new edge to connect entry block to the second loop. */
842 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
843 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
848 /* This function verifies that the following restrictions apply to LOOP:
850 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
851 (3) it is single entry, single exit
852 (4) its exit condition is the last stmt in the header
853 (5) E is the entry/exit edge of LOOP.
857 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
859 edge exit_e = loop->exit_edges [0];
860 edge entry_e = loop_preheader_edge (loop);
861 tree orig_cond = get_loop_exit_condition (loop);
862 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
864 if (any_marked_for_rewrite_p ())
868 /* All loops have an outer scope; the only case loop->outer is NULL is for
869 the function itself. */
871 || loop->num_nodes != 2
872 || !empty_block_p (loop->latch)
873 || loop->num_exits != 1
874 || loop->num_entries != 1
875 /* Verify that new loop exit condition can be trivially modified. */
876 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
877 || (e != exit_e && e != entry_e))
883 #ifdef ENABLE_CHECKING
885 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
886 struct loop *second_loop)
888 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
889 basic_block loop2_entry_bb = second_loop->pre_header;
890 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
892 /* A guard that controls whether the second_loop is to be executed or skipped
893 is placed in first_loop->exit. first_loopt->exit therefore has two
894 successors - one is the preheader of second_loop, and the other is a bb
897 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
900 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
903 /* The preheader of new_loop is expected to have two predessors:
904 first_loop->exit and the block that precedes first_loop. */
906 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
907 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
908 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
909 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
910 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
912 /* Verify that the other successor of first_loopt->exit is after the
918 /* Function slpeel_tree_peel_loop_to_edge.
920 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
921 that is placed on the entry (exit) edge E of LOOP. After this transformation
922 we have two loops one after the other - first-loop iterates FIRST_NITERS
923 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
926 - LOOP: the loop to be peeled.
927 - E: the exit or entry edge of LOOP.
928 If it is the entry edge, we peel the first iterations of LOOP. In this
929 case first-loop is LOOP, and second-loop is the newly created loop.
930 If it is the exit edge, we peel the last iterations of LOOP. In this
931 case, first-loop is the newly created loop, and second-loop is LOOP.
932 - NITERS: the number of iterations that LOOP iterates.
933 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
934 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
935 for updating the loop bound of the first-loop to FIRST_NITERS. If it
936 is false, the caller of this function may want to take care of this
937 (this can be useful if we don't want new stmts added to first-loop).
940 The function returns a pointer to the new loop-copy, or NULL if it failed
941 to perform the transformation.
943 The function generates two if-then-else guards: one before the first loop,
944 and the other before the second loop:
946 if (FIRST_NITERS == 0) then skip the first loop,
947 and go directly to the second loop.
949 if (FIRST_NITERS == NITERS) then skip the second loop.
951 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
952 FORNOW the resulting code will not be in loop-closed-ssa form.
956 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
957 edge e, tree first_niters,
958 tree niters, bool update_first_loop_count)
960 struct loop *new_loop = NULL, *first_loop, *second_loop;
964 basic_block bb_before_second_loop, bb_after_second_loop;
965 basic_block bb_before_first_loop;
966 basic_block bb_between_loops;
967 edge exit_e = loop->exit_edges [0];
969 if (!slpeel_can_duplicate_loop_p (loop, e))
972 /* We have to initialize cfg_hooks. Then, when calling
973 cfg_hooks->split_edge, the function tree_split_edge
974 is actually called and, when calling cfg_hooks->duplicate_block,
975 the function tree_duplicate_bb is called. */
976 tree_register_cfg_hooks ();
979 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
980 Resulting CFG would be:
993 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
995 if (vect_debug_stats (loop) || vect_debug_details (loop))
996 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1002 /* NEW_LOOP was placed after LOOP. */
1004 second_loop = new_loop;
1008 /* NEW_LOOP was placed before LOOP. */
1009 first_loop = new_loop;
1013 definitions = marked_ssa_names ();
1014 allocate_new_names (definitions);
1015 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1016 rename_variables_in_loop (new_loop);
1019 /* 2. Add the guard that controls whether the first loop is executed.
1020 Resulting CFG would be:
1022 bb_before_first_loop:
1023 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1030 bb_before_second_loop:
1039 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1040 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1041 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1042 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1043 flow_loop_scan (first_loop, LOOP_ALL);
1044 flow_loop_scan (second_loop, LOOP_ALL);
1047 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1048 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1049 bb_before_second_loop, bb_before_first_loop);
1050 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1051 first_loop == new_loop);
1054 /* 3. Add the guard that controls whether the second loop is executed.
1055 Resulting CFG would be:
1057 bb_before_first_loop:
1058 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1066 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1067 GOTO bb_before_second_loop
1069 bb_before_second_loop:
1075 bb_after_second_loop:
1080 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1081 add_bb_to_loop (bb_between_loops, first_loop->outer);
1082 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1083 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1084 flow_loop_scan (first_loop, LOOP_ALL);
1085 flow_loop_scan (second_loop, LOOP_ALL);
1087 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1088 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1089 bb_after_second_loop, bb_before_first_loop);
1090 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1091 second_loop == new_loop);
1093 /* Flow loop scan does not update loop->single_exit field. */
1094 first_loop->single_exit = first_loop->exit_edges[0];
1095 second_loop->single_exit = second_loop->exit_edges[0];
1097 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1099 if (update_first_loop_count)
1100 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1102 free_new_names (definitions);
1103 BITMAP_XFREE (definitions);
1104 unmark_all_for_rewrite ();
1110 /* Here the proper Vectorizer starts. */
1112 /*************************************************************************
1113 Vectorization Utilities.
1114 *************************************************************************/
1116 /* Function new_stmt_vec_info.
1118 Create and initialize a new stmt_vec_info struct for STMT. */
1121 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1124 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1126 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1127 STMT_VINFO_STMT (res) = stmt;
1128 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1129 STMT_VINFO_RELEVANT_P (res) = 0;
1130 STMT_VINFO_VECTYPE (res) = NULL;
1131 STMT_VINFO_VEC_STMT (res) = NULL;
1132 STMT_VINFO_DATA_REF (res) = NULL;
1133 STMT_VINFO_MEMTAG (res) = NULL;
1134 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1135 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1136 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1137 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1138 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1144 /* Function new_loop_vec_info.
1146 Create and initialize a new loop_vec_info struct for LOOP, as well as
1147 stmt_vec_info structs for all the stmts in LOOP. */
1150 new_loop_vec_info (struct loop *loop)
1154 block_stmt_iterator si;
1157 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1159 bbs = get_loop_body (loop);
1161 /* Create stmt_info for all stmts in the loop. */
1162 for (i = 0; i < loop->num_nodes; i++)
1164 basic_block bb = bbs[i];
1165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1167 tree stmt = bsi_stmt (si);
1170 get_stmt_operands (stmt);
1171 ann = stmt_ann (stmt);
1172 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1176 LOOP_VINFO_LOOP (res) = loop;
1177 LOOP_VINFO_BBS (res) = bbs;
1178 LOOP_VINFO_EXIT_COND (res) = NULL;
1179 LOOP_VINFO_NITERS (res) = NULL;
1180 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1181 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1182 LOOP_VINFO_VECT_FACTOR (res) = 0;
1183 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1184 "loop_write_datarefs");
1185 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1186 "loop_read_datarefs");
1187 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1193 /* Function destroy_loop_vec_info.
1195 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1196 stmts in the loop. */
1199 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1204 block_stmt_iterator si;
1210 loop = LOOP_VINFO_LOOP (loop_vinfo);
1212 bbs = LOOP_VINFO_BBS (loop_vinfo);
1213 nbbs = loop->num_nodes;
1215 for (j = 0; j < nbbs; j++)
1217 basic_block bb = bbs[j];
1218 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1220 tree stmt = bsi_stmt (si);
1221 stmt_ann_t ann = stmt_ann (stmt);
1222 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1224 set_stmt_info (ann, NULL);
1228 free (LOOP_VINFO_BBS (loop_vinfo));
1229 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1230 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1236 /* Function debug_loop_stats.
1238 For vectorization statistics dumps. */
1241 vect_debug_stats (struct loop *loop)
1244 block_stmt_iterator si;
1245 tree node = NULL_TREE;
1247 if (!dump_file || !(dump_flags & TDF_STATS))
1252 fprintf (dump_file, "\n");
1261 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1263 node = bsi_stmt (si);
1264 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1268 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1269 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1271 fprintf (dump_file, "\nloop at %s:%d: ",
1272 EXPR_FILENAME (node), EXPR_LINENO (node));
1280 /* Function debug_loop_details.
1282 For vectorization debug dumps. */
1285 vect_debug_details (struct loop *loop)
1288 block_stmt_iterator si;
1289 tree node = NULL_TREE;
1291 if (!dump_file || !(dump_flags & TDF_DETAILS))
1296 fprintf (dump_file, "\n");
1305 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1307 node = bsi_stmt (si);
1308 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1312 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1313 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1315 fprintf (dump_file, "\nloop at %s:%d: ",
1316 EXPR_FILENAME (node), EXPR_LINENO (node));
1324 /* Function vect_get_ptr_offset
1326 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1329 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1330 tree vectype ATTRIBUTE_UNUSED,
1331 tree *offset ATTRIBUTE_UNUSED)
1333 /* TODO: Use alignment information. */
1338 /* Function vect_strip_conversions
1340 Strip conversions that don't narrow the mode. */
1343 vect_strip_conversion (tree expr)
1345 tree to, ti, oprnd0;
1347 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1349 to = TREE_TYPE (expr);
1350 oprnd0 = TREE_OPERAND (expr, 0);
1351 ti = TREE_TYPE (oprnd0);
1353 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1355 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1364 /* Function vect_analyze_offset_expr
1366 Given an offset expression EXPR received from get_inner_reference, analyze
1367 it and create an expression for INITIAL_OFFSET by substituting the variables
1368 of EXPR with initial_condition of the corresponding access_fn in the loop.
1371 for (j = 3; j < N; j++)
1374 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1375 substituted, since its access_fn in the inner loop is i. 'j' will be
1376 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1379 Compute MISALIGN (the misalignment of the data reference initial access from
1380 its base) if possible. Misalignment can be calculated only if all the
1381 variables can be substituted with constants, or if a variable is multiplied
1382 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1383 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1384 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1385 VECTYPE_ALIGNMENT computation in the caller of this function).
1387 STEP is an evolution of the data reference in this loop in bytes.
1388 In the above example, STEP is C_j.
1390 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1391 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1392 are NULL_TREEs. Otherwise, return TRUE.
1397 vect_analyze_offset_expr (tree expr,
1399 tree vectype_alignment,
1400 tree *initial_offset,
1406 tree left_offset = ssize_int (0);
1407 tree right_offset = ssize_int (0);
1408 tree left_misalign = ssize_int (0);
1409 tree right_misalign = ssize_int (0);
1410 tree left_step = ssize_int (0);
1411 tree right_step = ssize_int (0);
1412 enum tree_code code;
1413 tree init, evolution;
1416 *misalign = NULL_TREE;
1417 *initial_offset = NULL_TREE;
1419 /* Strip conversions that don't narrow the mode. */
1420 expr = vect_strip_conversion (expr);
1426 if (TREE_CODE (expr) == INTEGER_CST)
1428 *initial_offset = fold_convert (ssizetype, expr);
1429 *misalign = fold_convert (ssizetype, expr);
1430 *step = ssize_int (0);
1434 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1435 access_fn in the current loop. */
1436 if (SSA_VAR_P (expr))
1438 tree access_fn = analyze_scalar_evolution (loop, expr);
1440 if (access_fn == chrec_dont_know)
1444 init = initial_condition_in_loop_num (access_fn, loop->num);
1445 if (init == expr && !expr_invariant_in_loop_p (loop, init))
1446 /* Not enough information: may be not loop invariant.
1447 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1448 initial_condition is D, but it depends on i - loop's induction
1452 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1453 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1454 /* Evolution is not constant. */
1457 if (TREE_CODE (init) == INTEGER_CST)
1458 *misalign = fold_convert (ssizetype, init);
1460 /* Not constant, misalignment cannot be calculated. */
1461 *misalign = NULL_TREE;
1463 *initial_offset = fold_convert (ssizetype, init);
1465 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
1469 /* Recursive computation. */
1470 if (!BINARY_CLASS_P (expr))
1472 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
1473 if (vect_debug_details (NULL))
1475 fprintf (dump_file, "Not binary expression ");
1476 print_generic_expr (dump_file, expr, TDF_SLIM);
1480 oprnd0 = TREE_OPERAND (expr, 0);
1481 oprnd1 = TREE_OPERAND (expr, 1);
1483 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1484 &left_misalign, &left_step)
1485 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1486 &right_offset, &right_misalign, &right_step))
1489 /* The type of the operation: plus, minus or mult. */
1490 code = TREE_CODE (expr);
1494 if (TREE_CODE (right_offset) != INTEGER_CST)
1495 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1497 FORNOW: We don't support such cases. */
1500 /* Strip conversions that don't narrow the mode. */
1501 left_offset = vect_strip_conversion (left_offset);
1504 /* Misalignment computation. */
1505 if (SSA_VAR_P (left_offset))
1507 /* If the left side contains variables that can't be substituted with
1508 constants, we check if the right side is a multiple of ALIGNMENT.
1510 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1511 fold_convert (ssizetype, vectype_alignment))))
1512 *misalign = ssize_int (0);
1514 /* If the remainder is not zero or the right side isn't constant,
1515 we can't compute misalignment. */
1516 *misalign = NULL_TREE;
1520 /* The left operand was successfully substituted with constant. */
1522 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1524 *misalign = size_binop (code, left_misalign, right_misalign);
1526 *misalign = NULL_TREE;
1529 /* Step calculation. */
1530 /* Multiply the step by the right operand. */
1531 *step = size_binop (MULT_EXPR, left_step, right_offset);
1536 /* Combine the recursive calculations for step and misalignment. */
1537 *step = size_binop (code, left_step, right_step);
1539 if (left_misalign && right_misalign)
1540 *misalign = size_binop (code, left_misalign, right_misalign);
1542 *misalign = NULL_TREE;
1550 /* Compute offset. */
1551 *initial_offset = fold_convert (ssizetype,
1552 fold (build2 (code, TREE_TYPE (left_offset),
1559 /* Function vect_get_base_and_offset
1561 Return the BASE of the data reference EXPR.
1562 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1564 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1565 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1566 instantiated with initial_conditions of access_functions of variables,
1567 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1569 Function get_inner_reference is used for the above in case of ARRAY_REF and
1573 EXPR - the memory reference that is being analyzed
1574 DR - the data_reference struct of the _original_ memory reference
1575 (Note: DR_REF (DR) is not necessarily EXPR)
1576 VECTYPE - the type that defines the alignment (i.e, we compute
1577 alignment relative to TYPE_ALIGN(VECTYPE))
1580 BASE (returned value) - the base of the data reference EXPR.
1581 E.g, if EXPR is a.b[k].c[i][j] the returned
1583 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1584 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1585 computation is impossible
1586 STEP - evolution of the DR_REF in the loop
1587 BASE_ALIGNED_P - indicates if BASE is aligned
1589 If something unexpected is encountered (an unsupported form of data-ref),
1590 then NULL_TREE is returned. */
1593 vect_get_base_and_offset (struct data_reference *dr,
1596 loop_vec_info loop_vinfo,
1597 tree *initial_offset,
1600 bool *base_aligned_p)
1602 tree this_offset = ssize_int (0);
1603 tree this_misalign = ssize_int (0);
1604 tree this_step = ssize_int (0);
1605 tree base = NULL_TREE;
1607 tree oprnd0, oprnd1;
1608 enum tree_code code = TREE_CODE (expr);
1609 HOST_WIDE_INT pbitsize;
1610 HOST_WIDE_INT pbitpos;
1612 enum machine_mode pmode;
1613 int punsignedp, pvolatilep;
1614 tree bit_pos_in_bytes;
1615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1617 *base_aligned_p = false;
1621 /* These cases end the recursion: */
1624 *initial_offset = ssize_int (0);
1625 *step = ssize_int (0);
1626 *misalign = ssize_int (0);
1627 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1628 *base_aligned_p = true;
1632 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1635 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1637 base = vect_get_ptr_offset (expr, vectype, misalign);
1639 *base_aligned_p = true;
1643 *base_aligned_p = true;
1644 *misalign = ssize_int (0);
1646 *initial_offset = ssize_int (0);
1647 *step = ssize_int (0);
1651 *initial_offset = fold_convert (ssizetype, expr);
1652 *misalign = fold_convert (ssizetype, expr);
1653 *step = ssize_int (0);
1656 /* These cases continue the recursion: */
1658 oprnd0 = TREE_OPERAND (expr, 0);
1663 oprnd0 = TREE_OPERAND (expr, 0);
1669 oprnd0 = TREE_OPERAND (expr, 0);
1670 oprnd1 = TREE_OPERAND (expr, 1);
1672 /* In case we have a PLUS_EXPR of the form
1673 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1674 This is verified in vect_get_memtag_and_dr. */
1675 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1676 &this_offset, &this_misalign,
1677 &this_step, base_aligned_p);
1678 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1679 this_offset = ssize_int (0);
1682 this_misalign = NULL_TREE;
1684 this_misalign = size_binop (TREE_CODE (expr), ssize_int (0),
1690 if (!handled_component_p (expr))
1691 /* Unsupported expression. */
1694 /* Find the base and the offset from it. */
1695 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1696 &pmode, &punsignedp, &pvolatilep, false);
1701 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1702 &this_offset, &this_misalign,
1705 /* Failed to compute offset or step. */
1707 *initial_offset = NULL_TREE;
1708 *misalign = NULL_TREE;
1712 /* Add bit position to OFFSET and MISALIGN. */
1714 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1715 /* Check that there is no remainder in bits. */
1716 if (pbitpos%BITS_PER_UNIT)
1718 if (vect_debug_details (NULL))
1719 fprintf (dump_file, "bit offset alignment.");
1722 this_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes,
1723 fold_convert (ssizetype, this_offset));
1725 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1727 /* Continue the recursion to refine the base (get_inner_reference returns
1728 &a for &a[i], and not a). */
1732 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1733 initial_offset, misalign, step,
1737 /* Combine the results. */
1738 if (this_misalign && *misalign)
1739 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1741 *misalign = NULL_TREE;
1743 *step = size_binop (PLUS_EXPR, *step, this_step);
1745 *initial_offset = size_binop (PLUS_EXPR, *initial_offset, this_offset);
1747 if (vect_debug_details (NULL))
1749 print_generic_expr (dump_file, expr, TDF_SLIM);
1750 fprintf (dump_file, "\n --> total offset for ref: ");
1751 print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1752 fprintf (dump_file, "\n --> total misalign for ref: ");
1753 print_generic_expr (dump_file, *misalign, TDF_SLIM);
1754 fprintf (dump_file, "\n --> total step for ref: ");
1755 print_generic_expr (dump_file, *step, TDF_SLIM);
1762 /* Function vect_force_dr_alignment_p.
1764 Returns whether the alignment of a DECL can be forced to be aligned
1765 on ALIGNMENT bit boundary. */
1768 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1770 if (TREE_CODE (decl) != VAR_DECL)
1773 if (DECL_EXTERNAL (decl))
1776 if (TREE_ASM_WRITTEN (decl))
1779 if (TREE_STATIC (decl))
1780 return (alignment <= MAX_OFILE_ALIGNMENT);
1782 /* This is not 100% correct. The absolute correct stack alignment
1783 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1784 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1785 However, until someone implements forced stack alignment, SSE
1786 isn't really usable without this. */
1787 return (alignment <= PREFERRED_STACK_BOUNDARY);
1791 /* Function vect_get_new_vect_var.
1793 Returns a name for a new variable. The current naming scheme appends the
1794 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1795 the name of vectorizer generated variables, and appends that to NAME if
1799 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1805 if (var_kind == vect_simple_var)
1810 prefix_len = strlen (prefix);
1813 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1815 new_vect_var = create_tmp_var (type, prefix);
1817 return new_vect_var;
1821 /* Function vect_create_index_for_vector_ref.
1823 Create (and return) an index variable, along with it's update chain in the
1824 loop. This variable will be used to access a memory location in a vector
1828 LOOP: The loop being vectorized.
1829 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1830 function can be added here, or in the loop pre-header.
1833 Return an index that will be used to index a vector array. It is expected
1834 that a pointer to the first vector will be used as the base address for the
1837 FORNOW: we are not trying to be efficient, just creating a new index each
1838 time from scratch. At this time all vector references could use the same
1841 TODO: create only one index to be used by all vector references. Record
1842 the index in the LOOP_VINFO the first time this procedure is called and
1843 return it on subsequent calls. The increment of this index must be placed
1844 just before the conditional expression that ends the single block loop. */
1847 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
1850 block_stmt_iterator incr_bsi;
1852 tree indx_before_incr, indx_after_incr;
1853 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1856 /* It is assumed that the base pointer used for vectorized access contains
1857 the address of the first vector. Therefore the index used for vectorized
1858 access must be initialized to zero and incremented by 1. */
1860 init = integer_zero_node;
1861 step = integer_one_node;
1863 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1864 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
1865 &indx_before_incr, &indx_after_incr);
1866 incr = bsi_stmt (incr_bsi);
1867 get_stmt_operands (incr);
1868 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1870 return indx_before_incr;
1874 /* Function vect_create_addr_base_for_vector_ref.
1876 Create an expression that computes the address of the first memory location
1877 that will be accessed for a data reference.
1880 STMT: The statement containing the data reference.
1881 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1882 OFFSET: Optional. If supplied, it is be added to the initial address.
1885 1. Return an SSA_NAME whose value is the address of the memory location of
1886 the first vector of the data reference.
1887 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1888 these statement(s) which define the returned SSA_NAME.
1890 FORNOW: We are only handling array accesses with step 1. */
1893 vect_create_addr_base_for_vector_ref (tree stmt,
1894 tree *new_stmt_list,
1897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1898 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1899 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1900 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1901 tree ref = DR_REF (dr);
1902 tree scalar_type = TREE_TYPE (ref);
1903 tree scalar_ptr_type = build_pointer_type (scalar_type);
1906 tree addr_base, addr_expr;
1907 tree dest, new_stmt;
1908 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1910 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1911 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1913 /* Add '&' to ref_base. */
1914 data_ref_base = build_fold_addr_expr (data_ref_base);
1917 /* Create '(scalar_type*) base' for pointers. */
1918 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1919 tree scalar_array_type = build_array_type (scalar_type, 0);
1920 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1921 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1922 add_referenced_tmp_var (array_ptr);
1924 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1925 add_referenced_tmp_var (dest);
1926 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1927 append_to_statement_list_force (new_stmt, new_stmt_list);
1929 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1930 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1931 new_temp = make_ssa_name (array_ptr, vec_stmt);
1932 TREE_OPERAND (vec_stmt, 0) = new_temp;
1933 append_to_statement_list_force (vec_stmt, new_stmt_list);
1934 data_ref_base = new_temp;
1937 /* Create base_offset */
1938 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1939 add_referenced_tmp_var (dest);
1940 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1941 append_to_statement_list_force (new_stmt, new_stmt_list);
1945 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1946 add_referenced_tmp_var (tmp);
1947 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1948 STMT_VINFO_VECT_STEP (stmt_info)));
1949 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
1950 base_offset, offset));
1951 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1952 append_to_statement_list_force (new_stmt, new_stmt_list);
1955 /* base + base_offset */
1956 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1959 /* addr_expr = addr_base */
1960 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1961 get_name (base_name));
1962 add_referenced_tmp_var (addr_expr);
1963 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1964 new_temp = make_ssa_name (addr_expr, vec_stmt);
1965 TREE_OPERAND (vec_stmt, 0) = new_temp;
1966 append_to_statement_list_force (vec_stmt, new_stmt_list);
1968 if (vect_debug_details (NULL))
1970 fprintf (dump_file, "created ");
1971 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1972 fprintf (dump_file, "\n");
1978 /* Function get_vectype_for_scalar_type.
1980 Returns the vector type corresponding to SCALAR_TYPE as supported
1984 get_vectype_for_scalar_type (tree scalar_type)
1986 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1987 int nbytes = GET_MODE_SIZE (inner_mode);
1994 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1996 nunits = UNITS_PER_SIMD_WORD / nbytes;
1998 vectype = build_vector_type (scalar_type, nunits);
1999 if (vect_debug_details (NULL))
2001 fprintf (dump_file, "get vectype with %d units of type ", nunits);
2002 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2008 if (vect_debug_details (NULL))
2010 fprintf (dump_file, "vectype: ");
2011 print_generic_expr (dump_file, vectype, TDF_SLIM);
2014 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2016 /* TODO: tree-complex.c sometimes can parallelize operations
2017 on generic vectors. We can vectorize the loop in that case,
2018 but then we should re-run the lowering pass. */
2019 if (vect_debug_details (NULL))
2020 fprintf (dump_file, "mode not supported by target.");
2028 /* Function vect_align_data_ref.
2030 Handle mislignment of a memory accesses.
2032 FORNOW: Can't handle misaligned accesses.
2033 Make sure that the dataref is aligned. */
2036 vect_align_data_ref (tree stmt)
2038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2041 /* FORNOW: can't handle misaligned accesses;
2042 all accesses expected to be aligned. */
2043 gcc_assert (aligned_access_p (dr));
2047 /* Function vect_create_data_ref_ptr.
2049 Create a memory reference expression for vector access, to be used in a
2050 vector load/store stmt. The reference is based on a new pointer to vector
2054 1. STMT: a stmt that references memory. Expected to be of the form
2055 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2056 2. BSI: block_stmt_iterator where new stmts can be added.
2057 3. OFFSET (optional): an offset to be added to the initial address accessed
2058 by the data-ref in STMT.
2059 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2060 pointing to the initial address.
2063 1. Declare a new ptr to vector_type, and have it point to the base of the
2064 data reference (initial addressed accessed by the data reference).
2065 For example, for vector of type V8HI, the following code is generated:
2068 vp = (v8hi *)initial_address;
2070 if OFFSET is not supplied:
2071 initial_address = &a[init];
2072 if OFFSET is supplied:
2073 initial_address = &a[init + OFFSET];
2075 Return the initial_address in INITIAL_ADDRESS.
2077 2. Create a data-reference in the loop based on the new vector pointer vp,
2078 and using a new index variable 'idx' as follows:
2082 where if ONLY_INIT is true:
2085 update = idx + vector_type_size
2087 Return the pointer vp'.
2090 FORNOW: handle only aligned and consecutive accesses. */
2093 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2094 tree *initial_address, bool only_init)
2097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2099 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2100 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2101 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2105 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2106 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2107 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2108 int nvuses, nv_may_defs, nv_must_defs;
2112 tree new_stmt_list = NULL_TREE;
2114 edge pe = loop_preheader_edge (loop);
2120 tree type, tmp, size;
2122 base_name = unshare_expr (DR_BASE_NAME (dr));
2123 if (vect_debug_details (NULL))
2125 tree data_ref_base = base_name;
2126 fprintf (dump_file, "create array_ref of type: ");
2127 print_generic_expr (dump_file, vectype, TDF_SLIM);
2128 if (TREE_CODE (data_ref_base) == VAR_DECL)
2129 fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2130 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2131 fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2132 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2133 fprintf (dump_file, "\nvectorizing a record based array ref: ");
2134 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2135 fprintf (dump_file, "\nvectorizing a pointer ref: ");
2136 print_generic_expr (dump_file, base_name, TDF_SLIM);
2139 /** (1) Create the new vector-pointer variable: **/
2141 vect_ptr_type = build_pointer_type (vectype);
2142 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2143 get_name (base_name));
2144 add_referenced_tmp_var (vect_ptr);
2147 /** (2) Handle aliasing information of the new vector-pointer: **/
2149 tag = STMT_VINFO_MEMTAG (stmt_info);
2151 get_var_ann (vect_ptr)->type_mem_tag = tag;
2153 /* Mark for renaming all aliased variables
2154 (i.e, the may-aliases of the type-mem-tag). */
2155 nvuses = NUM_VUSES (vuses);
2156 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2157 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2158 for (i = 0; i < nvuses; i++)
2160 tree use = VUSE_OP (vuses, i);
2161 if (TREE_CODE (use) == SSA_NAME)
2162 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2164 for (i = 0; i < nv_may_defs; i++)
2166 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2167 if (TREE_CODE (def) == SSA_NAME)
2168 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2170 for (i = 0; i < nv_must_defs; i++)
2172 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2173 if (TREE_CODE (def) == SSA_NAME)
2174 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2178 /** (3) Calculate the initial address the vector-pointer, and set
2179 the vector-pointer to point to it before the loop: **/
2181 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2182 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2184 pe = loop_preheader_edge (loop);
2185 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2186 gcc_assert (!new_bb);
2187 *initial_address = new_temp;
2189 /* Create: p = (vectype *) initial_base */
2190 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2191 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2192 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2193 TREE_OPERAND (vec_stmt, 0) = new_temp;
2194 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2195 gcc_assert (!new_bb);
2196 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2199 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2201 if (only_init) /* No update in loop is required. */
2202 return vect_ptr_init;
2204 idx = vect_create_index_for_vector_ref (loop_vinfo);
2206 /* Create: update = idx * vectype_size */
2207 tmp = create_tmp_var (integer_type_node, "update");
2208 add_referenced_tmp_var (tmp);
2209 size = TYPE_SIZE (vect_ptr_type);
2210 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2211 ptr_update = create_tmp_var (type, "update");
2212 add_referenced_tmp_var (ptr_update);
2213 vectype_size = TYPE_SIZE_UNIT (vectype);
2214 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2215 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2216 new_temp = make_ssa_name (tmp, vec_stmt);
2217 TREE_OPERAND (vec_stmt, 0) = new_temp;
2218 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2219 vec_stmt = fold_convert (type, new_temp);
2220 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2221 new_temp = make_ssa_name (ptr_update, vec_stmt);
2222 TREE_OPERAND (vec_stmt, 0) = new_temp;
2223 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2225 /* Create: data_ref_ptr = vect_ptr_init + update */
2226 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2227 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2228 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2229 TREE_OPERAND (vec_stmt, 0) = new_temp;
2230 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2231 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2233 return data_ref_ptr;
2237 /* Function vect_create_destination_var.
2239 Create a new temporary of type VECTYPE. */
2242 vect_create_destination_var (tree scalar_dest, tree vectype)
2245 const char *new_name;
2247 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2249 new_name = get_name (scalar_dest);
2252 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2253 add_referenced_tmp_var (vec_dest);
2259 /* Function vect_init_vector.
2261 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2262 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2263 used in the vectorization of STMT. */
2266 vect_init_vector (tree stmt, tree vector_var)
2268 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2269 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2270 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2273 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2279 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2280 add_referenced_tmp_var (new_var);
2282 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2283 new_temp = make_ssa_name (new_var, init_stmt);
2284 TREE_OPERAND (init_stmt, 0) = new_temp;
2286 pe = loop_preheader_edge (loop);
2287 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2288 gcc_assert (!new_bb);
2290 if (vect_debug_details (NULL))
2292 fprintf (dump_file, "created new init_stmt: ");
2293 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2296 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2301 /* Function vect_get_vec_def_for_operand.
2303 OP is an operand in STMT. This function returns a (vector) def that will be
2304 used in the vectorized stmt for STMT.
2306 In the case that OP is an SSA_NAME which is defined in the loop, then
2307 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2309 In case OP is an invariant or constant, a new stmt that creates a vector def
2310 needs to be introduced. */
2313 vect_get_vec_def_for_operand (tree op, tree stmt)
2318 stmt_vec_info def_stmt_info = NULL;
2319 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2320 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2321 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2322 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2323 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330 if (vect_debug_details (NULL))
2332 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2333 print_generic_expr (dump_file, op, TDF_SLIM);
2336 /** ===> Case 1: operand is a constant. **/
2338 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2340 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2344 /* Build a tree with vector elements. */
2345 if (vect_debug_details (NULL))
2346 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2348 for (i = nunits - 1; i >= 0; --i)
2350 t = tree_cons (NULL_TREE, op, t);
2352 vec_cst = build_vector (vectype, t);
2353 return vect_init_vector (stmt, vec_cst);
2356 gcc_assert (TREE_CODE (op) == SSA_NAME);
2358 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2360 def_stmt = SSA_NAME_DEF_STMT (op);
2361 def_stmt_info = vinfo_for_stmt (def_stmt);
2363 if (vect_debug_details (NULL))
2365 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2366 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2370 /** ==> Case 2.1: operand is defined inside the loop. **/
2374 /* Get the def from the vectorized stmt. */
2376 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2377 gcc_assert (vec_stmt);
2378 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2383 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2384 it is a reduction/induction. **/
2386 bb = bb_for_stmt (def_stmt);
2387 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2389 if (vect_debug_details (NULL))
2390 fprintf (dump_file, "reduction/induction - unsupported.");
2391 internal_error ("no support for reduction/induction"); /* FORNOW */
2395 /** ==> Case 2.3: operand is defined outside the loop -
2396 it is a loop invariant. */
2398 switch (TREE_CODE (def_stmt))
2401 def = PHI_RESULT (def_stmt);
2404 def = TREE_OPERAND (def_stmt, 0);
2407 def = TREE_OPERAND (def_stmt, 0);
2408 gcc_assert (IS_EMPTY_STMT (def_stmt));
2412 if (vect_debug_details (NULL))
2414 fprintf (dump_file, "unsupported defining stmt: ");
2415 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2417 internal_error ("unsupported defining stmt");
2420 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2422 if (vect_debug_details (NULL))
2423 fprintf (dump_file, "Create vector_inv.");
2425 for (i = nunits - 1; i >= 0; --i)
2427 t = tree_cons (NULL_TREE, def, t);
2430 vec_inv = build_constructor (vectype, t);
2431 return vect_init_vector (stmt, vec_inv);
2435 /* Function vect_finish_stmt_generation.
2437 Insert a new stmt. */
2440 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2442 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2444 if (vect_debug_details (NULL))
2446 fprintf (dump_file, "add new stmt: ");
2447 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2450 #ifdef ENABLE_CHECKING
2451 /* Make sure bsi points to the stmt that is being vectorized. */
2452 gcc_assert (stmt == bsi_stmt (*bsi));
2455 #ifdef USE_MAPPED_LOCATION
2456 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt));
2458 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2463 /* Function vectorizable_assignment.
2465 Check if STMT performs an assignment (copy) that can be vectorized.
2466 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2467 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2468 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2471 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2478 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2482 /* Is vectorizable assignment? */
2484 if (TREE_CODE (stmt) != MODIFY_EXPR)
2487 scalar_dest = TREE_OPERAND (stmt, 0);
2488 if (TREE_CODE (scalar_dest) != SSA_NAME)
2491 op = TREE_OPERAND (stmt, 1);
2492 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2494 if (vect_debug_details (NULL))
2495 fprintf (dump_file, "use not simple.");
2499 if (!vec_stmt) /* transformation not required. */
2501 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2506 if (vect_debug_details (NULL))
2507 fprintf (dump_file, "transform assignment.");
2510 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2513 op = TREE_OPERAND (stmt, 1);
2514 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2516 /* Arguments are ready. create the new vector stmt. */
2517 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2518 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2519 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2520 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2526 /* Function vectorizable_operation.
2528 Check if STMT performs a binary or unary operation that can be vectorized.
2529 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2530 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2531 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2534 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2539 tree op0, op1 = NULL;
2540 tree vec_oprnd0, vec_oprnd1=NULL;
2541 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2542 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2543 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2545 enum tree_code code;
2546 enum machine_mode vec_mode;
2552 /* Is STMT a vectorizable binary/unary operation? */
2553 if (TREE_CODE (stmt) != MODIFY_EXPR)
2556 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2559 operation = TREE_OPERAND (stmt, 1);
2560 code = TREE_CODE (operation);
2561 optab = optab_for_tree_code (code, vectype);
2563 /* Support only unary or binary operations. */
2564 op_type = TREE_CODE_LENGTH (code);
2565 if (op_type != unary_op && op_type != binary_op)
2567 if (vect_debug_details (NULL))
2568 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2572 for (i = 0; i < op_type; i++)
2574 op = TREE_OPERAND (operation, i);
2575 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2577 if (vect_debug_details (NULL))
2578 fprintf (dump_file, "use not simple.");
2583 /* Supportable by target? */
2586 if (vect_debug_details (NULL))
2587 fprintf (dump_file, "no optab.");
2590 vec_mode = TYPE_MODE (vectype);
2591 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2593 if (vect_debug_details (NULL))
2594 fprintf (dump_file, "op not supported by target.");
2598 if (!vec_stmt) /* transformation not required. */
2600 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2606 if (vect_debug_details (NULL))
2607 fprintf (dump_file, "transform binary/unary operation.");
2610 scalar_dest = TREE_OPERAND (stmt, 0);
2611 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2614 op0 = TREE_OPERAND (operation, 0);
2615 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2617 if (op_type == binary_op)
2619 op1 = TREE_OPERAND (operation, 1);
2620 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2623 /* Arguments are ready. create the new vector stmt. */
2625 if (op_type == binary_op)
2626 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2627 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2629 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2630 build1 (code, vectype, vec_oprnd0));
2631 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2632 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2633 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2639 /* Function vectorizable_store.
2641 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2648 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2655 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2656 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2657 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2658 enum machine_mode vec_mode;
2660 enum dr_alignment_support alignment_support_cheme;
2662 /* Is vectorizable store? */
2664 if (TREE_CODE (stmt) != MODIFY_EXPR)
2667 scalar_dest = TREE_OPERAND (stmt, 0);
2668 if (TREE_CODE (scalar_dest) != ARRAY_REF
2669 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2672 op = TREE_OPERAND (stmt, 1);
2673 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2675 if (vect_debug_details (NULL))
2676 fprintf (dump_file, "use not simple.");
2680 vec_mode = TYPE_MODE (vectype);
2681 /* FORNOW. In some cases can vectorize even if data-type not supported
2682 (e.g. - array initialization with 0). */
2683 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2686 if (!STMT_VINFO_DATA_REF (stmt_info))
2690 if (!vec_stmt) /* transformation not required. */
2692 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2698 if (vect_debug_details (NULL))
2699 fprintf (dump_file, "transform store");
2701 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2702 gcc_assert (alignment_support_cheme);
2703 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2705 /* Handle use - get the vectorized def from the defining stmt. */
2706 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2709 /* FORNOW: make sure the data reference is aligned. */
2710 vect_align_data_ref (stmt);
2711 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2712 data_ref = build_fold_indirect_ref (data_ref);
2714 /* Arguments are ready. create the new vector stmt. */
2715 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2716 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2722 /* vectorizable_load.
2724 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2726 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2727 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2728 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2731 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2734 tree vec_dest = NULL;
2735 tree data_ref = NULL;
2737 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2738 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2739 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2747 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2748 edge pe = loop_preheader_edge (loop);
2749 enum dr_alignment_support alignment_support_cheme;
2751 /* Is vectorizable load? */
2753 if (TREE_CODE (stmt) != MODIFY_EXPR)
2756 scalar_dest = TREE_OPERAND (stmt, 0);
2757 if (TREE_CODE (scalar_dest) != SSA_NAME)
2760 op = TREE_OPERAND (stmt, 1);
2761 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2764 if (!STMT_VINFO_DATA_REF (stmt_info))
2767 mode = (int) TYPE_MODE (vectype);
2769 /* FORNOW. In some cases can vectorize even if data-type not supported
2770 (e.g. - data copies). */
2771 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2773 if (vect_debug_details (loop))
2774 fprintf (dump_file, "Aligned load, but unsupported type.");
2778 if (!vec_stmt) /* transformation not required. */
2780 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2786 if (vect_debug_details (NULL))
2787 fprintf (dump_file, "transform load.");
2789 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2790 gcc_assert (alignment_support_cheme);
2792 if (alignment_support_cheme == dr_aligned
2793 || alignment_support_cheme == dr_unaligned_supported)
2804 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2805 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2806 if (aligned_access_p (dr))
2807 data_ref = build_fold_indirect_ref (data_ref);
2810 int mis = DR_MISALIGNMENT (dr);
2811 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2812 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2813 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2815 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2816 new_temp = make_ssa_name (vec_dest, new_stmt);
2817 TREE_OPERAND (new_stmt, 0) = new_temp;
2818 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2820 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2824 msq_init = *(floor(p1))
2825 p2 = initial_addr + VS - 1;
2826 magic = have_builtin ? builtin_result : initial_address;
2829 p2' = p2 + indx * vectype_size
2831 vec_dest = realign_load (msq, lsq, magic)
2845 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2846 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2847 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2849 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2850 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2851 new_temp = make_ssa_name (vec_dest, new_stmt);
2852 TREE_OPERAND (new_stmt, 0) = new_temp;
2853 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2854 gcc_assert (!new_bb);
2855 msq_init = TREE_OPERAND (new_stmt, 0);
2858 /* <2> Create lsq = *(floor(p2')) in the loop */
2859 offset = build_int_cst (integer_type_node,
2860 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2861 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2862 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2863 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2864 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2865 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2866 new_temp = make_ssa_name (vec_dest, new_stmt);
2867 TREE_OPERAND (new_stmt, 0) = new_temp;
2868 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2869 lsq = TREE_OPERAND (new_stmt, 0);
2873 if (targetm.vectorize.builtin_mask_for_load)
2875 /* Create permutation mask, if required, in loop preheader. */
2877 params = build_tree_list (NULL_TREE, init_addr);
2878 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2879 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2880 new_stmt = build_function_call_expr (builtin_decl, params);
2881 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2882 new_temp = make_ssa_name (vec_dest, new_stmt);
2883 TREE_OPERAND (new_stmt, 0) = new_temp;
2884 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2885 gcc_assert (!new_bb);
2886 magic = TREE_OPERAND (new_stmt, 0);
2888 /* Since we have just created a CALL_EXPR, we may need to
2889 rename call-clobbered variables. */
2890 mark_call_clobbered_vars_to_rename ();
2894 /* Use current address instead of init_addr for reduced reg pressure.
2896 magic = dataref_ptr;
2900 /* <4> Create msq = phi <msq_init, lsq> in loop */
2901 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2902 msq = make_ssa_name (vec_dest, NULL_TREE);
2903 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2904 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2905 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2906 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2909 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2910 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2911 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2912 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2913 new_temp = make_ssa_name (vec_dest, new_stmt);
2914 TREE_OPERAND (new_stmt, 0) = new_temp;
2915 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2920 *vec_stmt = new_stmt;
2925 /* Function vect_supportable_dr_alignment
2927 Return whether the data reference DR is supported with respect to its
2930 static enum dr_alignment_support
2931 vect_supportable_dr_alignment (struct data_reference *dr)
2933 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2934 enum machine_mode mode = (int) TYPE_MODE (vectype);
2936 if (aligned_access_p (dr))
2939 /* Possibly unaligned access. */
2941 if (DR_IS_READ (dr))
2943 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2944 && (!targetm.vectorize.builtin_mask_for_load
2945 || targetm.vectorize.builtin_mask_for_load ()))
2946 return dr_unaligned_software_pipeline;
2948 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2949 /* Can't software pipeline the loads, but can at least do them. */
2950 return dr_unaligned_supported;
2954 return dr_unaligned_unsupported;
2958 /* Function vect_transform_stmt.
2960 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2963 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2965 bool is_store = false;
2966 tree vec_stmt = NULL_TREE;
2967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2970 switch (STMT_VINFO_TYPE (stmt_info))
2972 case op_vec_info_type:
2973 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2977 case assignment_vec_info_type:
2978 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2982 case load_vec_info_type:
2983 done = vectorizable_load (stmt, bsi, &vec_stmt);
2987 case store_vec_info_type:
2988 done = vectorizable_store (stmt, bsi, &vec_stmt);
2993 if (vect_debug_details (NULL))
2994 fprintf (dump_file, "stmt not supported.");
2998 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3004 /* This function builds ni_name = number of iterations loop executes
3005 on the loop preheader. */
3008 vect_build_loop_niters (loop_vec_info loop_vinfo)
3010 tree ni_name, stmt, var;
3012 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3013 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3015 var = create_tmp_var (TREE_TYPE (ni), "niters");
3016 add_referenced_tmp_var (var);
3017 ni_name = force_gimple_operand (ni, &stmt, false, var);
3019 pe = loop_preheader_edge (loop);
3022 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3023 gcc_assert (!new_bb);
3030 /* This function generates the following statements:
3032 ni_name = number of iterations loop executes
3033 ratio = ni_name / vf
3034 ratio_mult_vf_name = ratio * vf
3036 and places them at the loop preheader edge. */
3039 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3041 tree *ratio_mult_vf_name_ptr,
3042 tree *ratio_name_ptr)
3050 tree ratio_mult_vf_name;
3051 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3052 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3053 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3054 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3056 pe = loop_preheader_edge (loop);
3058 /* Generate temporary variable that contains
3059 number of iterations loop executes. */
3061 ni_name = vect_build_loop_niters (loop_vinfo);
3063 /* Create: ratio = ni >> log2(vf) */
3065 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3066 add_referenced_tmp_var (var);
3067 ratio_name = make_ssa_name (var, NULL_TREE);
3068 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3069 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3070 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3072 pe = loop_preheader_edge (loop);
3073 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3074 gcc_assert (!new_bb);
3076 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3078 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3079 add_referenced_tmp_var (var);
3080 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3081 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3082 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3083 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3085 pe = loop_preheader_edge (loop);
3086 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3087 gcc_assert (!new_bb);
3089 *ni_name_ptr = ni_name;
3090 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3091 *ratio_name_ptr = ratio_name;
3097 /* Function vect_update_ivs_after_vectorizer.
3099 "Advance" the induction variables of LOOP to the value they should take
3100 after the execution of LOOP. This is currently necessary because the
3101 vectorizer does not handle induction variables that are used after the
3102 loop. Such a situation occurs when the last iterations of LOOP are
3104 1. We introduced new uses after LOOP for IVs that were not originally used
3105 after LOOP: the IVs of LOOP are now used by an epilog loop.
3106 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3107 times, whereas the loop IVs should be bumped N times.
3110 - LOOP - a loop that is going to be vectorized. The last few iterations
3111 of LOOP were peeled.
3112 - NITERS - the number of iterations that LOOP executes (before it is
3113 vectorized). i.e, the number of times the ivs should be bumped.
3114 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3115 coming out from LOOP on which there are uses of the LOOP ivs
3116 (this is the path from LOOP->exit to epilog_loop->preheader).
3118 The new definitions of the ivs are placed in LOOP->exit.
3119 The phi args associated with the edge UPDATE_E in the bb
3120 UPDATE_E->dest are updated accordingly.
3122 Assumption 1: Like the rest of the vectorizer, this function assumes
3123 a single loop exit that has a single predecessor.
3125 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3126 organized in the same order.
3128 Assumption 3: The access function of the ivs is simple enough (see
3129 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3131 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3132 coming out of LOOP on which the ivs of LOOP are used (this is the path
3133 that leads to the epilog loop; other paths skip the epilog loop). This
3134 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3135 needs to have its phis updated.
3139 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3142 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3143 basic_block exit_bb = loop->exit_edges[0]->dest;
3145 basic_block update_bb = update_e->dest;
3147 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3149 /* Make sure there exists a single-predecessor exit bb: */
3150 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3152 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3154 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3156 tree access_fn = NULL;
3157 tree evolution_part;
3160 tree var, stmt, ni, ni_name;
3161 block_stmt_iterator last_bsi;
3163 /* Skip virtual phi's. */
3164 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3166 if (vect_debug_details (NULL))
3167 fprintf (dump_file, "virtual phi. skip.");
3171 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3172 gcc_assert (access_fn);
3174 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3175 gcc_assert (evolution_part != NULL_TREE);
3177 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3178 of degree >= 2 or exponential. */
3179 gcc_assert (!tree_is_chrec (evolution_part));
3181 step_expr = evolution_part;
3182 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3185 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3186 build2 (MULT_EXPR, TREE_TYPE (niters),
3187 niters, step_expr), init_expr);
3189 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3190 add_referenced_tmp_var (var);
3192 ni_name = force_gimple_operand (ni, &stmt, false, var);
3194 /* Insert stmt into exit_bb. */
3195 last_bsi = bsi_last (exit_bb);
3197 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3199 /* Fix phi expressions in the successor bb. */
3200 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3201 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3202 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3207 /* Function vect_do_peeling_for_loop_bound
3209 Peel the last iterations of the loop represented by LOOP_VINFO.
3210 The peeled iterations form a new epilog loop. Given that the loop now
3211 iterates NITERS times, the new epilog loop iterates
3212 NITERS % VECTORIZATION_FACTOR times.
3214 The original loop will later be made to iterate
3215 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3218 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3219 struct loops *loops)
3222 tree ni_name, ratio_mult_vf_name;
3223 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3224 struct loop *new_loop;
3226 #ifdef ENABLE_CHECKING
3230 if (vect_debug_details (NULL))
3231 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3233 /* Generate the following variables on the preheader of original loop:
3235 ni_name = number of iteration the original loop executes
3236 ratio = ni_name / vf
3237 ratio_mult_vf_name = ratio * vf */
3238 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3239 &ratio_mult_vf_name, ratio);
3241 /* Update loop info. */
3242 loop->pre_header = loop_preheader_edge (loop)->src;
3243 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3245 #ifdef ENABLE_CHECKING
3246 loop_num = loop->num;
3248 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3249 ratio_mult_vf_name, ni_name, false);
3250 #ifdef ENABLE_CHECKING
3251 gcc_assert (new_loop);
3252 gcc_assert (loop_num == loop->num);
3253 slpeel_verify_cfg_after_peeling (loop, new_loop);
3256 /* A guard that controls whether the new_loop is to be executed or skipped
3257 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3258 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3259 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3260 is on the path where the LOOP IVs are used and need to be updated. */
3262 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3263 update_e = EDGE_PRED (new_loop->pre_header, 0);
3265 update_e = EDGE_PRED (new_loop->pre_header, 1);
3267 /* Update IVs of original loop as if they were advanced
3268 by ratio_mult_vf_name steps. */
3269 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3271 /* After peeling we have to reset scalar evolution analyzer. */
3278 /* Function vect_gen_niters_for_prolog_loop
3280 Set the number of iterations for the loop represented by LOOP_VINFO
3281 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3282 and the misalignment of DR - the first data reference recorded in
3283 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3284 this loop, the data reference DR will refer to an aligned location.
3286 The following computation is generated:
3288 compute address misalignment in bytes:
3289 addr_mis = addr & (vectype_size - 1)
3291 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3293 (elem_size = element type size; an element is the scalar element
3294 whose type is the inner type of the vectype) */
3297 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3299 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3300 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3301 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3303 tree iters, iters_name;
3306 tree dr_stmt = DR_STMT (dr);
3307 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3308 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3309 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3312 tree new_stmts = NULL_TREE;
3314 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3315 tree ptr_type = TREE_TYPE (start_addr);
3316 tree size = TYPE_SIZE (ptr_type);
3317 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3318 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3319 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3320 tree niters_type = TREE_TYPE (loop_niters);
3321 tree elem_size_log =
3322 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3323 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3325 pe = loop_preheader_edge (loop);
3326 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3327 gcc_assert (!new_bb);
3329 /* Create: byte_misalign = addr & (vectype_size - 1) */
3330 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3332 /* Create: elem_misalign = byte_misalign / element_size */
3334 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3336 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3337 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3338 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3339 iters = fold_convert (niters_type, iters);
3341 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3342 /* If the loop bound is known at compile time we already verified that it is
3343 greater than vf; since the misalignment ('iters') is at most vf, there's
3344 no need to generate the MIN_EXPR in this case. */
3345 if (TREE_CODE (loop_niters) != INTEGER_CST)
3346 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3348 var = create_tmp_var (niters_type, "prolog_loop_niters");
3349 add_referenced_tmp_var (var);
3350 iters_name = force_gimple_operand (iters, &stmt, false, var);
3352 /* Insert stmt on loop preheader edge. */
3353 pe = loop_preheader_edge (loop);
3356 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3357 gcc_assert (!new_bb);
3364 /* Function vect_update_inits_of_dr
3366 NITERS iterations were peeled from LOOP. DR represents a data reference
3367 in LOOP. This function updates the information recorded in DR to
3368 account for the fact that the first NITERS iterations had already been
3369 executed. Specifically, it updates the OFFSET field of stmt_info. */
3372 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3374 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3375 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3377 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3378 STMT_VINFO_VECT_STEP (stmt_info)));
3379 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3380 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3384 /* Function vect_update_inits_of_drs
3386 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3387 This function updates the information recorded for the data references in
3388 the loop to account for the fact that the first NITERS iterations had
3389 already been executed. Specifically, it updates the initial_condition of the
3390 access_function of all the data_references in the loop. */
3393 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3396 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3397 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3399 if (dump_file && (dump_flags & TDF_DETAILS))
3400 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3402 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3404 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3405 vect_update_inits_of_dr (dr, niters);
3408 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3410 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3411 vect_update_inits_of_dr (dr, niters);
3416 /* Function vect_do_peeling_for_alignment
3418 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3419 'niters' is set to the misalignment of one of the data references in the
3420 loop, thereby forcing it to refer to an aligned location at the beginning
3421 of the execution of this loop. The data reference for which we are
3422 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3425 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3427 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3428 tree niters_of_prolog_loop, ni_name;
3430 struct loop *new_loop;
3432 if (vect_debug_details (NULL))
3433 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3435 ni_name = vect_build_loop_niters (loop_vinfo);
3436 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3438 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3440 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3441 niters_of_prolog_loop, ni_name, true);
3442 #ifdef ENABLE_CHECKING
3443 gcc_assert (new_loop);
3444 slpeel_verify_cfg_after_peeling (new_loop, loop);
3447 /* Update number of times loop executes. */
3448 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3449 LOOP_VINFO_NITERS (loop_vinfo) =
3450 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3452 /* Update the init conditions of the access functions of all data refs. */
3453 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3455 /* After peeling we have to reset scalar evolution analyzer. */
3462 /* Function vect_transform_loop.
3464 The analysis phase has determined that the loop is vectorizable.
3465 Vectorize the loop - created vectorized stmts to replace the scalar
3466 stmts in the loop, and update the loop exit condition. */
3469 vect_transform_loop (loop_vec_info loop_vinfo,
3470 struct loops *loops ATTRIBUTE_UNUSED)
3472 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3473 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3474 int nbbs = loop->num_nodes;
3475 block_stmt_iterator si;
3478 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3480 if (vect_debug_details (NULL))
3481 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3484 /* Peel the loop if there are data refs with unknown alignment.
3485 Only one data ref with unknown store is allowed. */
3487 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3488 vect_do_peeling_for_alignment (loop_vinfo, loops);
3490 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3491 compile time constant), or it is a constant that doesn't divide by the
3492 vectorization factor, then an epilog loop needs to be created.
3493 We therefore duplicate the loop: the original loop will be vectorized,
3494 and will compute the first (n/VF) iterations. The second copy of the loop
3495 will remain scalar and will compute the remaining (n%VF) iterations.
3496 (VF is the vectorization factor). */
3498 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3499 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3500 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3501 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3503 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3504 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3506 /* 1) Make sure the loop header has exactly two entries
3507 2) Make sure we have a preheader basic block. */
3509 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3511 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3514 /* FORNOW: the vectorizer supports only loops which body consist
3515 of one basic block (header + empty latch). When the vectorizer will
3516 support more involved loop forms, the order by which the BBs are
3517 traversed need to be reconsidered. */
3519 for (i = 0; i < nbbs; i++)
3521 basic_block bb = bbs[i];
3523 for (si = bsi_start (bb); !bsi_end_p (si);)
3525 tree stmt = bsi_stmt (si);
3526 stmt_vec_info stmt_info;
3529 if (vect_debug_details (NULL))
3531 fprintf (dump_file, "------>vectorizing statement: ");
3532 print_generic_expr (dump_file, stmt, TDF_SLIM);
3534 stmt_info = vinfo_for_stmt (stmt);
3535 gcc_assert (stmt_info);
3536 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3541 #ifdef ENABLE_CHECKING
3542 /* FORNOW: Verify that all stmts operate on the same number of
3543 units and no inner unrolling is necessary. */
3545 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3546 == vectorization_factor);
3548 /* -------- vectorize statement ------------ */
3549 if (vect_debug_details (NULL))
3550 fprintf (dump_file, "transform statement.");
3552 is_store = vect_transform_stmt (stmt, &si);
3555 /* free the attached stmt_vec_info and remove the stmt. */
3556 stmt_ann_t ann = stmt_ann (stmt);
3558 set_stmt_info (ann, NULL);
3567 slpeel_make_loop_iterate_ntimes (loop, ratio);
3569 if (vect_debug_details (loop))
3570 fprintf (dump_file,"Success! loop vectorized.");
3571 if (vect_debug_stats (loop))
3572 fprintf (dump_file, "LOOP VECTORIZED.");
3576 /* Function vect_is_simple_use.
3579 LOOP - the loop that is being vectorized.
3580 OPERAND - operand of a stmt in LOOP.
3581 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3583 Returns whether a stmt with OPERAND can be vectorized.
3584 Supportable operands are constants, loop invariants, and operands that are
3585 defined by the current iteration of the loop. Unsupportable operands are
3586 those that are defined by a previous iteration of the loop (as is the case
3587 in reduction/induction computations). */
3590 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
3594 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3599 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3602 if (TREE_CODE (operand) != SSA_NAME)
3605 def_stmt = SSA_NAME_DEF_STMT (operand);
3606 if (def_stmt == NULL_TREE )
3608 if (vect_debug_details (NULL))
3609 fprintf (dump_file, "no def_stmt.");
3613 /* empty stmt is expected only in case of a function argument.
3614 (Otherwise - we expect a phi_node or a modify_expr). */
3615 if (IS_EMPTY_STMT (def_stmt))
3617 tree arg = TREE_OPERAND (def_stmt, 0);
3618 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3620 if (vect_debug_details (NULL))
3622 fprintf (dump_file, "Unexpected empty stmt: ");
3623 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3628 /* phi_node inside the loop indicates an induction/reduction pattern.
3629 This is not supported yet. */
3630 bb = bb_for_stmt (def_stmt);
3631 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3633 if (vect_debug_details (NULL))
3634 fprintf (dump_file, "reduction/induction - unsupported.");
3635 return false; /* FORNOW: not supported yet. */
3638 /* Expecting a modify_expr or a phi_node. */
3639 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3640 || TREE_CODE (def_stmt) == PHI_NODE)
3651 /* Function vect_analyze_operations.
3653 Scan the loop stmts and make sure they are all vectorizable. */
3656 vect_analyze_operations (loop_vec_info loop_vinfo)
3658 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3659 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3660 int nbbs = loop->num_nodes;
3661 block_stmt_iterator si;
3662 unsigned int vectorization_factor = 0;
3667 if (vect_debug_details (NULL))
3668 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3670 for (i = 0; i < nbbs; i++)
3672 basic_block bb = bbs[i];
3674 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3676 tree stmt = bsi_stmt (si);
3677 unsigned int nunits;
3678 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3681 if (vect_debug_details (NULL))
3683 fprintf (dump_file, "==> examining statement: ");
3684 print_generic_expr (dump_file, stmt, TDF_SLIM);
3687 gcc_assert (stmt_info);
3689 /* skip stmts which do not need to be vectorized.
3690 this is expected to include:
3691 - the COND_EXPR which is the loop exit condition
3692 - any LABEL_EXPRs in the loop
3693 - computations that are used only for array indexing or loop
3696 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3698 if (vect_debug_details (NULL))
3699 fprintf (dump_file, "irrelevant.");
3703 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3705 if (vect_debug_stats (loop) || vect_debug_details (loop))
3707 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3708 print_generic_expr (dump_file, stmt, TDF_SLIM);
3713 if (STMT_VINFO_DATA_REF (stmt_info))
3714 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3715 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3716 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3718 scalar_type = TREE_TYPE (stmt);
3720 if (vect_debug_details (NULL))
3722 fprintf (dump_file, "get vectype for scalar type: ");
3723 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3726 vectype = get_vectype_for_scalar_type (scalar_type);
3729 if (vect_debug_stats (loop) || vect_debug_details (loop))
3731 fprintf (dump_file, "not vectorized: unsupported data-type ");
3732 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3737 if (vect_debug_details (NULL))
3739 fprintf (dump_file, "vectype: ");
3740 print_generic_expr (dump_file, vectype, TDF_SLIM);
3742 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3744 ok = (vectorizable_operation (stmt, NULL, NULL)
3745 || vectorizable_assignment (stmt, NULL, NULL)
3746 || vectorizable_load (stmt, NULL, NULL)
3747 || vectorizable_store (stmt, NULL, NULL));
3751 if (vect_debug_stats (loop) || vect_debug_details (loop))
3753 fprintf (dump_file, "not vectorized: stmt not supported: ");
3754 print_generic_expr (dump_file, stmt, TDF_SLIM);
3759 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3760 if (vect_debug_details (NULL))
3761 fprintf (dump_file, "nunits = %d", nunits);
3763 if (vectorization_factor)
3765 /* FORNOW: don't allow mixed units.
3766 This restriction will be relaxed in the future. */
3767 if (nunits != vectorization_factor)
3769 if (vect_debug_stats (loop) || vect_debug_details (loop))
3770 fprintf (dump_file, "not vectorized: mixed data-types");
3775 vectorization_factor = nunits;
3777 #ifdef ENABLE_CHECKING
3778 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3779 * vectorization_factor == UNITS_PER_SIMD_WORD);
3784 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3786 if (vectorization_factor <= 1)
3788 if (vect_debug_stats (loop) || vect_debug_details (loop))
3789 fprintf (dump_file, "not vectorized: unsupported data-type");
3792 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3794 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3796 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3797 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3799 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3800 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3802 if (vect_debug_stats (loop) || vect_debug_details (loop))
3803 fprintf (dump_file, "not vectorized: iteration count too small.");
3807 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3808 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3810 if (vect_debug_stats (loop) || vect_debug_details (loop))
3811 fprintf (dump_file, "epilog loop required.");
3812 if (!vect_can_advance_ivs_p (loop_vinfo))
3814 if (vect_debug_stats (loop) || vect_debug_details (loop))
3815 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3818 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3820 if (vect_debug_stats (loop) || vect_debug_details (loop))
3821 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3830 /* Function exist_non_indexing_operands_for_use_p
3832 USE is one of the uses attached to STMT. Check if USE is
3833 used in STMT for anything other than indexing an array. */
3836 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3839 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3841 /* USE corresponds to some operand in STMT. If there is no data
3842 reference in STMT, then any operand that corresponds to USE
3843 is not indexing an array. */
3844 if (!STMT_VINFO_DATA_REF (stmt_info))
3847 /* STMT has a data_ref. FORNOW this means that its of one of
3848 the following forms:
3851 (This should have been verified in analyze_data_refs).
3853 'var' in the second case corresponds to a def, not a use,
3854 so USE cannot correspond to any operands that are not used
3857 Therefore, all we need to check is if STMT falls into the
3858 first case, and whether var corresponds to USE. */
3860 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3863 operand = TREE_OPERAND (stmt, 1);
3865 if (TREE_CODE (operand) != SSA_NAME)
3875 /* Function vect_is_simple_iv_evolution.
3877 FORNOW: A simple evolution of an induction variables in the loop is
3878 considered a polynomial evolution with constant step. */
3881 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3882 tree * step, bool strict)
3887 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3889 /* When there is no evolution in this loop, the evolution function
3891 if (evolution_part == NULL_TREE)
3894 /* When the evolution is a polynomial of degree >= 2
3895 the evolution function is not "simple". */
3896 if (tree_is_chrec (evolution_part))
3899 step_expr = evolution_part;
3900 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
3902 if (vect_debug_details (NULL))
3904 fprintf (dump_file, "step: ");
3905 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3906 fprintf (dump_file, ", init: ");
3907 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3913 if (TREE_CODE (step_expr) != INTEGER_CST)
3915 if (vect_debug_details (NULL))
3916 fprintf (dump_file, "step unknown.");
3921 if (!integer_onep (step_expr))
3923 if (vect_debug_details (NULL))
3924 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3932 /* Function vect_analyze_scalar_cycles.
3934 Examine the cross iteration def-use cycles of scalar variables, by
3935 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3936 cycles that they represent do not impede vectorization.
3938 FORNOW: Reduction as in the following loop, is not supported yet:
3942 The cross-iteration cycle corresponding to variable 'sum' will be
3943 considered too complicated and will impede vectorization.
3945 FORNOW: Induction as in the following loop, is not supported yet:
3950 However, the following loop *is* vectorizable:
3955 In both loops there exists a def-use cycle for the variable i:
3956 loop: i_2 = PHI (i_0, i_1)
3961 The evolution of the above cycle is considered simple enough,
3962 however, we also check that the cycle does not need to be
3963 vectorized, i.e - we check that the variable that this cycle
3964 defines is only used for array indexing or in stmts that do not
3965 need to be vectorized. This is not the case in loop2, but it
3966 *is* the case in loop3. */
3969 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3973 basic_block bb = loop->header;
3976 if (vect_debug_details (NULL))
3977 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3979 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3981 tree access_fn = NULL;
3983 if (vect_debug_details (NULL))
3985 fprintf (dump_file, "Analyze phi: ");
3986 print_generic_expr (dump_file, phi, TDF_SLIM);
3989 /* Skip virtual phi's. The data dependences that are associated with
3990 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3992 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3994 if (vect_debug_details (NULL))
3995 fprintf (dump_file, "virtual phi. skip.");
3999 /* Analyze the evolution function. */
4001 /* FORNOW: The only scalar cross-iteration cycles that we allow are
4002 those of loop induction variables; This property is verified here.
4004 Furthermore, if that induction variable is used in an operation
4005 that needs to be vectorized (i.e, is not solely used to index
4006 arrays and check the exit condition) - we do not support its
4007 vectorization yet. This property is verified in vect_is_simple_use,
4008 during vect_analyze_operations. */
4010 access_fn = /* instantiate_parameters
4012 analyze_scalar_evolution (loop, PHI_RESULT (phi));
4016 if (vect_debug_stats (loop) || vect_debug_details (loop))
4017 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4021 if (vect_debug_details (NULL))
4023 fprintf (dump_file, "Access function of PHI: ");
4024 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4027 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4030 if (vect_debug_stats (loop) || vect_debug_details (loop))
4031 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4040 /* Function vect_analyze_data_ref_dependence.
4042 Return TRUE if there (might) exist a dependence between a memory-reference
4043 DRA and a memory-reference DRB. */
4046 vect_analyze_data_ref_dependence (struct data_reference *dra,
4047 struct data_reference *drb,
4048 loop_vec_info loop_vinfo)
4051 struct data_dependence_relation *ddr;
4052 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4054 if (!array_base_name_differ_p (dra, drb, &differ_p))
4056 if (vect_debug_stats (loop) || vect_debug_details (loop))
4059 "not vectorized: can't determine dependence between: ");
4060 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4061 fprintf (dump_file, " and ");
4062 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4070 ddr = initialize_data_dependence_relation (dra, drb);
4071 compute_affine_dependence (ddr);
4073 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4076 if (vect_debug_stats (loop) || vect_debug_details (loop))
4079 "not vectorized: possible dependence between data-refs ");
4080 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4081 fprintf (dump_file, " and ");
4082 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4089 /* Function vect_analyze_data_ref_dependences.
4091 Examine all the data references in the loop, and make sure there do not
4092 exist any data dependences between them.
4094 TODO: dependences which distance is greater than the vectorization factor
4098 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4101 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4102 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4104 /* Examine store-store (output) dependences. */
4106 if (vect_debug_details (NULL))
4107 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4109 if (vect_debug_details (NULL))
4110 fprintf (dump_file, "compare all store-store pairs.");
4112 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4114 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4116 struct data_reference *dra =
4117 VARRAY_GENERIC_PTR (loop_write_refs, i);
4118 struct data_reference *drb =
4119 VARRAY_GENERIC_PTR (loop_write_refs, j);
4120 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4125 /* Examine load-store (true/anti) dependences. */
4127 if (vect_debug_details (NULL))
4128 fprintf (dump_file, "compare all load-store pairs.");
4130 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4132 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4134 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4135 struct data_reference *drb =
4136 VARRAY_GENERIC_PTR (loop_write_refs, j);
4137 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4146 /* Function vect_compute_data_ref_alignment
4148 Compute the misalignment of the data reference DR.
4151 1. If during the misalignment computation it is found that the data reference
4152 cannot be vectorized then false is returned.
4153 2. DR_MISALIGNMENT (DR) is defined.
4155 FOR NOW: No analysis is actually performed. Misalignment is calculated
4156 only for trivial cases. TODO. */
4159 vect_compute_data_ref_alignment (struct data_reference *dr)
4161 tree stmt = DR_STMT (dr);
4162 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4163 tree ref = DR_REF (dr);
4165 tree base, alignment;
4166 bool base_aligned_p;
4169 if (vect_debug_details (NULL))
4170 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4172 /* Initialize misalignment to unknown. */
4173 DR_MISALIGNMENT (dr) = -1;
4175 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4176 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4177 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4178 vectype = STMT_VINFO_VECTYPE (stmt_info);
4182 if (vect_debug_details (NULL))
4184 fprintf (dump_file, "Unknown alignment for access: ");
4185 print_generic_expr (dump_file, base, TDF_SLIM);
4190 if (!base_aligned_p)
4192 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4194 if (vect_debug_details (NULL))
4196 fprintf (dump_file, "can't force alignment of ref: ");
4197 print_generic_expr (dump_file, ref, TDF_SLIM);
4202 /* Force the alignment of the decl.
4203 NOTE: This is the only change to the code we make during
4204 the analysis phase, before deciding to vectorize the loop. */
4205 if (vect_debug_details (NULL))
4206 fprintf (dump_file, "force alignment");
4207 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4208 DECL_USER_ALIGN (base) = 1;
4211 /* At this point we assume that the base is aligned. */
4212 gcc_assert (base_aligned_p
4213 || (TREE_CODE (base) == VAR_DECL
4214 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4216 /* Alignment required, in bytes: */
4217 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4219 /* Modulo alignment. */
4220 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4221 if (tree_int_cst_sgn (misalign) < 0)
4223 /* Negative misalignment value. */
4224 if (vect_debug_details (NULL))
4225 fprintf (dump_file, "unexpected misalign value");
4229 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4231 if (vect_debug_details (NULL))
4232 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4238 /* Function vect_compute_data_refs_alignment
4240 Compute the misalignment of data references in the loop.
4241 This pass may take place at function granularity instead of at loop
4244 FOR NOW: No analysis is actually performed. Misalignment is calculated
4245 only for trivial cases. TODO. */
4248 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4250 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4251 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4254 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4256 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4257 if (!vect_compute_data_ref_alignment (dr))
4261 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4263 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4264 if (!vect_compute_data_ref_alignment (dr))
4272 /* Function vect_enhance_data_refs_alignment
4274 This pass will use loop versioning and loop peeling in order to enhance
4275 the alignment of data references in the loop.
4277 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4278 original loop is to be vectorized; Any other loops that are created by
4279 the transformations performed in this pass - are not supposed to be
4280 vectorized. This restriction will be relaxed. */
4283 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4285 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4286 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4291 This pass will require a cost model to guide it whether to apply peeling
4292 or versioning or a combination of the two. For example, the scheme that
4293 intel uses when given a loop with several memory accesses, is as follows:
4294 choose one memory access ('p') which alignment you want to force by doing
4295 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4296 other accesses are not necessarily aligned, or (2) use loop versioning to
4297 generate one loop in which all accesses are aligned, and another loop in
4298 which only 'p' is necessarily aligned.
4300 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4301 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4302 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4304 Devising a cost model is the most critical aspect of this work. It will
4305 guide us on which access to peel for, whether to use loop versioning, how
4306 many versions to create, etc. The cost model will probably consist of
4307 generic considerations as well as target specific considerations (on
4308 powerpc for example, misaligned stores are more painful than misaligned
4311 Here is the general steps involved in alignment enhancements:
4313 -- original loop, before alignment analysis:
4314 for (i=0; i<N; i++){
4315 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4316 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4319 -- After vect_compute_data_refs_alignment:
4320 for (i=0; i<N; i++){
4321 x = q[i]; # DR_MISALIGNMENT(q) = 3
4322 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4325 -- Possibility 1: we do loop versioning:
4327 for (i=0; i<N; i++){ # loop 1A
4328 x = q[i]; # DR_MISALIGNMENT(q) = 3
4329 p[i] = y; # DR_MISALIGNMENT(p) = 0
4333 for (i=0; i<N; i++){ # loop 1B
4334 x = q[i]; # DR_MISALIGNMENT(q) = 3
4335 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4339 -- Possibility 2: we do loop peeling:
4340 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4344 for (i = 3; i < N; i++){ # loop 2A
4345 x = q[i]; # DR_MISALIGNMENT(q) = 0
4346 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4349 -- Possibility 3: combination of loop peeling and versioning:
4350 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4355 for (i = 3; i<N; i++){ # loop 3A
4356 x = q[i]; # DR_MISALIGNMENT(q) = 0
4357 p[i] = y; # DR_MISALIGNMENT(p) = 0
4361 for (i = 3; i<N; i++){ # loop 3B
4362 x = q[i]; # DR_MISALIGNMENT(q) = 0
4363 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4367 These loops are later passed to loop_transform to be vectorized. The
4368 vectorizer will use the alignment information to guide the transformation
4369 (whether to generate regular loads/stores, or with special handling for
4373 /* (1) Peeling to force alignment. */
4375 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4377 + How many accesses will become aligned due to the peeling
4378 - How many accesses will become unaligned due to the peeling,
4379 and the cost of misaligned accesses.
4380 - The cost of peeling (the extra runtime checks, the increase
4383 The scheme we use FORNOW: peel to force the alignment of the first
4384 misaligned store in the loop.
4385 Rationale: misaligned stores are not yet supported.
4387 TODO: Use a better cost model. */
4389 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4391 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4392 if (!aligned_access_p (dr))
4394 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4395 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4400 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4402 if (vect_debug_details (loop))
4403 fprintf (dump_file, "Peeling for alignment will not be applied.");
4407 if (vect_debug_details (loop))
4408 fprintf (dump_file, "Peeling for alignment will be applied.");
4411 /* (1.2) Update the alignment info according to the peeling factor.
4412 If the misalignment of the DR we peel for is M, then the
4413 peeling factor is VF - M, and the misalignment of each access DR_i
4414 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4415 If the misalignment of the DR we peel for is unknown, then the
4416 misalignment of each access DR_i in the loop is also unknown.
4418 FORNOW: set the misalignment of the accesses to unknown even
4419 if the peeling factor is known at compile time.
4421 TODO: - if the peeling factor is known at compile time, use that
4422 when updating the misalignment info of the loop DRs.
4423 - consider accesses that are known to have the same
4424 alignment, even if that alignment is unknown. */
4426 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4428 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4429 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4431 DR_MISALIGNMENT (dr) = 0;
4432 if (vect_debug_details (loop) || vect_debug_stats (loop))
4433 fprintf (dump_file, "Alignment of access forced using peeling.");
4436 DR_MISALIGNMENT (dr) = -1;
4438 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4440 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4441 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4443 DR_MISALIGNMENT (dr) = 0;
4444 if (vect_debug_details (loop) || vect_debug_stats (loop))
4445 fprintf (dump_file, "Alignment of access forced using peeling.");
4448 DR_MISALIGNMENT (dr) = -1;
4453 /* Function vect_analyze_data_refs_alignment
4455 Analyze the alignment of the data-references in the loop.
4456 FOR NOW: Until support for misliagned accesses is in place, only if all
4457 accesses are aligned can the loop be vectorized. This restriction will be
4461 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4463 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4464 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4465 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4466 enum dr_alignment_support supportable_dr_alignment;
4469 if (vect_debug_details (NULL))
4470 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4473 /* This pass may take place at function granularity instead of at loop
4476 if (!vect_compute_data_refs_alignment (loop_vinfo))
4478 if (vect_debug_details (loop) || vect_debug_stats (loop))
4480 "not vectorized: can't calculate alignment for data ref.");
4485 /* This pass will decide on using loop versioning and/or loop peeling in
4486 order to enhance the alignment of data references in the loop. */
4488 vect_enhance_data_refs_alignment (loop_vinfo);
4491 /* Finally, check that all the data references in the loop can be
4492 handled with respect to their alignment. */
4494 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4496 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4497 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4498 if (!supportable_dr_alignment)
4500 if (vect_debug_details (loop) || vect_debug_stats (loop))
4501 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4504 if (supportable_dr_alignment != dr_aligned
4505 && (vect_debug_details (loop) || vect_debug_stats (loop)))
4506 fprintf (dump_file, "Vectorizing an unaligned access.");
4508 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4510 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4511 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4512 if (!supportable_dr_alignment)
4514 if (vect_debug_details (loop) || vect_debug_stats (loop))
4515 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4518 if (supportable_dr_alignment != dr_aligned
4519 && (vect_debug_details (loop) || vect_debug_stats (loop)))
4520 fprintf (dump_file, "Vectorizing an unaligned access.");
4527 /* Function vect_analyze_data_ref_access.
4529 Analyze the access pattern of the data-reference DR. For now, a data access
4530 has to consecutive to be considered vectorizable. */
4533 vect_analyze_data_ref_access (struct data_reference *dr)
4535 tree stmt = DR_STMT (dr);
4536 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4537 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4538 tree scalar_type = TREE_TYPE (DR_REF (dr));
4540 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4542 if (vect_debug_details (NULL))
4543 fprintf (dump_file, "not consecutive access");
4550 /* Function vect_analyze_data_ref_accesses.
4552 Analyze the access pattern of all the data references in the loop.
4554 FORNOW: the only access pattern that is considered vectorizable is a
4555 simple step 1 (consecutive) access.
4557 FORNOW: handle only arrays and pointer accesses. */
4560 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4563 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4564 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4566 if (vect_debug_details (NULL))
4567 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4569 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4571 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4572 bool ok = vect_analyze_data_ref_access (dr);
4575 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4576 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4577 fprintf (dump_file, "not vectorized: complicated access pattern.");
4582 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4584 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4585 bool ok = vect_analyze_data_ref_access (dr);
4588 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4589 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4590 fprintf (dump_file, "not vectorized: complicated access pattern.");
4599 /* Function vect_analyze_pointer_ref_access.
4602 STMT - a stmt that contains a data-ref
4603 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4605 If the data-ref access is vectorizable, return a data_reference structure
4606 that represents it (DR). Otherwise - return NULL. */
4608 static struct data_reference *
4609 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4612 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4613 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4614 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4616 tree reftype, innertype;
4617 tree indx_access_fn;
4618 int loopnum = loop->num;
4619 struct data_reference *dr;
4623 if (vect_debug_stats (loop) || vect_debug_details (loop))
4624 fprintf (dump_file, "not vectorized: complicated pointer access.");
4628 if (vect_debug_details (NULL))
4630 fprintf (dump_file, "Access function of ptr: ");
4631 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4634 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4636 if (vect_debug_stats (loop) || vect_debug_details (loop))
4637 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4643 if (!expr_invariant_in_loop_p (loop, init))
4645 if (vect_debug_stats (loop) || vect_debug_details (loop))
4647 "not vectorized: initial condition is not loop invariant.");
4651 if (TREE_CODE (step) != INTEGER_CST)
4653 if (vect_debug_stats (loop) || vect_debug_details (loop))
4655 "not vectorized: non constant step for pointer access.");
4659 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4660 if (TREE_CODE (reftype) != POINTER_TYPE)
4662 if (vect_debug_stats (loop) || vect_debug_details (loop))
4663 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4667 reftype = TREE_TYPE (init);
4668 if (TREE_CODE (reftype) != POINTER_TYPE)
4670 if (vect_debug_stats (loop) || vect_debug_details (loop))
4671 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4675 innertype = TREE_TYPE (reftype);
4676 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4678 /* FORNOW: support only consecutive access */
4679 if (vect_debug_stats (loop) || vect_debug_details (loop))
4680 fprintf (dump_file, "not vectorized: non consecutive access.");
4684 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (ssizetype, step);
4685 if (TREE_CODE (init) == PLUS_EXPR
4686 || TREE_CODE (init) == MINUS_EXPR)
4687 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4688 size_binop (TREE_CODE (init), ssize_int (0),
4689 fold_convert (ssizetype, TREE_OPERAND (init, 1)));
4691 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = ssize_int (0);
4694 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4695 if (vect_debug_details (NULL))
4697 fprintf (dump_file, "Access function of ptr indx: ");
4698 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4700 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4705 /* Function vect_get_memtag_and_dr.
4707 The function returns the relevant variable for memory tag (for aliasing
4708 purposes). Also data reference structure DR is created.
4710 This function handles three kinds of MEMREF:
4712 It is called from vect_analyze_data_refs with a MEMREF that is either an
4713 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4714 It builds a DR for them using vect_get_base_and_offset, and calls itself
4715 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4716 MEMREF along the way. During the recursive calls, the function may be called
4717 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4718 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4719 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4720 and SSA_NAME (this is category 3 - "recursion stop condition").
4722 When the MEMREF falls into category 1 there is still no data reference struct
4723 (DR) available. It is created by this function, and then, along the
4724 recursion, MEMREF will fall into category 2 or 3, in which case a DR will
4725 have already been created, but the analysis continues to retrieve the MEMTAG.
4728 MEMREF - data reference in STMT
4729 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4732 DR - data_reference struct for MEMREF
4733 return value - the relevant variable for memory tag (for aliasing purposes).
4738 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4739 loop_vec_info loop_vinfo,
4740 tree vectype, struct data_reference **dr)
4742 tree symbl, oprnd0, oprnd1;
4743 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4744 tree offset, misalign, step;
4745 tree ref_to_be_analyzed, tag, dr_base;
4746 struct data_reference *new_dr;
4747 bool base_aligned_p;
4751 /* Category 3: recursion stop condition. */
4752 /* (1) A DR already exists. We only need to get the relevant memtag for
4753 MEMREF, the rest of the data was already initialized. */
4755 switch (TREE_CODE (memref))
4757 /* (1.1) Stop condition: find the relevant memtag and return. */
4759 symbl = SSA_NAME_VAR (memref);
4760 tag = get_var_ann (symbl)->type_mem_tag;
4763 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4764 if (TREE_CODE (ptr) == SSA_NAME)
4765 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4769 if (vect_debug_details (NULL))
4770 fprintf (dump_file, "not vectorized: no memtag for ref.");
4779 /* Category 2: recursion continues. */
4780 /* (1.2) A recursive call to find the relevant memtag is required. */
4782 symbl = TREE_OPERAND (memref, 0);
4783 break; /* For recursive call. */
4786 /* Could have recorded more accurate information -
4787 i.e, the actual FIELD_DECL that is being referenced -
4788 but later passes expect VAR_DECL as the nmt. */
4792 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4793 break; /* For recursive call. */
4797 /* Although DR exists, we have to call the function recursively to
4798 build MEMTAG for such expression. This is handled below. */
4799 oprnd0 = TREE_OPERAND (memref, 0);
4800 oprnd1 = TREE_OPERAND (memref, 1);
4802 STRIP_NOPS (oprnd1);
4803 /* Supported plus/minus expressions are of the form
4804 {address_base + offset}, such that address_base is of type
4805 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4806 or it's not of type POINTER/ARRAY.
4807 TODO: swap operands if {offset + address_base}. */
4808 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4809 && TREE_CODE (oprnd1) != INTEGER_CST)
4810 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4814 break; /* For recursive call. */
4822 /* Category 1: recursion begins. */
4823 /* (2) A DR does not exist yet and must be built, followed by a
4824 recursive call to get the relevant memtag for MEMREF. */
4826 switch (TREE_CODE (memref))
4829 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4833 symbl = DR_BASE_NAME (new_dr);
4834 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4838 new_dr = analyze_array (stmt, memref, is_read);
4840 symbl = DR_BASE_NAME (new_dr);
4841 ref_to_be_analyzed = memref;
4845 /* TODO: Support data-refs of form a[i].p for unions and single
4846 field structures. */
4850 offset = ssize_int (0);
4851 misalign = ssize_int (0);
4852 step = ssize_int (0);
4854 /* Analyze data-ref, find its base, initial offset from the base, step,
4856 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4857 vectype, loop_vinfo, &offset,
4858 &misalign, &step, &base_aligned_p);
4862 /* Initialize information according to above analysis. */
4863 /* Since offset and step of a pointer can be also set in
4864 vect_analyze_pointer_ref_access, we combine the values here. */
4865 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4866 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4867 size_binop (PLUS_EXPR, offset,
4868 STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
4870 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4872 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4873 STMT_VINFO_VECT_STEP (stmt_info) =
4874 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4876 STMT_VINFO_VECT_STEP (stmt_info) = step;
4878 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4879 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4880 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4885 /* Recursive call to retrieve the relevant memtag. */
4886 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4891 /* Function vect_analyze_data_refs.
4893 Find all the data references in the loop.
4895 The general structure of the analysis of data refs in the vectorizer is as
4897 1- vect_analyze_data_refs(loop):
4898 Find and analyze all data-refs in the loop:
4900 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4901 1.1- vect_get_memtag_and_dr(ref):
4902 Analyze ref, and build a DR (data_referece struct) for it;
4903 call vect_get_base_and_offset to compute base, initial_offset,
4904 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4905 ref_stmt.alignment, and ref_stmt.step accordingly.
4906 1.1.1- vect_get_base_and_offset():
4907 Calculate base, initial_offset, step and alignment.
4908 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4909 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4910 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4911 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4913 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4914 which base is really an array (not a pointer) and which alignment
4915 can be forced. This restriction will be relaxed. */
4918 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4920 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4921 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4922 int nbbs = loop->num_nodes;
4923 block_stmt_iterator si;
4925 struct data_reference *dr;
4927 if (vect_debug_details (NULL))
4928 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4930 for (j = 0; j < nbbs; j++)
4932 basic_block bb = bbs[j];
4933 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4935 bool is_read = false;
4936 tree stmt = bsi_stmt (si);
4937 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4938 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4939 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4940 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4941 varray_type *datarefs = NULL;
4942 int nvuses, nv_may_defs, nv_must_defs;
4945 tree scalar_type, vectype;
4947 /* Assumption: there exists a data-ref in stmt, if and only if
4948 it has vuses/vdefs. */
4950 if (!vuses && !v_may_defs && !v_must_defs)
4953 nvuses = NUM_VUSES (vuses);
4954 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4955 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4957 if (nvuses && (nv_may_defs || nv_must_defs))
4959 if (vect_debug_details (NULL))
4961 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4962 print_generic_expr (dump_file, stmt, TDF_SLIM);
4967 if (TREE_CODE (stmt) != MODIFY_EXPR)
4969 if (vect_debug_details (NULL))
4971 fprintf (dump_file, "unexpected vops in stmt: ");
4972 print_generic_expr (dump_file, stmt, TDF_SLIM);
4979 memref = TREE_OPERAND (stmt, 1);
4980 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4985 memref = TREE_OPERAND (stmt, 0);
4986 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4990 scalar_type = TREE_TYPE (memref);
4991 vectype = get_vectype_for_scalar_type (scalar_type);
4994 if (vect_debug_details (NULL))
4996 fprintf (dump_file, "no vectype for stmt: ");
4997 print_generic_expr (dump_file, stmt, TDF_SLIM);
4998 fprintf (dump_file, " scalar_type: ");
4999 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
5001 /* It is not possible to vectorize this data reference. */
5004 /* Analyze MEMREF. If it is of a supported form, build data_reference
5005 struct for it (DR) and find memtag for aliasing purposes. */
5007 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
5011 if (vect_debug_stats (loop) || vect_debug_details (loop))
5013 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5014 print_generic_expr (dump_file, stmt, TDF_SLIM);
5018 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5019 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5020 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5021 STMT_VINFO_DATA_REF (stmt_info) = dr;
5029 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5031 /* Function vect_mark_relevant.
5033 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5036 vect_mark_relevant (varray_type *worklist, tree stmt)
5038 stmt_vec_info stmt_info;
5040 if (vect_debug_details (NULL))
5041 fprintf (dump_file, "mark relevant.");
5043 if (TREE_CODE (stmt) == PHI_NODE)
5045 VARRAY_PUSH_TREE (*worklist, stmt);
5049 stmt_info = vinfo_for_stmt (stmt);
5053 if (vect_debug_details (NULL))
5055 fprintf (dump_file, "mark relevant: no stmt info!!.");
5056 print_generic_expr (dump_file, stmt, TDF_SLIM);
5061 if (STMT_VINFO_RELEVANT_P (stmt_info))
5063 if (vect_debug_details (NULL))
5064 fprintf (dump_file, "already marked relevant.");
5068 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5069 VARRAY_PUSH_TREE (*worklist, stmt);
5073 /* Function vect_stmt_relevant_p.
5075 Return true if STMT in loop that is represented by LOOP_VINFO is
5076 "relevant for vectorization".
5078 A stmt is considered "relevant for vectorization" if:
5079 - it has uses outside the loop.
5080 - it has vdefs (it alters memory).
5081 - control stmts in the loop (except for the exit condition).
5083 CHECKME: what other side effects would the vectorizer allow? */
5086 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5088 v_may_def_optype v_may_defs;
5089 v_must_def_optype v_must_defs;
5090 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5095 /* cond stmt other than loop exit cond. */
5096 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5099 /* changing memory. */
5100 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5101 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5102 if (v_may_defs || v_must_defs)
5104 if (vect_debug_details (NULL))
5105 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5109 /* uses outside the loop. */
5110 df = get_immediate_uses (stmt);
5111 num_uses = num_immediate_uses (df);
5112 for (i = 0; i < num_uses; i++)
5114 tree use = immediate_use (df, i);
5115 basic_block bb = bb_for_stmt (use);
5116 if (!flow_bb_inside_loop_p (loop, bb))
5118 if (vect_debug_details (NULL))
5119 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5128 /* Function vect_mark_stmts_to_be_vectorized.
5130 Not all stmts in the loop need to be vectorized. For example:
5139 Stmt 1 and 3 do not need to be vectorized, because loop control and
5140 addressing of vectorized data-refs are handled differently.
5142 This pass detects such stmts. */
5145 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5147 varray_type worklist;
5148 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5149 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5150 unsigned int nbbs = loop->num_nodes;
5151 block_stmt_iterator si;
5157 stmt_vec_info stmt_info;
5161 if (vect_debug_details (NULL))
5162 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5165 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5167 if (vect_debug_details (NULL))
5169 fprintf (dump_file, "init: phi relevant? ");
5170 print_generic_expr (dump_file, phi, TDF_SLIM);
5173 if (vect_stmt_relevant_p (phi, loop_vinfo))
5175 if (vect_debug_details (NULL))
5176 fprintf (dump_file, "unsupported reduction/induction.");
5181 VARRAY_TREE_INIT (worklist, 64, "work list");
5183 /* 1. Init worklist. */
5185 for (i = 0; i < nbbs; i++)
5188 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5190 stmt = bsi_stmt (si);
5192 if (vect_debug_details (NULL))
5194 fprintf (dump_file, "init: stmt relevant? ");
5195 print_generic_expr (dump_file, stmt, TDF_SLIM);
5198 stmt_info = vinfo_for_stmt (stmt);
5199 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5201 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5202 vect_mark_relevant (&worklist, stmt);
5207 /* 2. Process_worklist */
5209 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5211 stmt = VARRAY_TOP_TREE (worklist);
5212 VARRAY_POP (worklist);
5214 if (vect_debug_details (NULL))
5216 fprintf (dump_file, "worklist: examine stmt: ");
5217 print_generic_expr (dump_file, stmt, TDF_SLIM);
5220 /* Examine the USES in this statement. Mark all the statements which
5221 feed this statement's uses as "relevant", unless the USE is used as
5224 if (TREE_CODE (stmt) == PHI_NODE)
5226 /* follow the def-use chain inside the loop. */
5227 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5229 tree arg = PHI_ARG_DEF (stmt, j);
5230 tree def_stmt = NULL_TREE;
5232 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
5234 if (vect_debug_details (NULL))
5235 fprintf (dump_file, "worklist: unsupported use.");
5236 varray_clear (worklist);
5242 if (vect_debug_details (NULL))
5244 fprintf (dump_file, "worklist: def_stmt: ");
5245 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5248 bb = bb_for_stmt (def_stmt);
5249 if (flow_bb_inside_loop_p (loop, bb))
5250 vect_mark_relevant (&worklist, def_stmt);
5254 ann = stmt_ann (stmt);
5255 use_ops = USE_OPS (ann);
5257 for (i = 0; i < NUM_USES (use_ops); i++)
5259 tree use = USE_OP (use_ops, i);
5261 /* We are only interested in uses that need to be vectorized. Uses
5262 that are used for address computation are not considered relevant.
5264 if (exist_non_indexing_operands_for_use_p (use, stmt))
5266 tree def_stmt = NULL_TREE;
5268 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
5270 if (vect_debug_details (NULL))
5271 fprintf (dump_file, "worklist: unsupported use.");
5272 varray_clear (worklist);
5279 if (vect_debug_details (NULL))
5281 fprintf (dump_file, "worklist: examine use %d: ", i);
5282 print_generic_expr (dump_file, use, TDF_SLIM);
5285 bb = bb_for_stmt (def_stmt);
5286 if (flow_bb_inside_loop_p (loop, bb))
5287 vect_mark_relevant (&worklist, def_stmt);
5290 } /* while worklist */
5292 varray_clear (worklist);
5297 /* Function vect_can_advance_ivs_p
5299 In case the number of iterations that LOOP iterates in unknown at compile
5300 time, an epilog loop will be generated, and the loop induction variables
5301 (IVs) will be "advanced" to the value they are supposed to take just before
5302 the epilog loop. Here we check that the access function of the loop IVs
5303 and the expression that represents the loop bound are simple enough.
5304 These restrictions will be relaxed in the future. */
5307 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
5309 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5310 basic_block bb = loop->header;
5313 /* Analyze phi functions of the loop header. */
5315 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5317 tree access_fn = NULL;
5318 tree evolution_part;
5320 if (vect_debug_details (NULL))
5322 fprintf (dump_file, "Analyze phi: ");
5323 print_generic_expr (dump_file, phi, TDF_SLIM);
5326 /* Skip virtual phi's. The data dependences that are associated with
5327 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5329 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5331 if (vect_debug_details (NULL))
5332 fprintf (dump_file, "virtual phi. skip.");
5336 /* Analyze the evolution function. */
5338 access_fn = instantiate_parameters
5339 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5343 if (vect_debug_details (NULL))
5344 fprintf (dump_file, "No Access function.");
5348 if (vect_debug_details (NULL))
5350 fprintf (dump_file, "Access function of PHI: ");
5351 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5354 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5356 if (evolution_part == NULL_TREE)
5359 /* FORNOW: We do not transform initial conditions of IVs
5360 which evolution functions are a polynomial of degree >= 2. */
5362 if (tree_is_chrec (evolution_part))
5370 /* Function vect_get_loop_niters.
5372 Determine how many iterations the loop is executed.
5373 If an expression that represents the number of iterations
5374 can be constructed, place it in NUMBER_OF_ITERATIONS.
5375 Return the loop exit condition. */
5378 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5382 if (vect_debug_details (NULL))
5383 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5385 niters = number_of_iterations_in_loop (loop);
5387 if (niters != NULL_TREE
5388 && niters != chrec_dont_know)
5390 *number_of_iterations = niters;
5392 if (vect_debug_details (NULL))
5394 fprintf (dump_file, "==> get_loop_niters:" );
5395 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5399 return get_loop_exit_condition (loop);
5403 /* Function vect_analyze_loop_form.
5405 Verify the following restrictions (some may be relaxed in the future):
5406 - it's an inner-most loop
5407 - number of BBs = 2 (which are the loop header and the latch)
5408 - the loop has a pre-header
5409 - the loop has a single entry and exit
5410 - the loop exit condition is simple enough, and the number of iterations
5411 can be analyzed (a countable loop). */
5413 static loop_vec_info
5414 vect_analyze_loop_form (struct loop *loop)
5416 loop_vec_info loop_vinfo;
5418 tree number_of_iterations = NULL;
5419 bool rescan = false;
5421 if (vect_debug_details (loop))
5422 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5425 || !loop->single_exit
5426 || loop->num_nodes != 2
5427 || EDGE_COUNT (loop->header->preds) != 2
5428 || loop->num_entries != 1)
5430 if (vect_debug_stats (loop) || vect_debug_details (loop))
5432 fprintf (dump_file, "not vectorized: bad loop form. ");
5434 fprintf (dump_file, "nested loop.");
5435 else if (!loop->single_exit)
5436 fprintf (dump_file, "multiple exits.");
5437 else if (loop->num_nodes != 2)
5438 fprintf (dump_file, "too many BBs in loop.");
5439 else if (EDGE_COUNT (loop->header->preds) != 2)
5440 fprintf (dump_file, "too many incoming edges.");
5441 else if (loop->num_entries != 1)
5442 fprintf (dump_file, "too many entries.");
5448 /* We assume that the loop exit condition is at the end of the loop. i.e,
5449 that the loop is represented as a do-while (with a proper if-guard
5450 before the loop if needed), where the loop header contains all the
5451 executable statements, and the latch is empty. */
5452 if (!empty_block_p (loop->latch))
5454 if (vect_debug_stats (loop) || vect_debug_details (loop))
5455 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5459 /* Make sure we have a preheader basic block. */
5460 if (!loop->pre_header)
5463 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5466 /* Make sure there exists a single-predecessor exit bb: */
5467 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5470 loop_split_edge_with (loop->exit_edges[0], NULL);
5475 flow_loop_scan (loop, LOOP_ALL);
5476 /* Flow loop scan does not update loop->single_exit field. */
5477 loop->single_exit = loop->exit_edges[0];
5480 if (empty_block_p (loop->header))
5482 if (vect_debug_stats (loop) || vect_debug_details (loop))
5483 fprintf (dump_file, "not vectorized: empty loop.");
5487 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5490 if (vect_debug_stats (loop) || vect_debug_details (loop))
5491 fprintf (dump_file, "not vectorized: complicated exit condition.");
5495 if (!number_of_iterations)
5497 if (vect_debug_stats (loop) || vect_debug_details (loop))
5499 "not vectorized: number of iterations cannot be computed.");
5503 if (chrec_contains_undetermined (number_of_iterations))
5505 if (vect_debug_details (NULL))
5506 fprintf (dump_file, "Infinite number of iterations.");
5510 loop_vinfo = new_loop_vec_info (loop);
5511 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5513 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5515 if (vect_debug_details (loop))
5517 fprintf (dump_file, "loop bound unknown.\n");
5518 fprintf (dump_file, "Symbolic number of iterations is ");
5519 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5523 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5525 if (vect_debug_stats (loop) || vect_debug_details (loop))
5526 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5530 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5536 /* Function vect_analyze_loop.
5538 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5539 for it. The different analyses will record information in the
5540 loop_vec_info struct. */
5542 static loop_vec_info
5543 vect_analyze_loop (struct loop *loop)
5546 loop_vec_info loop_vinfo;
5548 if (vect_debug_details (NULL))
5549 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5551 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5553 loop_vinfo = vect_analyze_loop_form (loop);
5556 if (vect_debug_details (loop))
5557 fprintf (dump_file, "bad loop form.");
5561 /* Find all data references in the loop (which correspond to vdefs/vuses)
5562 and analyze their evolution in the loop.
5564 FORNOW: Handle only simple, array references, which
5565 alignment can be forced, and aligned pointer-references. */
5567 ok = vect_analyze_data_refs (loop_vinfo);
5570 if (vect_debug_details (loop))
5571 fprintf (dump_file, "bad data references.");
5572 destroy_loop_vec_info (loop_vinfo);
5576 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5578 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5581 if (vect_debug_details (loop))
5582 fprintf (dump_file, "unexpected pattern.");
5583 if (vect_debug_details (loop))
5584 fprintf (dump_file, "not vectorized: unexpected pattern.");
5585 destroy_loop_vec_info (loop_vinfo);
5589 /* Check that all cross-iteration scalar data-flow cycles are OK.
5590 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5592 ok = vect_analyze_scalar_cycles (loop_vinfo);
5595 if (vect_debug_details (loop))
5596 fprintf (dump_file, "bad scalar cycle.");
5597 destroy_loop_vec_info (loop_vinfo);
5601 /* Analyze data dependences between the data-refs in the loop.
5602 FORNOW: fail at the first data dependence that we encounter. */
5604 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5607 if (vect_debug_details (loop))
5608 fprintf (dump_file, "bad data dependence.");
5609 destroy_loop_vec_info (loop_vinfo);
5613 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5614 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5616 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5619 if (vect_debug_details (loop))
5620 fprintf (dump_file, "bad data access.");
5621 destroy_loop_vec_info (loop_vinfo);
5625 /* Analyze the alignment of the data-refs in the loop.
5626 FORNOW: Only aligned accesses are handled. */
5628 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5631 if (vect_debug_details (loop))
5632 fprintf (dump_file, "bad data alignment.");
5633 destroy_loop_vec_info (loop_vinfo);
5637 /* Scan all the operations in the loop and make sure they are
5640 ok = vect_analyze_operations (loop_vinfo);
5643 if (vect_debug_details (loop))
5644 fprintf (dump_file, "bad operation or unsupported loop bound.");
5645 destroy_loop_vec_info (loop_vinfo);
5649 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5655 /* Function need_imm_uses_for.
5657 Return whether we ought to include information for 'var'
5658 when calculating immediate uses. For this pass we only want use
5659 information for non-virtual variables. */
5662 need_imm_uses_for (tree var)
5664 return is_gimple_reg (var);
5668 /* Function vectorize_loops.
5670 Entry Point to loop vectorization phase. */
5673 vectorize_loops (struct loops *loops)
5675 unsigned int i, loops_num;
5676 unsigned int num_vectorized_loops = 0;
5678 /* Does the target support SIMD? */
5679 /* FORNOW: until more sophisticated machine modelling is in place. */
5680 if (!UNITS_PER_SIMD_WORD)
5682 if (vect_debug_details (NULL))
5683 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5687 #ifdef ENABLE_CHECKING
5688 verify_loop_closed_ssa ();
5691 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5693 /* ----------- Analyze loops. ----------- */
5695 /* If some loop was duplicated, it gets bigger number
5696 than all previously defined loops. This fact allows us to run
5697 only over initial loops skipping newly generated ones. */
5698 loops_num = loops->num;
5699 for (i = 1; i < loops_num; i++)
5701 loop_vec_info loop_vinfo;
5702 struct loop *loop = loops->parray[i];
5707 loop_vinfo = vect_analyze_loop (loop);
5708 loop->aux = loop_vinfo;
5710 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5713 vect_transform_loop (loop_vinfo, loops);
5714 num_vectorized_loops++;
5717 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5718 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5719 num_vectorized_loops);
5721 /* ----------- Finalize. ----------- */
5724 for (i = 1; i < loops_num; i++)
5726 struct loop *loop = loops->parray[i];
5727 loop_vec_info loop_vinfo;
5731 loop_vinfo = loop->aux;
5732 destroy_loop_vec_info (loop_vinfo);
5736 rewrite_into_ssa (false);
5737 rewrite_into_loop_closed_ssa (); /* FORNOW */
5738 bitmap_clear (vars_to_rename);