2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
150 /*************************************************************************
151 Simple Loop Peeling Utilities
152 *************************************************************************/
154 /* Entry point for peeling of simple loops.
155 Peel the first/last iterations of a loop.
156 It can be used outside of the vectorizer for loops that are simple enough
157 (see function documentation). In the vectorizer it is used to peel the
158 last few iterations when the loop bound is unknown or does not evenly
159 divide by the vectorization factor, and to peel the first few iterations
160 to force the alignment of data references in the loop. */
161 struct loop *slpeel_tree_peel_loop_to_edge
162 (struct loop *, struct loops *, edge, tree, tree, bool);
163 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
164 (struct loop *, struct loops *, edge);
165 static void slpeel_update_phis_for_duplicate_loop
166 (struct loop *, struct loop *, bool after);
167 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
168 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
169 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
170 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
171 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
180 /*************************************************************************
181 Vectorization Utilities.
182 *************************************************************************/
184 /* Main analysis functions. */
185 static loop_vec_info vect_analyze_loop (struct loop *);
186 static loop_vec_info vect_analyze_loop_form (struct loop *);
187 static bool vect_analyze_data_refs (loop_vec_info);
188 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
189 static bool vect_analyze_scalar_cycles (loop_vec_info);
190 static bool vect_analyze_data_ref_accesses (loop_vec_info);
191 static bool vect_analyze_data_refs_alignment (loop_vec_info);
192 static bool vect_compute_data_refs_alignment (loop_vec_info);
193 static bool vect_analyze_operations (loop_vec_info);
195 /* Main code transformation functions. */
196 static void vect_transform_loop (loop_vec_info, struct loops *);
197 static void vect_transform_loop_bound (loop_vec_info, tree niters);
198 static bool vect_transform_stmt (tree, block_stmt_iterator *);
199 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
200 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
201 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
203 static enum dr_alignment_support vect_supportable_dr_alignment
204 (struct data_reference *);
205 static void vect_align_data_ref (tree);
206 static void vect_enhance_data_refs_alignment (loop_vec_info);
208 /* Utility functions for the analyses. */
209 static bool vect_is_simple_use (tree , struct loop *, tree *);
210 static bool exist_non_indexing_operands_for_use_p (tree, tree);
211 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
212 static void vect_mark_relevant (varray_type, tree);
213 static bool vect_stmt_relevant_p (tree, loop_vec_info);
214 static tree vect_get_loop_niters (struct loop *, tree *);
215 static bool vect_compute_data_ref_alignment
216 (struct data_reference *, loop_vec_info);
217 static bool vect_analyze_data_ref_access (struct data_reference *);
218 static bool vect_get_first_index (tree, tree *);
219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
220 static struct data_reference * vect_analyze_pointer_ref_access
222 static bool vect_can_advance_ivs_p (struct loop *);
223 static tree vect_get_base_and_bit_offset
224 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
225 static struct data_reference * vect_analyze_pointer_ref_access
227 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
228 static tree vect_compute_array_ref_alignment
229 (struct data_reference *, loop_vec_info, tree, tree *);
230 static tree vect_get_ptr_offset (tree, tree, tree *);
231 static tree vect_get_symbl_and_dr
232 (tree, tree, bool, loop_vec_info, struct data_reference **);
234 /* Utility functions for the code transformation. */
235 static tree vect_create_destination_var (tree, tree);
236 static tree vect_create_data_ref_ptr
237 (tree, block_stmt_iterator *, tree, tree *, bool);
238 static tree vect_create_index_for_vector_ref
239 (struct loop *, block_stmt_iterator *);
240 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
241 static tree get_vectype_for_scalar_type (tree);
242 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
243 static tree vect_get_vec_def_for_operand (tree, tree);
244 static tree vect_init_vector (tree, tree);
245 static tree vect_build_symbol_bound (tree, int, struct loop *);
246 static void vect_finish_stmt_generation
247 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
249 /* Utility function dealing with loop peeling (not peeling itself). */
250 static void vect_generate_tmps_on_preheader
251 (loop_vec_info, tree *, tree *, tree *);
252 static tree vect_build_loop_niters (loop_vec_info);
253 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
254 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
255 static void vect_update_inits_of_dr
256 (struct data_reference *, struct loop *, tree niters);
257 static void vect_update_inits_of_drs (loop_vec_info, tree);
258 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
259 static void vect_do_peeling_for_loop_bound
260 (loop_vec_info, tree *, struct loops *);
262 /* Utilities for creation and deletion of vec_info structs. */
263 loop_vec_info new_loop_vec_info (struct loop *loop);
264 void destroy_loop_vec_info (loop_vec_info);
265 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
267 static bool vect_debug_stats (struct loop *loop);
268 static bool vect_debug_details (struct loop *loop);
271 /*************************************************************************
272 Simple Loop Peeling Utilities
274 Utilities to support loop peeling for vectorization purposes.
275 *************************************************************************/
278 /* For each definition in DEFINITIONS this function allocates
282 allocate_new_names (bitmap definitions)
287 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
289 tree def = ssa_name (ver);
290 tree *new_name_ptr = xmalloc (sizeof (tree));
292 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
294 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
295 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
297 SSA_NAME_AUX (def) = new_name_ptr;
302 /* Renames the use *OP_P. */
305 rename_use_op (use_operand_p op_p)
309 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
312 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
314 /* Something defined outside of the loop. */
318 /* An ordinary ssa name defined in the loop. */
320 SET_USE (op_p, *new_name_ptr);
324 /* Renames the def *OP_P in statement STMT. */
327 rename_def_op (def_operand_p op_p, tree stmt)
331 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
334 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
336 /* Something defined outside of the loop. */
340 /* An ordinary ssa name defined in the loop. */
342 SET_DEF (op_p, *new_name_ptr);
343 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
347 /* Renames the variables in basic block BB. */
350 rename_variables_in_bb (basic_block bb)
353 block_stmt_iterator bsi;
359 v_may_def_optype v_may_defs;
360 v_must_def_optype v_must_defs;
364 struct loop *loop = bb->loop_father;
366 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
367 rename_def_op (PHI_RESULT_PTR (phi), phi);
369 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
371 stmt = bsi_stmt (bsi);
372 get_stmt_operands (stmt);
373 ann = stmt_ann (stmt);
375 uses = USE_OPS (ann);
376 for (i = 0; i < NUM_USES (uses); i++)
377 rename_use_op (USE_OP_PTR (uses, i));
379 defs = DEF_OPS (ann);
380 for (i = 0; i < NUM_DEFS (defs); i++)
381 rename_def_op (DEF_OP_PTR (defs, i), stmt);
383 vuses = VUSE_OPS (ann);
384 for (i = 0; i < NUM_VUSES (vuses); i++)
385 rename_use_op (VUSE_OP_PTR (vuses, i));
387 v_may_defs = V_MAY_DEF_OPS (ann);
388 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
390 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
391 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
394 v_must_defs = V_MUST_DEF_OPS (ann);
395 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
397 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
398 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
402 FOR_EACH_EDGE (e, ei, bb->succs)
404 if (!flow_bb_inside_loop_p (loop, e->dest))
406 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
407 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
412 /* Releases the structures holding the new ssa names. */
415 free_new_names (bitmap definitions)
420 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
422 tree def = ssa_name (ver);
424 if (SSA_NAME_AUX (def))
426 free (SSA_NAME_AUX (def));
427 SSA_NAME_AUX (def) = NULL;
433 /* Renames variables in new generated LOOP. */
436 rename_variables_in_loop (struct loop *loop)
441 bbs = get_loop_body (loop);
443 for (i = 0; i < loop->num_nodes; i++)
444 rename_variables_in_bb (bbs[i]);
450 /* Update the PHI nodes of NEW_LOOP.
452 NEW_LOOP is a duplicate of ORIG_LOOP.
453 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
454 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
455 executes before it. */
458 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
459 struct loop *new_loop, bool after)
461 tree *new_name_ptr, new_ssa_name;
462 tree phi_new, phi_orig;
464 edge orig_loop_latch = loop_latch_edge (orig_loop);
465 edge orig_entry_e = loop_preheader_edge (orig_loop);
466 edge new_loop_exit_e = new_loop->exit_edges[0];
467 edge new_loop_entry_e = loop_preheader_edge (new_loop);
468 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
471 step 1. For each loop-header-phi:
472 Add the first phi argument for the phi in NEW_LOOP
473 (the one associated with the entry of NEW_LOOP)
475 step 2. For each loop-header-phi:
476 Add the second phi argument for the phi in NEW_LOOP
477 (the one associated with the latch of NEW_LOOP)
479 step 3. Update the phis in the successor block of NEW_LOOP.
481 case 1: NEW_LOOP was placed before ORIG_LOOP:
482 The successor block of NEW_LOOP is the header of ORIG_LOOP.
483 Updating the phis in the successor block can therefore be done
484 along with the scanning of the loop header phis, because the
485 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
486 phi nodes, organized in the same order.
488 case 2: NEW_LOOP was placed after ORIG_LOOP:
489 The successor block of NEW_LOOP is the original exit block of
490 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
491 We postpone updating these phis to a later stage (when
492 loop guards are added).
496 /* Scan the phis in the headers of the old and new loops
497 (they are organized in exactly the same order). */
499 for (phi_new = phi_nodes (new_loop->header),
500 phi_orig = phi_nodes (orig_loop->header);
502 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
505 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
506 add_phi_arg (&phi_new, def, new_loop_entry_e);
509 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
510 if (TREE_CODE (def) != SSA_NAME)
513 new_name_ptr = SSA_NAME_AUX (def);
515 /* Something defined outside of the loop. */
518 /* An ordinary ssa name defined in the loop. */
519 new_ssa_name = *new_name_ptr;
520 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop));
522 /* step 3 (case 1). */
525 gcc_assert (new_loop_exit_e == orig_entry_e);
526 SET_PHI_ARG_DEF (phi_orig,
527 phi_arg_from_edge (phi_orig, new_loop_exit_e),
534 /* Update PHI nodes for a guard of the LOOP.
537 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
538 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
539 originates from the guard-bb, skips LOOP and reaches the (unique) exit
540 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
541 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
542 LOOP header) before the guard code was added, and now it became a merge
543 point of two paths - the path that ends with the LOOP exit-edge, and
544 the path that ends with GUARD_EDGE.
546 This function creates and updates the relevant phi nodes to account for
547 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
548 1. Create phi nodes at NEW_MERGE_BB.
549 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
550 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
553 ===> The CFG before the guard-code was added:
555 if (exit_loop) goto update_bb : LOOP_header_bb
558 ==> The CFG after the guard-code was added:
560 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
562 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
567 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
568 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
569 organized in the same order.
570 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
573 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
574 "original" loop). FALSE if LOOP is an original loop (not a newly
575 created copy). The SSA_NAME_AUX fields of the defs in the origianl
576 loop are the corresponding new ssa-names used in the new duplicated
577 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
578 nodes in UPDATE_BB takes the original ssa-name, and which takes the
579 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
580 the LOOP-exit-edge takes the new-name, and the phi-arg that is
581 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
582 FALSE, it's the other way around.
586 slpeel_update_phi_nodes_for_guard (edge guard_edge,
591 tree orig_phi, new_phi, update_phi;
592 tree guard_arg, loop_arg;
593 basic_block new_merge_bb = guard_edge->dest;
594 edge e = EDGE_SUCC (new_merge_bb, 0);
595 basic_block update_bb = e->dest;
596 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
598 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
599 orig_phi && update_phi;
600 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
602 /* 1. Generate new phi node in NEW_MERGE_BB: */
603 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
606 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
607 of LOOP. Set the two phi args in NEW_PHI for these edges: */
610 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
611 EDGE_SUCC (loop->latch, 0));
612 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
616 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
617 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
621 new_name = *new_name_ptr;
623 /* Something defined outside of the loop */
628 guard_arg = orig_def;
633 guard_arg = new_name;
637 add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]);
638 add_phi_arg (&new_phi, guard_arg, guard_edge);
640 /* 3. Update phi in successor block. */
641 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
642 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
643 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
644 PHI_RESULT (new_phi));
647 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
651 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
652 that starts at zero, increases by one and its limit is NITERS.
654 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
657 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
659 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
661 edge exit_edge = loop->exit_edges[0];
662 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
663 tree begin_label = tree_block_label (loop->latch);
664 tree exit_label = tree_block_label (loop->single_exit->dest);
666 orig_cond = get_loop_exit_condition (loop);
667 gcc_assert (orig_cond);
668 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
669 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
671 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
672 back to the exit condition statement. */
673 bsi_next (&loop_exit_bsi);
674 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
676 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
677 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
678 else /* 'then' edge loops back. */
679 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
681 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
682 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
683 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
684 begin_label, exit_label);
685 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
687 /* Remove old loop exit test: */
688 bsi_remove (&loop_exit_bsi);
690 if (vect_debug_stats (loop) || vect_debug_details (loop))
691 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
693 loop->nb_iterations = niters;
697 /* Given LOOP this function generates a new copy of it and puts it
698 on E which is either the entry or exit of LOOP. */
701 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
704 struct loop *new_loop;
705 basic_block *new_bbs, *bbs;
708 basic_block exit_dest;
711 at_exit = (e == loop->exit_edges[0]);
712 if (!at_exit && e != loop_preheader_edge (loop))
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
719 bbs = get_loop_body (loop);
721 /* Check whether duplication is possible. */
722 if (!can_copy_bbs_p (bbs, loop->num_nodes))
724 if (vect_debug_stats (loop) || vect_debug_details (loop))
725 fprintf (dump_file, "Cannot copy basic blocks.\n");
730 /* Generate new loop structure. */
731 new_loop = duplicate_loop (loops, loop, loop->outer);
734 if (vect_debug_stats (loop) || vect_debug_details (loop))
735 fprintf (dump_file, "duplicate_loop returns NULL.\n");
740 exit_dest = loop->exit_edges[0]->dest;
741 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
742 exit_dest) == loop->header ?
745 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
747 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
749 /* Duplicating phi args at exit bbs as coming
750 also from exit of duplicated loop. */
751 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
753 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
756 edge new_loop_exit_edge;
758 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
759 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
761 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
763 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
767 if (at_exit) /* Add the loop copy at exit. */
769 redirect_edge_and_branch_force (e, new_loop->header);
770 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
772 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
774 else /* Add the copy at entry. */
777 edge entry_e = loop_preheader_edge (loop);
778 basic_block preheader = entry_e->src;
780 if (!flow_bb_inside_loop_p (new_loop,
781 EDGE_SUCC (new_loop->header, 0)->dest))
782 new_exit_e = EDGE_SUCC (new_loop->header, 0);
784 new_exit_e = EDGE_SUCC (new_loop->header, 1);
786 redirect_edge_and_branch_force (new_exit_e, loop->header);
787 set_immediate_dominator (CDI_DOMINATORS, loop->header,
790 /* We have to add phi args to the loop->header here as coming
791 from new_exit_e edge. */
792 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
794 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
796 add_phi_arg (&phi, phi_arg, new_exit_e);
799 redirect_edge_and_branch_force (entry_e, new_loop->header);
800 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
803 flow_loop_scan (new_loop, LOOP_ALL);
804 flow_loop_scan (loop, LOOP_ALL);
812 /* Given the condition statement COND, put it as the last statement
813 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
814 Assumes that this is the single exit of the guarded loop.
815 Returns the skip edge. */
818 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
821 block_stmt_iterator bsi;
823 tree cond_stmt, then_label, else_label;
825 enter_e = EDGE_SUCC (guard_bb, 0);
826 enter_e->flags &= ~EDGE_FALLTHRU;
827 enter_e->flags |= EDGE_FALSE_VALUE;
828 bsi = bsi_last (guard_bb);
830 then_label = build1 (GOTO_EXPR, void_type_node,
831 tree_block_label (exit_bb));
832 else_label = build1 (GOTO_EXPR, void_type_node,
833 tree_block_label (enter_e->dest));
834 cond_stmt = build (COND_EXPR, void_type_node, cond,
835 then_label, else_label);
836 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
837 /* Add new edge to connect entry block to the second loop. */
838 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
839 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
844 /* This function verifies that the following restrictions apply to LOOP:
846 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
847 (3) it is single entry, single exit
848 (4) its exit condition is the last stmt in the header
849 (5) E is the entry/exit edge of LOOP.
853 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
855 edge exit_e = loop->exit_edges [0];
856 edge entry_e = loop_preheader_edge (loop);
857 tree orig_cond = get_loop_exit_condition (loop);
858 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
860 if (any_marked_for_rewrite_p ())
864 /* All loops have an outer scope; the only case loop->outer is NULL is for
865 the function itself. */
867 || loop->num_nodes != 2
868 || !empty_block_p (loop->latch)
869 || loop->num_exits != 1
870 || loop->num_entries != 1
871 /* Verify that new loop exit condition can be trivially modified. */
872 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
873 || (e != exit_e && e != entry_e))
881 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
882 struct loop *second_loop)
884 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
885 basic_block loop2_entry_bb = second_loop->pre_header;
886 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
888 /* A guard that controls whether the second_loop is to be executed or skipped
889 is placed in first_loop->exit. first_loopt->exit therefore has two
890 successors - one is the preheader of second_loop, and the other is a bb
893 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
896 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
899 /* The preheader of new_loop is expected to have two predessors:
900 first_loop->exit and the block that precedes first_loop. */
902 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
903 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
904 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
905 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
906 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
908 /* Verify that the other successor of first_loopt->exit is after the
914 /* Function slpeel_tree_peel_loop_to_edge.
916 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
917 that is placed on the entry (exit) edge E of LOOP. After this transformation
918 we have two loops one after the other - first-loop iterates FIRST_NITERS
919 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
922 - LOOP: the loop to be peeled.
923 - E: the exit or entry edge of LOOP.
924 If it is the entry edge, we peel the first iterations of LOOP. In this
925 case first-loop is LOOP, and second-loop is the newly created loop.
926 If it is the exit edge, we peel the last iterations of LOOP. In this
927 case, first-loop is the newly created loop, and second-loop is LOOP.
928 - NITERS: the number of iterations that LOOP iterates.
929 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
930 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responssible
931 for updating the loop bound of the first-loop to FIRST_NITERS. If it
932 is false, the caller of this function may want to take care of this
933 (this can be usefull is we don't want new stmts added to first-loop).
936 The function returns a pointer to the new loop-copy, or NULL if it failed
937 to perform the trabsformation.
939 The function generates two if-then-else guards: one before the first loop,
940 and the other before the second loop:
942 if (FIRST_NITERS == 0) then skip the first loop,
943 and go directly to the second loop.
945 if (FIRST_NITERS == NITERS) then skip the second loop.
947 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
948 FORNOW the resulting code will not be in loop-closed-ssa form.
952 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
953 edge e, tree first_niters,
954 tree niters, bool update_first_loop_count)
956 struct loop *new_loop = NULL, *first_loop, *second_loop;
960 basic_block bb_before_second_loop, bb_after_second_loop;
961 basic_block bb_before_first_loop;
962 basic_block bb_between_loops;
963 edge exit_e = loop->exit_edges [0];
965 if (!slpeel_can_duplicate_loop_p (loop, e))
968 /* We have to initialize cfg_hooks. Then, when calling
969 cfg_hooks->split_edge, the function tree_split_edge
970 is actually called and, when calling cfg_hooks->duplicate_block,
971 the function tree_duplicate_bb is called. */
972 tree_register_cfg_hooks ();
975 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
976 Resulting CFG would be:
989 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
991 if (vect_debug_stats (loop) || vect_debug_details (loop))
992 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
998 /* NEW_LOOP was placed after LOOP. */
1000 second_loop = new_loop;
1004 /* NEW_LOOP was placed before LOOP. */
1005 first_loop = new_loop;
1009 definitions = marked_ssa_names ();
1010 allocate_new_names (definitions);
1011 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1012 rename_variables_in_loop (new_loop);
1015 /* 2. Add the guard that controls whether the first loop is executed.
1016 Resulting CFG would be:
1018 bb_before_first_loop:
1019 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1026 bb_before_second_loop:
1035 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1036 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1037 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1038 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1039 flow_loop_scan (first_loop, LOOP_ALL);
1040 flow_loop_scan (second_loop, LOOP_ALL);
1043 build (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1044 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1045 bb_before_second_loop, bb_before_first_loop);
1046 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1047 first_loop == new_loop);
1050 /* 3. Add the guard that controls whether the second loop is executed.
1051 Resulting CFG would be:
1053 bb_before_first_loop:
1054 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1062 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1063 GOTO bb_before_second_loop
1065 bb_before_second_loop:
1071 bb_after_second_loop:
1076 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1077 add_bb_to_loop (bb_between_loops, first_loop->outer);
1078 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1079 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1080 flow_loop_scan (first_loop, LOOP_ALL);
1081 flow_loop_scan (second_loop, LOOP_ALL);
1083 pre_condition = build (EQ_EXPR, boolean_type_node, first_niters, niters);
1084 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1085 bb_after_second_loop, bb_before_first_loop);
1086 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1087 second_loop == new_loop);
1089 /* Flow loop scan does not update loop->single_exit field. */
1090 first_loop->single_exit = first_loop->exit_edges[0];
1091 second_loop->single_exit = second_loop->exit_edges[0];
1093 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1095 if (update_first_loop_count)
1096 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1098 free_new_names (definitions);
1099 BITMAP_XFREE (definitions);
1100 unmark_all_for_rewrite ();
1106 /* Here the proper Vectorizer starts. */
1108 /*************************************************************************
1109 Vectorization Utilities.
1110 *************************************************************************/
1112 /* Function new_stmt_vec_info.
1114 Create and initialize a new stmt_vec_info struct for STMT. */
1117 new_stmt_vec_info (tree stmt, struct loop *loop)
1120 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1122 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1123 STMT_VINFO_STMT (res) = stmt;
1124 STMT_VINFO_LOOP (res) = loop;
1125 STMT_VINFO_RELEVANT_P (res) = 0;
1126 STMT_VINFO_VECTYPE (res) = NULL;
1127 STMT_VINFO_VEC_STMT (res) = NULL;
1128 STMT_VINFO_DATA_REF (res) = NULL;
1129 STMT_VINFO_MEMTAG (res) = NULL;
1130 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1136 /* Function new_loop_vec_info.
1138 Create and initialize a new loop_vec_info struct for LOOP, as well as
1139 stmt_vec_info structs for all the stmts in LOOP. */
1142 new_loop_vec_info (struct loop *loop)
1146 block_stmt_iterator si;
1149 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1151 bbs = get_loop_body (loop);
1153 /* Create stmt_info for all stmts in the loop. */
1154 for (i = 0; i < loop->num_nodes; i++)
1156 basic_block bb = bbs[i];
1157 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1159 tree stmt = bsi_stmt (si);
1162 get_stmt_operands (stmt);
1163 ann = stmt_ann (stmt);
1164 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1168 LOOP_VINFO_LOOP (res) = loop;
1169 LOOP_VINFO_BBS (res) = bbs;
1170 LOOP_VINFO_EXIT_COND (res) = NULL;
1171 LOOP_VINFO_NITERS (res) = NULL;
1172 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1173 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1174 LOOP_VINFO_VECT_FACTOR (res) = 0;
1175 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1176 "loop_write_datarefs");
1177 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1178 "loop_read_datarefs");
1179 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1185 /* Function destroy_loop_vec_info.
1187 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1188 stmts in the loop. */
1191 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1196 block_stmt_iterator si;
1202 loop = LOOP_VINFO_LOOP (loop_vinfo);
1204 bbs = LOOP_VINFO_BBS (loop_vinfo);
1205 nbbs = loop->num_nodes;
1207 for (j = 0; j < nbbs; j++)
1209 basic_block bb = bbs[j];
1210 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1212 tree stmt = bsi_stmt (si);
1213 stmt_ann_t ann = stmt_ann (stmt);
1214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1216 set_stmt_info (ann, NULL);
1220 free (LOOP_VINFO_BBS (loop_vinfo));
1221 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1222 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1228 /* Function debug_loop_stats.
1230 For vectorization statistics dumps. */
1233 vect_debug_stats (struct loop *loop)
1236 block_stmt_iterator si;
1237 tree node = NULL_TREE;
1239 if (!dump_file || !(dump_flags & TDF_STATS))
1244 fprintf (dump_file, "\n");
1253 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1255 node = bsi_stmt (si);
1256 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1260 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1261 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1263 fprintf (dump_file, "\nloop at %s:%d: ",
1264 EXPR_FILENAME (node), EXPR_LINENO (node));
1272 /* Function debug_loop_details.
1274 For vectorization debug dumps. */
1277 vect_debug_details (struct loop *loop)
1280 block_stmt_iterator si;
1281 tree node = NULL_TREE;
1283 if (!dump_file || !(dump_flags & TDF_DETAILS))
1288 fprintf (dump_file, "\n");
1297 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1299 node = bsi_stmt (si);
1300 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1304 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1305 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1307 fprintf (dump_file, "\nloop at %s:%d: ",
1308 EXPR_FILENAME (node), EXPR_LINENO (node));
1316 /* Function vect_get_ptr_offset
1318 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1321 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1322 tree vectype ATTRIBUTE_UNUSED,
1323 tree *offset ATTRIBUTE_UNUSED)
1325 /* TODO: Use alignment information. */
1330 /* Function vect_get_base_and_bit_offset
1332 Return the BASE of the data reference EXPR.
1333 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1334 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1335 bits of 'a.b[i] + 4B' from a.
1338 EXPR - the memory reference that is being analyzed
1339 DR - the data_reference struct of the _original_ memory reference
1340 (Note: DR_REF (DR) is not necessarily EXPR)
1341 VECTYPE - the type that defines the alignment (i.e, we compute
1342 alignment relative to TYPE_ALIGN(VECTYPE))
1345 BASE (returned value) - the base of the data reference EXPR.
1346 E.g, if EXPR is a.b[k].c[i][j] the returned
1348 OFFSET - offset of EXPR from BASE in bits
1349 BASE_ALIGNED_P - indicates if BASE is aligned
1351 If something unexpected is encountered (an unsupported form of data-ref),
1352 or if VECTYPE is given but OFFSET cannot be determined:
1353 then NULL_TREE is returned. */
1356 vect_get_base_and_bit_offset (struct data_reference *dr,
1359 loop_vec_info loop_vinfo,
1361 bool *base_aligned_p)
1363 tree this_offset = size_zero_node;
1364 tree base = NULL_TREE;
1366 tree oprnd0, oprnd1;
1367 struct data_reference *array_dr;
1368 enum tree_code code = TREE_CODE (expr);
1370 *base_aligned_p = false;
1374 /* These cases end the recursion: */
1376 *offset = size_zero_node;
1377 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1378 *base_aligned_p = true;
1385 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1388 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1390 base = vect_get_ptr_offset (expr, vectype, offset);
1392 *base_aligned_p = true;
1396 *base_aligned_p = true;
1397 *offset = size_zero_node;
1403 *offset = int_const_binop (MULT_EXPR, expr,
1404 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1407 /* These cases continue the recursion: */
1409 oprnd0 = TREE_OPERAND (expr, 0);
1410 oprnd1 = TREE_OPERAND (expr, 1);
1412 this_offset = bit_position (oprnd1);
1413 if (vectype && !host_integerp (this_offset, 1))
1419 oprnd0 = TREE_OPERAND (expr, 0);
1424 oprnd0 = TREE_OPERAND (expr, 0);
1429 if (DR_REF (dr) != expr)
1430 /* Build array data_reference struct if the existing DR_REF
1431 doesn't match EXPR. This happens, for example, when the
1432 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1433 contains information on the access of T, not of arr. In order
1434 to continue the analysis, we create a new DR struct that
1435 describes the access of arr.
1437 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1441 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1442 vectype, &this_offset);
1447 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1449 *offset = this_offset;
1450 *base_aligned_p = true;
1457 /* In case we have a PLUS_EXPR of the form
1458 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1459 This is verified in vect_get_symbl_and_dr. */
1460 oprnd0 = TREE_OPERAND (expr, 0);
1461 oprnd1 = TREE_OPERAND (expr, 1);
1463 base = vect_get_base_and_bit_offset
1464 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1465 if (vectype && !base)
1475 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1476 loop_vinfo, offset, base_aligned_p);
1478 if (vectype && base)
1480 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1481 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1484 if (vect_debug_details (NULL))
1486 print_generic_expr (dump_file, expr, TDF_SLIM);
1487 fprintf (dump_file, " --> total offset for ref: ");
1488 print_generic_expr (dump_file, *offset, TDF_SLIM);
1495 /* Function vect_force_dr_alignment_p.
1497 Returns whether the alignment of a DECL can be forced to be aligned
1498 on ALIGNMENT bit boundary. */
1501 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1503 if (TREE_CODE (decl) != VAR_DECL)
1506 if (DECL_EXTERNAL (decl))
1509 if (TREE_STATIC (decl))
1510 return (alignment <= MAX_OFILE_ALIGNMENT);
1512 /* This is not 100% correct. The absolute correct stack alignment
1513 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1514 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1515 However, until someone implements forced stack alignment, SSE
1516 isn't really usable without this. */
1517 return (alignment <= PREFERRED_STACK_BOUNDARY);
1521 /* Function vect_get_new_vect_var.
1523 Returns a name for a new variable. The current naming scheme appends the
1524 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1525 the name of vectorizer generated variables, and appends that to NAME if
1529 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1535 if (var_kind == vect_simple_var)
1540 prefix_len = strlen (prefix);
1543 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1545 new_vect_var = create_tmp_var (type, prefix);
1547 return new_vect_var;
1551 /* Function vect_create_index_for_vector_ref.
1553 Create (and return) an index variable, along with it's update chain in the
1554 loop. This variable will be used to access a memory location in a vector
1558 LOOP: The loop being vectorized.
1559 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1560 function can be added here, or in the loop pre-header.
1563 Return an index that will be used to index a vector array. It is expected
1564 that a pointer to the first vector will be used as the base address for the
1567 FORNOW: we are not trying to be efficient, just creating a new index each
1568 time from scratch. At this time all vector references could use the same
1571 TODO: create only one index to be used by all vector references. Record
1572 the index in the LOOP_VINFO the first time this procedure is called and
1573 return it on subsequent calls. The increment of this index must be placed
1574 just before the conditional expression that ends the single block loop. */
1577 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1580 tree indx_before_incr, indx_after_incr;
1582 /* It is assumed that the base pointer used for vectorized access contains
1583 the address of the first vector. Therefore the index used for vectorized
1584 access must be initialized to zero and incremented by 1. */
1586 init = integer_zero_node;
1587 step = integer_one_node;
1589 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1590 create_iv (init, step, NULL_TREE, loop, bsi, false,
1591 &indx_before_incr, &indx_after_incr);
1593 return indx_before_incr;
1597 /* Function vect_create_addr_base_for_vector_ref.
1599 Create an expression that computes the address of the first memory location
1600 that will be accessed for a data reference.
1603 STMT: The statement containing the data reference.
1604 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1605 OFFSET: Optional. If supplied, it is be added to the initial address.
1608 1. Return an SSA_NAME whose value is the address of the memory location of
1609 the first vector of the data reference.
1610 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1611 these statement(s) which define the returned SSA_NAME.
1613 FORNOW: We are only handling array accesses with step 1. */
1616 vect_create_addr_base_for_vector_ref (tree stmt,
1617 tree *new_stmt_list,
1620 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1621 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1622 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1623 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1624 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1625 tree ref = DR_REF (dr);
1626 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1627 tree scalar_type = TREE_TYPE (ref);
1628 tree scalar_ptr_type = build_pointer_type (scalar_type);
1630 tree init_val, step, init_oval;
1632 bool is_ptr_ref, is_array_ref, is_addr_expr;
1637 tree addr_base, addr_expr;
1638 tree dest, new_stmt;
1640 /* Only the access function of the last index is relevant (i_n in
1641 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1642 access_fn = DR_ACCESS_FN (dr, 0);
1643 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1646 init_oval = integer_zero_node;
1648 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1649 && TREE_CODE (data_ref_base) == SSA_NAME;
1650 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1651 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1652 || TREE_CODE (data_ref_base) == PLUS_EXPR
1653 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1654 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1656 /** Create: &(base[init_val])
1658 if data_ref_base is an ARRAY_TYPE:
1659 base = data_ref_base
1661 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1662 base = *((scalar_array *) data_ref_base)
1666 array_base = data_ref_base;
1667 else /* is_ptr_ref or is_addr_expr */
1669 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1670 tree scalar_array_type = build_array_type (scalar_type, 0);
1671 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1672 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1673 add_referenced_tmp_var (array_ptr);
1675 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1676 add_referenced_tmp_var (dest);
1678 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1679 append_to_statement_list_force (new_stmt, new_stmt_list);
1681 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1682 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1683 new_temp = make_ssa_name (array_ptr, vec_stmt);
1684 TREE_OPERAND (vec_stmt, 0) = new_temp;
1685 append_to_statement_list_force (vec_stmt, new_stmt_list);
1688 array_base = build_fold_indirect_ref (new_temp);
1691 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1692 add_referenced_tmp_var (dest);
1693 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1694 append_to_statement_list_force (new_stmt, new_stmt_list);
1698 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1699 add_referenced_tmp_var (tmp);
1700 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1701 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1702 init_val = make_ssa_name (tmp, vec_stmt);
1703 TREE_OPERAND (vec_stmt, 0) = init_val;
1704 append_to_statement_list_force (vec_stmt, new_stmt_list);
1707 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1708 NULL_TREE, NULL_TREE);
1709 addr_base = build_fold_addr_expr (array_ref);
1711 /* addr_expr = addr_base */
1712 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1713 get_name (base_name));
1714 add_referenced_tmp_var (addr_expr);
1715 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1716 new_temp = make_ssa_name (addr_expr, vec_stmt);
1717 TREE_OPERAND (vec_stmt, 0) = new_temp;
1718 append_to_statement_list_force (vec_stmt, new_stmt_list);
1724 /* Function get_vectype_for_scalar_type.
1726 Returns the vector type corresponding to SCALAR_TYPE as supported
1730 get_vectype_for_scalar_type (tree scalar_type)
1732 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1733 int nbytes = GET_MODE_SIZE (inner_mode);
1740 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1742 nunits = UNITS_PER_SIMD_WORD / nbytes;
1744 vectype = build_vector_type (scalar_type, nunits);
1745 if (vect_debug_details (NULL))
1747 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1748 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1754 if (vect_debug_details (NULL))
1756 fprintf (dump_file, "vectype: ");
1757 print_generic_expr (dump_file, vectype, TDF_SLIM);
1760 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1762 /* TODO: tree-complex.c sometimes can parallelize operations
1763 on generic vectors. We can vectorize the loop in that case,
1764 but then we should re-run the lowering pass. */
1765 if (vect_debug_details (NULL))
1766 fprintf (dump_file, "mode not supported by target.");
1774 /* Function vect_align_data_ref.
1776 Handle mislignment of a memory accesses.
1778 FORNOW: Can't handle misaligned accesses.
1779 Make sure that the dataref is aligned. */
1782 vect_align_data_ref (tree stmt)
1784 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1785 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1787 /* FORNOW: can't handle misaligned accesses;
1788 all accesses expected to be aligned. */
1789 gcc_assert (aligned_access_p (dr));
1793 /* Function vect_create_data_ref_ptr.
1795 Create a memory reference expression for vector access, to be used in a
1796 vector load/store stmt. The reference is based on a new pointer to vector
1800 1. STMT: a stmt that references memory. Expected to be of the form
1801 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1802 2. BSI: block_stmt_iterator where new stmts can be added.
1803 3. OFFSET (optional): an offset to be added to the initial address accessed
1804 by the data-ref in STMT.
1805 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1806 pointing to the initial address.
1809 1. Declare a new ptr to vector_type, and have it point to the base of the
1810 data reference (initial addressed accessed by the data reference).
1811 For example, for vector of type V8HI, the following code is generated:
1814 vp = (v8hi *)initial_address;
1816 if OFFSET is not supplied:
1817 initial_address = &a[init];
1818 if OFFSET is supplied:
1819 initial_address = &a[init + OFFSET];
1821 Return the initial_address in INITIAL_ADDRESS.
1823 2. Create a data-reference in the loop based on the new vector pointer vp,
1824 and using a new index variable 'idx' as follows:
1828 where if ONLY_INIT is true:
1831 update = idx + vector_type_size
1833 Return the pointer vp'.
1836 FORNOW: handle only aligned and consecutive accesses. */
1839 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1840 tree *initial_address, bool only_init)
1843 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1844 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1845 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1846 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1850 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1851 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1852 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1853 int nvuses, nv_may_defs, nv_must_defs;
1857 tree new_stmt_list = NULL_TREE;
1859 edge pe = loop_preheader_edge (loop);
1866 base_name = unshare_expr (DR_BASE_NAME (dr));
1867 if (vect_debug_details (NULL))
1869 tree data_ref_base = base_name;
1870 fprintf (dump_file, "create array_ref of type: ");
1871 print_generic_expr (dump_file, vectype, TDF_SLIM);
1872 if (TREE_CODE (data_ref_base) == VAR_DECL)
1873 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1874 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1875 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1876 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1877 fprintf (dump_file, "vectorizing a record based array ref: ");
1878 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1879 fprintf (dump_file, "vectorizing a pointer ref: ");
1880 print_generic_expr (dump_file, base_name, TDF_SLIM);
1883 /** (1) Create the new vector-pointer variable: **/
1885 vect_ptr_type = build_pointer_type (vectype);
1886 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1887 get_name (base_name));
1888 add_referenced_tmp_var (vect_ptr);
1891 /** (2) Handle aliasing information of the new vector-pointer: **/
1893 tag = STMT_VINFO_MEMTAG (stmt_info);
1895 get_var_ann (vect_ptr)->type_mem_tag = tag;
1897 /* Mark for renaming all aliased variables
1898 (i.e, the may-aliases of the type-mem-tag). */
1899 nvuses = NUM_VUSES (vuses);
1900 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1901 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1902 for (i = 0; i < nvuses; i++)
1904 tree use = VUSE_OP (vuses, i);
1905 if (TREE_CODE (use) == SSA_NAME)
1906 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1908 for (i = 0; i < nv_may_defs; i++)
1910 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1911 if (TREE_CODE (def) == SSA_NAME)
1912 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1914 for (i = 0; i < nv_must_defs; i++)
1916 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1917 if (TREE_CODE (def) == SSA_NAME)
1918 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1922 /** (3) Calculate the initial address the vector-pointer, and set
1923 the vector-pointer to point to it before the loop: **/
1925 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1926 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1928 pe = loop_preheader_edge (loop);
1929 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1930 gcc_assert (!new_bb);
1931 *initial_address = new_temp;
1933 /* Create: p = (vectype *) initial_base */
1934 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1935 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1936 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1937 TREE_OPERAND (vec_stmt, 0) = new_temp;
1938 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1939 gcc_assert (!new_bb);
1940 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1943 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1945 if (only_init) /* No update in loop is required. */
1946 return vect_ptr_init;
1948 idx = vect_create_index_for_vector_ref (loop, bsi);
1950 /* Create: update = idx * vectype_size */
1951 ptr_update = create_tmp_var (integer_type_node, "update");
1952 add_referenced_tmp_var (ptr_update);
1953 vectype_size = build_int_cst (integer_type_node,
1954 GET_MODE_SIZE (TYPE_MODE (vectype)));
1955 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1956 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1957 new_temp = make_ssa_name (ptr_update, vec_stmt);
1958 TREE_OPERAND (vec_stmt, 0) = new_temp;
1959 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1961 /* Create: data_ref_ptr = vect_ptr_init + update */
1962 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1963 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1964 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1965 TREE_OPERAND (vec_stmt, 0) = new_temp;
1966 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1967 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1969 return data_ref_ptr;
1973 /* Function vect_create_destination_var.
1975 Create a new temporary of type VECTYPE. */
1978 vect_create_destination_var (tree scalar_dest, tree vectype)
1981 const char *new_name;
1983 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1985 new_name = get_name (scalar_dest);
1988 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1989 add_referenced_tmp_var (vec_dest);
1995 /* Function vect_init_vector.
1997 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1998 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1999 used in the vectorization of STMT. */
2002 vect_init_vector (tree stmt, tree vector_var)
2004 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2005 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2008 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2014 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2015 add_referenced_tmp_var (new_var);
2017 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2018 new_temp = make_ssa_name (new_var, init_stmt);
2019 TREE_OPERAND (init_stmt, 0) = new_temp;
2021 pe = loop_preheader_edge (loop);
2022 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2023 gcc_assert (!new_bb);
2025 if (vect_debug_details (NULL))
2027 fprintf (dump_file, "created new init_stmt: ");
2028 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2031 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2036 /* Function vect_get_vec_def_for_operand.
2038 OP is an operand in STMT. This function returns a (vector) def that will be
2039 used in the vectorized stmt for STMT.
2041 In the case that OP is an SSA_NAME which is defined in the loop, then
2042 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2044 In case OP is an invariant or constant, a new stmt that creates a vector def
2045 needs to be introduced. */
2048 vect_get_vec_def_for_operand (tree op, tree stmt)
2053 stmt_vec_info def_stmt_info = NULL;
2054 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2055 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2056 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2057 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2064 if (vect_debug_details (NULL))
2066 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2067 print_generic_expr (dump_file, op, TDF_SLIM);
2070 /** ===> Case 1: operand is a constant. **/
2072 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2074 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2078 /* Build a tree with vector elements. */
2079 if (vect_debug_details (NULL))
2080 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2082 for (i = nunits - 1; i >= 0; --i)
2084 t = tree_cons (NULL_TREE, op, t);
2086 vec_cst = build_vector (vectype, t);
2087 return vect_init_vector (stmt, vec_cst);
2090 gcc_assert (TREE_CODE (op) == SSA_NAME);
2092 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2094 def_stmt = SSA_NAME_DEF_STMT (op);
2095 def_stmt_info = vinfo_for_stmt (def_stmt);
2097 if (vect_debug_details (NULL))
2099 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2100 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2104 /** ==> Case 2.1: operand is defined inside the loop. **/
2108 /* Get the def from the vectorized stmt. */
2110 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2111 gcc_assert (vec_stmt);
2112 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2117 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2118 it is a reduction/induction. **/
2120 bb = bb_for_stmt (def_stmt);
2121 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2123 if (vect_debug_details (NULL))
2124 fprintf (dump_file, "reduction/induction - unsupported.");
2125 internal_error ("no support for reduction/induction"); /* FORNOW */
2129 /** ==> Case 2.3: operand is defined outside the loop -
2130 it is a loop invariant. */
2132 switch (TREE_CODE (def_stmt))
2135 def = PHI_RESULT (def_stmt);
2138 def = TREE_OPERAND (def_stmt, 0);
2141 def = TREE_OPERAND (def_stmt, 0);
2142 gcc_assert (IS_EMPTY_STMT (def_stmt));
2146 if (vect_debug_details (NULL))
2148 fprintf (dump_file, "unsupported defining stmt: ");
2149 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2151 internal_error ("unsupported defining stmt");
2154 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2156 if (vect_debug_details (NULL))
2157 fprintf (dump_file, "Create vector_inv.");
2159 for (i = nunits - 1; i >= 0; --i)
2161 t = tree_cons (NULL_TREE, def, t);
2164 vec_inv = build_constructor (vectype, t);
2165 return vect_init_vector (stmt, vec_inv);
2169 /* Function vect_finish_stmt_generation.
2171 Insert a new stmt. */
2174 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2176 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2178 if (vect_debug_details (NULL))
2180 fprintf (dump_file, "add new stmt: ");
2181 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2184 /* Make sure bsi points to the stmt that is being vectorized. */
2186 /* Assumption: any stmts created for the vectorization of stmt S were
2187 inserted before S. BSI is expected to point to S or some new stmt before S.
2190 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2192 gcc_assert (stmt == bsi_stmt (*bsi));
2196 /* Function vectorizable_assignment.
2198 Check if STMT performs an assignment (copy) that can be vectorized.
2199 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2200 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2201 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2204 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2211 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2212 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2215 /* Is vectorizable assignment? */
2217 if (TREE_CODE (stmt) != MODIFY_EXPR)
2220 scalar_dest = TREE_OPERAND (stmt, 0);
2221 if (TREE_CODE (scalar_dest) != SSA_NAME)
2224 op = TREE_OPERAND (stmt, 1);
2225 if (!vect_is_simple_use (op, loop, NULL))
2227 if (vect_debug_details (NULL))
2228 fprintf (dump_file, "use not simple.");
2232 if (!vec_stmt) /* transformation not required. */
2234 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2239 if (vect_debug_details (NULL))
2240 fprintf (dump_file, "transform assignment.");
2243 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2246 op = TREE_OPERAND (stmt, 1);
2247 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2249 /* Arguments are ready. create the new vector stmt. */
2250 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2251 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2252 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2253 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2259 /* Function vectorizable_operation.
2261 Check if STMT performs a binary or unary operation that can be vectorized.
2262 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2263 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2264 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2267 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2272 tree op0, op1 = NULL;
2273 tree vec_oprnd0, vec_oprnd1=NULL;
2274 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2275 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2276 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2278 enum tree_code code;
2279 enum machine_mode vec_mode;
2285 /* Is STMT a vectorizable binary/unary operation? */
2286 if (TREE_CODE (stmt) != MODIFY_EXPR)
2289 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2292 operation = TREE_OPERAND (stmt, 1);
2293 code = TREE_CODE (operation);
2294 optab = optab_for_tree_code (code, vectype);
2296 /* Support only unary or binary operations. */
2297 op_type = TREE_CODE_LENGTH (code);
2298 if (op_type != unary_op && op_type != binary_op)
2300 if (vect_debug_details (NULL))
2301 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2305 for (i = 0; i < op_type; i++)
2307 op = TREE_OPERAND (operation, i);
2308 if (!vect_is_simple_use (op, loop, NULL))
2310 if (vect_debug_details (NULL))
2311 fprintf (dump_file, "use not simple.");
2316 /* Supportable by target? */
2319 if (vect_debug_details (NULL))
2320 fprintf (dump_file, "no optab.");
2323 vec_mode = TYPE_MODE (vectype);
2324 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2326 if (vect_debug_details (NULL))
2327 fprintf (dump_file, "op not supported by target.");
2331 if (!vec_stmt) /* transformation not required. */
2333 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2339 if (vect_debug_details (NULL))
2340 fprintf (dump_file, "transform binary/unary operation.");
2343 scalar_dest = TREE_OPERAND (stmt, 0);
2344 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2347 op0 = TREE_OPERAND (operation, 0);
2348 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2350 if (op_type == binary_op)
2352 op1 = TREE_OPERAND (operation, 1);
2353 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2356 /* Arguments are ready. create the new vector stmt. */
2358 if (op_type == binary_op)
2359 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2360 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2362 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2363 build1 (code, vectype, vec_oprnd0));
2364 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2365 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2366 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2372 /* Function vectorizable_store.
2374 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2376 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2377 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2378 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2381 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2388 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2389 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2390 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2391 enum machine_mode vec_mode;
2393 enum dr_alignment_support alignment_support_cheme;
2395 /* Is vectorizable store? */
2397 if (TREE_CODE (stmt) != MODIFY_EXPR)
2400 scalar_dest = TREE_OPERAND (stmt, 0);
2401 if (TREE_CODE (scalar_dest) != ARRAY_REF
2402 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2405 op = TREE_OPERAND (stmt, 1);
2406 if (!vect_is_simple_use (op, loop, NULL))
2408 if (vect_debug_details (NULL))
2409 fprintf (dump_file, "use not simple.");
2413 vec_mode = TYPE_MODE (vectype);
2414 /* FORNOW. In some cases can vectorize even if data-type not supported
2415 (e.g. - array initialization with 0). */
2416 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2419 if (!STMT_VINFO_DATA_REF (stmt_info))
2423 if (!vec_stmt) /* transformation not required. */
2425 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2431 if (vect_debug_details (NULL))
2432 fprintf (dump_file, "transform store");
2434 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2435 gcc_assert (alignment_support_cheme);
2436 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2438 /* Handle use - get the vectorized def from the defining stmt. */
2439 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2442 /* FORNOW: make sure the data reference is aligned. */
2443 vect_align_data_ref (stmt);
2444 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2445 data_ref = build_fold_indirect_ref (data_ref);
2447 /* Arguments are ready. create the new vector stmt. */
2448 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2449 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2455 /* vectorizable_load.
2457 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2459 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2460 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2461 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2464 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2467 tree vec_dest = NULL;
2468 tree data_ref = NULL;
2470 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2471 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2472 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2480 edge pe = loop_preheader_edge (loop);
2481 enum dr_alignment_support alignment_support_cheme;
2483 /* Is vectorizable load? */
2485 if (TREE_CODE (stmt) != MODIFY_EXPR)
2488 scalar_dest = TREE_OPERAND (stmt, 0);
2489 if (TREE_CODE (scalar_dest) != SSA_NAME)
2492 op = TREE_OPERAND (stmt, 1);
2493 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2496 if (!STMT_VINFO_DATA_REF (stmt_info))
2499 mode = (int) TYPE_MODE (vectype);
2501 /* FORNOW. In some cases can vectorize even if data-type not supported
2502 (e.g. - data copies). */
2503 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2505 if (vect_debug_details (loop))
2506 fprintf (dump_file, "Aligned load, but unsupported type.");
2510 if (!vec_stmt) /* transformation not required. */
2512 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2518 if (vect_debug_details (NULL))
2519 fprintf (dump_file, "transform load.");
2521 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2522 gcc_assert (alignment_support_cheme);
2524 if (alignment_support_cheme == dr_aligned
2525 || alignment_support_cheme == dr_unaligned_supported)
2536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2537 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2538 if (aligned_access_p (dr))
2539 data_ref = build_fold_indirect_ref (data_ref);
2542 int mis = DR_MISALIGNMENT (dr);
2543 tree tmis = (mis == -1 ?
2545 build_int_cst (integer_type_node, mis));
2546 tmis = int_const_binop (MULT_EXPR, tmis,
2547 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2548 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2550 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2551 new_temp = make_ssa_name (vec_dest, new_stmt);
2552 TREE_OPERAND (new_stmt, 0) = new_temp;
2553 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2555 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2559 msq_init = *(floor(p1))
2560 p2 = initial_addr + VS - 1;
2561 magic = have_builtin ? builtin_result : initial_address;
2564 p2' = p2 + indx * vectype_size
2566 vec_dest = realign_load (msq, lsq, magic)
2580 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2581 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2582 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2584 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2585 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2586 new_temp = make_ssa_name (vec_dest, new_stmt);
2587 TREE_OPERAND (new_stmt, 0) = new_temp;
2588 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2589 gcc_assert (!new_bb);
2590 msq_init = TREE_OPERAND (new_stmt, 0);
2593 /* <2> Create lsq = *(floor(p2')) in the loop */
2594 offset = build_int_cst (integer_type_node,
2595 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2596 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2597 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2598 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2599 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2600 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2601 new_temp = make_ssa_name (vec_dest, new_stmt);
2602 TREE_OPERAND (new_stmt, 0) = new_temp;
2603 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2604 lsq = TREE_OPERAND (new_stmt, 0);
2608 if (targetm.vectorize.builtin_mask_for_load)
2610 /* Create permutation mask, if required, in loop preheader. */
2612 params = build_tree_list (NULL_TREE, init_addr);
2613 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2614 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2615 new_stmt = build_function_call_expr (builtin_decl, params);
2616 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2617 new_temp = make_ssa_name (vec_dest, new_stmt);
2618 TREE_OPERAND (new_stmt, 0) = new_temp;
2619 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2620 gcc_assert (!new_bb);
2621 magic = TREE_OPERAND (new_stmt, 0);
2625 /* Use current address instead of init_addr for reduced reg pressure.
2627 magic = dataref_ptr;
2631 /* <4> Create msq = phi <msq_init, lsq> in loop */
2632 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2633 msq = make_ssa_name (vec_dest, NULL_TREE);
2634 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2635 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2636 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2637 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2640 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2641 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2642 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2643 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2644 new_temp = make_ssa_name (vec_dest, new_stmt);
2645 TREE_OPERAND (new_stmt, 0) = new_temp;
2646 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2651 *vec_stmt = new_stmt;
2656 /* Function vect_supportable_dr_alignment
2658 Return whether the data reference DR is supported with respect to its
2661 static enum dr_alignment_support
2662 vect_supportable_dr_alignment (struct data_reference *dr)
2664 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2665 enum machine_mode mode = (int) TYPE_MODE (vectype);
2667 if (aligned_access_p (dr))
2670 /* Possibly unaligned access. */
2672 if (DR_IS_READ (dr))
2674 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2675 && (!targetm.vectorize.builtin_mask_for_load
2676 || targetm.vectorize.builtin_mask_for_load ()))
2677 return dr_unaligned_software_pipeline;
2679 if (targetm.vectorize.misaligned_mem_ok (mode))
2680 /* Can't software pipeline the loads. */
2681 return dr_unaligned_supported;
2685 return dr_unaligned_unsupported;
2689 /* Function vect_transform_stmt.
2691 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2694 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2696 bool is_store = false;
2697 tree vec_stmt = NULL_TREE;
2698 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2701 switch (STMT_VINFO_TYPE (stmt_info))
2703 case op_vec_info_type:
2704 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2708 case assignment_vec_info_type:
2709 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2713 case load_vec_info_type:
2714 done = vectorizable_load (stmt, bsi, &vec_stmt);
2718 case store_vec_info_type:
2719 done = vectorizable_store (stmt, bsi, &vec_stmt);
2724 if (vect_debug_details (NULL))
2725 fprintf (dump_file, "stmt not supported.");
2729 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2735 /* This function builds ni_name = number of iterations loop executes
2736 on the loop preheader. */
2739 vect_build_loop_niters (loop_vec_info loop_vinfo)
2741 tree ni_name, stmt, var;
2743 basic_block new_bb = NULL;
2744 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2745 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2747 var = create_tmp_var (TREE_TYPE (ni), "niters");
2748 add_referenced_tmp_var (var);
2749 if (TREE_CODE (ni) == INTEGER_CST)
2751 /* This case is generated when treating a known loop bound
2752 indivisible by VF. Here we cannot use force_gimple_operand. */
2753 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2754 ni_name = make_ssa_name (var, stmt);
2755 TREE_OPERAND (stmt, 0) = ni_name;
2758 ni_name = force_gimple_operand (ni, &stmt, false, var);
2760 pe = loop_preheader_edge (loop);
2762 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2764 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2770 /* This function generates the following statements:
2772 ni_name = number of iterations loop executes
2773 ratio = ni_name / vf
2774 ratio_mult_vf_name = ratio * vf
2776 and places them at the loop preheader edge. */
2779 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2780 tree *ratio_mult_vf_name_p, tree *ratio_p)
2787 tree ratio_mult_vf_name, ratio_mult_vf;
2788 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2789 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2793 /* Generate temporary variable that contains
2794 number of iterations loop executes. */
2796 ni_name = vect_build_loop_niters (loop_vinfo);
2799 vf is power of 2; then if ratio = = n >> log2 (vf). */
2800 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2801 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2803 /* Update initial conditions of loop copy. */
2805 /* ratio_mult_vf = ratio * vf;
2806 then if ratio_mult_vf = ratio << log2 (vf). */
2808 i = exact_log2 (vf);
2809 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2810 add_referenced_tmp_var (ratio_mult_vf);
2812 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2814 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2815 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2816 ratio, build_int_cst (unsigned_type_node,
2819 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2821 pe = loop_preheader_edge (loop);
2822 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2824 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2826 *ni_name_p = ni_name;
2827 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2834 /* This function generates stmt
2838 and attaches it to preheader of LOOP. */
2841 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2843 tree var, stmt, var_name;
2848 /* create temporary variable */
2849 var = create_tmp_var (TREE_TYPE (n), "bnd");
2850 add_referenced_tmp_var (var);
2852 var_name = make_ssa_name (var, NULL_TREE);
2854 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2856 i = exact_log2 (vf);
2857 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2858 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2859 n, build_int_cst (unsigned_type_node,i)));
2861 SSA_NAME_DEF_STMT (var_name) = stmt;
2863 pe = loop_preheader_edge (loop);
2864 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2866 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2868 if (vect_debug_details (NULL))
2869 fprintf (dump_file, "New bb on preheader edge was not generated.");
2875 /* Function vect_transform_loop_bound.
2877 Create a new exit condition for the loop. */
2880 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2882 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2883 tree orig_cond_expr;
2884 HOST_WIDE_INT old_N = 0;
2886 tree new_loop_bound;
2890 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2893 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2895 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2897 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2898 #ifdef ENABLE_CHECKING
2899 gcc_assert (orig_cond_expr);
2902 /* new loop exit test: */
2903 lb_type = TREE_TYPE (TREE_OPERAND (COND_EXPR_COND (orig_cond_expr), 1));
2906 fold_convert (lb_type, build_int_cst (unsigned_type_node, old_N/vf));
2908 new_loop_bound = niters;
2910 slpeel_make_loop_iterate_ntimes (loop, new_loop_bound);
2914 /* Function vect_update_ivs_after_vectorizer.
2916 "Advance" the induction variables of LOOP to the value they should take
2917 after the execution of LOOP. This is currently necessary because the
2918 vectorizer does not handle induction variables that are used after the
2919 loop. Such a situation occurs when the last iterations of LOOP are
2921 1. We introduced new uses after LOOP for IVs that were not originally used
2922 after LOOP: the IVs of LOOP are now used by an epilog loop.
2923 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2924 times, whereas the loop IVs should be bumped N times.
2927 - LOOP - a loop that is going to be vectorized. The last few iterations
2928 of LOOP were peeled.
2929 - NITERS - the number of iterations that LOOP executes (before it is
2930 vectorized). i.e, the number of times the ivs should be bumped.
2931 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2932 coming out from LOOP on which there are uses of the LOOP ivs
2933 (this is the path from LOOP->exit to epilog_loop->preheader).
2935 The new definitions of the ivs are placed in LOOP->exit.
2936 The phi args associated with the edge UPDATE_E in the bb
2937 UPDATE_E->dest are updated accordingly.
2939 Assumption 1: Like the rest of the vectorizer, this function assumes
2940 a single loop exit that has a single predecessor.
2942 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2943 organized in the same order.
2945 Assumption 3: The access function of the ivs is simple enough (see
2946 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2948 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2949 coming out of LOOP on which the ivs of LOOP are used (this is the path
2950 that leads to the epilog loop; other paths skip the epilog loop). This
2951 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2952 needs to have its phis updated.
2956 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2958 basic_block exit_bb = loop->exit_edges[0]->dest;
2960 basic_block update_bb = update_e->dest;
2962 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2964 /* Make sure there exists a single-predecessor exit bb: */
2965 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2967 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2969 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2971 tree access_fn = NULL;
2972 tree evolution_part;
2975 tree var, stmt, ni, ni_name;
2976 block_stmt_iterator last_bsi;
2978 /* Skip virtual phi's. */
2979 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2981 if (vect_debug_details (NULL))
2982 fprintf (dump_file, "virtual phi. skip.");
2986 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2987 gcc_assert (access_fn);
2989 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2990 gcc_assert (evolution_part != NULL_TREE);
2992 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2993 of degree >= 2 or exponential. */
2994 gcc_assert (!tree_is_chrec (evolution_part));
2996 step_expr = evolution_part;
2997 init_expr = unshare_expr (initial_condition (access_fn));
2999 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3000 build2 (MULT_EXPR, TREE_TYPE (niters),
3001 niters, step_expr), init_expr);
3003 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3004 add_referenced_tmp_var (var);
3006 ni_name = force_gimple_operand (ni, &stmt, false, var);
3008 /* Insert stmt into exit_bb. */
3009 last_bsi = bsi_last (exit_bb);
3011 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3013 /* Fix phi expressions in the successor bb. */
3014 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3015 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3016 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3021 /* Function vect_do_peeling_for_loop_bound
3023 Peel the last iterations of the loop represented by LOOP_VINFO.
3024 The peeled iterations form a new epilog loop. Given that the loop now
3025 iterates NITERS times, the new epilog loop iterates
3026 NITERS % VECTORIZATION_FACTOR times.
3028 The original loop will later be made to iterate
3029 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3032 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3033 struct loops *loops)
3036 tree ni_name, ratio_mult_vf_name;
3037 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3038 struct loop *new_loop;
3040 #ifdef ENABLE_CHECKING
3044 if (vect_debug_details (NULL))
3045 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3047 /* Generate the following variables on the preheader of original loop:
3049 ni_name = number of iteration the original loop executes
3050 ratio = ni_name / vf
3051 ratio_mult_vf_name = ratio * vf */
3052 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3053 &ratio_mult_vf_name, ratio);
3055 /* Update loop info. */
3056 loop->pre_header = loop_preheader_edge (loop)->src;
3057 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3059 #ifdef ENABLE_CHECKING
3060 loop_num = loop->num;
3062 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3063 ratio_mult_vf_name, ni_name, false);
3064 #ifdef ENABLE_CHECKING
3065 gcc_assert (new_loop);
3066 gcc_assert (loop_num == loop->num);
3067 slpeel_verify_cfg_after_peeling (loop, new_loop);
3070 /* A guard that controls whether the new_loop is to be executed or skipped
3071 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3072 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3073 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3074 is on the path where the LOOP IVs are used and need to be updated. */
3076 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3077 update_e = EDGE_PRED (new_loop->pre_header, 0);
3079 update_e = EDGE_PRED (new_loop->pre_header, 1);
3081 /* Update IVs of original loop as if they were advanced
3082 by ratio_mult_vf_name steps. */
3083 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3085 /* After peeling we have to reset scalar evolution analyzer. */
3092 /* Function vect_gen_niters_for_prolog_loop
3094 Set the number of iterations for the loop represented by LOOP_VINFO
3095 to the minimum between NITERS (the original iteration count of the loop)
3096 and the misalignment of DR - the first data reference recorded in
3097 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3098 this loop, the data reference DR will refer to an aligned location. */
3101 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3103 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3104 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3105 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3107 tree iters, iters_name;
3110 tree dr_stmt = DR_STMT (dr);
3111 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3112 tree start_addr, byte_miss_align, elem_miss_align;
3113 int vec_type_align =
3114 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3117 tree new_stmt_list = NULL_TREE;
3119 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3120 &new_stmt_list, NULL_TREE);
3122 pe = loop_preheader_edge (loop);
3123 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3125 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3128 build (BIT_AND_EXPR, integer_type_node, start_addr,
3129 build (MINUS_EXPR, integer_type_node,
3130 build_int_cst (unsigned_type_node,
3131 vec_type_align), integer_one_node));
3132 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3133 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3134 byte_miss_align, tmp1);
3137 build (BIT_AND_EXPR, integer_type_node,
3138 build (MINUS_EXPR, integer_type_node,
3139 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3140 build (MINUS_EXPR, integer_type_node,
3141 build_int_cst (unsigned_type_node, vf), integer_one_node));
3143 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3144 var = create_tmp_var (TREE_TYPE (iters), "iters");
3145 add_referenced_tmp_var (var);
3146 iters_name = force_gimple_operand (iters, &stmt, false, var);
3148 /* Insert stmt on loop preheader edge. */
3149 pe = loop_preheader_edge (loop);
3151 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3153 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3159 /* Function vect_update_inits_of_dr
3161 NITERS iterations were peeled from LOOP. DR represents a data reference
3162 in LOOP. This function updates the information recorded in DR to
3163 account for the fact that the first NITERS iterations had already been
3164 executed. Specifically, it updates the initial_condition of the
3165 access_function of DR. */
3168 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3171 tree access_fn = DR_ACCESS_FN (dr, 0);
3172 tree init, init_new, step;
3174 step = evolution_part_in_loop_num (access_fn, loop->num);
3175 init = initial_condition (access_fn);
3177 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3178 build (MULT_EXPR, TREE_TYPE (niters),
3179 niters, step), init);
3180 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3186 /* Function vect_update_inits_of_drs
3188 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3189 This function updates the information recorded for the data references in
3190 the loop to account for the fact that the first NITERS iterations had
3191 already been executed. Specifically, it updates the initial_condition of the
3192 access_function of all the data_references in the loop. */
3195 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3198 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3199 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3200 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3202 if (dump_file && (dump_flags & TDF_DETAILS))
3203 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3205 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3207 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3208 vect_update_inits_of_dr (dr, loop, niters);
3211 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3213 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3214 vect_update_inits_of_dr (dr, loop, niters);
3219 /* Function vect_do_peeling_for_alignment
3221 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3222 'niters' is set to the misalignment of one of the data references in the
3223 loop, thereby forcing it to refer to an aligned location at the beginning
3224 of the execution of this loop. The data reference for which we are
3225 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3228 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3231 tree niters_of_prolog_loop, ni_name;
3233 struct loop *new_loop;
3235 if (vect_debug_details (NULL))
3236 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3238 ni_name = vect_build_loop_niters (loop_vinfo);
3239 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3241 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3243 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3244 niters_of_prolog_loop, ni_name, true);
3245 #ifdef ENABLE_CHECKING
3246 gcc_assert (new_loop);
3247 slpeel_verify_cfg_after_peeling (new_loop, loop);
3250 /* Update number of times loop executes. */
3251 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3252 LOOP_VINFO_NITERS (loop_vinfo) =
3253 build (MINUS_EXPR, integer_type_node, n_iters, niters_of_prolog_loop);
3255 /* Update the init conditions of the access functions of all data refs. */
3256 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3258 /* After peeling we have to reset scalar evolution analyzer. */
3265 /* Function vect_transform_loop.
3267 The analysis phase has determined that the loop is vectorizable.
3268 Vectorize the loop - created vectorized stmts to replace the scalar
3269 stmts in the loop, and update the loop exit condition. */
3272 vect_transform_loop (loop_vec_info loop_vinfo,
3273 struct loops *loops ATTRIBUTE_UNUSED)
3275 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3276 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3277 int nbbs = loop->num_nodes;
3278 block_stmt_iterator si;
3281 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3283 if (vect_debug_details (NULL))
3284 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3287 /* Peel the loop if there are data refs with unknown alignment.
3288 Only one data ref with unknown store is allowed. */
3290 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3291 vect_do_peeling_for_alignment (loop_vinfo, loops);
3293 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3294 compile time constant), or it is a constant that doesn't divide by the
3295 vectorization factor, then an epilog loop needs to be created.
3296 We therefore duplicate the loop: the original loop will be vectorized,
3297 and will compute the first (n/VF) iterations. The second copy of the loop
3298 will remain scalar and will compute the remaining (n%VF) iterations.
3299 (VF is the vectorization factor). */
3301 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3302 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3303 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3304 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3306 /* 1) Make sure the loop header has exactly two entries
3307 2) Make sure we have a preheader basic block. */
3309 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3311 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3314 /* FORNOW: the vectorizer supports only loops which body consist
3315 of one basic block (header + empty latch). When the vectorizer will
3316 support more involved loop forms, the order by which the BBs are
3317 traversed need to be reconsidered. */
3319 for (i = 0; i < nbbs; i++)
3321 basic_block bb = bbs[i];
3323 for (si = bsi_start (bb); !bsi_end_p (si);)
3325 tree stmt = bsi_stmt (si);
3326 stmt_vec_info stmt_info;
3329 if (vect_debug_details (NULL))
3331 fprintf (dump_file, "------>vectorizing statement: ");
3332 print_generic_expr (dump_file, stmt, TDF_SLIM);
3334 stmt_info = vinfo_for_stmt (stmt);
3335 gcc_assert (stmt_info);
3336 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3341 #ifdef ENABLE_CHECKING
3342 /* FORNOW: Verify that all stmts operate on the same number of
3343 units and no inner unrolling is necessary. */
3345 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3346 == vectorization_factor);
3348 /* -------- vectorize statement ------------ */
3349 if (vect_debug_details (NULL))
3350 fprintf (dump_file, "transform statement.");
3352 is_store = vect_transform_stmt (stmt, &si);
3355 /* free the attached stmt_vec_info and remove the stmt. */
3356 stmt_ann_t ann = stmt_ann (stmt);
3358 set_stmt_info (ann, NULL);
3367 vect_transform_loop_bound (loop_vinfo, ratio);
3369 if (vect_debug_details (loop))
3370 fprintf (dump_file,"Success! loop vectorized.");
3371 if (vect_debug_stats (loop))
3372 fprintf (dump_file, "LOOP VECTORIZED.");
3376 /* Function vect_is_simple_use.
3379 LOOP - the loop that is being vectorized.
3380 OPERAND - operand of a stmt in LOOP.
3381 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3383 Returns whether a stmt with OPERAND can be vectorized.
3384 Supportable operands are constants, loop invariants, and operands that are
3385 defined by the current iteration of the loop. Unsupportable operands are
3386 those that are defined by a previous iteration of the loop (as is the case
3387 in reduction/induction computations). */
3390 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3398 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3401 if (TREE_CODE (operand) != SSA_NAME)
3404 def_stmt = SSA_NAME_DEF_STMT (operand);
3405 if (def_stmt == NULL_TREE )
3407 if (vect_debug_details (NULL))
3408 fprintf (dump_file, "no def_stmt.");
3412 /* empty stmt is expected only in case of a function argument.
3413 (Otherwise - we expect a phi_node or a modify_expr). */
3414 if (IS_EMPTY_STMT (def_stmt))
3416 tree arg = TREE_OPERAND (def_stmt, 0);
3417 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3419 if (vect_debug_details (NULL))
3421 fprintf (dump_file, "Unexpected empty stmt: ");
3422 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3427 /* phi_node inside the loop indicates an induction/reduction pattern.
3428 This is not supported yet. */
3429 bb = bb_for_stmt (def_stmt);
3430 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3432 if (vect_debug_details (NULL))
3433 fprintf (dump_file, "reduction/induction - unsupported.");
3434 return false; /* FORNOW: not supported yet. */
3437 /* Expecting a modify_expr or a phi_node. */
3438 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3439 || TREE_CODE (def_stmt) == PHI_NODE)
3450 /* Function vect_analyze_operations.
3452 Scan the loop stmts and make sure they are all vectorizable. */
3455 vect_analyze_operations (loop_vec_info loop_vinfo)
3457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3458 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3459 int nbbs = loop->num_nodes;
3460 block_stmt_iterator si;
3461 int vectorization_factor = 0;
3466 if (vect_debug_details (NULL))
3467 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3469 for (i = 0; i < nbbs; i++)
3471 basic_block bb = bbs[i];
3473 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3475 tree stmt = bsi_stmt (si);
3477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3480 if (vect_debug_details (NULL))
3482 fprintf (dump_file, "==> examining statement: ");
3483 print_generic_expr (dump_file, stmt, TDF_SLIM);
3486 gcc_assert (stmt_info);
3488 /* skip stmts which do not need to be vectorized.
3489 this is expected to include:
3490 - the COND_EXPR which is the loop exit condition
3491 - any LABEL_EXPRs in the loop
3492 - computations that are used only for array indexing or loop
3495 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3497 if (vect_debug_details (NULL))
3498 fprintf (dump_file, "irrelevant.");
3502 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3504 if (vect_debug_stats (loop) || vect_debug_details (loop))
3506 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3507 print_generic_expr (dump_file, stmt, TDF_SLIM);
3512 if (STMT_VINFO_DATA_REF (stmt_info))
3513 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3514 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3515 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3517 scalar_type = TREE_TYPE (stmt);
3519 if (vect_debug_details (NULL))
3521 fprintf (dump_file, "get vectype for scalar type: ");
3522 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3525 vectype = get_vectype_for_scalar_type (scalar_type);
3528 if (vect_debug_stats (loop) || vect_debug_details (loop))
3530 fprintf (dump_file, "not vectorized: unsupported data-type ");
3531 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3536 if (vect_debug_details (NULL))
3538 fprintf (dump_file, "vectype: ");
3539 print_generic_expr (dump_file, vectype, TDF_SLIM);
3541 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3543 ok = (vectorizable_operation (stmt, NULL, NULL)
3544 || vectorizable_assignment (stmt, NULL, NULL)
3545 || vectorizable_load (stmt, NULL, NULL)
3546 || vectorizable_store (stmt, NULL, NULL));
3550 if (vect_debug_stats (loop) || vect_debug_details (loop))
3552 fprintf (dump_file, "not vectorized: stmt not supported: ");
3553 print_generic_expr (dump_file, stmt, TDF_SLIM);
3558 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3559 if (vect_debug_details (NULL))
3560 fprintf (dump_file, "nunits = %d", nunits);
3562 if (vectorization_factor)
3564 /* FORNOW: don't allow mixed units.
3565 This restriction will be relaxed in the future. */
3566 if (nunits != vectorization_factor)
3568 if (vect_debug_stats (loop) || vect_debug_details (loop))
3569 fprintf (dump_file, "not vectorized: mixed data-types");
3574 vectorization_factor = nunits;
3576 #ifdef ENABLE_CHECKING
3577 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3578 * vectorization_factor == UNITS_PER_SIMD_WORD);
3583 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3585 if (vectorization_factor <= 1)
3587 if (vect_debug_stats (loop) || vect_debug_details (loop))
3588 fprintf (dump_file, "not vectorized: unsupported data-type");
3591 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3593 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3595 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3596 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3598 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3599 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3601 if (vect_debug_stats (loop) || vect_debug_details (loop))
3602 fprintf (dump_file, "epilog loop required.");
3603 if (!vect_can_advance_ivs_p (loop))
3605 if (vect_debug_stats (loop) || vect_debug_details (loop))
3606 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3609 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3611 if (vect_debug_stats (loop) || vect_debug_details (loop))
3612 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3621 /* Function exist_non_indexing_operands_for_use_p
3623 USE is one of the uses attached to STMT. Check if USE is
3624 used in STMT for anything other than indexing an array. */
3627 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3632 /* USE corresponds to some operand in STMT. If there is no data
3633 reference in STMT, then any operand that corresponds to USE
3634 is not indexing an array. */
3635 if (!STMT_VINFO_DATA_REF (stmt_info))
3638 /* STMT has a data_ref. FORNOW this means that its of one of
3639 the following forms:
3642 (This should have been verified in analyze_data_refs).
3644 'var' in the second case corresponds to a def, not a use,
3645 so USE cannot correspond to any operands that are not used
3648 Therefore, all we need to check is if STMT falls into the
3649 first case, and whether var corresponds to USE. */
3651 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3654 operand = TREE_OPERAND (stmt, 1);
3656 if (TREE_CODE (operand) != SSA_NAME)
3666 /* Function vect_is_simple_iv_evolution.
3668 FORNOW: A simple evolution of an induction variables in the loop is
3669 considered a polynomial evolution with constant step. */
3672 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3673 tree * step, bool strict)
3678 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3680 /* When there is no evolution in this loop, the evolution function
3682 if (evolution_part == NULL_TREE)
3685 /* When the evolution is a polynomial of degree >= 2
3686 the evolution function is not "simple". */
3687 if (tree_is_chrec (evolution_part))
3690 step_expr = evolution_part;
3691 init_expr = unshare_expr (initial_condition (access_fn));
3693 if (vect_debug_details (NULL))
3695 fprintf (dump_file, "step: ");
3696 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3697 fprintf (dump_file, ", init: ");
3698 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3704 if (TREE_CODE (step_expr) != INTEGER_CST)
3706 if (vect_debug_details (NULL))
3707 fprintf (dump_file, "step unknown.");
3712 if (!integer_onep (step_expr))
3714 if (vect_debug_details (NULL))
3715 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3723 /* Function vect_analyze_scalar_cycles.
3725 Examine the cross iteration def-use cycles of scalar variables, by
3726 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3727 cycles that they represent do not impede vectorization.
3729 FORNOW: Reduction as in the following loop, is not supported yet:
3733 The cross-iteration cycle corresponding to variable 'sum' will be
3734 considered too complicated and will impede vectorization.
3736 FORNOW: Induction as in the following loop, is not supported yet:
3741 However, the following loop *is* vectorizable:
3746 In both loops there exists a def-use cycle for the variable i:
3747 loop: i_2 = PHI (i_0, i_1)
3752 The evolution of the above cycle is considered simple enough,
3753 however, we also check that the cycle does not need to be
3754 vectorized, i.e - we check that the variable that this cycle
3755 defines is only used for array indexing or in stmts that do not
3756 need to be vectorized. This is not the case in loop2, but it
3757 *is* the case in loop3. */
3760 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3763 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3764 basic_block bb = loop->header;
3767 if (vect_debug_details (NULL))
3768 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3770 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3772 tree access_fn = NULL;
3774 if (vect_debug_details (NULL))
3776 fprintf (dump_file, "Analyze phi: ");
3777 print_generic_expr (dump_file, phi, TDF_SLIM);
3780 /* Skip virtual phi's. The data dependences that are associated with
3781 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3783 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3785 if (vect_debug_details (NULL))
3786 fprintf (dump_file, "virtual phi. skip.");
3790 /* Analyze the evolution function. */
3792 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3793 those of loop induction variables; This property is verified here.
3795 Furthermore, if that induction variable is used in an operation
3796 that needs to be vectorized (i.e, is not solely used to index
3797 arrays and check the exit condition) - we do not support its
3798 vectorization yet. This property is verified in vect_is_simple_use,
3799 during vect_analyze_operations. */
3801 access_fn = /* instantiate_parameters
3803 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3807 if (vect_debug_stats (loop) || vect_debug_details (loop))
3808 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3812 if (vect_debug_details (NULL))
3814 fprintf (dump_file, "Access function of PHI: ");
3815 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3818 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3821 if (vect_debug_stats (loop) || vect_debug_details (loop))
3822 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3831 /* Function vect_analyze_data_ref_dependence.
3833 Return TRUE if there (might) exist a dependence between a memory-reference
3834 DRA and a memory-reference DRB. */
3837 vect_analyze_data_ref_dependence (struct data_reference *dra,
3838 struct data_reference *drb,
3842 struct data_dependence_relation *ddr;
3844 if (!array_base_name_differ_p (dra, drb, &differ_p))
3846 if (vect_debug_stats (loop) || vect_debug_details (loop))
3849 "not vectorized: can't determine dependence between: ");
3850 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3851 fprintf (dump_file, " and ");
3852 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3860 ddr = initialize_data_dependence_relation (dra, drb);
3861 compute_affine_dependence (ddr);
3863 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3866 if (vect_debug_stats (loop) || vect_debug_details (loop))
3869 "not vectorized: possible dependence between data-refs ");
3870 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3871 fprintf (dump_file, " and ");
3872 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3879 /* Function vect_analyze_data_ref_dependences.
3881 Examine all the data references in the loop, and make sure there do not
3882 exist any data dependences between them.
3884 TODO: dependences which distance is greater than the vectorization factor
3888 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3891 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3892 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3893 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3895 /* Examine store-store (output) dependences. */
3897 if (vect_debug_details (NULL))
3898 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3900 if (vect_debug_details (NULL))
3901 fprintf (dump_file, "compare all store-store pairs.");
3903 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3905 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3907 struct data_reference *dra =
3908 VARRAY_GENERIC_PTR (loop_write_refs, i);
3909 struct data_reference *drb =
3910 VARRAY_GENERIC_PTR (loop_write_refs, j);
3911 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3916 /* Examine load-store (true/anti) dependences. */
3918 if (vect_debug_details (NULL))
3919 fprintf (dump_file, "compare all load-store pairs.");
3921 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3923 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3925 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3926 struct data_reference *drb =
3927 VARRAY_GENERIC_PTR (loop_write_refs, j);
3928 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3937 /* Function vect_get_first_index.
3939 REF is a data reference.
3940 If it is an ARRAY_REF: if its lower bound is simple enough,
3941 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3942 If it is not an ARRAY_REF: REF has no "first index";
3943 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3946 vect_get_first_index (tree ref, tree *array_first_index)
3950 if (TREE_CODE (ref) != ARRAY_REF)
3951 *array_first_index = size_zero_node;
3954 array_start = array_ref_low_bound (ref);
3955 if (!host_integerp (array_start,0))
3957 if (vect_debug_details (NULL))
3959 fprintf (dump_file, "array min val not simple integer cst.");
3960 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3964 *array_first_index = array_start;
3971 /* Function vect_compute_array_base_alignment.
3972 A utility function of vect_compute_array_ref_alignment.
3974 Compute the misalignment of ARRAY in bits.
3977 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3978 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3979 if NULL: don't compute misalignment, just return the base of ARRAY.
3980 PREV_DIMENSIONS - initialized to one.
3981 MISALIGNMENT - the computed misalignment in bits.
3984 If VECTYPE is not NULL:
3985 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3986 the base of the array, and put the computed misalignment in MISALIGNMENT.
3988 Return the base of the array.
3990 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3991 a[idx_N]...[idx_2][idx_1] is
3992 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3993 ... + idx_N * dim_0 * ... * dim_N-1}.
3994 (The misalignment of &a is not checked here).
3995 Note, that every term contains dim_0, therefore, if dim_0 is a
3996 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3997 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3998 NUINTS, we can say that the misalignment of the sum is equal to
3999 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
4000 we can't determine this array misalignment, and we return
4002 We proceed recursively in this manner, accumulating total misalignment
4003 and the multiplication of previous dimensions for correct misalignment
4007 vect_compute_array_base_alignment (tree array,
4009 tree *prev_dimensions,
4014 tree dimension_size;
4016 tree bits_per_vectype;
4017 tree bits_per_vectype_unit;
4019 /* The 'stop condition' of the recursion. */
4020 if (TREE_CODE (array) != ARRAY_REF)
4024 /* Just get the base decl. */
4025 return vect_compute_array_base_alignment
4026 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4028 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4029 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4032 domain = TYPE_DOMAIN (TREE_TYPE (array));
4034 int_const_binop (PLUS_EXPR,
4035 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4036 TYPE_MIN_VALUE (domain), 1),
4039 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4040 is a multiple of NUNITS:
4042 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4044 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4045 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4046 if (integer_zerop (mis))
4047 /* This array is aligned. Continue just in order to get the base decl. */
4048 return vect_compute_array_base_alignment
4049 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4051 index = TREE_OPERAND (array, 1);
4052 if (!host_integerp (index, 1))
4053 /* The current index is not constant. */
4056 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4058 bits_per_vectype = fold_convert (unsigned_type_node,
4059 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4060 GET_MODE_SIZE (TYPE_MODE (vectype))));
4061 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4062 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4063 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4065 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4069 (*misalignment + index_val * dimension_size * *prev_dimensions)
4073 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4074 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4075 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4076 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4077 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4080 *prev_dimensions = int_const_binop (MULT_EXPR,
4081 *prev_dimensions, dimension_size, 1);
4083 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4089 /* Function vect_compute_data_ref_alignment
4091 Compute the misalignment of the data reference DR.
4094 1. If during the misalignment computation it is found that the data reference
4095 cannot be vectorized then false is returned.
4096 2. DR_MISALIGNMENT (DR) is defined.
4098 FOR NOW: No analysis is actually performed. Misalignment is calculated
4099 only for trivial cases. TODO. */
4102 vect_compute_data_ref_alignment (struct data_reference *dr,
4103 loop_vec_info loop_vinfo)
4105 tree stmt = DR_STMT (dr);
4106 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4107 tree ref = DR_REF (dr);
4110 tree offset = size_zero_node;
4111 tree base, bit_offset, alignment;
4112 tree unit_bits = fold_convert (unsigned_type_node,
4113 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4115 bool base_aligned_p;
4117 if (vect_debug_details (NULL))
4118 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4120 /* Initialize misalignment to unknown. */
4121 DR_MISALIGNMENT (dr) = -1;
4123 scalar_type = TREE_TYPE (ref);
4124 vectype = get_vectype_for_scalar_type (scalar_type);
4127 if (vect_debug_details (NULL))
4129 fprintf (dump_file, "no vectype for stmt: ");
4130 print_generic_expr (dump_file, stmt, TDF_SLIM);
4131 fprintf (dump_file, " scalar_type: ");
4132 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4134 /* It is not possible to vectorize this data reference. */
4137 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4138 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4140 if (TREE_CODE (ref) == ARRAY_REF)
4143 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4145 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4146 loop_vinfo, &bit_offset, &base_aligned_p);
4149 if (vect_debug_details (NULL))
4151 fprintf (dump_file, "Unknown alignment for access: ");
4152 print_generic_expr (dump_file,
4153 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4158 if (!base_aligned_p)
4160 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4162 if (vect_debug_details (NULL))
4164 fprintf (dump_file, "can't force alignment of ref: ");
4165 print_generic_expr (dump_file, ref, TDF_SLIM);
4170 /* Force the alignment of the decl.
4171 NOTE: This is the only change to the code we make during
4172 the analysis phase, before deciding to vectorize the loop. */
4173 if (vect_debug_details (NULL))
4174 fprintf (dump_file, "force alignment");
4175 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4176 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4179 /* At this point we assume that the base is aligned, and the offset from it
4180 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4181 gcc_assert (base_aligned_p
4182 || (TREE_CODE (base) == VAR_DECL
4183 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4185 /* Convert into bytes. */
4186 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4187 /* Check that there is no remainder in bits. */
4188 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4189 if (!integer_zerop (bit_offset))
4191 if (vect_debug_details (NULL))
4193 fprintf (dump_file, "bit offset alignment: ");
4194 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4199 /* Alignment required, in bytes: */
4200 alignment = fold_convert (unsigned_type_node,
4201 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4203 /* Modulo alignment. */
4204 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4205 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4207 if (vect_debug_details (NULL))
4208 fprintf (dump_file, "unexpected misalign value");
4212 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4214 if (vect_debug_details (NULL))
4215 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4221 /* Function vect_compute_array_ref_alignment
4223 Compute the alignment of an array-ref.
4224 The alignment we compute here is relative to
4225 TYPE_ALIGN(VECTYPE) boundary.
4228 OFFSET - the alignment in bits
4229 Return value - the base of the array-ref. E.g,
4230 if the array-ref is a.b[k].c[i][j] the returned
4235 vect_compute_array_ref_alignment (struct data_reference *dr,
4236 loop_vec_info loop_vinfo,
4240 tree array_first_index = size_zero_node;
4242 tree ref = DR_REF (dr);
4243 tree scalar_type = TREE_TYPE (ref);
4244 tree oprnd0 = TREE_OPERAND (ref, 0);
4245 tree dims = size_one_node;
4246 tree misalign = size_zero_node;
4247 tree next_ref, this_offset = size_zero_node;
4251 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4252 /* The reference is an array without its last index. */
4253 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4256 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4259 /* Alignment is not requested. Just return the base. */
4262 /* Compute alignment. */
4263 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4265 this_offset = misalign;
4267 /* Check the first index accessed. */
4268 if (!vect_get_first_index (ref, &array_first_index))
4270 if (vect_debug_details (NULL))
4271 fprintf (dump_file, "no first_index for array.");
4275 /* Check the index of the array_ref. */
4276 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4277 LOOP_VINFO_LOOP (loop_vinfo)->num);
4279 /* FORNOW: In order to simplify the handling of alignment, we make sure
4280 that the first location at which the array is accessed ('init') is on an
4281 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4282 This is too conservative, since we require that
4283 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4284 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4285 This should be relaxed in the future. */
4287 if (!init || !host_integerp (init, 0))
4289 if (vect_debug_details (NULL))
4290 fprintf (dump_file, "non constant init. ");
4294 /* bytes per scalar element: */
4295 nunits = fold_convert (unsigned_type_node,
4296 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4297 nbits = int_const_binop (MULT_EXPR, nunits,
4298 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4300 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4301 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4302 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4303 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4305 /* TODO: allow negative misalign values. */
4306 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4308 if (vect_debug_details (NULL))
4309 fprintf (dump_file, "unexpected misalign value");
4317 /* Function vect_compute_data_refs_alignment
4319 Compute the misalignment of data references in the loop.
4320 This pass may take place at function granularity instead of at loop
4323 FOR NOW: No analysis is actually performed. Misalignment is calculated
4324 only for trivial cases. TODO. */
4327 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4329 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4330 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4333 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4335 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4336 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4340 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4342 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4343 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4351 /* Function vect_enhance_data_refs_alignment
4353 This pass will use loop versioning and loop peeling in order to enhance
4354 the alignment of data references in the loop.
4356 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4357 original loop is to be vectorized; Any other loops that are created by
4358 the transformations performed in this pass - are not supposed to be
4359 vectorized. This restriction will be relaxed. */
4362 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4364 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4365 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4366 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4370 This pass will require a cost model to guide it whether to apply peeling
4371 or versioning or a combination of the two. For example, the scheme that
4372 intel uses when given a loop with several memory accesses, is as follows:
4373 choose one memory access ('p') which alignment you want to force by doing
4374 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4375 other accesses are not necessarily aligned, or (2) use loop versioning to
4376 generate one loop in which all accesses are aligned, and another loop in
4377 which only 'p' is necessarily aligned.
4379 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4380 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4381 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4383 Devising a cost model is the most critical aspect of this work. It will
4384 guide us on which access to peel for, whether to use loop versioning, how
4385 many versions to create, etc. The cost model will probably consist of
4386 generic considerations as well as target specific considerations (on
4387 powerpc for example, misaligned stores are more painful than misaligned
4390 Here is the general steps involved in alignment enhancements:
4392 -- original loop, before alignment analysis:
4393 for (i=0; i<N; i++){
4394 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4395 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4398 -- After vect_compute_data_refs_alignment:
4399 for (i=0; i<N; i++){
4400 x = q[i]; # DR_MISALIGNMENT(q) = 3
4401 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4404 -- Possibility 1: we do loop versioning:
4406 for (i=0; i<N; i++){ # loop 1A
4407 x = q[i]; # DR_MISALIGNMENT(q) = 3
4408 p[i] = y; # DR_MISALIGNMENT(p) = 0
4412 for (i=0; i<N; i++){ # loop 1B
4413 x = q[i]; # DR_MISALIGNMENT(q) = 3
4414 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4418 -- Possibility 2: we do loop peeling:
4419 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4423 for (i = 3; i < N; i++){ # loop 2A
4424 x = q[i]; # DR_MISALIGNMENT(q) = 0
4425 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4428 -- Possibility 3: combination of loop peeling and versioning:
4429 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4434 for (i = 3; i<N; i++){ # loop 3A
4435 x = q[i]; # DR_MISALIGNMENT(q) = 0
4436 p[i] = y; # DR_MISALIGNMENT(p) = 0
4440 for (i = 3; i<N; i++){ # loop 3B
4441 x = q[i]; # DR_MISALIGNMENT(q) = 0
4442 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4446 These loops are later passed to loop_transform to be vectorized. The
4447 vectorizer will use the alignment information to guide the transformation
4448 (whether to generate regular loads/stores, or with special handling for
4452 /* (1) Peeling to force alignment. */
4454 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4456 + How many accesses will become aligned due to the peeling
4457 - How many accesses will become unaligned due to the peeling,
4458 and the cost of misaligned accesses.
4459 - The cost of peeling (the extra runtime checks, the increase
4462 The scheme we use FORNOW: peel to force the alignment of the first
4463 misaligned store in the loop.
4464 Rationale: misaligned stores are not yet supported.
4466 TODO: Use a better cost model. */
4468 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4470 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4471 if (!aligned_access_p (dr))
4473 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4474 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4479 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4481 if (vect_debug_details (loop))
4482 fprintf (dump_file, "Peeling for alignment will not be applied.");
4486 if (vect_debug_details (loop))
4487 fprintf (dump_file, "Peeling for alignment will be applied.");
4490 /* (1.2) Update the alignment info according to the peeling factor.
4491 If the misalignment of the DR we peel for is M, then the
4492 peeling factor is VF - M, and the misalignment of each access DR_i
4493 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4494 If the misalignment of the DR we peel for is unknown, then the
4495 misalignment of each access DR_i in the loop is also unknown.
4497 FORNOW: set the misalignment of the accesses to unknown even
4498 if the peeling factor is known at compile time.
4500 TODO: - if the peeling factor is known at compile time, use that
4501 when updating the misalignment info of the loop DRs.
4502 - consider accesses that are known to have the same
4503 alignment, even if that alignment is unknown. */
4505 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4507 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4508 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4509 DR_MISALIGNMENT (dr) = 0;
4511 DR_MISALIGNMENT (dr) = -1;
4513 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4515 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4516 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4517 DR_MISALIGNMENT (dr) = 0;
4519 DR_MISALIGNMENT (dr) = -1;
4524 /* Function vect_analyze_data_refs_alignment
4526 Analyze the alignment of the data-references in the loop.
4527 FOR NOW: Until support for misliagned accesses is in place, only if all
4528 accesses are aligned can the loop be vectorized. This restriction will be
4532 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4534 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4535 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4536 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4537 enum dr_alignment_support supportable_dr_alignment;
4540 if (vect_debug_details (NULL))
4541 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4544 /* This pass may take place at function granularity instead of at loop
4547 if (!vect_compute_data_refs_alignment (loop_vinfo))
4549 if (vect_debug_details (loop) || vect_debug_stats (loop))
4551 "not vectorized: can't calculate alignment for data ref.");
4556 /* This pass will decide on using loop versioning and/or loop peeling in
4557 order to enhance the alignment of data references in the loop. */
4559 vect_enhance_data_refs_alignment (loop_vinfo);
4562 /* Finally, check that all the data references in the loop can be
4563 handled with respect to their alignment. */
4565 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4567 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4568 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4569 if (!supportable_dr_alignment)
4571 if (vect_debug_details (loop) || vect_debug_stats (loop))
4572 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4576 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4578 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4579 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4580 if (!supportable_dr_alignment)
4582 if (vect_debug_details (loop) || vect_debug_stats (loop))
4583 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4592 /* Function vect_analyze_data_ref_access.
4594 Analyze the access pattern of the data-reference DR. For now, a data access
4595 has to consecutive and aligned to be considered vectorizable. */
4598 vect_analyze_data_ref_access (struct data_reference *dr)
4600 varray_type access_fns = DR_ACCESS_FNS (dr);
4603 unsigned int dimensions, i;
4605 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4606 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4607 access is contiguous). */
4608 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4610 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4612 access_fn = DR_ACCESS_FN (dr, i);
4614 if (evolution_part_in_loop_num (access_fn,
4615 loop_containing_stmt (DR_STMT (dr))->num))
4617 /* Evolution part is not NULL in this loop (it is neither constant
4619 if (vect_debug_details (NULL))
4622 "not vectorized: complicated multidim. array access.");
4623 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4629 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4630 if (!evolution_function_is_constant_p (access_fn)
4631 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4632 access_fn, &init, &step, true))
4634 if (vect_debug_details (NULL))
4636 fprintf (dump_file, "not vectorized: complicated access function.");
4637 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4646 /* Function vect_analyze_data_ref_accesses.
4648 Analyze the access pattern of all the data references in the loop.
4650 FORNOW: the only access pattern that is considered vectorizable is a
4651 simple step 1 (consecutive) access.
4653 FORNOW: handle only arrays and pointer accesses. */
4656 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4659 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4660 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4662 if (vect_debug_details (NULL))
4663 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4665 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4667 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4668 bool ok = vect_analyze_data_ref_access (dr);
4671 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4672 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4673 fprintf (dump_file, "not vectorized: complicated access pattern.");
4678 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4680 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4681 bool ok = vect_analyze_data_ref_access (dr);
4684 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4685 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4686 fprintf (dump_file, "not vectorized: complicated access pattern.");
4695 /* Function vect_analyze_pointer_ref_access.
4698 STMT - a stmt that contains a data-ref
4699 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4701 If the data-ref access is vectorizable, return a data_reference structure
4702 that represents it (DR). Otherwise - return NULL. */
4704 static struct data_reference *
4705 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4708 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4709 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4712 tree reftype, innertype;
4713 enum machine_mode innermode;
4714 tree indx_access_fn;
4715 int loopnum = loop->num;
4716 struct data_reference *dr;
4720 if (vect_debug_stats (loop) || vect_debug_details (loop))
4721 fprintf (dump_file, "not vectorized: complicated pointer access.");
4725 if (vect_debug_details (NULL))
4727 fprintf (dump_file, "Access function of ptr: ");
4728 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4731 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4733 if (vect_debug_stats (loop) || vect_debug_details (loop))
4734 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4740 if (!host_integerp (step,0))
4742 if (vect_debug_stats (loop) || vect_debug_details (loop))
4744 "not vectorized: non constant step for pointer access.");
4748 step_val = TREE_INT_CST_LOW (step);
4750 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4751 if (TREE_CODE (reftype) != POINTER_TYPE)
4753 if (vect_debug_stats (loop) || vect_debug_details (loop))
4754 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4758 reftype = TREE_TYPE (init);
4759 if (TREE_CODE (reftype) != POINTER_TYPE)
4761 if (vect_debug_stats (loop) || vect_debug_details (loop))
4762 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4766 innertype = TREE_TYPE (reftype);
4767 innermode = TYPE_MODE (innertype);
4768 if (GET_MODE_SIZE (innermode) != step_val)
4770 /* FORNOW: support only consecutive access */
4771 if (vect_debug_stats (loop) || vect_debug_details (loop))
4772 fprintf (dump_file, "not vectorized: non consecutive access.");
4777 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4778 if (vect_debug_details (NULL))
4780 fprintf (dump_file, "Access function of ptr indx: ");
4781 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4783 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4788 /* Function vect_get_symbl_and_dr.
4790 The function returns SYMBL - the relevant variable for
4791 memory tag (for aliasing purposes).
4792 Also data reference structure DR is created.
4795 MEMREF - data reference in STMT
4796 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4799 DR - data_reference struct for MEMREF
4800 return value - the relevant variable for memory tag (for aliasing purposes).
4805 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4806 loop_vec_info loop_vinfo, struct data_reference **dr)
4808 tree symbl, oprnd0, oprnd1;
4809 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4811 tree array_base, base;
4812 struct data_reference *new_dr;
4813 bool base_aligned_p;
4816 switch (TREE_CODE (memref))
4819 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4823 symbl = DR_BASE_NAME (new_dr);
4824 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4826 switch (TREE_CODE (symbl))
4830 oprnd0 = TREE_OPERAND (symbl, 0);
4831 oprnd1 = TREE_OPERAND (symbl, 1);
4834 /* Only {address_base + offset} expressions are supported,
4835 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4836 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4837 TODO: swap operands if {offset + address_base}. */
4838 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4839 && TREE_CODE (oprnd1) != INTEGER_CST)
4840 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4843 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4846 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4847 loop_vinfo, &new_dr);
4851 /* symbl remains unchanged. */
4855 if (vect_debug_details (NULL))
4857 fprintf (dump_file, "unhandled data ref: ");
4858 print_generic_expr (dump_file, memref, TDF_SLIM);
4859 fprintf (dump_file, " (symbl ");
4860 print_generic_expr (dump_file, symbl, TDF_SLIM);
4861 fprintf (dump_file, ") in stmt ");
4862 print_generic_expr (dump_file, stmt, TDF_SLIM);
4869 offset = size_zero_node;
4871 /* Store the array base in the stmt info.
4872 For one dimensional array ref a[i], the base is a,
4873 for multidimensional a[i1][i2]..[iN], the base is
4874 a[i1][i2]..[iN-1]. */
4875 array_base = TREE_OPERAND (memref, 0);
4876 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4878 new_dr = analyze_array (stmt, memref, is_read);
4881 /* Find the relevant symbol for aliasing purposes. */
4882 base = DR_BASE_NAME (new_dr);
4883 switch (TREE_CODE (base))
4890 symbl = TREE_OPERAND (base, 0);
4894 /* Could have recorded more accurate information -
4895 i.e, the actual FIELD_DECL that is being referenced -
4896 but later passes expect VAR_DECL as the nmt. */
4897 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4898 loop_vinfo, &offset, &base_aligned_p);
4903 if (vect_debug_details (NULL))
4905 fprintf (dump_file, "unhandled struct/class field access ");
4906 print_generic_expr (dump_file, stmt, TDF_SLIM);
4913 if (vect_debug_details (NULL))
4915 fprintf (dump_file, "unhandled data ref: ");
4916 print_generic_expr (dump_file, memref, TDF_SLIM);
4917 fprintf (dump_file, " in stmt ");
4918 print_generic_expr (dump_file, stmt, TDF_SLIM);
4926 /* Function vect_analyze_data_refs.
4928 Find all the data references in the loop.
4930 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4931 which base is really an array (not a pointer) and which alignment
4932 can be forced. This restriction will be relaxed. */
4935 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4937 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4938 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4939 int nbbs = loop->num_nodes;
4940 block_stmt_iterator si;
4942 struct data_reference *dr;
4945 bool base_aligned_p;
4948 if (vect_debug_details (NULL))
4949 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4951 for (j = 0; j < nbbs; j++)
4953 basic_block bb = bbs[j];
4954 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4956 bool is_read = false;
4957 tree stmt = bsi_stmt (si);
4958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4959 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4960 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4961 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4962 varray_type *datarefs = NULL;
4963 int nvuses, nv_may_defs, nv_must_defs;
4967 /* Assumption: there exists a data-ref in stmt, if and only if
4968 it has vuses/vdefs. */
4970 if (!vuses && !v_may_defs && !v_must_defs)
4973 nvuses = NUM_VUSES (vuses);
4974 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4975 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4977 if (nvuses && (nv_may_defs || nv_must_defs))
4979 if (vect_debug_details (NULL))
4981 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4982 print_generic_expr (dump_file, stmt, TDF_SLIM);
4987 if (TREE_CODE (stmt) != MODIFY_EXPR)
4989 if (vect_debug_details (NULL))
4991 fprintf (dump_file, "unexpected vops in stmt: ");
4992 print_generic_expr (dump_file, stmt, TDF_SLIM);
4999 memref = TREE_OPERAND (stmt, 1);
5000 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5005 memref = TREE_OPERAND (stmt, 0);
5006 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5010 /* Analyze MEMREF. If it is of a supported form, build data_reference
5011 struct for it (DR) and find the relevant symbol for aliasing
5013 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
5017 if (vect_debug_stats (loop) || vect_debug_details (loop))
5019 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5020 print_generic_expr (dump_file, stmt, TDF_SLIM);
5025 /* Find and record the memtag assigned to this data-ref. */
5026 switch (TREE_CODE (symbl))
5029 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5033 symbl = SSA_NAME_VAR (symbl);
5034 tag = get_var_ann (symbl)->type_mem_tag;
5037 tree ptr = TREE_OPERAND (memref, 0);
5038 if (TREE_CODE (ptr) == SSA_NAME)
5039 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5043 if (vect_debug_stats (loop) || vect_debug_details (loop))
5044 fprintf (dump_file, "not vectorized: no memtag for ref.");
5047 STMT_VINFO_MEMTAG (stmt_info) = tag;
5051 address_base = TREE_OPERAND (symbl, 0);
5053 switch (TREE_CODE (address_base))
5056 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5058 STMT_VINFO_MEMTAG (stmt_info) =
5059 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5060 loop_vinfo, &offset,
5065 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5069 if (vect_debug_stats (loop) || vect_debug_details (loop))
5072 "not vectorized: unhandled address expr: ");
5073 print_generic_expr (dump_file, stmt, TDF_SLIM);
5080 if (vect_debug_stats (loop) || vect_debug_details (loop))
5082 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5083 print_generic_expr (dump_file, memref, TDF_SLIM);
5088 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5089 STMT_VINFO_DATA_REF (stmt_info) = dr;
5097 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5099 /* Function vect_mark_relevant.
5101 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5104 vect_mark_relevant (varray_type worklist, tree stmt)
5106 stmt_vec_info stmt_info;
5108 if (vect_debug_details (NULL))
5109 fprintf (dump_file, "mark relevant.");
5111 if (TREE_CODE (stmt) == PHI_NODE)
5113 VARRAY_PUSH_TREE (worklist, stmt);
5117 stmt_info = vinfo_for_stmt (stmt);
5121 if (vect_debug_details (NULL))
5123 fprintf (dump_file, "mark relevant: no stmt info!!.");
5124 print_generic_expr (dump_file, stmt, TDF_SLIM);
5129 if (STMT_VINFO_RELEVANT_P (stmt_info))
5131 if (vect_debug_details (NULL))
5132 fprintf (dump_file, "already marked relevant.");
5136 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5137 VARRAY_PUSH_TREE (worklist, stmt);
5141 /* Function vect_stmt_relevant_p.
5143 Return true if STMT in loop that is represented by LOOP_VINFO is
5144 "relevant for vectorization".
5146 A stmt is considered "relevant for vectorization" if:
5147 - it has uses outside the loop.
5148 - it has vdefs (it alters memory).
5149 - control stmts in the loop (except for the exit condition).
5151 CHECKME: what other side effects would the vectorizer allow? */
5154 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5156 v_may_def_optype v_may_defs;
5157 v_must_def_optype v_must_defs;
5158 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5163 /* cond stmt other than loop exit cond. */
5164 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5167 /* changing memory. */
5168 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5169 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5170 if (v_may_defs || v_must_defs)
5172 if (vect_debug_details (NULL))
5173 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5177 /* uses outside the loop. */
5178 df = get_immediate_uses (stmt);
5179 num_uses = num_immediate_uses (df);
5180 for (i = 0; i < num_uses; i++)
5182 tree use = immediate_use (df, i);
5183 basic_block bb = bb_for_stmt (use);
5184 if (!flow_bb_inside_loop_p (loop, bb))
5186 if (vect_debug_details (NULL))
5187 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5196 /* Function vect_mark_stmts_to_be_vectorized.
5198 Not all stmts in the loop need to be vectorized. For example:
5207 Stmt 1 and 3 do not need to be vectorized, because loop control and
5208 addressing of vectorized data-refs are handled differently.
5210 This pass detects such stmts. */
5213 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5215 varray_type worklist;
5216 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5217 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5218 unsigned int nbbs = loop->num_nodes;
5219 block_stmt_iterator si;
5225 stmt_vec_info stmt_info;
5227 if (vect_debug_details (NULL))
5228 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5230 VARRAY_TREE_INIT (worklist, 64, "work list");
5232 /* 1. Init worklist. */
5234 for (i = 0; i < nbbs; i++)
5236 basic_block bb = bbs[i];
5237 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5239 stmt = bsi_stmt (si);
5241 if (vect_debug_details (NULL))
5243 fprintf (dump_file, "init: stmt relevant? ");
5244 print_generic_expr (dump_file, stmt, TDF_SLIM);
5247 stmt_info = vinfo_for_stmt (stmt);
5248 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5250 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5251 vect_mark_relevant (worklist, stmt);
5256 /* 2. Process_worklist */
5258 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5260 stmt = VARRAY_TOP_TREE (worklist);
5261 VARRAY_POP (worklist);
5263 if (vect_debug_details (NULL))
5265 fprintf (dump_file, "worklist: examine stmt: ");
5266 print_generic_expr (dump_file, stmt, TDF_SLIM);
5269 /* Examine the USES in this statement. Mark all the statements which
5270 feed this statement's uses as "relevant", unless the USE is used as
5273 if (TREE_CODE (stmt) == PHI_NODE)
5275 /* follow the def-use chain inside the loop. */
5276 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5278 tree arg = PHI_ARG_DEF (stmt, j);
5279 tree def_stmt = NULL_TREE;
5281 if (!vect_is_simple_use (arg, loop, &def_stmt))
5283 if (vect_debug_details (NULL))
5284 fprintf (dump_file, "worklist: unsupported use.");
5285 varray_clear (worklist);
5291 if (vect_debug_details (NULL))
5293 fprintf (dump_file, "worklist: def_stmt: ");
5294 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5297 bb = bb_for_stmt (def_stmt);
5298 if (flow_bb_inside_loop_p (loop, bb))
5299 vect_mark_relevant (worklist, def_stmt);
5303 ann = stmt_ann (stmt);
5304 use_ops = USE_OPS (ann);
5306 for (i = 0; i < NUM_USES (use_ops); i++)
5308 tree use = USE_OP (use_ops, i);
5310 /* We are only interested in uses that need to be vectorized. Uses
5311 that are used for address computation are not considered relevant.
5313 if (exist_non_indexing_operands_for_use_p (use, stmt))
5315 tree def_stmt = NULL_TREE;
5317 if (!vect_is_simple_use (use, loop, &def_stmt))
5319 if (vect_debug_details (NULL))
5320 fprintf (dump_file, "worklist: unsupported use.");
5321 varray_clear (worklist);
5328 if (vect_debug_details (NULL))
5330 fprintf (dump_file, "worklist: examine use %d: ", i);
5331 print_generic_expr (dump_file, use, TDF_SLIM);
5334 bb = bb_for_stmt (def_stmt);
5335 if (flow_bb_inside_loop_p (loop, bb))
5336 vect_mark_relevant (worklist, def_stmt);
5339 } /* while worklist */
5341 varray_clear (worklist);
5346 /* Function vect_can_advance_ivs_p
5348 In case the number of iterations that LOOP iterates in unknown at compile
5349 time, an epilog loop will be generated, and the loop induction variables
5350 (IVs) will be "advanced" to the value they are supposed to take just before
5351 the epilog loop. Here we check that the access function of the loop IVs
5352 and the expression that represents the loop bound are simple enough.
5353 These restrictions will be relaxed in the future. */
5356 vect_can_advance_ivs_p (struct loop *loop)
5358 basic_block bb = loop->header;
5361 /* Analyze phi functions of the loop header. */
5363 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5365 tree access_fn = NULL;
5366 tree evolution_part;
5368 if (vect_debug_details (NULL))
5370 fprintf (dump_file, "Analyze phi: ");
5371 print_generic_expr (dump_file, phi, TDF_SLIM);
5374 /* Skip virtual phi's. The data dependences that are associated with
5375 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5377 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5379 if (vect_debug_details (NULL))
5380 fprintf (dump_file, "virtual phi. skip.");
5384 /* Analyze the evolution function. */
5386 access_fn = instantiate_parameters
5387 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5391 if (vect_debug_details (NULL))
5392 fprintf (dump_file, "No Access function.");
5396 if (vect_debug_details (NULL))
5398 fprintf (dump_file, "Access function of PHI: ");
5399 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5402 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5404 if (evolution_part == NULL_TREE)
5407 /* FORNOW: We do not transform initial conditions of IVs
5408 which evolution functions are a polynomial of degree >= 2. */
5410 if (tree_is_chrec (evolution_part))
5418 /* Function vect_get_loop_niters.
5420 Determine how many iterations the loop is executed.
5421 If an expression that represents the number of iterations
5422 can be constructed, place it in NUMBER_OF_ITERATIONS.
5423 Return the loop exit condition. */
5426 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5430 if (vect_debug_details (NULL))
5431 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5433 niters = number_of_iterations_in_loop (loop);
5435 if (niters != NULL_TREE
5436 && niters != chrec_dont_know)
5438 *number_of_iterations = niters;
5440 if (vect_debug_details (NULL))
5442 fprintf (dump_file, "==> get_loop_niters:" );
5443 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5447 return get_loop_exit_condition (loop);
5451 /* Function vect_analyze_loop_form.
5453 Verify the following restrictions (some may be relaxed in the future):
5454 - it's an inner-most loop
5455 - number of BBs = 2 (which are the loop header and the latch)
5456 - the loop has a pre-header
5457 - the loop has a single entry and exit
5458 - the loop exit condition is simple enough, and the number of iterations
5459 can be analyzed (a countable loop). */
5461 static loop_vec_info
5462 vect_analyze_loop_form (struct loop *loop)
5464 loop_vec_info loop_vinfo;
5466 tree number_of_iterations = NULL;
5467 bool rescan = false;
5469 if (vect_debug_details (loop))
5470 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5473 || !loop->single_exit
5474 || loop->num_nodes != 2
5475 || EDGE_COUNT (loop->header->preds) != 2
5476 || loop->num_entries != 1)
5478 if (vect_debug_stats (loop) || vect_debug_details (loop))
5480 fprintf (dump_file, "not vectorized: bad loop form. ");
5482 fprintf (dump_file, "nested loop.");
5483 else if (!loop->single_exit)
5484 fprintf (dump_file, "multiple exits.");
5485 else if (loop->num_nodes != 2)
5486 fprintf (dump_file, "too many BBs in loop.");
5487 else if (EDGE_COUNT (loop->header->preds) != 2)
5488 fprintf (dump_file, "too many incoming edges.");
5489 else if (loop->num_entries != 1)
5490 fprintf (dump_file, "too many entries.");
5496 /* We assume that the loop exit condition is at the end of the loop. i.e,
5497 that the loop is represented as a do-while (with a proper if-guard
5498 before the loop if needed), where the loop header contains all the
5499 executable statements, and the latch is empty. */
5500 if (!empty_block_p (loop->latch))
5502 if (vect_debug_stats (loop) || vect_debug_details (loop))
5503 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5507 /* Make sure we have a preheader basic block. */
5508 if (!loop->pre_header)
5511 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5514 /* Make sure there exists a single-predecessor exit bb: */
5515 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5518 loop_split_edge_with (loop->exit_edges[0], NULL);
5523 flow_loop_scan (loop, LOOP_ALL);
5524 /* Flow loop scan does not update loop->single_exit field. */
5525 loop->single_exit = loop->exit_edges[0];
5528 if (empty_block_p (loop->header))
5530 if (vect_debug_stats (loop) || vect_debug_details (loop))
5531 fprintf (dump_file, "not vectorized: empty loop.");
5535 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5538 if (vect_debug_stats (loop) || vect_debug_details (loop))
5539 fprintf (dump_file, "not vectorized: complicated exit condition.");
5543 if (!number_of_iterations)
5545 if (vect_debug_stats (loop) || vect_debug_details (loop))
5547 "not vectorized: number of iterations cannot be computed.");
5551 if (chrec_contains_undetermined (number_of_iterations))
5553 if (vect_debug_details (NULL))
5554 fprintf (dump_file, "Infinite number of iterations.");
5558 loop_vinfo = new_loop_vec_info (loop);
5559 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5561 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5563 if (vect_debug_details (loop))
5565 fprintf (dump_file, "loop bound unknown.\n");
5566 fprintf (dump_file, "Symbolic number of iterations is ");
5567 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5571 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5573 if (vect_debug_stats (loop) || vect_debug_details (loop))
5574 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5578 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5584 /* Function vect_analyze_loop.
5586 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5587 for it. The different analyses will record information in the
5588 loop_vec_info struct. */
5590 static loop_vec_info
5591 vect_analyze_loop (struct loop *loop)
5594 loop_vec_info loop_vinfo;
5596 if (vect_debug_details (NULL))
5597 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5599 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5601 loop_vinfo = vect_analyze_loop_form (loop);
5604 if (vect_debug_details (loop))
5605 fprintf (dump_file, "bad loop form.");
5609 /* Find all data references in the loop (which correspond to vdefs/vuses)
5610 and analyze their evolution in the loop.
5612 FORNOW: Handle only simple, array references, which
5613 alignment can be forced, and aligned pointer-references. */
5615 ok = vect_analyze_data_refs (loop_vinfo);
5618 if (vect_debug_details (loop))
5619 fprintf (dump_file, "bad data references.");
5620 destroy_loop_vec_info (loop_vinfo);
5624 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5626 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5629 if (vect_debug_details (loop))
5630 fprintf (dump_file, "unexpected pattern.");
5631 if (vect_debug_details (loop))
5632 fprintf (dump_file, "not vectorized: unexpected pattern.");
5633 destroy_loop_vec_info (loop_vinfo);
5637 /* Check that all cross-iteration scalar data-flow cycles are OK.
5638 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5640 ok = vect_analyze_scalar_cycles (loop_vinfo);
5643 if (vect_debug_details (loop))
5644 fprintf (dump_file, "bad scalar cycle.");
5645 destroy_loop_vec_info (loop_vinfo);
5649 /* Analyze data dependences between the data-refs in the loop.
5650 FORNOW: fail at the first data dependence that we encounter. */
5652 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5655 if (vect_debug_details (loop))
5656 fprintf (dump_file, "bad data dependence.");
5657 destroy_loop_vec_info (loop_vinfo);
5661 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5662 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5664 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5667 if (vect_debug_details (loop))
5668 fprintf (dump_file, "bad data access.");
5669 destroy_loop_vec_info (loop_vinfo);
5673 /* Analyze the alignment of the data-refs in the loop.
5674 FORNOW: Only aligned accesses are handled. */
5676 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5679 if (vect_debug_details (loop))
5680 fprintf (dump_file, "bad data alignment.");
5681 destroy_loop_vec_info (loop_vinfo);
5685 /* Scan all the operations in the loop and make sure they are
5688 ok = vect_analyze_operations (loop_vinfo);
5691 if (vect_debug_details (loop))
5692 fprintf (dump_file, "bad operation or unsupported loop bound.");
5693 destroy_loop_vec_info (loop_vinfo);
5697 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5703 /* Function need_imm_uses_for.
5705 Return whether we ought to include information for 'var'
5706 when calculating immediate uses. For this pass we only want use
5707 information for non-virtual variables. */
5710 need_imm_uses_for (tree var)
5712 return is_gimple_reg (var);
5716 /* Function vectorize_loops.
5718 Entry Point to loop vectorization phase. */
5721 vectorize_loops (struct loops *loops)
5723 unsigned int i, loops_num;
5724 unsigned int num_vectorized_loops = 0;
5726 /* Does the target support SIMD? */
5727 /* FORNOW: until more sophisticated machine modelling is in place. */
5728 if (!UNITS_PER_SIMD_WORD)
5730 if (vect_debug_details (NULL))
5731 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5735 #ifdef ENABLE_CHECKING
5736 verify_loop_closed_ssa ();
5739 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5741 /* ----------- Analyze loops. ----------- */
5743 /* If some loop was duplicated, it gets bigger number
5744 than all previously defined loops. This fact allows us to run
5745 only over initial loops skipping newly generated ones. */
5746 loops_num = loops->num;
5747 for (i = 1; i < loops_num; i++)
5749 loop_vec_info loop_vinfo;
5750 struct loop *loop = loops->parray[i];
5755 loop_vinfo = vect_analyze_loop (loop);
5756 loop->aux = loop_vinfo;
5758 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5761 vect_transform_loop (loop_vinfo, loops);
5762 num_vectorized_loops++;
5765 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5766 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5767 num_vectorized_loops);
5769 /* ----------- Finalize. ----------- */
5772 for (i = 1; i < loops_num; i++)
5774 struct loop *loop = loops->parray[i];
5775 loop_vec_info loop_vinfo;
5779 loop_vinfo = loop->aux;
5780 destroy_loop_vec_info (loop_vinfo);
5784 rewrite_into_ssa (false);
5785 rewrite_into_loop_closed_ssa (); /* FORNOW */
5786 bitmap_clear (vars_to_rename);