2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type *, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment (struct data_reference *);
218 static bool vect_analyze_data_ref_access (struct data_reference *);
219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
220 static struct data_reference * vect_analyze_pointer_ref_access
222 static bool vect_can_advance_ivs_p (struct loop *);
223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
224 loop_vec_info, tree *, tree *, tree *,
226 static struct data_reference * vect_analyze_pointer_ref_access
228 static tree vect_get_ptr_offset (tree, tree, tree *);
229 static tree vect_get_memtag_and_dr
230 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
234 /* Utility functions for the code transformation. */
235 static tree vect_create_destination_var (tree, tree);
236 static tree vect_create_data_ref_ptr
237 (tree, block_stmt_iterator *, tree, tree *, bool);
238 static tree vect_create_index_for_vector_ref
239 (struct loop *, block_stmt_iterator *);
240 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
241 static tree get_vectype_for_scalar_type (tree);
242 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
243 static tree vect_get_vec_def_for_operand (tree, tree);
244 static tree vect_init_vector (tree, tree);
245 static void vect_finish_stmt_generation
246 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
248 /* Utility function dealing with loop peeling (not peeling itself). */
249 static void vect_generate_tmps_on_preheader
250 (loop_vec_info, tree *, tree *, tree *);
251 static tree vect_build_loop_niters (loop_vec_info);
252 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
253 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
254 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
255 static void vect_update_inits_of_drs (loop_vec_info, tree);
256 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
257 static void vect_do_peeling_for_loop_bound
258 (loop_vec_info, tree *, struct loops *);
260 /* Utilities for creation and deletion of vec_info structs. */
261 loop_vec_info new_loop_vec_info (struct loop *loop);
262 void destroy_loop_vec_info (loop_vec_info);
263 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
265 static bool vect_debug_stats (struct loop *loop);
266 static bool vect_debug_details (struct loop *loop);
269 /*************************************************************************
270 Simple Loop Peeling Utilities
272 Utilities to support loop peeling for vectorization purposes.
273 *************************************************************************/
276 /* For each definition in DEFINITIONS this function allocates
280 allocate_new_names (bitmap definitions)
285 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
287 tree def = ssa_name (ver);
288 tree *new_name_ptr = xmalloc (sizeof (tree));
290 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
292 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
293 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
295 SSA_NAME_AUX (def) = new_name_ptr;
300 /* Renames the use *OP_P. */
303 rename_use_op (use_operand_p op_p)
307 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
310 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
312 /* Something defined outside of the loop. */
316 /* An ordinary ssa name defined in the loop. */
318 SET_USE (op_p, *new_name_ptr);
322 /* Renames the def *OP_P in statement STMT. */
325 rename_def_op (def_operand_p op_p, tree stmt)
329 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
332 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
334 /* Something defined outside of the loop. */
338 /* An ordinary ssa name defined in the loop. */
340 SET_DEF (op_p, *new_name_ptr);
341 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
345 /* Renames the variables in basic block BB. */
348 rename_variables_in_bb (basic_block bb)
351 block_stmt_iterator bsi;
357 v_may_def_optype v_may_defs;
358 v_must_def_optype v_must_defs;
362 struct loop *loop = bb->loop_father;
364 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
365 rename_def_op (PHI_RESULT_PTR (phi), phi);
367 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
369 stmt = bsi_stmt (bsi);
370 get_stmt_operands (stmt);
371 ann = stmt_ann (stmt);
373 uses = USE_OPS (ann);
374 for (i = 0; i < NUM_USES (uses); i++)
375 rename_use_op (USE_OP_PTR (uses, i));
377 defs = DEF_OPS (ann);
378 for (i = 0; i < NUM_DEFS (defs); i++)
379 rename_def_op (DEF_OP_PTR (defs, i), stmt);
381 vuses = VUSE_OPS (ann);
382 for (i = 0; i < NUM_VUSES (vuses); i++)
383 rename_use_op (VUSE_OP_PTR (vuses, i));
385 v_may_defs = V_MAY_DEF_OPS (ann);
386 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
388 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
389 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
392 v_must_defs = V_MUST_DEF_OPS (ann);
393 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
395 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
396 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
400 FOR_EACH_EDGE (e, ei, bb->succs)
402 if (!flow_bb_inside_loop_p (loop, e->dest))
404 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
405 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
410 /* Releases the structures holding the new ssa names. */
413 free_new_names (bitmap definitions)
418 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
420 tree def = ssa_name (ver);
422 if (SSA_NAME_AUX (def))
424 free (SSA_NAME_AUX (def));
425 SSA_NAME_AUX (def) = NULL;
431 /* Renames variables in new generated LOOP. */
434 rename_variables_in_loop (struct loop *loop)
439 bbs = get_loop_body (loop);
441 for (i = 0; i < loop->num_nodes; i++)
442 rename_variables_in_bb (bbs[i]);
448 /* Update the PHI nodes of NEW_LOOP.
450 NEW_LOOP is a duplicate of ORIG_LOOP.
451 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
452 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
453 executes before it. */
456 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
457 struct loop *new_loop, bool after)
459 tree *new_name_ptr, new_ssa_name;
460 tree phi_new, phi_orig;
462 edge orig_loop_latch = loop_latch_edge (orig_loop);
463 edge orig_entry_e = loop_preheader_edge (orig_loop);
464 edge new_loop_exit_e = new_loop->exit_edges[0];
465 edge new_loop_entry_e = loop_preheader_edge (new_loop);
466 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
469 step 1. For each loop-header-phi:
470 Add the first phi argument for the phi in NEW_LOOP
471 (the one associated with the entry of NEW_LOOP)
473 step 2. For each loop-header-phi:
474 Add the second phi argument for the phi in NEW_LOOP
475 (the one associated with the latch of NEW_LOOP)
477 step 3. Update the phis in the successor block of NEW_LOOP.
479 case 1: NEW_LOOP was placed before ORIG_LOOP:
480 The successor block of NEW_LOOP is the header of ORIG_LOOP.
481 Updating the phis in the successor block can therefore be done
482 along with the scanning of the loop header phis, because the
483 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
484 phi nodes, organized in the same order.
486 case 2: NEW_LOOP was placed after ORIG_LOOP:
487 The successor block of NEW_LOOP is the original exit block of
488 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
489 We postpone updating these phis to a later stage (when
490 loop guards are added).
494 /* Scan the phis in the headers of the old and new loops
495 (they are organized in exactly the same order). */
497 for (phi_new = phi_nodes (new_loop->header),
498 phi_orig = phi_nodes (orig_loop->header);
500 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
503 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
504 add_phi_arg (phi_new, def, new_loop_entry_e);
507 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
508 if (TREE_CODE (def) != SSA_NAME)
511 new_name_ptr = SSA_NAME_AUX (def);
513 /* Something defined outside of the loop. */
516 /* An ordinary ssa name defined in the loop. */
517 new_ssa_name = *new_name_ptr;
518 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
520 /* step 3 (case 1). */
523 gcc_assert (new_loop_exit_e == orig_entry_e);
524 SET_PHI_ARG_DEF (phi_orig,
525 phi_arg_from_edge (phi_orig, new_loop_exit_e),
532 /* Update PHI nodes for a guard of the LOOP.
535 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
536 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
537 originates from the guard-bb, skips LOOP and reaches the (unique) exit
538 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
539 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
540 LOOP header) before the guard code was added, and now it became a merge
541 point of two paths - the path that ends with the LOOP exit-edge, and
542 the path that ends with GUARD_EDGE.
544 This function creates and updates the relevant phi nodes to account for
545 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
546 1. Create phi nodes at NEW_MERGE_BB.
547 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
548 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
551 ===> The CFG before the guard-code was added:
553 if (exit_loop) goto update_bb : LOOP_header_bb
556 ==> The CFG after the guard-code was added:
558 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
560 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
565 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
566 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
567 organized in the same order.
568 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
571 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
572 "original" loop). FALSE if LOOP is an original loop (not a newly
573 created copy). The SSA_NAME_AUX fields of the defs in the original
574 loop are the corresponding new ssa-names used in the new duplicated
575 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
576 nodes in UPDATE_BB takes the original ssa-name, and which takes the
577 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
578 the LOOP-exit-edge takes the new-name, and the phi-arg that is
579 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
580 FALSE, it's the other way around.
584 slpeel_update_phi_nodes_for_guard (edge guard_edge,
589 tree orig_phi, new_phi, update_phi;
590 tree guard_arg, loop_arg;
591 basic_block new_merge_bb = guard_edge->dest;
592 edge e = EDGE_SUCC (new_merge_bb, 0);
593 basic_block update_bb = e->dest;
594 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
596 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
597 orig_phi && update_phi;
598 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
600 /* 1. Generate new phi node in NEW_MERGE_BB: */
601 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
604 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
605 of LOOP. Set the two phi args in NEW_PHI for these edges: */
608 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
609 EDGE_SUCC (loop->latch, 0));
610 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
614 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
615 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
619 new_name = *new_name_ptr;
621 /* Something defined outside of the loop */
626 guard_arg = orig_def;
631 guard_arg = new_name;
635 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
636 add_phi_arg (new_phi, guard_arg, guard_edge);
638 /* 3. Update phi in successor block. */
639 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
640 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
641 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
642 PHI_RESULT (new_phi));
645 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
649 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
650 that starts at zero, increases by one and its limit is NITERS.
652 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
655 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
657 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
659 edge exit_edge = loop->exit_edges[0];
660 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
661 tree begin_label = tree_block_label (loop->latch);
662 tree exit_label = tree_block_label (loop->single_exit->dest);
663 tree init = build_int_cst (TREE_TYPE (niters), 0);
664 tree step = build_int_cst (TREE_TYPE (niters), 1);
668 orig_cond = get_loop_exit_condition (loop);
669 gcc_assert (orig_cond);
670 create_iv (init, step, NULL_TREE, loop,
671 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
673 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
674 back to the exit condition statement. */
675 bsi_next (&loop_exit_bsi);
676 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
678 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
680 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
681 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
682 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
684 else /* 'then' edge loops back. */
686 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
687 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
688 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
691 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
692 then_label, else_label);
693 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
695 /* Remove old loop exit test: */
696 bsi_remove (&loop_exit_bsi);
698 if (vect_debug_stats (loop) || vect_debug_details (loop))
699 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
701 loop->nb_iterations = niters;
705 /* Given LOOP this function generates a new copy of it and puts it
706 on E which is either the entry or exit of LOOP. */
709 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
712 struct loop *new_loop;
713 basic_block *new_bbs, *bbs;
716 basic_block exit_dest;
719 at_exit = (e == loop->exit_edges[0]);
720 if (!at_exit && e != loop_preheader_edge (loop))
722 if (dump_file && (dump_flags & TDF_DETAILS))
723 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
727 bbs = get_loop_body (loop);
729 /* Check whether duplication is possible. */
730 if (!can_copy_bbs_p (bbs, loop->num_nodes))
732 if (vect_debug_stats (loop) || vect_debug_details (loop))
733 fprintf (dump_file, "Cannot copy basic blocks.\n");
738 /* Generate new loop structure. */
739 new_loop = duplicate_loop (loops, loop, loop->outer);
742 if (vect_debug_stats (loop) || vect_debug_details (loop))
743 fprintf (dump_file, "duplicate_loop returns NULL.\n");
748 exit_dest = loop->exit_edges[0]->dest;
749 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
750 exit_dest) == loop->header ?
753 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
755 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
757 /* Duplicating phi args at exit bbs as coming
758 also from exit of duplicated loop. */
759 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
761 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
764 edge new_loop_exit_edge;
766 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
767 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
769 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
771 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
775 if (at_exit) /* Add the loop copy at exit. */
777 redirect_edge_and_branch_force (e, new_loop->header);
778 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
780 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
782 else /* Add the copy at entry. */
785 edge entry_e = loop_preheader_edge (loop);
786 basic_block preheader = entry_e->src;
788 if (!flow_bb_inside_loop_p (new_loop,
789 EDGE_SUCC (new_loop->header, 0)->dest))
790 new_exit_e = EDGE_SUCC (new_loop->header, 0);
792 new_exit_e = EDGE_SUCC (new_loop->header, 1);
794 redirect_edge_and_branch_force (new_exit_e, loop->header);
795 set_immediate_dominator (CDI_DOMINATORS, loop->header,
798 /* We have to add phi args to the loop->header here as coming
799 from new_exit_e edge. */
800 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
802 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
804 add_phi_arg (phi, phi_arg, new_exit_e);
807 redirect_edge_and_branch_force (entry_e, new_loop->header);
808 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
811 flow_loop_scan (new_loop, LOOP_ALL);
812 flow_loop_scan (loop, LOOP_ALL);
820 /* Given the condition statement COND, put it as the last statement
821 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
822 Assumes that this is the single exit of the guarded loop.
823 Returns the skip edge. */
826 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
829 block_stmt_iterator bsi;
831 tree cond_stmt, then_label, else_label;
833 enter_e = EDGE_SUCC (guard_bb, 0);
834 enter_e->flags &= ~EDGE_FALLTHRU;
835 enter_e->flags |= EDGE_FALSE_VALUE;
836 bsi = bsi_last (guard_bb);
838 then_label = build1 (GOTO_EXPR, void_type_node,
839 tree_block_label (exit_bb));
840 else_label = build1 (GOTO_EXPR, void_type_node,
841 tree_block_label (enter_e->dest));
842 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
843 then_label, else_label);
844 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
845 /* Add new edge to connect entry block to the second loop. */
846 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
847 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
852 /* This function verifies that the following restrictions apply to LOOP:
854 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
855 (3) it is single entry, single exit
856 (4) its exit condition is the last stmt in the header
857 (5) E is the entry/exit edge of LOOP.
861 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
863 edge exit_e = loop->exit_edges [0];
864 edge entry_e = loop_preheader_edge (loop);
865 tree orig_cond = get_loop_exit_condition (loop);
866 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
868 if (any_marked_for_rewrite_p ())
872 /* All loops have an outer scope; the only case loop->outer is NULL is for
873 the function itself. */
875 || loop->num_nodes != 2
876 || !empty_block_p (loop->latch)
877 || loop->num_exits != 1
878 || loop->num_entries != 1
879 /* Verify that new loop exit condition can be trivially modified. */
880 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
881 || (e != exit_e && e != entry_e))
887 #ifdef ENABLE_CHECKING
889 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
890 struct loop *second_loop)
892 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
893 basic_block loop2_entry_bb = second_loop->pre_header;
894 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
896 /* A guard that controls whether the second_loop is to be executed or skipped
897 is placed in first_loop->exit. first_loopt->exit therefore has two
898 successors - one is the preheader of second_loop, and the other is a bb
901 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
904 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
907 /* The preheader of new_loop is expected to have two predessors:
908 first_loop->exit and the block that precedes first_loop. */
910 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
911 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
912 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
913 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
914 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
916 /* Verify that the other successor of first_loopt->exit is after the
922 /* Function slpeel_tree_peel_loop_to_edge.
924 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
925 that is placed on the entry (exit) edge E of LOOP. After this transformation
926 we have two loops one after the other - first-loop iterates FIRST_NITERS
927 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
930 - LOOP: the loop to be peeled.
931 - E: the exit or entry edge of LOOP.
932 If it is the entry edge, we peel the first iterations of LOOP. In this
933 case first-loop is LOOP, and second-loop is the newly created loop.
934 If it is the exit edge, we peel the last iterations of LOOP. In this
935 case, first-loop is the newly created loop, and second-loop is LOOP.
936 - NITERS: the number of iterations that LOOP iterates.
937 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
938 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
939 for updating the loop bound of the first-loop to FIRST_NITERS. If it
940 is false, the caller of this function may want to take care of this
941 (this can be useful if we don't want new stmts added to first-loop).
944 The function returns a pointer to the new loop-copy, or NULL if it failed
945 to perform the transformation.
947 The function generates two if-then-else guards: one before the first loop,
948 and the other before the second loop:
950 if (FIRST_NITERS == 0) then skip the first loop,
951 and go directly to the second loop.
953 if (FIRST_NITERS == NITERS) then skip the second loop.
955 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
956 FORNOW the resulting code will not be in loop-closed-ssa form.
960 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
961 edge e, tree first_niters,
962 tree niters, bool update_first_loop_count)
964 struct loop *new_loop = NULL, *first_loop, *second_loop;
968 basic_block bb_before_second_loop, bb_after_second_loop;
969 basic_block bb_before_first_loop;
970 basic_block bb_between_loops;
971 edge exit_e = loop->exit_edges [0];
973 if (!slpeel_can_duplicate_loop_p (loop, e))
976 /* We have to initialize cfg_hooks. Then, when calling
977 cfg_hooks->split_edge, the function tree_split_edge
978 is actually called and, when calling cfg_hooks->duplicate_block,
979 the function tree_duplicate_bb is called. */
980 tree_register_cfg_hooks ();
983 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
984 Resulting CFG would be:
997 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
999 if (vect_debug_stats (loop) || vect_debug_details (loop))
1000 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1006 /* NEW_LOOP was placed after LOOP. */
1008 second_loop = new_loop;
1012 /* NEW_LOOP was placed before LOOP. */
1013 first_loop = new_loop;
1017 definitions = marked_ssa_names ();
1018 allocate_new_names (definitions);
1019 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1020 rename_variables_in_loop (new_loop);
1023 /* 2. Add the guard that controls whether the first loop is executed.
1024 Resulting CFG would be:
1026 bb_before_first_loop:
1027 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1034 bb_before_second_loop:
1043 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1044 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1045 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1046 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1047 flow_loop_scan (first_loop, LOOP_ALL);
1048 flow_loop_scan (second_loop, LOOP_ALL);
1051 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1052 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1053 bb_before_second_loop, bb_before_first_loop);
1054 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1055 first_loop == new_loop);
1058 /* 3. Add the guard that controls whether the second loop is executed.
1059 Resulting CFG would be:
1061 bb_before_first_loop:
1062 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1070 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1071 GOTO bb_before_second_loop
1073 bb_before_second_loop:
1079 bb_after_second_loop:
1084 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1085 add_bb_to_loop (bb_between_loops, first_loop->outer);
1086 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1087 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1088 flow_loop_scan (first_loop, LOOP_ALL);
1089 flow_loop_scan (second_loop, LOOP_ALL);
1091 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1092 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1093 bb_after_second_loop, bb_before_first_loop);
1094 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1095 second_loop == new_loop);
1097 /* Flow loop scan does not update loop->single_exit field. */
1098 first_loop->single_exit = first_loop->exit_edges[0];
1099 second_loop->single_exit = second_loop->exit_edges[0];
1101 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1103 if (update_first_loop_count)
1104 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1106 free_new_names (definitions);
1107 BITMAP_XFREE (definitions);
1108 unmark_all_for_rewrite ();
1114 /* Here the proper Vectorizer starts. */
1116 /*************************************************************************
1117 Vectorization Utilities.
1118 *************************************************************************/
1120 /* Function new_stmt_vec_info.
1122 Create and initialize a new stmt_vec_info struct for STMT. */
1125 new_stmt_vec_info (tree stmt, struct loop *loop)
1128 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1130 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1131 STMT_VINFO_STMT (res) = stmt;
1132 STMT_VINFO_LOOP (res) = loop;
1133 STMT_VINFO_RELEVANT_P (res) = 0;
1134 STMT_VINFO_VECTYPE (res) = NULL;
1135 STMT_VINFO_VEC_STMT (res) = NULL;
1136 STMT_VINFO_DATA_REF (res) = NULL;
1137 STMT_VINFO_MEMTAG (res) = NULL;
1138 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1139 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1140 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1141 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1142 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1148 /* Function new_loop_vec_info.
1150 Create and initialize a new loop_vec_info struct for LOOP, as well as
1151 stmt_vec_info structs for all the stmts in LOOP. */
1154 new_loop_vec_info (struct loop *loop)
1158 block_stmt_iterator si;
1161 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1163 bbs = get_loop_body (loop);
1165 /* Create stmt_info for all stmts in the loop. */
1166 for (i = 0; i < loop->num_nodes; i++)
1168 basic_block bb = bbs[i];
1169 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1171 tree stmt = bsi_stmt (si);
1174 get_stmt_operands (stmt);
1175 ann = stmt_ann (stmt);
1176 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1180 LOOP_VINFO_LOOP (res) = loop;
1181 LOOP_VINFO_BBS (res) = bbs;
1182 LOOP_VINFO_EXIT_COND (res) = NULL;
1183 LOOP_VINFO_NITERS (res) = NULL;
1184 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1185 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1186 LOOP_VINFO_VECT_FACTOR (res) = 0;
1187 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1188 "loop_write_datarefs");
1189 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1190 "loop_read_datarefs");
1191 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1197 /* Function destroy_loop_vec_info.
1199 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1200 stmts in the loop. */
1203 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1208 block_stmt_iterator si;
1214 loop = LOOP_VINFO_LOOP (loop_vinfo);
1216 bbs = LOOP_VINFO_BBS (loop_vinfo);
1217 nbbs = loop->num_nodes;
1219 for (j = 0; j < nbbs; j++)
1221 basic_block bb = bbs[j];
1222 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1224 tree stmt = bsi_stmt (si);
1225 stmt_ann_t ann = stmt_ann (stmt);
1226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1228 set_stmt_info (ann, NULL);
1232 free (LOOP_VINFO_BBS (loop_vinfo));
1233 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1234 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1240 /* Function debug_loop_stats.
1242 For vectorization statistics dumps. */
1245 vect_debug_stats (struct loop *loop)
1248 block_stmt_iterator si;
1249 tree node = NULL_TREE;
1251 if (!dump_file || !(dump_flags & TDF_STATS))
1256 fprintf (dump_file, "\n");
1265 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1267 node = bsi_stmt (si);
1268 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1272 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1273 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1275 fprintf (dump_file, "\nloop at %s:%d: ",
1276 EXPR_FILENAME (node), EXPR_LINENO (node));
1284 /* Function debug_loop_details.
1286 For vectorization debug dumps. */
1289 vect_debug_details (struct loop *loop)
1292 block_stmt_iterator si;
1293 tree node = NULL_TREE;
1295 if (!dump_file || !(dump_flags & TDF_DETAILS))
1300 fprintf (dump_file, "\n");
1309 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1311 node = bsi_stmt (si);
1312 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1316 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1317 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1319 fprintf (dump_file, "\nloop at %s:%d: ",
1320 EXPR_FILENAME (node), EXPR_LINENO (node));
1328 /* Function vect_get_ptr_offset
1330 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1333 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1334 tree vectype ATTRIBUTE_UNUSED,
1335 tree *offset ATTRIBUTE_UNUSED)
1337 /* TODO: Use alignment information. */
1342 /* Function vect_analyze_offset_expr
1344 Given an offset expression EXPR received from get_inner_reference, analyze
1345 it and create an expression for INITIAL_OFFSET by substituting the variables
1346 of EXPR with initial_condition of the corresponding access_fn in the loop.
1349 for (j = 3; j < N; j++)
1352 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1353 subsituted, since its access_fn in the inner loop is i. 'j' will be
1354 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1357 Compute MISALIGN (the misalignment of the data reference initial access from
1358 its base) if possible. Misalignment can be calculated only if all the
1359 variables can be substitued with constants, or if a variable is multiplied
1360 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1361 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1362 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1363 VECTYPE_ALIGNMENT computation in the caller of this function).
1365 STEP is an evolution of the data reference in this loop in bytes.
1366 In the above example, STEP is C_j.
1368 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1369 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1370 are NULL_TREEs. Otherwise, return TRUE.
1375 vect_analyze_offset_expr (tree expr,
1377 tree vectype_alignment,
1378 tree *initial_offset,
1384 tree left_offset = size_zero_node;
1385 tree right_offset = size_zero_node;
1386 tree left_misalign = size_zero_node;
1387 tree right_misalign = size_zero_node;
1388 tree left_step = size_zero_node;
1389 tree right_step = size_zero_node;
1390 enum tree_code code;
1391 tree init, evolution, def_stmt;
1396 *misalign = NULL_TREE;
1397 *initial_offset = NULL_TREE;
1401 if (TREE_CONSTANT (expr))
1403 *initial_offset = fold_convert (sizetype, expr);
1404 *misalign = fold_convert (sizetype, expr);
1405 *step = size_zero_node;
1409 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1410 access_fn in the current loop. */
1411 if (SSA_VAR_P (expr))
1413 tree access_fn = analyze_scalar_evolution (loop, expr);
1415 if (access_fn == chrec_dont_know)
1419 init = initial_condition_in_loop_num (access_fn, loop->num);
1422 def_stmt = SSA_NAME_DEF_STMT (init);
1424 && !IS_EMPTY_STMT (def_stmt)
1425 && flow_bb_inside_loop_p (loop, bb_for_stmt (def_stmt)))
1426 /* Not enough information: may be not loop invariant.
1427 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1428 initial_condition is D, but it depends on i - loop's induction
1433 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1434 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1435 /* Evolution is not constant. */
1438 if (TREE_CONSTANT (init))
1439 *misalign = fold_convert (sizetype, init);
1441 /* Not constant, misalignment cannot be calculated. */
1442 *misalign = NULL_TREE;
1444 *initial_offset = fold_convert (sizetype, init);
1446 *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1450 /* Recursive computation. */
1451 oprnd0 = TREE_OPERAND (expr, 0);
1452 oprnd1 = TREE_OPERAND (expr, 1);
1454 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1455 &left_misalign, &left_step)
1456 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1457 &right_offset, &right_misalign, &right_step))
1460 /* The type of the operation: plus, minus or mult. */
1461 code = TREE_CODE (expr);
1465 if (!TREE_CONSTANT (right_offset))
1466 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1468 FORNOW: We don't support such cases. */
1471 /* Misalignment computation. */
1472 if (SSA_VAR_P (left_offset))
1474 /* If the left side contains variable that cannot be substituted with
1475 constant, we check if the right side is a multiple of ALIGNMENT. */
1476 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1477 vectype_alignment)))
1478 *misalign = size_zero_node;
1480 /* If the remainder is not zero or the right side isn't constant, we
1481 can't compute misalignment. */
1482 *misalign = NULL_TREE;
1486 /* The left operand was successfully substituted with constant. */
1488 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1490 *misalign = size_binop (code, left_misalign, right_misalign);
1492 *misalign = NULL_TREE;
1495 /* Step calculation. */
1496 /* Multiply the step by the right operand. */
1497 *step = size_binop (MULT_EXPR, left_step, right_offset);
1502 /* Combine the recursive calculations for step and misalignment. */
1503 *step = size_binop (code, left_step, right_step);
1505 if (left_misalign && right_misalign)
1506 *misalign = size_binop (code, left_misalign, right_misalign);
1508 *misalign = NULL_TREE;
1516 /* Compute offset. */
1517 *initial_offset = fold_convert (sizetype,
1518 fold (build2 (code, TREE_TYPE (left_offset),
1525 /* Function vect_get_base_and_offset
1527 Return the BASE of the data reference EXPR.
1528 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1530 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1531 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1532 instantiated with initial_conditions of access_functions of variables,
1533 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1535 Function get_inner_reference is used for the above in case of ARRAY_REF and
1539 EXPR - the memory reference that is being analyzed
1540 DR - the data_reference struct of the _original_ memory reference
1541 (Note: DR_REF (DR) is not necessarily EXPR)
1542 VECTYPE - the type that defines the alignment (i.e, we compute
1543 alignment relative to TYPE_ALIGN(VECTYPE))
1546 BASE (returned value) - the base of the data reference EXPR.
1547 E.g, if EXPR is a.b[k].c[i][j] the returned
1549 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1550 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1551 computation is impossible
1552 STEP - evolution of the DR_REF in the loop
1553 BASE_ALIGNED_P - indicates if BASE is aligned
1555 If something unexpected is encountered (an unsupported form of data-ref),
1556 then NULL_TREE is returned. */
1559 vect_get_base_and_offset (struct data_reference *dr,
1562 loop_vec_info loop_vinfo,
1563 tree *initial_offset,
1566 bool *base_aligned_p)
1568 tree this_offset = size_zero_node;
1569 tree this_misalign = size_zero_node;
1570 tree this_step = size_zero_node;
1571 tree base = NULL_TREE;
1573 tree oprnd0, oprnd1;
1574 enum tree_code code = TREE_CODE (expr);
1575 HOST_WIDE_INT pbitsize;
1576 HOST_WIDE_INT pbitpos;
1578 enum machine_mode pmode;
1579 int punsignedp, pvolatilep;
1580 tree bit_pos_in_bytes;
1581 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1583 *base_aligned_p = false;
1587 /* These cases end the recursion: */
1590 *initial_offset = size_zero_node;
1591 *step = size_zero_node;
1592 *misalign = size_zero_node;
1593 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1594 *base_aligned_p = true;
1598 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1601 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1603 base = vect_get_ptr_offset (expr, vectype, misalign);
1605 *base_aligned_p = true;
1609 *base_aligned_p = true;
1610 *misalign = size_zero_node;
1612 *initial_offset = size_zero_node;
1613 *step = size_zero_node;
1617 *initial_offset = fold_convert (sizetype, expr);
1618 *misalign = fold_convert (sizetype, expr);
1619 *step = size_zero_node;
1622 /* These cases continue the recursion: */
1624 oprnd0 = TREE_OPERAND (expr, 0);
1629 oprnd0 = TREE_OPERAND (expr, 0);
1635 oprnd0 = TREE_OPERAND (expr, 0);
1636 oprnd1 = TREE_OPERAND (expr, 1);
1638 /* In case we have a PLUS_EXPR of the form
1639 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1640 This is verified in vect_get_memtag_and_dr. */
1641 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1642 &this_offset, &this_misalign,
1643 &this_step, base_aligned_p);
1644 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1645 this_offset = size_zero_node;
1648 this_misalign = NULL_TREE;
1654 if (!handled_component_p (expr))
1655 /* Unsupported expression. */
1658 /* Find the base and the offset from it. */
1659 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1660 &pmode, &punsignedp, &pvolatilep);
1665 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1666 &this_offset, &this_misalign,
1669 /* Failed to compute offset or step. */
1671 *initial_offset = NULL_TREE;
1672 *misalign = NULL_TREE;
1676 /* Add bit position to OFFSET and MISALIGN. */
1678 bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1679 /* Check that there is no remainder in bits. */
1680 if (pbitpos%BITS_PER_UNIT)
1682 if (vect_debug_details (NULL))
1683 fprintf (dump_file, "bit offset alignment.");
1686 this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1687 fold_convert (sizetype, this_offset)));
1689 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1691 /* Continue the recursion to refine the base (get_inner_reference returns
1692 &a for &a[i], and not a). */
1696 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1697 initial_offset, misalign, step,
1701 /* Combine the results. */
1702 if (this_misalign && *misalign)
1703 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1705 *misalign = NULL_TREE;
1707 *step = size_binop (PLUS_EXPR, *step, this_step);
1709 *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1710 *initial_offset, this_offset));
1712 if (vect_debug_details (NULL))
1714 print_generic_expr (dump_file, expr, TDF_SLIM);
1715 fprintf (dump_file, "\n --> total offset for ref: ");
1716 print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1717 fprintf (dump_file, "\n --> total misalign for ref: ");
1718 print_generic_expr (dump_file, *misalign, TDF_SLIM);
1719 fprintf (dump_file, "\n --> total step for ref: ");
1720 print_generic_expr (dump_file, *step, TDF_SLIM);
1727 /* Function vect_force_dr_alignment_p.
1729 Returns whether the alignment of a DECL can be forced to be aligned
1730 on ALIGNMENT bit boundary. */
1733 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1735 if (TREE_CODE (decl) != VAR_DECL)
1738 if (DECL_EXTERNAL (decl))
1741 if (TREE_ASM_WRITTEN (decl))
1744 if (TREE_STATIC (decl))
1745 return (alignment <= MAX_OFILE_ALIGNMENT);
1747 /* This is not 100% correct. The absolute correct stack alignment
1748 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1749 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1750 However, until someone implements forced stack alignment, SSE
1751 isn't really usable without this. */
1752 return (alignment <= PREFERRED_STACK_BOUNDARY);
1756 /* Function vect_get_new_vect_var.
1758 Returns a name for a new variable. The current naming scheme appends the
1759 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1760 the name of vectorizer generated variables, and appends that to NAME if
1764 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1770 if (var_kind == vect_simple_var)
1775 prefix_len = strlen (prefix);
1778 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1780 new_vect_var = create_tmp_var (type, prefix);
1782 return new_vect_var;
1786 /* Function vect_create_index_for_vector_ref.
1788 Create (and return) an index variable, along with it's update chain in the
1789 loop. This variable will be used to access a memory location in a vector
1793 LOOP: The loop being vectorized.
1794 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1795 function can be added here, or in the loop pre-header.
1798 Return an index that will be used to index a vector array. It is expected
1799 that a pointer to the first vector will be used as the base address for the
1802 FORNOW: we are not trying to be efficient, just creating a new index each
1803 time from scratch. At this time all vector references could use the same
1806 TODO: create only one index to be used by all vector references. Record
1807 the index in the LOOP_VINFO the first time this procedure is called and
1808 return it on subsequent calls. The increment of this index must be placed
1809 just before the conditional expression that ends the single block loop. */
1812 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1815 tree indx_before_incr, indx_after_incr;
1817 /* It is assumed that the base pointer used for vectorized access contains
1818 the address of the first vector. Therefore the index used for vectorized
1819 access must be initialized to zero and incremented by 1. */
1821 init = integer_zero_node;
1822 step = integer_one_node;
1824 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1825 create_iv (init, step, NULL_TREE, loop, bsi, false,
1826 &indx_before_incr, &indx_after_incr);
1828 return indx_before_incr;
1832 /* Function vect_create_addr_base_for_vector_ref.
1834 Create an expression that computes the address of the first memory location
1835 that will be accessed for a data reference.
1838 STMT: The statement containing the data reference.
1839 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1840 OFFSET: Optional. If supplied, it is be added to the initial address.
1843 1. Return an SSA_NAME whose value is the address of the memory location of
1844 the first vector of the data reference.
1845 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1846 these statement(s) which define the returned SSA_NAME.
1848 FORNOW: We are only handling array accesses with step 1. */
1851 vect_create_addr_base_for_vector_ref (tree stmt,
1852 tree *new_stmt_list,
1855 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1856 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1857 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1858 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1859 tree ref = DR_REF (dr);
1860 tree scalar_type = TREE_TYPE (ref);
1861 tree scalar_ptr_type = build_pointer_type (scalar_type);
1864 tree addr_base, addr_expr;
1865 tree dest, new_stmt;
1866 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1868 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1869 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1871 /* Add '&' to ref_base. */
1872 data_ref_base = build_fold_addr_expr (data_ref_base);
1875 /* Create '(scalar_type*) base' for pointers. */
1876 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1877 tree scalar_array_type = build_array_type (scalar_type, 0);
1878 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1879 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1880 add_referenced_tmp_var (array_ptr);
1882 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1883 add_referenced_tmp_var (dest);
1884 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1885 append_to_statement_list_force (new_stmt, new_stmt_list);
1887 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1888 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1889 new_temp = make_ssa_name (array_ptr, vec_stmt);
1890 TREE_OPERAND (vec_stmt, 0) = new_temp;
1891 append_to_statement_list_force (vec_stmt, new_stmt_list);
1892 data_ref_base = new_temp;
1895 /* Create base_offset */
1896 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1897 add_referenced_tmp_var (dest);
1898 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1899 append_to_statement_list_force (new_stmt, new_stmt_list);
1903 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1904 add_referenced_tmp_var (tmp);
1905 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1906 STMT_VINFO_VECT_STEP (stmt_info)));
1907 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1909 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1910 append_to_statement_list_force (new_stmt, new_stmt_list);
1913 /* base + base_offset */
1914 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1917 /* addr_expr = addr_base */
1918 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1919 get_name (base_name));
1920 add_referenced_tmp_var (addr_expr);
1921 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1922 new_temp = make_ssa_name (addr_expr, vec_stmt);
1923 TREE_OPERAND (vec_stmt, 0) = new_temp;
1924 append_to_statement_list_force (vec_stmt, new_stmt_list);
1926 if (vect_debug_details (NULL))
1928 fprintf (dump_file, "created ");
1929 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1930 fprintf (dump_file, "\n");
1936 /* Function get_vectype_for_scalar_type.
1938 Returns the vector type corresponding to SCALAR_TYPE as supported
1942 get_vectype_for_scalar_type (tree scalar_type)
1944 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1945 int nbytes = GET_MODE_SIZE (inner_mode);
1952 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1954 nunits = UNITS_PER_SIMD_WORD / nbytes;
1956 vectype = build_vector_type (scalar_type, nunits);
1957 if (vect_debug_details (NULL))
1959 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1960 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1966 if (vect_debug_details (NULL))
1968 fprintf (dump_file, "vectype: ");
1969 print_generic_expr (dump_file, vectype, TDF_SLIM);
1972 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1974 /* TODO: tree-complex.c sometimes can parallelize operations
1975 on generic vectors. We can vectorize the loop in that case,
1976 but then we should re-run the lowering pass. */
1977 if (vect_debug_details (NULL))
1978 fprintf (dump_file, "mode not supported by target.");
1986 /* Function vect_align_data_ref.
1988 Handle mislignment of a memory accesses.
1990 FORNOW: Can't handle misaligned accesses.
1991 Make sure that the dataref is aligned. */
1994 vect_align_data_ref (tree stmt)
1996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1997 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1999 /* FORNOW: can't handle misaligned accesses;
2000 all accesses expected to be aligned. */
2001 gcc_assert (aligned_access_p (dr));
2005 /* Function vect_create_data_ref_ptr.
2007 Create a memory reference expression for vector access, to be used in a
2008 vector load/store stmt. The reference is based on a new pointer to vector
2012 1. STMT: a stmt that references memory. Expected to be of the form
2013 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2014 2. BSI: block_stmt_iterator where new stmts can be added.
2015 3. OFFSET (optional): an offset to be added to the initial address accessed
2016 by the data-ref in STMT.
2017 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2018 pointing to the initial address.
2021 1. Declare a new ptr to vector_type, and have it point to the base of the
2022 data reference (initial addressed accessed by the data reference).
2023 For example, for vector of type V8HI, the following code is generated:
2026 vp = (v8hi *)initial_address;
2028 if OFFSET is not supplied:
2029 initial_address = &a[init];
2030 if OFFSET is supplied:
2031 initial_address = &a[init + OFFSET];
2033 Return the initial_address in INITIAL_ADDRESS.
2035 2. Create a data-reference in the loop based on the new vector pointer vp,
2036 and using a new index variable 'idx' as follows:
2040 where if ONLY_INIT is true:
2043 update = idx + vector_type_size
2045 Return the pointer vp'.
2048 FORNOW: handle only aligned and consecutive accesses. */
2051 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2052 tree *initial_address, bool only_init)
2055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2056 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2057 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2058 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2062 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2063 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2064 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2065 int nvuses, nv_may_defs, nv_must_defs;
2069 tree new_stmt_list = NULL_TREE;
2071 edge pe = loop_preheader_edge (loop);
2077 tree type, tmp, size;
2079 base_name = unshare_expr (DR_BASE_NAME (dr));
2080 if (vect_debug_details (NULL))
2082 tree data_ref_base = base_name;
2083 fprintf (dump_file, "create array_ref of type: ");
2084 print_generic_expr (dump_file, vectype, TDF_SLIM);
2085 if (TREE_CODE (data_ref_base) == VAR_DECL)
2086 fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2087 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2088 fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2089 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2090 fprintf (dump_file, "\nvectorizing a record based array ref: ");
2091 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2092 fprintf (dump_file, "\nvectorizing a pointer ref: ");
2093 print_generic_expr (dump_file, base_name, TDF_SLIM);
2096 /** (1) Create the new vector-pointer variable: **/
2098 vect_ptr_type = build_pointer_type (vectype);
2099 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2100 get_name (base_name));
2101 add_referenced_tmp_var (vect_ptr);
2104 /** (2) Handle aliasing information of the new vector-pointer: **/
2106 tag = STMT_VINFO_MEMTAG (stmt_info);
2108 get_var_ann (vect_ptr)->type_mem_tag = tag;
2110 /* Mark for renaming all aliased variables
2111 (i.e, the may-aliases of the type-mem-tag). */
2112 nvuses = NUM_VUSES (vuses);
2113 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2114 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2115 for (i = 0; i < nvuses; i++)
2117 tree use = VUSE_OP (vuses, i);
2118 if (TREE_CODE (use) == SSA_NAME)
2119 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2121 for (i = 0; i < nv_may_defs; i++)
2123 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2124 if (TREE_CODE (def) == SSA_NAME)
2125 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2127 for (i = 0; i < nv_must_defs; i++)
2129 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2130 if (TREE_CODE (def) == SSA_NAME)
2131 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2135 /** (3) Calculate the initial address the vector-pointer, and set
2136 the vector-pointer to point to it before the loop: **/
2138 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2139 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2141 pe = loop_preheader_edge (loop);
2142 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2143 gcc_assert (!new_bb);
2144 *initial_address = new_temp;
2146 /* Create: p = (vectype *) initial_base */
2147 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2148 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2149 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2150 TREE_OPERAND (vec_stmt, 0) = new_temp;
2151 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2152 gcc_assert (!new_bb);
2153 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2156 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2158 if (only_init) /* No update in loop is required. */
2159 return vect_ptr_init;
2161 idx = vect_create_index_for_vector_ref (loop, bsi);
2163 /* Create: update = idx * vectype_size */
2164 tmp = create_tmp_var (integer_type_node, "update");
2165 add_referenced_tmp_var (tmp);
2166 size = TYPE_SIZE (vect_ptr_type);
2167 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2168 ptr_update = create_tmp_var (type, "update");
2169 add_referenced_tmp_var (ptr_update);
2170 vectype_size = TYPE_SIZE_UNIT (vectype);
2171 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2172 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2173 new_temp = make_ssa_name (tmp, vec_stmt);
2174 TREE_OPERAND (vec_stmt, 0) = new_temp;
2175 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2176 vec_stmt = fold_convert (type, new_temp);
2177 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2178 new_temp = make_ssa_name (ptr_update, vec_stmt);
2179 TREE_OPERAND (vec_stmt, 0) = new_temp;
2180 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2182 /* Create: data_ref_ptr = vect_ptr_init + update */
2183 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2184 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2185 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2186 TREE_OPERAND (vec_stmt, 0) = new_temp;
2187 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2188 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2190 return data_ref_ptr;
2194 /* Function vect_create_destination_var.
2196 Create a new temporary of type VECTYPE. */
2199 vect_create_destination_var (tree scalar_dest, tree vectype)
2202 const char *new_name;
2204 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2206 new_name = get_name (scalar_dest);
2209 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2210 add_referenced_tmp_var (vec_dest);
2216 /* Function vect_init_vector.
2218 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2219 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2220 used in the vectorization of STMT. */
2223 vect_init_vector (tree stmt, tree vector_var)
2225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2226 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2229 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2235 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2236 add_referenced_tmp_var (new_var);
2238 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2239 new_temp = make_ssa_name (new_var, init_stmt);
2240 TREE_OPERAND (init_stmt, 0) = new_temp;
2242 pe = loop_preheader_edge (loop);
2243 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2244 gcc_assert (!new_bb);
2246 if (vect_debug_details (NULL))
2248 fprintf (dump_file, "created new init_stmt: ");
2249 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2252 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2257 /* Function vect_get_vec_def_for_operand.
2259 OP is an operand in STMT. This function returns a (vector) def that will be
2260 used in the vectorized stmt for STMT.
2262 In the case that OP is an SSA_NAME which is defined in the loop, then
2263 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2265 In case OP is an invariant or constant, a new stmt that creates a vector def
2266 needs to be introduced. */
2269 vect_get_vec_def_for_operand (tree op, tree stmt)
2274 stmt_vec_info def_stmt_info = NULL;
2275 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2276 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2277 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2278 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2285 if (vect_debug_details (NULL))
2287 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2288 print_generic_expr (dump_file, op, TDF_SLIM);
2291 /** ===> Case 1: operand is a constant. **/
2293 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2295 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2299 /* Build a tree with vector elements. */
2300 if (vect_debug_details (NULL))
2301 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2303 for (i = nunits - 1; i >= 0; --i)
2305 t = tree_cons (NULL_TREE, op, t);
2307 vec_cst = build_vector (vectype, t);
2308 return vect_init_vector (stmt, vec_cst);
2311 gcc_assert (TREE_CODE (op) == SSA_NAME);
2313 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2315 def_stmt = SSA_NAME_DEF_STMT (op);
2316 def_stmt_info = vinfo_for_stmt (def_stmt);
2318 if (vect_debug_details (NULL))
2320 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2321 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2325 /** ==> Case 2.1: operand is defined inside the loop. **/
2329 /* Get the def from the vectorized stmt. */
2331 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2332 gcc_assert (vec_stmt);
2333 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2338 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2339 it is a reduction/induction. **/
2341 bb = bb_for_stmt (def_stmt);
2342 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2344 if (vect_debug_details (NULL))
2345 fprintf (dump_file, "reduction/induction - unsupported.");
2346 internal_error ("no support for reduction/induction"); /* FORNOW */
2350 /** ==> Case 2.3: operand is defined outside the loop -
2351 it is a loop invariant. */
2353 switch (TREE_CODE (def_stmt))
2356 def = PHI_RESULT (def_stmt);
2359 def = TREE_OPERAND (def_stmt, 0);
2362 def = TREE_OPERAND (def_stmt, 0);
2363 gcc_assert (IS_EMPTY_STMT (def_stmt));
2367 if (vect_debug_details (NULL))
2369 fprintf (dump_file, "unsupported defining stmt: ");
2370 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2372 internal_error ("unsupported defining stmt");
2375 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2377 if (vect_debug_details (NULL))
2378 fprintf (dump_file, "Create vector_inv.");
2380 for (i = nunits - 1; i >= 0; --i)
2382 t = tree_cons (NULL_TREE, def, t);
2385 vec_inv = build_constructor (vectype, t);
2386 return vect_init_vector (stmt, vec_inv);
2390 /* Function vect_finish_stmt_generation.
2392 Insert a new stmt. */
2395 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2397 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2399 if (vect_debug_details (NULL))
2401 fprintf (dump_file, "add new stmt: ");
2402 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2405 /* Make sure bsi points to the stmt that is being vectorized. */
2407 /* Assumption: any stmts created for the vectorization of stmt S were
2408 inserted before S. BSI is expected to point to S or some new stmt before S.
2411 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2413 gcc_assert (stmt == bsi_stmt (*bsi));
2417 /* Function vectorizable_assignment.
2419 Check if STMT performs an assignment (copy) that can be vectorized.
2420 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2421 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2422 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2425 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2432 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2433 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2436 /* Is vectorizable assignment? */
2438 if (TREE_CODE (stmt) != MODIFY_EXPR)
2441 scalar_dest = TREE_OPERAND (stmt, 0);
2442 if (TREE_CODE (scalar_dest) != SSA_NAME)
2445 op = TREE_OPERAND (stmt, 1);
2446 if (!vect_is_simple_use (op, loop, NULL))
2448 if (vect_debug_details (NULL))
2449 fprintf (dump_file, "use not simple.");
2453 if (!vec_stmt) /* transformation not required. */
2455 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2460 if (vect_debug_details (NULL))
2461 fprintf (dump_file, "transform assignment.");
2464 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2467 op = TREE_OPERAND (stmt, 1);
2468 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2470 /* Arguments are ready. create the new vector stmt. */
2471 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2472 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2473 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2474 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2480 /* Function vectorizable_operation.
2482 Check if STMT performs a binary or unary operation that can be vectorized.
2483 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2484 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2485 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2488 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2493 tree op0, op1 = NULL;
2494 tree vec_oprnd0, vec_oprnd1=NULL;
2495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2496 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2497 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2499 enum tree_code code;
2500 enum machine_mode vec_mode;
2506 /* Is STMT a vectorizable binary/unary operation? */
2507 if (TREE_CODE (stmt) != MODIFY_EXPR)
2510 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2513 operation = TREE_OPERAND (stmt, 1);
2514 code = TREE_CODE (operation);
2515 optab = optab_for_tree_code (code, vectype);
2517 /* Support only unary or binary operations. */
2518 op_type = TREE_CODE_LENGTH (code);
2519 if (op_type != unary_op && op_type != binary_op)
2521 if (vect_debug_details (NULL))
2522 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2526 for (i = 0; i < op_type; i++)
2528 op = TREE_OPERAND (operation, i);
2529 if (!vect_is_simple_use (op, loop, NULL))
2531 if (vect_debug_details (NULL))
2532 fprintf (dump_file, "use not simple.");
2537 /* Supportable by target? */
2540 if (vect_debug_details (NULL))
2541 fprintf (dump_file, "no optab.");
2544 vec_mode = TYPE_MODE (vectype);
2545 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2547 if (vect_debug_details (NULL))
2548 fprintf (dump_file, "op not supported by target.");
2552 if (!vec_stmt) /* transformation not required. */
2554 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2560 if (vect_debug_details (NULL))
2561 fprintf (dump_file, "transform binary/unary operation.");
2564 scalar_dest = TREE_OPERAND (stmt, 0);
2565 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2568 op0 = TREE_OPERAND (operation, 0);
2569 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2571 if (op_type == binary_op)
2573 op1 = TREE_OPERAND (operation, 1);
2574 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2577 /* Arguments are ready. create the new vector stmt. */
2579 if (op_type == binary_op)
2580 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2581 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2583 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2584 build1 (code, vectype, vec_oprnd0));
2585 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2586 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2587 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2593 /* Function vectorizable_store.
2595 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2597 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2598 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2599 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2602 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2609 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2610 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2611 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2612 enum machine_mode vec_mode;
2614 enum dr_alignment_support alignment_support_cheme;
2616 /* Is vectorizable store? */
2618 if (TREE_CODE (stmt) != MODIFY_EXPR)
2621 scalar_dest = TREE_OPERAND (stmt, 0);
2622 if (TREE_CODE (scalar_dest) != ARRAY_REF
2623 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2626 op = TREE_OPERAND (stmt, 1);
2627 if (!vect_is_simple_use (op, loop, NULL))
2629 if (vect_debug_details (NULL))
2630 fprintf (dump_file, "use not simple.");
2634 vec_mode = TYPE_MODE (vectype);
2635 /* FORNOW. In some cases can vectorize even if data-type not supported
2636 (e.g. - array initialization with 0). */
2637 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2640 if (!STMT_VINFO_DATA_REF (stmt_info))
2644 if (!vec_stmt) /* transformation not required. */
2646 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2652 if (vect_debug_details (NULL))
2653 fprintf (dump_file, "transform store");
2655 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2656 gcc_assert (alignment_support_cheme);
2657 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2659 /* Handle use - get the vectorized def from the defining stmt. */
2660 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2663 /* FORNOW: make sure the data reference is aligned. */
2664 vect_align_data_ref (stmt);
2665 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2666 data_ref = build_fold_indirect_ref (data_ref);
2668 /* Arguments are ready. create the new vector stmt. */
2669 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2670 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2676 /* vectorizable_load.
2678 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2680 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2681 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2682 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2685 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2688 tree vec_dest = NULL;
2689 tree data_ref = NULL;
2691 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2692 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2693 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2700 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2701 edge pe = loop_preheader_edge (loop);
2702 enum dr_alignment_support alignment_support_cheme;
2704 /* Is vectorizable load? */
2706 if (TREE_CODE (stmt) != MODIFY_EXPR)
2709 scalar_dest = TREE_OPERAND (stmt, 0);
2710 if (TREE_CODE (scalar_dest) != SSA_NAME)
2713 op = TREE_OPERAND (stmt, 1);
2714 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2717 if (!STMT_VINFO_DATA_REF (stmt_info))
2720 mode = (int) TYPE_MODE (vectype);
2722 /* FORNOW. In some cases can vectorize even if data-type not supported
2723 (e.g. - data copies). */
2724 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2726 if (vect_debug_details (loop))
2727 fprintf (dump_file, "Aligned load, but unsupported type.");
2731 if (!vec_stmt) /* transformation not required. */
2733 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2739 if (vect_debug_details (NULL))
2740 fprintf (dump_file, "transform load.");
2742 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2743 gcc_assert (alignment_support_cheme);
2745 if (alignment_support_cheme == dr_aligned
2746 || alignment_support_cheme == dr_unaligned_supported)
2757 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2758 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2759 if (aligned_access_p (dr))
2760 data_ref = build_fold_indirect_ref (data_ref);
2763 int mis = DR_MISALIGNMENT (dr);
2764 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2765 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2766 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2768 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2769 new_temp = make_ssa_name (vec_dest, new_stmt);
2770 TREE_OPERAND (new_stmt, 0) = new_temp;
2771 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2773 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2777 msq_init = *(floor(p1))
2778 p2 = initial_addr + VS - 1;
2779 magic = have_builtin ? builtin_result : initial_address;
2782 p2' = p2 + indx * vectype_size
2784 vec_dest = realign_load (msq, lsq, magic)
2798 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2799 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2800 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2802 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2803 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2804 new_temp = make_ssa_name (vec_dest, new_stmt);
2805 TREE_OPERAND (new_stmt, 0) = new_temp;
2806 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2807 gcc_assert (!new_bb);
2808 msq_init = TREE_OPERAND (new_stmt, 0);
2811 /* <2> Create lsq = *(floor(p2')) in the loop */
2812 offset = build_int_cst (integer_type_node,
2813 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2814 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2815 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2816 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2817 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2818 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2819 new_temp = make_ssa_name (vec_dest, new_stmt);
2820 TREE_OPERAND (new_stmt, 0) = new_temp;
2821 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2822 lsq = TREE_OPERAND (new_stmt, 0);
2826 if (targetm.vectorize.builtin_mask_for_load)
2828 /* Create permutation mask, if required, in loop preheader. */
2830 params = build_tree_list (NULL_TREE, init_addr);
2831 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2832 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2833 new_stmt = build_function_call_expr (builtin_decl, params);
2834 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2835 new_temp = make_ssa_name (vec_dest, new_stmt);
2836 TREE_OPERAND (new_stmt, 0) = new_temp;
2837 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2838 gcc_assert (!new_bb);
2839 magic = TREE_OPERAND (new_stmt, 0);
2841 /* Since we have just created a CALL_EXPR, we may need to
2842 rename call-clobbered variables. */
2843 mark_call_clobbered_vars_to_rename ();
2847 /* Use current address instead of init_addr for reduced reg pressure.
2849 magic = dataref_ptr;
2853 /* <4> Create msq = phi <msq_init, lsq> in loop */
2854 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2855 msq = make_ssa_name (vec_dest, NULL_TREE);
2856 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2857 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2858 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2859 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2862 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2863 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2864 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2865 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2866 new_temp = make_ssa_name (vec_dest, new_stmt);
2867 TREE_OPERAND (new_stmt, 0) = new_temp;
2868 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2873 *vec_stmt = new_stmt;
2878 /* Function vect_supportable_dr_alignment
2880 Return whether the data reference DR is supported with respect to its
2883 static enum dr_alignment_support
2884 vect_supportable_dr_alignment (struct data_reference *dr)
2886 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2887 enum machine_mode mode = (int) TYPE_MODE (vectype);
2889 if (aligned_access_p (dr))
2892 /* Possibly unaligned access. */
2894 if (DR_IS_READ (dr))
2896 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2897 && (!targetm.vectorize.builtin_mask_for_load
2898 || targetm.vectorize.builtin_mask_for_load ()))
2899 return dr_unaligned_software_pipeline;
2901 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2902 /* Can't software pipeline the loads, but can at least do them. */
2903 return dr_unaligned_supported;
2907 return dr_unaligned_unsupported;
2911 /* Function vect_transform_stmt.
2913 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2916 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2918 bool is_store = false;
2919 tree vec_stmt = NULL_TREE;
2920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2923 switch (STMT_VINFO_TYPE (stmt_info))
2925 case op_vec_info_type:
2926 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2930 case assignment_vec_info_type:
2931 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2935 case load_vec_info_type:
2936 done = vectorizable_load (stmt, bsi, &vec_stmt);
2940 case store_vec_info_type:
2941 done = vectorizable_store (stmt, bsi, &vec_stmt);
2946 if (vect_debug_details (NULL))
2947 fprintf (dump_file, "stmt not supported.");
2951 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2957 /* This function builds ni_name = number of iterations loop executes
2958 on the loop preheader. */
2961 vect_build_loop_niters (loop_vec_info loop_vinfo)
2963 tree ni_name, stmt, var;
2965 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2966 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2968 var = create_tmp_var (TREE_TYPE (ni), "niters");
2969 add_referenced_tmp_var (var);
2970 ni_name = force_gimple_operand (ni, &stmt, false, var);
2972 pe = loop_preheader_edge (loop);
2975 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2976 gcc_assert (!new_bb);
2983 /* This function generates the following statements:
2985 ni_name = number of iterations loop executes
2986 ratio = ni_name / vf
2987 ratio_mult_vf_name = ratio * vf
2989 and places them at the loop preheader edge. */
2992 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2994 tree *ratio_mult_vf_name_ptr,
2995 tree *ratio_name_ptr)
3003 tree ratio_mult_vf_name;
3004 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3005 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3006 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3007 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3009 pe = loop_preheader_edge (loop);
3011 /* Generate temporary variable that contains
3012 number of iterations loop executes. */
3014 ni_name = vect_build_loop_niters (loop_vinfo);
3016 /* Create: ratio = ni >> log2(vf) */
3018 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3019 add_referenced_tmp_var (var);
3020 ratio_name = make_ssa_name (var, NULL_TREE);
3021 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3022 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3023 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3025 pe = loop_preheader_edge (loop);
3026 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3027 gcc_assert (!new_bb);
3029 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3031 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3032 add_referenced_tmp_var (var);
3033 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3034 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3035 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3036 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3038 pe = loop_preheader_edge (loop);
3039 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3040 gcc_assert (!new_bb);
3042 *ni_name_ptr = ni_name;
3043 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3044 *ratio_name_ptr = ratio_name;
3050 /* Function vect_update_ivs_after_vectorizer.
3052 "Advance" the induction variables of LOOP to the value they should take
3053 after the execution of LOOP. This is currently necessary because the
3054 vectorizer does not handle induction variables that are used after the
3055 loop. Such a situation occurs when the last iterations of LOOP are
3057 1. We introduced new uses after LOOP for IVs that were not originally used
3058 after LOOP: the IVs of LOOP are now used by an epilog loop.
3059 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3060 times, whereas the loop IVs should be bumped N times.
3063 - LOOP - a loop that is going to be vectorized. The last few iterations
3064 of LOOP were peeled.
3065 - NITERS - the number of iterations that LOOP executes (before it is
3066 vectorized). i.e, the number of times the ivs should be bumped.
3067 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3068 coming out from LOOP on which there are uses of the LOOP ivs
3069 (this is the path from LOOP->exit to epilog_loop->preheader).
3071 The new definitions of the ivs are placed in LOOP->exit.
3072 The phi args associated with the edge UPDATE_E in the bb
3073 UPDATE_E->dest are updated accordingly.
3075 Assumption 1: Like the rest of the vectorizer, this function assumes
3076 a single loop exit that has a single predecessor.
3078 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3079 organized in the same order.
3081 Assumption 3: The access function of the ivs is simple enough (see
3082 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3084 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3085 coming out of LOOP on which the ivs of LOOP are used (this is the path
3086 that leads to the epilog loop; other paths skip the epilog loop). This
3087 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3088 needs to have its phis updated.
3092 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3094 basic_block exit_bb = loop->exit_edges[0]->dest;
3096 basic_block update_bb = update_e->dest;
3098 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3100 /* Make sure there exists a single-predecessor exit bb: */
3101 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3103 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3105 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3107 tree access_fn = NULL;
3108 tree evolution_part;
3111 tree var, stmt, ni, ni_name;
3112 block_stmt_iterator last_bsi;
3114 /* Skip virtual phi's. */
3115 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3117 if (vect_debug_details (NULL))
3118 fprintf (dump_file, "virtual phi. skip.");
3122 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3123 gcc_assert (access_fn);
3125 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3126 gcc_assert (evolution_part != NULL_TREE);
3128 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3129 of degree >= 2 or exponential. */
3130 gcc_assert (!tree_is_chrec (evolution_part));
3132 step_expr = evolution_part;
3133 init_expr = unshare_expr (initial_condition (access_fn));
3135 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3136 build2 (MULT_EXPR, TREE_TYPE (niters),
3137 niters, step_expr), init_expr);
3139 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3140 add_referenced_tmp_var (var);
3142 ni_name = force_gimple_operand (ni, &stmt, false, var);
3144 /* Insert stmt into exit_bb. */
3145 last_bsi = bsi_last (exit_bb);
3147 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3149 /* Fix phi expressions in the successor bb. */
3150 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3151 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3152 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3157 /* Function vect_do_peeling_for_loop_bound
3159 Peel the last iterations of the loop represented by LOOP_VINFO.
3160 The peeled iterations form a new epilog loop. Given that the loop now
3161 iterates NITERS times, the new epilog loop iterates
3162 NITERS % VECTORIZATION_FACTOR times.
3164 The original loop will later be made to iterate
3165 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3168 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3169 struct loops *loops)
3172 tree ni_name, ratio_mult_vf_name;
3173 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3174 struct loop *new_loop;
3176 #ifdef ENABLE_CHECKING
3180 if (vect_debug_details (NULL))
3181 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3183 /* Generate the following variables on the preheader of original loop:
3185 ni_name = number of iteration the original loop executes
3186 ratio = ni_name / vf
3187 ratio_mult_vf_name = ratio * vf */
3188 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3189 &ratio_mult_vf_name, ratio);
3191 /* Update loop info. */
3192 loop->pre_header = loop_preheader_edge (loop)->src;
3193 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3195 #ifdef ENABLE_CHECKING
3196 loop_num = loop->num;
3198 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3199 ratio_mult_vf_name, ni_name, false);
3200 #ifdef ENABLE_CHECKING
3201 gcc_assert (new_loop);
3202 gcc_assert (loop_num == loop->num);
3203 slpeel_verify_cfg_after_peeling (loop, new_loop);
3206 /* A guard that controls whether the new_loop is to be executed or skipped
3207 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3208 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3209 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3210 is on the path where the LOOP IVs are used and need to be updated. */
3212 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3213 update_e = EDGE_PRED (new_loop->pre_header, 0);
3215 update_e = EDGE_PRED (new_loop->pre_header, 1);
3217 /* Update IVs of original loop as if they were advanced
3218 by ratio_mult_vf_name steps. */
3219 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3221 /* After peeling we have to reset scalar evolution analyzer. */
3228 /* Function vect_gen_niters_for_prolog_loop
3230 Set the number of iterations for the loop represented by LOOP_VINFO
3231 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3232 and the misalignment of DR - the first data reference recorded in
3233 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3234 this loop, the data reference DR will refer to an aligned location.
3236 The following computation is generated:
3238 compute address misalignment in bytes:
3239 addr_mis = addr & (vectype_size - 1)
3241 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3243 (elem_size = element type size; an element is the scalar element
3244 whose type is the inner type of the vectype) */
3247 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3249 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3250 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3251 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3253 tree iters, iters_name;
3256 tree dr_stmt = DR_STMT (dr);
3257 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3258 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3259 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3262 tree new_stmts = NULL_TREE;
3264 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3265 tree ptr_type = TREE_TYPE (start_addr);
3266 tree size = TYPE_SIZE (ptr_type);
3267 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3268 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3269 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3270 tree niters_type = TREE_TYPE (loop_niters);
3271 tree elem_size_log =
3272 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3273 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3275 pe = loop_preheader_edge (loop);
3276 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3277 gcc_assert (!new_bb);
3279 /* Create: byte_misalign = addr & (vectype_size - 1) */
3280 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3282 /* Create: elem_misalign = byte_misalign / element_size */
3284 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3286 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3287 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3288 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3289 iters = fold_convert (niters_type, iters);
3291 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3292 /* If the loop bound is known at compile time we already verified that it is
3293 greater than vf; since the misalignment ('iters') is at most vf, there's
3294 no need to generate the MIN_EXPR in this case. */
3295 if (!TREE_CONSTANT (loop_niters))
3296 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3298 var = create_tmp_var (niters_type, "prolog_loop_niters");
3299 add_referenced_tmp_var (var);
3300 iters_name = force_gimple_operand (iters, &stmt, false, var);
3302 /* Insert stmt on loop preheader edge. */
3303 pe = loop_preheader_edge (loop);
3306 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3307 gcc_assert (!new_bb);
3314 /* Function vect_update_inits_of_dr
3316 NITERS iterations were peeled from LOOP. DR represents a data reference
3317 in LOOP. This function updates the information recorded in DR to
3318 account for the fact that the first NITERS iterations had already been
3319 executed. Specifically, it updates the OFFSET field of stmt_info. */
3322 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3324 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3325 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3327 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3328 STMT_VINFO_VECT_STEP (stmt_info)));
3329 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3330 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3334 /* Function vect_update_inits_of_drs
3336 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3337 This function updates the information recorded for the data references in
3338 the loop to account for the fact that the first NITERS iterations had
3339 already been executed. Specifically, it updates the initial_condition of the
3340 access_function of all the data_references in the loop. */
3343 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3346 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3347 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3349 if (dump_file && (dump_flags & TDF_DETAILS))
3350 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3352 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3354 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3355 vect_update_inits_of_dr (dr, niters);
3358 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3360 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3361 vect_update_inits_of_dr (dr, niters);
3366 /* Function vect_do_peeling_for_alignment
3368 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3369 'niters' is set to the misalignment of one of the data references in the
3370 loop, thereby forcing it to refer to an aligned location at the beginning
3371 of the execution of this loop. The data reference for which we are
3372 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3375 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3377 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3378 tree niters_of_prolog_loop, ni_name;
3380 struct loop *new_loop;
3382 if (vect_debug_details (NULL))
3383 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3385 ni_name = vect_build_loop_niters (loop_vinfo);
3386 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3388 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3390 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3391 niters_of_prolog_loop, ni_name, true);
3392 #ifdef ENABLE_CHECKING
3393 gcc_assert (new_loop);
3394 slpeel_verify_cfg_after_peeling (new_loop, loop);
3397 /* Update number of times loop executes. */
3398 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3399 LOOP_VINFO_NITERS (loop_vinfo) =
3400 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3402 /* Update the init conditions of the access functions of all data refs. */
3403 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3405 /* After peeling we have to reset scalar evolution analyzer. */
3412 /* Function vect_transform_loop.
3414 The analysis phase has determined that the loop is vectorizable.
3415 Vectorize the loop - created vectorized stmts to replace the scalar
3416 stmts in the loop, and update the loop exit condition. */
3419 vect_transform_loop (loop_vec_info loop_vinfo,
3420 struct loops *loops ATTRIBUTE_UNUSED)
3422 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3423 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3424 int nbbs = loop->num_nodes;
3425 block_stmt_iterator si;
3428 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3430 if (vect_debug_details (NULL))
3431 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3434 /* Peel the loop if there are data refs with unknown alignment.
3435 Only one data ref with unknown store is allowed. */
3437 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3438 vect_do_peeling_for_alignment (loop_vinfo, loops);
3440 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3441 compile time constant), or it is a constant that doesn't divide by the
3442 vectorization factor, then an epilog loop needs to be created.
3443 We therefore duplicate the loop: the original loop will be vectorized,
3444 and will compute the first (n/VF) iterations. The second copy of the loop
3445 will remain scalar and will compute the remaining (n%VF) iterations.
3446 (VF is the vectorization factor). */
3448 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3449 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3450 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3451 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3453 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3454 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3456 /* 1) Make sure the loop header has exactly two entries
3457 2) Make sure we have a preheader basic block. */
3459 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3461 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3464 /* FORNOW: the vectorizer supports only loops which body consist
3465 of one basic block (header + empty latch). When the vectorizer will
3466 support more involved loop forms, the order by which the BBs are
3467 traversed need to be reconsidered. */
3469 for (i = 0; i < nbbs; i++)
3471 basic_block bb = bbs[i];
3473 for (si = bsi_start (bb); !bsi_end_p (si);)
3475 tree stmt = bsi_stmt (si);
3476 stmt_vec_info stmt_info;
3479 if (vect_debug_details (NULL))
3481 fprintf (dump_file, "------>vectorizing statement: ");
3482 print_generic_expr (dump_file, stmt, TDF_SLIM);
3484 stmt_info = vinfo_for_stmt (stmt);
3485 gcc_assert (stmt_info);
3486 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3491 #ifdef ENABLE_CHECKING
3492 /* FORNOW: Verify that all stmts operate on the same number of
3493 units and no inner unrolling is necessary. */
3495 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3496 == vectorization_factor);
3498 /* -------- vectorize statement ------------ */
3499 if (vect_debug_details (NULL))
3500 fprintf (dump_file, "transform statement.");
3502 is_store = vect_transform_stmt (stmt, &si);
3505 /* free the attached stmt_vec_info and remove the stmt. */
3506 stmt_ann_t ann = stmt_ann (stmt);
3508 set_stmt_info (ann, NULL);
3517 slpeel_make_loop_iterate_ntimes (loop, ratio);
3519 if (vect_debug_details (loop))
3520 fprintf (dump_file,"Success! loop vectorized.");
3521 if (vect_debug_stats (loop))
3522 fprintf (dump_file, "LOOP VECTORIZED.");
3526 /* Function vect_is_simple_use.
3529 LOOP - the loop that is being vectorized.
3530 OPERAND - operand of a stmt in LOOP.
3531 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3533 Returns whether a stmt with OPERAND can be vectorized.
3534 Supportable operands are constants, loop invariants, and operands that are
3535 defined by the current iteration of the loop. Unsupportable operands are
3536 those that are defined by a previous iteration of the loop (as is the case
3537 in reduction/induction computations). */
3540 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3548 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3551 if (TREE_CODE (operand) != SSA_NAME)
3554 def_stmt = SSA_NAME_DEF_STMT (operand);
3555 if (def_stmt == NULL_TREE )
3557 if (vect_debug_details (NULL))
3558 fprintf (dump_file, "no def_stmt.");
3562 /* empty stmt is expected only in case of a function argument.
3563 (Otherwise - we expect a phi_node or a modify_expr). */
3564 if (IS_EMPTY_STMT (def_stmt))
3566 tree arg = TREE_OPERAND (def_stmt, 0);
3567 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3569 if (vect_debug_details (NULL))
3571 fprintf (dump_file, "Unexpected empty stmt: ");
3572 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3577 /* phi_node inside the loop indicates an induction/reduction pattern.
3578 This is not supported yet. */
3579 bb = bb_for_stmt (def_stmt);
3580 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3582 if (vect_debug_details (NULL))
3583 fprintf (dump_file, "reduction/induction - unsupported.");
3584 return false; /* FORNOW: not supported yet. */
3587 /* Expecting a modify_expr or a phi_node. */
3588 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3589 || TREE_CODE (def_stmt) == PHI_NODE)
3600 /* Function vect_analyze_operations.
3602 Scan the loop stmts and make sure they are all vectorizable. */
3605 vect_analyze_operations (loop_vec_info loop_vinfo)
3607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3609 int nbbs = loop->num_nodes;
3610 block_stmt_iterator si;
3611 unsigned int vectorization_factor = 0;
3616 if (vect_debug_details (NULL))
3617 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3619 for (i = 0; i < nbbs; i++)
3621 basic_block bb = bbs[i];
3623 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3625 tree stmt = bsi_stmt (si);
3626 unsigned int nunits;
3627 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3630 if (vect_debug_details (NULL))
3632 fprintf (dump_file, "==> examining statement: ");
3633 print_generic_expr (dump_file, stmt, TDF_SLIM);
3636 gcc_assert (stmt_info);
3638 /* skip stmts which do not need to be vectorized.
3639 this is expected to include:
3640 - the COND_EXPR which is the loop exit condition
3641 - any LABEL_EXPRs in the loop
3642 - computations that are used only for array indexing or loop
3645 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3647 if (vect_debug_details (NULL))
3648 fprintf (dump_file, "irrelevant.");
3652 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3654 if (vect_debug_stats (loop) || vect_debug_details (loop))
3656 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3657 print_generic_expr (dump_file, stmt, TDF_SLIM);
3662 if (STMT_VINFO_DATA_REF (stmt_info))
3663 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3664 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3665 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3667 scalar_type = TREE_TYPE (stmt);
3669 if (vect_debug_details (NULL))
3671 fprintf (dump_file, "get vectype for scalar type: ");
3672 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3675 vectype = get_vectype_for_scalar_type (scalar_type);
3678 if (vect_debug_stats (loop) || vect_debug_details (loop))
3680 fprintf (dump_file, "not vectorized: unsupported data-type ");
3681 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3686 if (vect_debug_details (NULL))
3688 fprintf (dump_file, "vectype: ");
3689 print_generic_expr (dump_file, vectype, TDF_SLIM);
3691 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3693 ok = (vectorizable_operation (stmt, NULL, NULL)
3694 || vectorizable_assignment (stmt, NULL, NULL)
3695 || vectorizable_load (stmt, NULL, NULL)
3696 || vectorizable_store (stmt, NULL, NULL));
3700 if (vect_debug_stats (loop) || vect_debug_details (loop))
3702 fprintf (dump_file, "not vectorized: stmt not supported: ");
3703 print_generic_expr (dump_file, stmt, TDF_SLIM);
3708 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3709 if (vect_debug_details (NULL))
3710 fprintf (dump_file, "nunits = %d", nunits);
3712 if (vectorization_factor)
3714 /* FORNOW: don't allow mixed units.
3715 This restriction will be relaxed in the future. */
3716 if (nunits != vectorization_factor)
3718 if (vect_debug_stats (loop) || vect_debug_details (loop))
3719 fprintf (dump_file, "not vectorized: mixed data-types");
3724 vectorization_factor = nunits;
3726 #ifdef ENABLE_CHECKING
3727 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3728 * vectorization_factor == UNITS_PER_SIMD_WORD);
3733 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3735 if (vectorization_factor <= 1)
3737 if (vect_debug_stats (loop) || vect_debug_details (loop))
3738 fprintf (dump_file, "not vectorized: unsupported data-type");
3741 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3743 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3745 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3746 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3748 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3749 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3751 if (vect_debug_stats (loop) || vect_debug_details (loop))
3752 fprintf (dump_file, "not vectorized: iteration count too small.");
3756 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3757 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3759 if (vect_debug_stats (loop) || vect_debug_details (loop))
3760 fprintf (dump_file, "epilog loop required.");
3761 if (!vect_can_advance_ivs_p (loop))
3763 if (vect_debug_stats (loop) || vect_debug_details (loop))
3764 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3767 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3769 if (vect_debug_stats (loop) || vect_debug_details (loop))
3770 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3779 /* Function exist_non_indexing_operands_for_use_p
3781 USE is one of the uses attached to STMT. Check if USE is
3782 used in STMT for anything other than indexing an array. */
3785 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3790 /* USE corresponds to some operand in STMT. If there is no data
3791 reference in STMT, then any operand that corresponds to USE
3792 is not indexing an array. */
3793 if (!STMT_VINFO_DATA_REF (stmt_info))
3796 /* STMT has a data_ref. FORNOW this means that its of one of
3797 the following forms:
3800 (This should have been verified in analyze_data_refs).
3802 'var' in the second case corresponds to a def, not a use,
3803 so USE cannot correspond to any operands that are not used
3806 Therefore, all we need to check is if STMT falls into the
3807 first case, and whether var corresponds to USE. */
3809 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3812 operand = TREE_OPERAND (stmt, 1);
3814 if (TREE_CODE (operand) != SSA_NAME)
3824 /* Function vect_is_simple_iv_evolution.
3826 FORNOW: A simple evolution of an induction variables in the loop is
3827 considered a polynomial evolution with constant step. */
3830 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3831 tree * step, bool strict)
3836 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3838 /* When there is no evolution in this loop, the evolution function
3840 if (evolution_part == NULL_TREE)
3843 /* When the evolution is a polynomial of degree >= 2
3844 the evolution function is not "simple". */
3845 if (tree_is_chrec (evolution_part))
3848 step_expr = evolution_part;
3849 init_expr = unshare_expr (initial_condition (access_fn));
3851 if (vect_debug_details (NULL))
3853 fprintf (dump_file, "step: ");
3854 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3855 fprintf (dump_file, ", init: ");
3856 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3862 if (TREE_CODE (step_expr) != INTEGER_CST)
3864 if (vect_debug_details (NULL))
3865 fprintf (dump_file, "step unknown.");
3870 if (!integer_onep (step_expr))
3872 if (vect_debug_details (NULL))
3873 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3881 /* Function vect_analyze_scalar_cycles.
3883 Examine the cross iteration def-use cycles of scalar variables, by
3884 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3885 cycles that they represent do not impede vectorization.
3887 FORNOW: Reduction as in the following loop, is not supported yet:
3891 The cross-iteration cycle corresponding to variable 'sum' will be
3892 considered too complicated and will impede vectorization.
3894 FORNOW: Induction as in the following loop, is not supported yet:
3899 However, the following loop *is* vectorizable:
3904 In both loops there exists a def-use cycle for the variable i:
3905 loop: i_2 = PHI (i_0, i_1)
3910 The evolution of the above cycle is considered simple enough,
3911 however, we also check that the cycle does not need to be
3912 vectorized, i.e - we check that the variable that this cycle
3913 defines is only used for array indexing or in stmts that do not
3914 need to be vectorized. This is not the case in loop2, but it
3915 *is* the case in loop3. */
3918 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3921 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3922 basic_block bb = loop->header;
3925 if (vect_debug_details (NULL))
3926 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3928 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3930 tree access_fn = NULL;
3932 if (vect_debug_details (NULL))
3934 fprintf (dump_file, "Analyze phi: ");
3935 print_generic_expr (dump_file, phi, TDF_SLIM);
3938 /* Skip virtual phi's. The data dependences that are associated with
3939 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3941 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3943 if (vect_debug_details (NULL))
3944 fprintf (dump_file, "virtual phi. skip.");
3948 /* Analyze the evolution function. */
3950 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3951 those of loop induction variables; This property is verified here.
3953 Furthermore, if that induction variable is used in an operation
3954 that needs to be vectorized (i.e, is not solely used to index
3955 arrays and check the exit condition) - we do not support its
3956 vectorization yet. This property is verified in vect_is_simple_use,
3957 during vect_analyze_operations. */
3959 access_fn = /* instantiate_parameters
3961 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3965 if (vect_debug_stats (loop) || vect_debug_details (loop))
3966 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3970 if (vect_debug_details (NULL))
3972 fprintf (dump_file, "Access function of PHI: ");
3973 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3976 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3979 if (vect_debug_stats (loop) || vect_debug_details (loop))
3980 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3989 /* Function vect_analyze_data_ref_dependence.
3991 Return TRUE if there (might) exist a dependence between a memory-reference
3992 DRA and a memory-reference DRB. */
3995 vect_analyze_data_ref_dependence (struct data_reference *dra,
3996 struct data_reference *drb,
4000 struct data_dependence_relation *ddr;
4002 if (!array_base_name_differ_p (dra, drb, &differ_p))
4004 if (vect_debug_stats (loop) || vect_debug_details (loop))
4007 "not vectorized: can't determine dependence between: ");
4008 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4009 fprintf (dump_file, " and ");
4010 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4018 ddr = initialize_data_dependence_relation (dra, drb);
4019 compute_affine_dependence (ddr);
4021 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4024 if (vect_debug_stats (loop) || vect_debug_details (loop))
4027 "not vectorized: possible dependence between data-refs ");
4028 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4029 fprintf (dump_file, " and ");
4030 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4037 /* Function vect_analyze_data_ref_dependences.
4039 Examine all the data references in the loop, and make sure there do not
4040 exist any data dependences between them.
4042 TODO: dependences which distance is greater than the vectorization factor
4046 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4049 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4050 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4051 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4053 /* Examine store-store (output) dependences. */
4055 if (vect_debug_details (NULL))
4056 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4058 if (vect_debug_details (NULL))
4059 fprintf (dump_file, "compare all store-store pairs.");
4061 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4063 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4065 struct data_reference *dra =
4066 VARRAY_GENERIC_PTR (loop_write_refs, i);
4067 struct data_reference *drb =
4068 VARRAY_GENERIC_PTR (loop_write_refs, j);
4069 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4074 /* Examine load-store (true/anti) dependences. */
4076 if (vect_debug_details (NULL))
4077 fprintf (dump_file, "compare all load-store pairs.");
4079 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4081 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4083 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4084 struct data_reference *drb =
4085 VARRAY_GENERIC_PTR (loop_write_refs, j);
4086 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4095 /* Function vect_compute_data_ref_alignment
4097 Compute the misalignment of the data reference DR.
4100 1. If during the misalignment computation it is found that the data reference
4101 cannot be vectorized then false is returned.
4102 2. DR_MISALIGNMENT (DR) is defined.
4104 FOR NOW: No analysis is actually performed. Misalignment is calculated
4105 only for trivial cases. TODO. */
4108 vect_compute_data_ref_alignment (struct data_reference *dr)
4110 tree stmt = DR_STMT (dr);
4111 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4112 tree ref = DR_REF (dr);
4114 tree base, alignment;
4115 bool base_aligned_p;
4118 if (vect_debug_details (NULL))
4119 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4121 /* Initialize misalignment to unknown. */
4122 DR_MISALIGNMENT (dr) = -1;
4124 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4125 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4126 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4127 vectype = STMT_VINFO_VECTYPE (stmt_info);
4131 if (vect_debug_details (NULL))
4133 fprintf (dump_file, "Unknown alignment for access: ");
4134 print_generic_expr (dump_file, base, TDF_SLIM);
4139 if (!base_aligned_p)
4141 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4143 if (vect_debug_details (NULL))
4145 fprintf (dump_file, "can't force alignment of ref: ");
4146 print_generic_expr (dump_file, ref, TDF_SLIM);
4151 /* Force the alignment of the decl.
4152 NOTE: This is the only change to the code we make during
4153 the analysis phase, before deciding to vectorize the loop. */
4154 if (vect_debug_details (NULL))
4155 fprintf (dump_file, "force alignment");
4156 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4157 DECL_USER_ALIGN (base) = 1;
4160 /* At this point we assume that the base is aligned. */
4161 gcc_assert (base_aligned_p
4162 || (TREE_CODE (base) == VAR_DECL
4163 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4165 /* Alignment required, in bytes: */
4166 alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4168 /* Modulo alignment. */
4169 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4170 if (tree_int_cst_sgn (misalign) < 0)
4172 /* Negative misalignment value. */
4173 if (vect_debug_details (NULL))
4174 fprintf (dump_file, "unexpected misalign value");
4178 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4180 if (vect_debug_details (NULL))
4181 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4187 /* Function vect_compute_data_refs_alignment
4189 Compute the misalignment of data references in the loop.
4190 This pass may take place at function granularity instead of at loop
4193 FOR NOW: No analysis is actually performed. Misalignment is calculated
4194 only for trivial cases. TODO. */
4197 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4199 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4200 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4203 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4205 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4206 if (!vect_compute_data_ref_alignment (dr))
4210 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4212 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4213 if (!vect_compute_data_ref_alignment (dr))
4221 /* Function vect_enhance_data_refs_alignment
4223 This pass will use loop versioning and loop peeling in order to enhance
4224 the alignment of data references in the loop.
4226 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4227 original loop is to be vectorized; Any other loops that are created by
4228 the transformations performed in this pass - are not supposed to be
4229 vectorized. This restriction will be relaxed. */
4232 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4234 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4235 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4240 This pass will require a cost model to guide it whether to apply peeling
4241 or versioning or a combination of the two. For example, the scheme that
4242 intel uses when given a loop with several memory accesses, is as follows:
4243 choose one memory access ('p') which alignment you want to force by doing
4244 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4245 other accesses are not necessarily aligned, or (2) use loop versioning to
4246 generate one loop in which all accesses are aligned, and another loop in
4247 which only 'p' is necessarily aligned.
4249 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4250 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4251 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4253 Devising a cost model is the most critical aspect of this work. It will
4254 guide us on which access to peel for, whether to use loop versioning, how
4255 many versions to create, etc. The cost model will probably consist of
4256 generic considerations as well as target specific considerations (on
4257 powerpc for example, misaligned stores are more painful than misaligned
4260 Here is the general steps involved in alignment enhancements:
4262 -- original loop, before alignment analysis:
4263 for (i=0; i<N; i++){
4264 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4265 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4268 -- After vect_compute_data_refs_alignment:
4269 for (i=0; i<N; i++){
4270 x = q[i]; # DR_MISALIGNMENT(q) = 3
4271 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4274 -- Possibility 1: we do loop versioning:
4276 for (i=0; i<N; i++){ # loop 1A
4277 x = q[i]; # DR_MISALIGNMENT(q) = 3
4278 p[i] = y; # DR_MISALIGNMENT(p) = 0
4282 for (i=0; i<N; i++){ # loop 1B
4283 x = q[i]; # DR_MISALIGNMENT(q) = 3
4284 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4288 -- Possibility 2: we do loop peeling:
4289 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4293 for (i = 3; i < N; i++){ # loop 2A
4294 x = q[i]; # DR_MISALIGNMENT(q) = 0
4295 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4298 -- Possibility 3: combination of loop peeling and versioning:
4299 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4304 for (i = 3; i<N; i++){ # loop 3A
4305 x = q[i]; # DR_MISALIGNMENT(q) = 0
4306 p[i] = y; # DR_MISALIGNMENT(p) = 0
4310 for (i = 3; i<N; i++){ # loop 3B
4311 x = q[i]; # DR_MISALIGNMENT(q) = 0
4312 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4316 These loops are later passed to loop_transform to be vectorized. The
4317 vectorizer will use the alignment information to guide the transformation
4318 (whether to generate regular loads/stores, or with special handling for
4322 /* (1) Peeling to force alignment. */
4324 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4326 + How many accesses will become aligned due to the peeling
4327 - How many accesses will become unaligned due to the peeling,
4328 and the cost of misaligned accesses.
4329 - The cost of peeling (the extra runtime checks, the increase
4332 The scheme we use FORNOW: peel to force the alignment of the first
4333 misaligned store in the loop.
4334 Rationale: misaligned stores are not yet supported.
4336 TODO: Use a better cost model. */
4338 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4340 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4341 if (!aligned_access_p (dr))
4343 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4344 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4349 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4351 if (vect_debug_details (loop))
4352 fprintf (dump_file, "Peeling for alignment will not be applied.");
4356 if (vect_debug_details (loop))
4357 fprintf (dump_file, "Peeling for alignment will be applied.");
4360 /* (1.2) Update the alignment info according to the peeling factor.
4361 If the misalignment of the DR we peel for is M, then the
4362 peeling factor is VF - M, and the misalignment of each access DR_i
4363 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4364 If the misalignment of the DR we peel for is unknown, then the
4365 misalignment of each access DR_i in the loop is also unknown.
4367 FORNOW: set the misalignment of the accesses to unknown even
4368 if the peeling factor is known at compile time.
4370 TODO: - if the peeling factor is known at compile time, use that
4371 when updating the misalignment info of the loop DRs.
4372 - consider accesses that are known to have the same
4373 alignment, even if that alignment is unknown. */
4375 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4377 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4378 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4379 DR_MISALIGNMENT (dr) = 0;
4381 DR_MISALIGNMENT (dr) = -1;
4383 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4385 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4386 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4387 DR_MISALIGNMENT (dr) = 0;
4389 DR_MISALIGNMENT (dr) = -1;
4394 /* Function vect_analyze_data_refs_alignment
4396 Analyze the alignment of the data-references in the loop.
4397 FOR NOW: Until support for misliagned accesses is in place, only if all
4398 accesses are aligned can the loop be vectorized. This restriction will be
4402 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4404 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4405 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4406 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4407 enum dr_alignment_support supportable_dr_alignment;
4410 if (vect_debug_details (NULL))
4411 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4414 /* This pass may take place at function granularity instead of at loop
4417 if (!vect_compute_data_refs_alignment (loop_vinfo))
4419 if (vect_debug_details (loop) || vect_debug_stats (loop))
4421 "not vectorized: can't calculate alignment for data ref.");
4426 /* This pass will decide on using loop versioning and/or loop peeling in
4427 order to enhance the alignment of data references in the loop. */
4429 vect_enhance_data_refs_alignment (loop_vinfo);
4432 /* Finally, check that all the data references in the loop can be
4433 handled with respect to their alignment. */
4435 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4437 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4438 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4439 if (!supportable_dr_alignment)
4441 if (vect_debug_details (loop) || vect_debug_stats (loop))
4442 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4446 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4448 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4449 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4450 if (!supportable_dr_alignment)
4452 if (vect_debug_details (loop) || vect_debug_stats (loop))
4453 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4462 /* Function vect_analyze_data_ref_access.
4464 Analyze the access pattern of the data-reference DR. For now, a data access
4465 has to consecutive to be considered vectorizable. */
4468 vect_analyze_data_ref_access (struct data_reference *dr)
4470 tree stmt = DR_STMT (dr);
4471 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4472 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4473 tree scalar_type = TREE_TYPE (DR_REF (dr));
4475 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4477 if (vect_debug_details (NULL))
4478 fprintf (dump_file, "not consecutive access");
4485 /* Function vect_analyze_data_ref_accesses.
4487 Analyze the access pattern of all the data references in the loop.
4489 FORNOW: the only access pattern that is considered vectorizable is a
4490 simple step 1 (consecutive) access.
4492 FORNOW: handle only arrays and pointer accesses. */
4495 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4498 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4499 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4501 if (vect_debug_details (NULL))
4502 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4504 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4506 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4507 bool ok = vect_analyze_data_ref_access (dr);
4510 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4511 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4512 fprintf (dump_file, "not vectorized: complicated access pattern.");
4517 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4519 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4520 bool ok = vect_analyze_data_ref_access (dr);
4523 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4524 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4525 fprintf (dump_file, "not vectorized: complicated access pattern.");
4534 /* Function vect_analyze_pointer_ref_access.
4537 STMT - a stmt that contains a data-ref
4538 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4540 If the data-ref access is vectorizable, return a data_reference structure
4541 that represents it (DR). Otherwise - return NULL. */
4543 static struct data_reference *
4544 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4546 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4547 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4548 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4550 tree reftype, innertype;
4551 tree indx_access_fn;
4552 int loopnum = loop->num;
4553 struct data_reference *dr;
4557 if (vect_debug_stats (loop) || vect_debug_details (loop))
4558 fprintf (dump_file, "not vectorized: complicated pointer access.");
4562 if (vect_debug_details (NULL))
4564 fprintf (dump_file, "Access function of ptr: ");
4565 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4568 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4570 if (vect_debug_stats (loop) || vect_debug_details (loop))
4571 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4577 if (!TREE_CONSTANT (step))
4579 if (vect_debug_stats (loop) || vect_debug_details (loop))
4581 "not vectorized: non constant step for pointer access.");
4585 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4586 if (TREE_CODE (reftype) != POINTER_TYPE)
4588 if (vect_debug_stats (loop) || vect_debug_details (loop))
4589 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4593 reftype = TREE_TYPE (init);
4594 if (TREE_CODE (reftype) != POINTER_TYPE)
4596 if (vect_debug_stats (loop) || vect_debug_details (loop))
4597 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4601 innertype = TREE_TYPE (reftype);
4602 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4604 /* FORNOW: support only consecutive access */
4605 if (vect_debug_stats (loop) || vect_debug_details (loop))
4606 fprintf (dump_file, "not vectorized: non consecutive access.");
4610 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4611 if (TREE_CODE (init) == PLUS_EXPR
4612 || TREE_CODE (init) == MINUS_EXPR)
4613 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4614 fold (size_binop (TREE_CODE (init), size_zero_node,
4615 fold_convert (sizetype, TREE_OPERAND (init, 1))));
4617 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4620 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4621 if (vect_debug_details (NULL))
4623 fprintf (dump_file, "Access function of ptr indx: ");
4624 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4626 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4631 /* Function vect_get_memtag_and_dr.
4633 The function returns the relevant variable for memory tag (for aliasing
4634 purposes). Also data reference structure DR is created.
4636 This function handles three kinds of MEMREF:
4638 It is called from vect_analyze_data_refs with a MEMREF that is either an
4639 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4640 It builds a DR for them using vect_get_base_and_offset, and calls itself
4641 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4642 MEMREF along the way. During the recursive calls, the function may be called
4643 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4644 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4645 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4646 and SSA_NAME (this is category 3 - "recursion stop condition").
4648 When the MEMREF falls into category 1 there is still no data reference struct
4649 (DR) available. It is created by this function, and then, along the recursion,
4650 MEMREF will fall into category 2 or 3, in which case a DR will have already
4651 been created, but the analysis continues to retrieve the MEMTAG.
4654 MEMREF - data reference in STMT
4655 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4658 DR - data_reference struct for MEMREF
4659 return value - the relevant variable for memory tag (for aliasing purposes).
4664 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4665 loop_vec_info loop_vinfo,
4666 tree vectype, struct data_reference **dr)
4668 tree symbl, oprnd0, oprnd1;
4669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4670 tree offset, misalign, step;
4671 tree ref_to_be_analyzed, tag, dr_base;
4672 struct data_reference *new_dr;
4673 bool base_aligned_p;
4677 /* Category 3: recursion stop condition. */
4678 /* (1) A DR already exists. We only need to get the relevant memtag for
4679 MEMREF, the rest of the data was already initialized. */
4681 switch (TREE_CODE (memref))
4683 /* (1.1) Stop condition: find the relevant memtag and return. */
4685 symbl = SSA_NAME_VAR (memref);
4686 tag = get_var_ann (symbl)->type_mem_tag;
4689 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4690 if (TREE_CODE (ptr) == SSA_NAME)
4691 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4695 if (vect_debug_details (NULL))
4696 fprintf (dump_file, "not vectorized: no memtag for ref.");
4705 /* Category 2: recursion continues. */
4706 /* (1.2) A recursive call to find the relevant memtag is required. */
4708 symbl = TREE_OPERAND (memref, 0);
4709 break; /* For recursive call. */
4712 /* Could have recorded more accurate information -
4713 i.e, the actual FIELD_DECL that is being referenced -
4714 but later passes expect VAR_DECL as the nmt. */
4718 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4719 break; /* For recursive call. */
4723 /* Although DR exists, we have to call the function recursively to
4724 build MEMTAG for such expression. This is handled below. */
4725 oprnd0 = TREE_OPERAND (memref, 0);
4726 oprnd1 = TREE_OPERAND (memref, 1);
4728 STRIP_NOPS (oprnd1);
4729 /* Supported plus/minus expressions are of the form
4730 {address_base + offset}, such that address_base is of type
4731 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4732 or it's not of type POINTER/ARRAY.
4733 TODO: swap operands if {offset + address_base}. */
4734 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4735 && TREE_CODE (oprnd1) != INTEGER_CST)
4736 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4740 break; /* For recursive call. */
4748 /* Category 1: recursion begins. */
4749 /* (2) A DR does not exist yet and must be built, followed by a
4750 recursive call to get the relevant memtag for MEMREF. */
4752 switch (TREE_CODE (memref))
4755 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4759 symbl = DR_BASE_NAME (new_dr);
4760 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4764 new_dr = analyze_array (stmt, memref, is_read);
4766 symbl = DR_BASE_NAME (new_dr);
4767 ref_to_be_analyzed = memref;
4771 /* TODO: Support data-refs of form a[i].p for unions and single
4772 field structures. */
4776 offset = size_zero_node;
4777 misalign = size_zero_node;
4778 step = size_zero_node;
4780 /* Analyze data-ref, find its base, initial offset from the base, step,
4782 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4783 vectype, loop_vinfo, &offset,
4784 &misalign, &step, &base_aligned_p);
4788 /* Initialize information according to above analysis. */
4789 /* Since offset and step of a pointer can be also set in
4790 vect_analyze_pointer_ref_access, we combine the values here. */
4791 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4792 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4793 fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4794 STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4796 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4798 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4799 STMT_VINFO_VECT_STEP (stmt_info) =
4800 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4802 STMT_VINFO_VECT_STEP (stmt_info) = step;
4804 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4805 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4806 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4811 /* Recursive call to retrieve the relevant memtag. */
4812 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4818 /* Function vect_analyze_data_refs.
4820 Find all the data references in the loop.
4822 The general structure of the analysis of data refs in the vectorizer is as
4824 1- vect_analyze_data_refs(loop):
4825 Find and analyze all data-refs in the loop:
4827 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4828 1.1- vect_get_memtag_and_dr(ref):
4829 Analyze ref, and build a DR (data_referece struct) for it;
4830 call vect_get_base_and_offset to compute base, initial_offset,
4831 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4832 ref_stmt.alignment, and ref_stmt.step accordingly.
4833 1.1.1- vect_get_base_and_offset():
4834 Calculate base, initial_offset, step and alignment.
4835 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4836 2- vect_analyze_dependences(): apply dependece testing using ref_stmt.DR
4837 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4838 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4840 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4841 which base is really an array (not a pointer) and which alignment
4842 can be forced. This restriction will be relaxed. */
4845 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4847 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4848 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4849 int nbbs = loop->num_nodes;
4850 block_stmt_iterator si;
4852 struct data_reference *dr;
4854 if (vect_debug_details (NULL))
4855 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4857 for (j = 0; j < nbbs; j++)
4859 basic_block bb = bbs[j];
4860 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4862 bool is_read = false;
4863 tree stmt = bsi_stmt (si);
4864 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4865 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4866 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4867 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4868 varray_type *datarefs = NULL;
4869 int nvuses, nv_may_defs, nv_must_defs;
4872 tree scalar_type, vectype;
4874 /* Assumption: there exists a data-ref in stmt, if and only if
4875 it has vuses/vdefs. */
4877 if (!vuses && !v_may_defs && !v_must_defs)
4880 nvuses = NUM_VUSES (vuses);
4881 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4882 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4884 if (nvuses && (nv_may_defs || nv_must_defs))
4886 if (vect_debug_details (NULL))
4888 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4889 print_generic_expr (dump_file, stmt, TDF_SLIM);
4894 if (TREE_CODE (stmt) != MODIFY_EXPR)
4896 if (vect_debug_details (NULL))
4898 fprintf (dump_file, "unexpected vops in stmt: ");
4899 print_generic_expr (dump_file, stmt, TDF_SLIM);
4906 memref = TREE_OPERAND (stmt, 1);
4907 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4912 memref = TREE_OPERAND (stmt, 0);
4913 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4917 scalar_type = TREE_TYPE (memref);
4918 vectype = get_vectype_for_scalar_type (scalar_type);
4921 if (vect_debug_details (NULL))
4923 fprintf (dump_file, "no vectype for stmt: ");
4924 print_generic_expr (dump_file, stmt, TDF_SLIM);
4925 fprintf (dump_file, " scalar_type: ");
4926 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4928 /* It is not possible to vectorize this data reference. */
4931 /* Analyze MEMREF. If it is of a supported form, build data_reference
4932 struct for it (DR) and find memtag for aliasing purposes. */
4934 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4938 if (vect_debug_stats (loop) || vect_debug_details (loop))
4940 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4941 print_generic_expr (dump_file, stmt, TDF_SLIM);
4945 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4946 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4947 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4948 STMT_VINFO_DATA_REF (stmt_info) = dr;
4956 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
4958 /* Function vect_mark_relevant.
4960 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
4963 vect_mark_relevant (varray_type *worklist, tree stmt)
4965 stmt_vec_info stmt_info;
4967 if (vect_debug_details (NULL))
4968 fprintf (dump_file, "mark relevant.");
4970 if (TREE_CODE (stmt) == PHI_NODE)
4972 VARRAY_PUSH_TREE (*worklist, stmt);
4976 stmt_info = vinfo_for_stmt (stmt);
4980 if (vect_debug_details (NULL))
4982 fprintf (dump_file, "mark relevant: no stmt info!!.");
4983 print_generic_expr (dump_file, stmt, TDF_SLIM);
4988 if (STMT_VINFO_RELEVANT_P (stmt_info))
4990 if (vect_debug_details (NULL))
4991 fprintf (dump_file, "already marked relevant.");
4995 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
4996 VARRAY_PUSH_TREE (*worklist, stmt);
5000 /* Function vect_stmt_relevant_p.
5002 Return true if STMT in loop that is represented by LOOP_VINFO is
5003 "relevant for vectorization".
5005 A stmt is considered "relevant for vectorization" if:
5006 - it has uses outside the loop.
5007 - it has vdefs (it alters memory).
5008 - control stmts in the loop (except for the exit condition).
5010 CHECKME: what other side effects would the vectorizer allow? */
5013 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5015 v_may_def_optype v_may_defs;
5016 v_must_def_optype v_must_defs;
5017 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5022 /* cond stmt other than loop exit cond. */
5023 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5026 /* changing memory. */
5027 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5028 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5029 if (v_may_defs || v_must_defs)
5031 if (vect_debug_details (NULL))
5032 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5036 /* uses outside the loop. */
5037 df = get_immediate_uses (stmt);
5038 num_uses = num_immediate_uses (df);
5039 for (i = 0; i < num_uses; i++)
5041 tree use = immediate_use (df, i);
5042 basic_block bb = bb_for_stmt (use);
5043 if (!flow_bb_inside_loop_p (loop, bb))
5045 if (vect_debug_details (NULL))
5046 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5055 /* Function vect_mark_stmts_to_be_vectorized.
5057 Not all stmts in the loop need to be vectorized. For example:
5066 Stmt 1 and 3 do not need to be vectorized, because loop control and
5067 addressing of vectorized data-refs are handled differently.
5069 This pass detects such stmts. */
5072 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5074 varray_type worklist;
5075 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5076 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5077 unsigned int nbbs = loop->num_nodes;
5078 block_stmt_iterator si;
5084 stmt_vec_info stmt_info;
5086 if (vect_debug_details (NULL))
5087 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5089 VARRAY_TREE_INIT (worklist, 64, "work list");
5091 /* 1. Init worklist. */
5093 for (i = 0; i < nbbs; i++)
5095 basic_block bb = bbs[i];
5096 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5098 stmt = bsi_stmt (si);
5100 if (vect_debug_details (NULL))
5102 fprintf (dump_file, "init: stmt relevant? ");
5103 print_generic_expr (dump_file, stmt, TDF_SLIM);
5106 stmt_info = vinfo_for_stmt (stmt);
5107 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5109 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5110 vect_mark_relevant (&worklist, stmt);
5115 /* 2. Process_worklist */
5117 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5119 stmt = VARRAY_TOP_TREE (worklist);
5120 VARRAY_POP (worklist);
5122 if (vect_debug_details (NULL))
5124 fprintf (dump_file, "worklist: examine stmt: ");
5125 print_generic_expr (dump_file, stmt, TDF_SLIM);
5128 /* Examine the USES in this statement. Mark all the statements which
5129 feed this statement's uses as "relevant", unless the USE is used as
5132 if (TREE_CODE (stmt) == PHI_NODE)
5134 /* follow the def-use chain inside the loop. */
5135 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5137 tree arg = PHI_ARG_DEF (stmt, j);
5138 tree def_stmt = NULL_TREE;
5140 if (!vect_is_simple_use (arg, loop, &def_stmt))
5142 if (vect_debug_details (NULL))
5143 fprintf (dump_file, "worklist: unsupported use.");
5144 varray_clear (worklist);
5150 if (vect_debug_details (NULL))
5152 fprintf (dump_file, "worklist: def_stmt: ");
5153 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5156 bb = bb_for_stmt (def_stmt);
5157 if (flow_bb_inside_loop_p (loop, bb))
5158 vect_mark_relevant (&worklist, def_stmt);
5162 ann = stmt_ann (stmt);
5163 use_ops = USE_OPS (ann);
5165 for (i = 0; i < NUM_USES (use_ops); i++)
5167 tree use = USE_OP (use_ops, i);
5169 /* We are only interested in uses that need to be vectorized. Uses
5170 that are used for address computation are not considered relevant.
5172 if (exist_non_indexing_operands_for_use_p (use, stmt))
5174 tree def_stmt = NULL_TREE;
5176 if (!vect_is_simple_use (use, loop, &def_stmt))
5178 if (vect_debug_details (NULL))
5179 fprintf (dump_file, "worklist: unsupported use.");
5180 varray_clear (worklist);
5187 if (vect_debug_details (NULL))
5189 fprintf (dump_file, "worklist: examine use %d: ", i);
5190 print_generic_expr (dump_file, use, TDF_SLIM);
5193 bb = bb_for_stmt (def_stmt);
5194 if (flow_bb_inside_loop_p (loop, bb))
5195 vect_mark_relevant (&worklist, def_stmt);
5198 } /* while worklist */
5200 varray_clear (worklist);
5205 /* Function vect_can_advance_ivs_p
5207 In case the number of iterations that LOOP iterates in unknown at compile
5208 time, an epilog loop will be generated, and the loop induction variables
5209 (IVs) will be "advanced" to the value they are supposed to take just before
5210 the epilog loop. Here we check that the access function of the loop IVs
5211 and the expression that represents the loop bound are simple enough.
5212 These restrictions will be relaxed in the future. */
5215 vect_can_advance_ivs_p (struct loop *loop)
5217 basic_block bb = loop->header;
5220 /* Analyze phi functions of the loop header. */
5222 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5224 tree access_fn = NULL;
5225 tree evolution_part;
5227 if (vect_debug_details (NULL))
5229 fprintf (dump_file, "Analyze phi: ");
5230 print_generic_expr (dump_file, phi, TDF_SLIM);
5233 /* Skip virtual phi's. The data dependences that are associated with
5234 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5236 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5238 if (vect_debug_details (NULL))
5239 fprintf (dump_file, "virtual phi. skip.");
5243 /* Analyze the evolution function. */
5245 access_fn = instantiate_parameters
5246 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5250 if (vect_debug_details (NULL))
5251 fprintf (dump_file, "No Access function.");
5255 if (vect_debug_details (NULL))
5257 fprintf (dump_file, "Access function of PHI: ");
5258 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5261 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5263 if (evolution_part == NULL_TREE)
5266 /* FORNOW: We do not transform initial conditions of IVs
5267 which evolution functions are a polynomial of degree >= 2. */
5269 if (tree_is_chrec (evolution_part))
5277 /* Function vect_get_loop_niters.
5279 Determine how many iterations the loop is executed.
5280 If an expression that represents the number of iterations
5281 can be constructed, place it in NUMBER_OF_ITERATIONS.
5282 Return the loop exit condition. */
5285 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5289 if (vect_debug_details (NULL))
5290 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5292 niters = number_of_iterations_in_loop (loop);
5294 if (niters != NULL_TREE
5295 && niters != chrec_dont_know)
5297 *number_of_iterations = niters;
5299 if (vect_debug_details (NULL))
5301 fprintf (dump_file, "==> get_loop_niters:" );
5302 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5306 return get_loop_exit_condition (loop);
5310 /* Function vect_analyze_loop_form.
5312 Verify the following restrictions (some may be relaxed in the future):
5313 - it's an inner-most loop
5314 - number of BBs = 2 (which are the loop header and the latch)
5315 - the loop has a pre-header
5316 - the loop has a single entry and exit
5317 - the loop exit condition is simple enough, and the number of iterations
5318 can be analyzed (a countable loop). */
5320 static loop_vec_info
5321 vect_analyze_loop_form (struct loop *loop)
5323 loop_vec_info loop_vinfo;
5325 tree number_of_iterations = NULL;
5326 bool rescan = false;
5328 if (vect_debug_details (loop))
5329 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5332 || !loop->single_exit
5333 || loop->num_nodes != 2
5334 || EDGE_COUNT (loop->header->preds) != 2
5335 || loop->num_entries != 1)
5337 if (vect_debug_stats (loop) || vect_debug_details (loop))
5339 fprintf (dump_file, "not vectorized: bad loop form. ");
5341 fprintf (dump_file, "nested loop.");
5342 else if (!loop->single_exit)
5343 fprintf (dump_file, "multiple exits.");
5344 else if (loop->num_nodes != 2)
5345 fprintf (dump_file, "too many BBs in loop.");
5346 else if (EDGE_COUNT (loop->header->preds) != 2)
5347 fprintf (dump_file, "too many incoming edges.");
5348 else if (loop->num_entries != 1)
5349 fprintf (dump_file, "too many entries.");
5355 /* We assume that the loop exit condition is at the end of the loop. i.e,
5356 that the loop is represented as a do-while (with a proper if-guard
5357 before the loop if needed), where the loop header contains all the
5358 executable statements, and the latch is empty. */
5359 if (!empty_block_p (loop->latch))
5361 if (vect_debug_stats (loop) || vect_debug_details (loop))
5362 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5366 /* Make sure we have a preheader basic block. */
5367 if (!loop->pre_header)
5370 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5373 /* Make sure there exists a single-predecessor exit bb: */
5374 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5377 loop_split_edge_with (loop->exit_edges[0], NULL);
5382 flow_loop_scan (loop, LOOP_ALL);
5383 /* Flow loop scan does not update loop->single_exit field. */
5384 loop->single_exit = loop->exit_edges[0];
5387 if (empty_block_p (loop->header))
5389 if (vect_debug_stats (loop) || vect_debug_details (loop))
5390 fprintf (dump_file, "not vectorized: empty loop.");
5394 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5397 if (vect_debug_stats (loop) || vect_debug_details (loop))
5398 fprintf (dump_file, "not vectorized: complicated exit condition.");
5402 if (!number_of_iterations)
5404 if (vect_debug_stats (loop) || vect_debug_details (loop))
5406 "not vectorized: number of iterations cannot be computed.");
5410 if (chrec_contains_undetermined (number_of_iterations))
5412 if (vect_debug_details (NULL))
5413 fprintf (dump_file, "Infinite number of iterations.");
5417 loop_vinfo = new_loop_vec_info (loop);
5418 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5420 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5422 if (vect_debug_details (loop))
5424 fprintf (dump_file, "loop bound unknown.\n");
5425 fprintf (dump_file, "Symbolic number of iterations is ");
5426 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5430 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5432 if (vect_debug_stats (loop) || vect_debug_details (loop))
5433 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5437 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5443 /* Function vect_analyze_loop.
5445 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5446 for it. The different analyses will record information in the
5447 loop_vec_info struct. */
5449 static loop_vec_info
5450 vect_analyze_loop (struct loop *loop)
5453 loop_vec_info loop_vinfo;
5455 if (vect_debug_details (NULL))
5456 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5458 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5460 loop_vinfo = vect_analyze_loop_form (loop);
5463 if (vect_debug_details (loop))
5464 fprintf (dump_file, "bad loop form.");
5468 /* Find all data references in the loop (which correspond to vdefs/vuses)
5469 and analyze their evolution in the loop.
5471 FORNOW: Handle only simple, array references, which
5472 alignment can be forced, and aligned pointer-references. */
5474 ok = vect_analyze_data_refs (loop_vinfo);
5477 if (vect_debug_details (loop))
5478 fprintf (dump_file, "bad data references.");
5479 destroy_loop_vec_info (loop_vinfo);
5483 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5485 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5488 if (vect_debug_details (loop))
5489 fprintf (dump_file, "unexpected pattern.");
5490 if (vect_debug_details (loop))
5491 fprintf (dump_file, "not vectorized: unexpected pattern.");
5492 destroy_loop_vec_info (loop_vinfo);
5496 /* Check that all cross-iteration scalar data-flow cycles are OK.
5497 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5499 ok = vect_analyze_scalar_cycles (loop_vinfo);
5502 if (vect_debug_details (loop))
5503 fprintf (dump_file, "bad scalar cycle.");
5504 destroy_loop_vec_info (loop_vinfo);
5508 /* Analyze data dependences between the data-refs in the loop.
5509 FORNOW: fail at the first data dependence that we encounter. */
5511 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5514 if (vect_debug_details (loop))
5515 fprintf (dump_file, "bad data dependence.");
5516 destroy_loop_vec_info (loop_vinfo);
5520 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5521 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5523 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5526 if (vect_debug_details (loop))
5527 fprintf (dump_file, "bad data access.");
5528 destroy_loop_vec_info (loop_vinfo);
5532 /* Analyze the alignment of the data-refs in the loop.
5533 FORNOW: Only aligned accesses are handled. */
5535 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5538 if (vect_debug_details (loop))
5539 fprintf (dump_file, "bad data alignment.");
5540 destroy_loop_vec_info (loop_vinfo);
5544 /* Scan all the operations in the loop and make sure they are
5547 ok = vect_analyze_operations (loop_vinfo);
5550 if (vect_debug_details (loop))
5551 fprintf (dump_file, "bad operation or unsupported loop bound.");
5552 destroy_loop_vec_info (loop_vinfo);
5556 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5562 /* Function need_imm_uses_for.
5564 Return whether we ought to include information for 'var'
5565 when calculating immediate uses. For this pass we only want use
5566 information for non-virtual variables. */
5569 need_imm_uses_for (tree var)
5571 return is_gimple_reg (var);
5575 /* Function vectorize_loops.
5577 Entry Point to loop vectorization phase. */
5580 vectorize_loops (struct loops *loops)
5582 unsigned int i, loops_num;
5583 unsigned int num_vectorized_loops = 0;
5585 /* Does the target support SIMD? */
5586 /* FORNOW: until more sophisticated machine modelling is in place. */
5587 if (!UNITS_PER_SIMD_WORD)
5589 if (vect_debug_details (NULL))
5590 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5594 #ifdef ENABLE_CHECKING
5595 verify_loop_closed_ssa ();
5598 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5600 /* ----------- Analyze loops. ----------- */
5602 /* If some loop was duplicated, it gets bigger number
5603 than all previously defined loops. This fact allows us to run
5604 only over initial loops skipping newly generated ones. */
5605 loops_num = loops->num;
5606 for (i = 1; i < loops_num; i++)
5608 loop_vec_info loop_vinfo;
5609 struct loop *loop = loops->parray[i];
5614 loop_vinfo = vect_analyze_loop (loop);
5615 loop->aux = loop_vinfo;
5617 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5620 vect_transform_loop (loop_vinfo, loops);
5621 num_vectorized_loops++;
5624 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5625 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5626 num_vectorized_loops);
5628 /* ----------- Finalize. ----------- */
5631 for (i = 1; i < loops_num; i++)
5633 struct loop *loop = loops->parray[i];
5634 loop_vec_info loop_vinfo;
5638 loop_vinfo = loop->aux;
5639 destroy_loop_vec_info (loop_vinfo);
5643 rewrite_into_ssa (false);
5644 rewrite_into_loop_closed_ssa (); /* FORNOW */
5645 bitmap_clear (vars_to_rename);