2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type *, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment (struct data_reference *);
218 static bool vect_analyze_data_ref_access (struct data_reference *);
219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
220 static struct data_reference * vect_analyze_pointer_ref_access
222 static bool vect_can_advance_ivs_p (struct loop *);
223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
224 loop_vec_info, tree *, tree *, tree *,
226 static struct data_reference * vect_analyze_pointer_ref_access
228 static tree vect_get_ptr_offset (tree, tree, tree *);
229 static tree vect_get_memtag_and_dr
230 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
234 /* Utility functions for the code transformation. */
235 static tree vect_create_destination_var (tree, tree);
236 static tree vect_create_data_ref_ptr
237 (tree, block_stmt_iterator *, tree, tree *, bool);
238 static tree vect_create_index_for_vector_ref
239 (struct loop *, block_stmt_iterator *);
240 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
241 static tree get_vectype_for_scalar_type (tree);
242 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
243 static tree vect_get_vec_def_for_operand (tree, tree);
244 static tree vect_init_vector (tree, tree);
245 static void vect_finish_stmt_generation
246 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
248 /* Utility function dealing with loop peeling (not peeling itself). */
249 static void vect_generate_tmps_on_preheader
250 (loop_vec_info, tree *, tree *, tree *);
251 static tree vect_build_loop_niters (loop_vec_info);
252 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
253 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
254 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
255 static void vect_update_inits_of_drs (loop_vec_info, tree);
256 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
257 static void vect_do_peeling_for_loop_bound
258 (loop_vec_info, tree *, struct loops *);
260 /* Utilities for creation and deletion of vec_info structs. */
261 loop_vec_info new_loop_vec_info (struct loop *loop);
262 void destroy_loop_vec_info (loop_vec_info);
263 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
265 static bool vect_debug_stats (struct loop *loop);
266 static bool vect_debug_details (struct loop *loop);
269 /*************************************************************************
270 Simple Loop Peeling Utilities
272 Utilities to support loop peeling for vectorization purposes.
273 *************************************************************************/
276 /* For each definition in DEFINITIONS this function allocates
280 allocate_new_names (bitmap definitions)
285 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
287 tree def = ssa_name (ver);
288 tree *new_name_ptr = xmalloc (sizeof (tree));
290 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
292 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
293 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
295 SSA_NAME_AUX (def) = new_name_ptr;
300 /* Renames the use *OP_P. */
303 rename_use_op (use_operand_p op_p)
307 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
310 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
312 /* Something defined outside of the loop. */
316 /* An ordinary ssa name defined in the loop. */
318 SET_USE (op_p, *new_name_ptr);
322 /* Renames the def *OP_P in statement STMT. */
325 rename_def_op (def_operand_p op_p, tree stmt)
329 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
332 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
334 /* Something defined outside of the loop. */
338 /* An ordinary ssa name defined in the loop. */
340 SET_DEF (op_p, *new_name_ptr);
341 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
345 /* Renames the variables in basic block BB. */
348 rename_variables_in_bb (basic_block bb)
351 block_stmt_iterator bsi;
357 v_may_def_optype v_may_defs;
358 v_must_def_optype v_must_defs;
362 struct loop *loop = bb->loop_father;
364 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
365 rename_def_op (PHI_RESULT_PTR (phi), phi);
367 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
369 stmt = bsi_stmt (bsi);
370 get_stmt_operands (stmt);
371 ann = stmt_ann (stmt);
373 uses = USE_OPS (ann);
374 for (i = 0; i < NUM_USES (uses); i++)
375 rename_use_op (USE_OP_PTR (uses, i));
377 defs = DEF_OPS (ann);
378 for (i = 0; i < NUM_DEFS (defs); i++)
379 rename_def_op (DEF_OP_PTR (defs, i), stmt);
381 vuses = VUSE_OPS (ann);
382 for (i = 0; i < NUM_VUSES (vuses); i++)
383 rename_use_op (VUSE_OP_PTR (vuses, i));
385 v_may_defs = V_MAY_DEF_OPS (ann);
386 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
388 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
389 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
392 v_must_defs = V_MUST_DEF_OPS (ann);
393 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
395 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
396 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
400 FOR_EACH_EDGE (e, ei, bb->succs)
402 if (!flow_bb_inside_loop_p (loop, e->dest))
404 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
405 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
410 /* Releases the structures holding the new ssa names. */
413 free_new_names (bitmap definitions)
418 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
420 tree def = ssa_name (ver);
422 if (SSA_NAME_AUX (def))
424 free (SSA_NAME_AUX (def));
425 SSA_NAME_AUX (def) = NULL;
431 /* Renames variables in new generated LOOP. */
434 rename_variables_in_loop (struct loop *loop)
439 bbs = get_loop_body (loop);
441 for (i = 0; i < loop->num_nodes; i++)
442 rename_variables_in_bb (bbs[i]);
448 /* Update the PHI nodes of NEW_LOOP.
450 NEW_LOOP is a duplicate of ORIG_LOOP.
451 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
452 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
453 executes before it. */
456 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
457 struct loop *new_loop, bool after)
459 tree *new_name_ptr, new_ssa_name;
460 tree phi_new, phi_orig;
462 edge orig_loop_latch = loop_latch_edge (orig_loop);
463 edge orig_entry_e = loop_preheader_edge (orig_loop);
464 edge new_loop_exit_e = new_loop->exit_edges[0];
465 edge new_loop_entry_e = loop_preheader_edge (new_loop);
466 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
469 step 1. For each loop-header-phi:
470 Add the first phi argument for the phi in NEW_LOOP
471 (the one associated with the entry of NEW_LOOP)
473 step 2. For each loop-header-phi:
474 Add the second phi argument for the phi in NEW_LOOP
475 (the one associated with the latch of NEW_LOOP)
477 step 3. Update the phis in the successor block of NEW_LOOP.
479 case 1: NEW_LOOP was placed before ORIG_LOOP:
480 The successor block of NEW_LOOP is the header of ORIG_LOOP.
481 Updating the phis in the successor block can therefore be done
482 along with the scanning of the loop header phis, because the
483 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
484 phi nodes, organized in the same order.
486 case 2: NEW_LOOP was placed after ORIG_LOOP:
487 The successor block of NEW_LOOP is the original exit block of
488 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
489 We postpone updating these phis to a later stage (when
490 loop guards are added).
494 /* Scan the phis in the headers of the old and new loops
495 (they are organized in exactly the same order). */
497 for (phi_new = phi_nodes (new_loop->header),
498 phi_orig = phi_nodes (orig_loop->header);
500 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
503 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
504 add_phi_arg (phi_new, def, new_loop_entry_e);
507 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
508 if (TREE_CODE (def) != SSA_NAME)
511 new_name_ptr = SSA_NAME_AUX (def);
513 /* Something defined outside of the loop. */
516 /* An ordinary ssa name defined in the loop. */
517 new_ssa_name = *new_name_ptr;
518 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
520 /* step 3 (case 1). */
523 gcc_assert (new_loop_exit_e == orig_entry_e);
524 SET_PHI_ARG_DEF (phi_orig,
525 phi_arg_from_edge (phi_orig, new_loop_exit_e),
532 /* Update PHI nodes for a guard of the LOOP.
535 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
536 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
537 originates from the guard-bb, skips LOOP and reaches the (unique) exit
538 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
539 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
540 LOOP header) before the guard code was added, and now it became a merge
541 point of two paths - the path that ends with the LOOP exit-edge, and
542 the path that ends with GUARD_EDGE.
544 This function creates and updates the relevant phi nodes to account for
545 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
546 1. Create phi nodes at NEW_MERGE_BB.
547 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
548 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
551 ===> The CFG before the guard-code was added:
553 if (exit_loop) goto update_bb : LOOP_header_bb
556 ==> The CFG after the guard-code was added:
558 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
560 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
565 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
566 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
567 organized in the same order.
568 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
571 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
572 "original" loop). FALSE if LOOP is an original loop (not a newly
573 created copy). The SSA_NAME_AUX fields of the defs in the original
574 loop are the corresponding new ssa-names used in the new duplicated
575 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
576 nodes in UPDATE_BB takes the original ssa-name, and which takes the
577 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
578 the LOOP-exit-edge takes the new-name, and the phi-arg that is
579 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
580 FALSE, it's the other way around.
584 slpeel_update_phi_nodes_for_guard (edge guard_edge,
589 tree orig_phi, new_phi, update_phi;
590 tree guard_arg, loop_arg;
591 basic_block new_merge_bb = guard_edge->dest;
592 edge e = EDGE_SUCC (new_merge_bb, 0);
593 basic_block update_bb = e->dest;
594 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
596 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
597 orig_phi && update_phi;
598 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
600 /* 1. Generate new phi node in NEW_MERGE_BB: */
601 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
604 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
605 of LOOP. Set the two phi args in NEW_PHI for these edges: */
608 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
609 EDGE_SUCC (loop->latch, 0));
610 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
614 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
615 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
619 new_name = *new_name_ptr;
621 /* Something defined outside of the loop */
626 guard_arg = orig_def;
631 guard_arg = new_name;
635 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
636 add_phi_arg (new_phi, guard_arg, guard_edge);
638 /* 3. Update phi in successor block. */
639 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
640 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
641 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
642 PHI_RESULT (new_phi));
645 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
649 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
650 that starts at zero, increases by one and its limit is NITERS.
652 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
655 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
657 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
659 edge exit_edge = loop->exit_edges[0];
660 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
661 tree begin_label = tree_block_label (loop->latch);
662 tree exit_label = tree_block_label (loop->single_exit->dest);
663 tree init = build_int_cst (TREE_TYPE (niters), 0);
664 tree step = build_int_cst (TREE_TYPE (niters), 1);
668 orig_cond = get_loop_exit_condition (loop);
669 gcc_assert (orig_cond);
670 create_iv (init, step, NULL_TREE, loop,
671 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
673 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
674 back to the exit condition statement. */
675 bsi_next (&loop_exit_bsi);
676 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
678 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
680 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
681 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
682 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
684 else /* 'then' edge loops back. */
686 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
687 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
688 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
691 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
692 then_label, else_label);
693 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
695 /* Remove old loop exit test: */
696 bsi_remove (&loop_exit_bsi);
698 if (vect_debug_stats (loop) || vect_debug_details (loop))
699 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
701 loop->nb_iterations = niters;
705 /* Given LOOP this function generates a new copy of it and puts it
706 on E which is either the entry or exit of LOOP. */
709 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
712 struct loop *new_loop;
713 basic_block *new_bbs, *bbs;
716 basic_block exit_dest;
719 at_exit = (e == loop->exit_edges[0]);
720 if (!at_exit && e != loop_preheader_edge (loop))
722 if (dump_file && (dump_flags & TDF_DETAILS))
723 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
727 bbs = get_loop_body (loop);
729 /* Check whether duplication is possible. */
730 if (!can_copy_bbs_p (bbs, loop->num_nodes))
732 if (vect_debug_stats (loop) || vect_debug_details (loop))
733 fprintf (dump_file, "Cannot copy basic blocks.\n");
738 /* Generate new loop structure. */
739 new_loop = duplicate_loop (loops, loop, loop->outer);
742 if (vect_debug_stats (loop) || vect_debug_details (loop))
743 fprintf (dump_file, "duplicate_loop returns NULL.\n");
748 exit_dest = loop->exit_edges[0]->dest;
749 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
750 exit_dest) == loop->header ?
753 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
755 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
757 /* Duplicating phi args at exit bbs as coming
758 also from exit of duplicated loop. */
759 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
761 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
764 edge new_loop_exit_edge;
766 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
767 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
769 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
771 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
775 if (at_exit) /* Add the loop copy at exit. */
777 redirect_edge_and_branch_force (e, new_loop->header);
778 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
780 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
782 else /* Add the copy at entry. */
785 edge entry_e = loop_preheader_edge (loop);
786 basic_block preheader = entry_e->src;
788 if (!flow_bb_inside_loop_p (new_loop,
789 EDGE_SUCC (new_loop->header, 0)->dest))
790 new_exit_e = EDGE_SUCC (new_loop->header, 0);
792 new_exit_e = EDGE_SUCC (new_loop->header, 1);
794 redirect_edge_and_branch_force (new_exit_e, loop->header);
795 set_immediate_dominator (CDI_DOMINATORS, loop->header,
798 /* We have to add phi args to the loop->header here as coming
799 from new_exit_e edge. */
800 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
802 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
804 add_phi_arg (phi, phi_arg, new_exit_e);
807 redirect_edge_and_branch_force (entry_e, new_loop->header);
808 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
811 flow_loop_scan (new_loop, LOOP_ALL);
812 flow_loop_scan (loop, LOOP_ALL);
820 /* Given the condition statement COND, put it as the last statement
821 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
822 Assumes that this is the single exit of the guarded loop.
823 Returns the skip edge. */
826 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
829 block_stmt_iterator bsi;
831 tree cond_stmt, then_label, else_label;
833 enter_e = EDGE_SUCC (guard_bb, 0);
834 enter_e->flags &= ~EDGE_FALLTHRU;
835 enter_e->flags |= EDGE_FALSE_VALUE;
836 bsi = bsi_last (guard_bb);
838 then_label = build1 (GOTO_EXPR, void_type_node,
839 tree_block_label (exit_bb));
840 else_label = build1 (GOTO_EXPR, void_type_node,
841 tree_block_label (enter_e->dest));
842 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
843 then_label, else_label);
844 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
845 /* Add new edge to connect entry block to the second loop. */
846 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
847 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
852 /* This function verifies that the following restrictions apply to LOOP:
854 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
855 (3) it is single entry, single exit
856 (4) its exit condition is the last stmt in the header
857 (5) E is the entry/exit edge of LOOP.
861 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
863 edge exit_e = loop->exit_edges [0];
864 edge entry_e = loop_preheader_edge (loop);
865 tree orig_cond = get_loop_exit_condition (loop);
866 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
868 if (any_marked_for_rewrite_p ())
872 /* All loops have an outer scope; the only case loop->outer is NULL is for
873 the function itself. */
875 || loop->num_nodes != 2
876 || !empty_block_p (loop->latch)
877 || loop->num_exits != 1
878 || loop->num_entries != 1
879 /* Verify that new loop exit condition can be trivially modified. */
880 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
881 || (e != exit_e && e != entry_e))
887 #ifdef ENABLE_CHECKING
889 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
890 struct loop *second_loop)
892 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
893 basic_block loop2_entry_bb = second_loop->pre_header;
894 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
896 /* A guard that controls whether the second_loop is to be executed or skipped
897 is placed in first_loop->exit. first_loopt->exit therefore has two
898 successors - one is the preheader of second_loop, and the other is a bb
901 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
904 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
907 /* The preheader of new_loop is expected to have two predessors:
908 first_loop->exit and the block that precedes first_loop. */
910 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
911 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
912 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
913 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
914 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
916 /* Verify that the other successor of first_loopt->exit is after the
922 /* Function slpeel_tree_peel_loop_to_edge.
924 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
925 that is placed on the entry (exit) edge E of LOOP. After this transformation
926 we have two loops one after the other - first-loop iterates FIRST_NITERS
927 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
930 - LOOP: the loop to be peeled.
931 - E: the exit or entry edge of LOOP.
932 If it is the entry edge, we peel the first iterations of LOOP. In this
933 case first-loop is LOOP, and second-loop is the newly created loop.
934 If it is the exit edge, we peel the last iterations of LOOP. In this
935 case, first-loop is the newly created loop, and second-loop is LOOP.
936 - NITERS: the number of iterations that LOOP iterates.
937 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
938 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
939 for updating the loop bound of the first-loop to FIRST_NITERS. If it
940 is false, the caller of this function may want to take care of this
941 (this can be useful if we don't want new stmts added to first-loop).
944 The function returns a pointer to the new loop-copy, or NULL if it failed
945 to perform the transformation.
947 The function generates two if-then-else guards: one before the first loop,
948 and the other before the second loop:
950 if (FIRST_NITERS == 0) then skip the first loop,
951 and go directly to the second loop.
953 if (FIRST_NITERS == NITERS) then skip the second loop.
955 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
956 FORNOW the resulting code will not be in loop-closed-ssa form.
960 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
961 edge e, tree first_niters,
962 tree niters, bool update_first_loop_count)
964 struct loop *new_loop = NULL, *first_loop, *second_loop;
968 basic_block bb_before_second_loop, bb_after_second_loop;
969 basic_block bb_before_first_loop;
970 basic_block bb_between_loops;
971 edge exit_e = loop->exit_edges [0];
973 if (!slpeel_can_duplicate_loop_p (loop, e))
976 /* We have to initialize cfg_hooks. Then, when calling
977 cfg_hooks->split_edge, the function tree_split_edge
978 is actually called and, when calling cfg_hooks->duplicate_block,
979 the function tree_duplicate_bb is called. */
980 tree_register_cfg_hooks ();
983 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
984 Resulting CFG would be:
997 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
999 if (vect_debug_stats (loop) || vect_debug_details (loop))
1000 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1006 /* NEW_LOOP was placed after LOOP. */
1008 second_loop = new_loop;
1012 /* NEW_LOOP was placed before LOOP. */
1013 first_loop = new_loop;
1017 definitions = marked_ssa_names ();
1018 allocate_new_names (definitions);
1019 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1020 rename_variables_in_loop (new_loop);
1023 /* 2. Add the guard that controls whether the first loop is executed.
1024 Resulting CFG would be:
1026 bb_before_first_loop:
1027 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1034 bb_before_second_loop:
1043 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1044 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1045 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1046 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1047 flow_loop_scan (first_loop, LOOP_ALL);
1048 flow_loop_scan (second_loop, LOOP_ALL);
1051 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1052 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1053 bb_before_second_loop, bb_before_first_loop);
1054 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1055 first_loop == new_loop);
1058 /* 3. Add the guard that controls whether the second loop is executed.
1059 Resulting CFG would be:
1061 bb_before_first_loop:
1062 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1070 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1071 GOTO bb_before_second_loop
1073 bb_before_second_loop:
1079 bb_after_second_loop:
1084 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1085 add_bb_to_loop (bb_between_loops, first_loop->outer);
1086 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1087 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1088 flow_loop_scan (first_loop, LOOP_ALL);
1089 flow_loop_scan (second_loop, LOOP_ALL);
1091 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1092 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1093 bb_after_second_loop, bb_before_first_loop);
1094 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1095 second_loop == new_loop);
1097 /* Flow loop scan does not update loop->single_exit field. */
1098 first_loop->single_exit = first_loop->exit_edges[0];
1099 second_loop->single_exit = second_loop->exit_edges[0];
1101 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1103 if (update_first_loop_count)
1104 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1106 free_new_names (definitions);
1107 BITMAP_XFREE (definitions);
1108 unmark_all_for_rewrite ();
1114 /* Here the proper Vectorizer starts. */
1116 /*************************************************************************
1117 Vectorization Utilities.
1118 *************************************************************************/
1120 /* Function new_stmt_vec_info.
1122 Create and initialize a new stmt_vec_info struct for STMT. */
1125 new_stmt_vec_info (tree stmt, struct loop *loop)
1128 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1130 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1131 STMT_VINFO_STMT (res) = stmt;
1132 STMT_VINFO_LOOP (res) = loop;
1133 STMT_VINFO_RELEVANT_P (res) = 0;
1134 STMT_VINFO_VECTYPE (res) = NULL;
1135 STMT_VINFO_VEC_STMT (res) = NULL;
1136 STMT_VINFO_DATA_REF (res) = NULL;
1137 STMT_VINFO_MEMTAG (res) = NULL;
1138 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1139 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1140 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1141 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1142 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1148 /* Function new_loop_vec_info.
1150 Create and initialize a new loop_vec_info struct for LOOP, as well as
1151 stmt_vec_info structs for all the stmts in LOOP. */
1154 new_loop_vec_info (struct loop *loop)
1158 block_stmt_iterator si;
1161 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1163 bbs = get_loop_body (loop);
1165 /* Create stmt_info for all stmts in the loop. */
1166 for (i = 0; i < loop->num_nodes; i++)
1168 basic_block bb = bbs[i];
1169 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1171 tree stmt = bsi_stmt (si);
1174 get_stmt_operands (stmt);
1175 ann = stmt_ann (stmt);
1176 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1180 LOOP_VINFO_LOOP (res) = loop;
1181 LOOP_VINFO_BBS (res) = bbs;
1182 LOOP_VINFO_EXIT_COND (res) = NULL;
1183 LOOP_VINFO_NITERS (res) = NULL;
1184 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1185 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1186 LOOP_VINFO_VECT_FACTOR (res) = 0;
1187 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1188 "loop_write_datarefs");
1189 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1190 "loop_read_datarefs");
1191 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1197 /* Function destroy_loop_vec_info.
1199 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1200 stmts in the loop. */
1203 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1208 block_stmt_iterator si;
1214 loop = LOOP_VINFO_LOOP (loop_vinfo);
1216 bbs = LOOP_VINFO_BBS (loop_vinfo);
1217 nbbs = loop->num_nodes;
1219 for (j = 0; j < nbbs; j++)
1221 basic_block bb = bbs[j];
1222 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1224 tree stmt = bsi_stmt (si);
1225 stmt_ann_t ann = stmt_ann (stmt);
1226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1228 set_stmt_info (ann, NULL);
1232 free (LOOP_VINFO_BBS (loop_vinfo));
1233 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1234 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1240 /* Function debug_loop_stats.
1242 For vectorization statistics dumps. */
1245 vect_debug_stats (struct loop *loop)
1248 block_stmt_iterator si;
1249 tree node = NULL_TREE;
1251 if (!dump_file || !(dump_flags & TDF_STATS))
1256 fprintf (dump_file, "\n");
1265 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1267 node = bsi_stmt (si);
1268 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1272 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1273 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1275 fprintf (dump_file, "\nloop at %s:%d: ",
1276 EXPR_FILENAME (node), EXPR_LINENO (node));
1284 /* Function debug_loop_details.
1286 For vectorization debug dumps. */
1289 vect_debug_details (struct loop *loop)
1292 block_stmt_iterator si;
1293 tree node = NULL_TREE;
1295 if (!dump_file || !(dump_flags & TDF_DETAILS))
1300 fprintf (dump_file, "\n");
1309 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1311 node = bsi_stmt (si);
1312 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1316 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1317 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1319 fprintf (dump_file, "\nloop at %s:%d: ",
1320 EXPR_FILENAME (node), EXPR_LINENO (node));
1328 /* Function vect_get_ptr_offset
1330 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1333 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1334 tree vectype ATTRIBUTE_UNUSED,
1335 tree *offset ATTRIBUTE_UNUSED)
1337 /* TODO: Use alignment information. */
1342 /* Function vect_analyze_offset_expr
1344 Given an offset expression EXPR received from get_inner_reference, analyze
1345 it and create an expression for INITIAL_OFFSET by substituting the variables
1346 of EXPR with initial_condition of the corresponding access_fn in the loop.
1349 for (j = 3; j < N; j++)
1352 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1353 subsituted, since its access_fn in the inner loop is i. 'j' will be
1354 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1357 Compute MISALIGN (the misalignment of the data reference initial access from
1358 its base) if possible. Misalignment can be calculated only if all the
1359 variables can be substitued with constants, or if a variable is multiplied
1360 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1361 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1362 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1363 VECTYPE_ALIGNMENT computation in the caller of this function).
1365 STEP is an evolution of the data reference in this loop in bytes.
1366 In the above example, STEP is C_j.
1368 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1369 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1370 are NULL_TREEs. Otherwise, return TRUE.
1375 vect_analyze_offset_expr (tree expr,
1377 tree vectype_alignment,
1378 tree *initial_offset,
1384 tree left_offset = size_zero_node;
1385 tree right_offset = size_zero_node;
1386 tree left_misalign = size_zero_node;
1387 tree right_misalign = size_zero_node;
1388 tree left_step = size_zero_node;
1389 tree right_step = size_zero_node;
1390 enum tree_code code;
1391 tree init, evolution, def_stmt;
1393 /* Strip conversions that don't narrow the mode. */
1394 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1398 to = TREE_TYPE (expr);
1399 oprnd0 = TREE_OPERAND (expr, 0);
1400 ti = TREE_TYPE (oprnd0);
1402 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1404 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1411 *misalign = NULL_TREE;
1412 *initial_offset = NULL_TREE;
1416 if (TREE_CONSTANT (expr))
1418 *initial_offset = fold_convert (sizetype, expr);
1419 *misalign = fold_convert (sizetype, expr);
1420 *step = size_zero_node;
1424 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1425 access_fn in the current loop. */
1426 if (SSA_VAR_P (expr))
1428 tree access_fn = analyze_scalar_evolution (loop, expr);
1430 if (access_fn == chrec_dont_know)
1434 init = initial_condition_in_loop_num (access_fn, loop->num);
1437 def_stmt = SSA_NAME_DEF_STMT (init);
1439 && !IS_EMPTY_STMT (def_stmt)
1440 && flow_bb_inside_loop_p (loop, bb_for_stmt (def_stmt)))
1441 /* Not enough information: may be not loop invariant.
1442 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1443 initial_condition is D, but it depends on i - loop's induction
1448 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1449 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1450 /* Evolution is not constant. */
1453 if (TREE_CONSTANT (init))
1454 *misalign = fold_convert (sizetype, init);
1456 /* Not constant, misalignment cannot be calculated. */
1457 *misalign = NULL_TREE;
1459 *initial_offset = fold_convert (sizetype, init);
1461 *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1465 /* Recursive computation. */
1466 oprnd0 = TREE_OPERAND (expr, 0);
1467 oprnd1 = TREE_OPERAND (expr, 1);
1469 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1470 &left_misalign, &left_step)
1471 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1472 &right_offset, &right_misalign, &right_step))
1475 /* The type of the operation: plus, minus or mult. */
1476 code = TREE_CODE (expr);
1480 if (!TREE_CONSTANT (right_offset))
1481 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1483 FORNOW: We don't support such cases. */
1486 /* Misalignment computation. */
1487 if (SSA_VAR_P (left_offset))
1489 /* If the left side contains variable that cannot be substituted with
1490 constant, we check if the right side is a multiple of ALIGNMENT. */
1491 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1492 vectype_alignment)))
1493 *misalign = size_zero_node;
1495 /* If the remainder is not zero or the right side isn't constant, we
1496 can't compute misalignment. */
1497 *misalign = NULL_TREE;
1501 /* The left operand was successfully substituted with constant. */
1503 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1505 *misalign = size_binop (code, left_misalign, right_misalign);
1507 *misalign = NULL_TREE;
1510 /* Step calculation. */
1511 /* Multiply the step by the right operand. */
1512 *step = size_binop (MULT_EXPR, left_step, right_offset);
1517 /* Combine the recursive calculations for step and misalignment. */
1518 *step = size_binop (code, left_step, right_step);
1520 if (left_misalign && right_misalign)
1521 *misalign = size_binop (code, left_misalign, right_misalign);
1523 *misalign = NULL_TREE;
1531 /* Compute offset. */
1532 *initial_offset = fold_convert (sizetype,
1533 fold (build2 (code, TREE_TYPE (left_offset),
1540 /* Function vect_get_base_and_offset
1542 Return the BASE of the data reference EXPR.
1543 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1545 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1546 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1547 instantiated with initial_conditions of access_functions of variables,
1548 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1550 Function get_inner_reference is used for the above in case of ARRAY_REF and
1554 EXPR - the memory reference that is being analyzed
1555 DR - the data_reference struct of the _original_ memory reference
1556 (Note: DR_REF (DR) is not necessarily EXPR)
1557 VECTYPE - the type that defines the alignment (i.e, we compute
1558 alignment relative to TYPE_ALIGN(VECTYPE))
1561 BASE (returned value) - the base of the data reference EXPR.
1562 E.g, if EXPR is a.b[k].c[i][j] the returned
1564 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1565 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1566 computation is impossible
1567 STEP - evolution of the DR_REF in the loop
1568 BASE_ALIGNED_P - indicates if BASE is aligned
1570 If something unexpected is encountered (an unsupported form of data-ref),
1571 then NULL_TREE is returned. */
1574 vect_get_base_and_offset (struct data_reference *dr,
1577 loop_vec_info loop_vinfo,
1578 tree *initial_offset,
1581 bool *base_aligned_p)
1583 tree this_offset = size_zero_node;
1584 tree this_misalign = size_zero_node;
1585 tree this_step = size_zero_node;
1586 tree base = NULL_TREE;
1588 tree oprnd0, oprnd1;
1589 enum tree_code code = TREE_CODE (expr);
1590 HOST_WIDE_INT pbitsize;
1591 HOST_WIDE_INT pbitpos;
1593 enum machine_mode pmode;
1594 int punsignedp, pvolatilep;
1595 tree bit_pos_in_bytes;
1596 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598 *base_aligned_p = false;
1602 /* These cases end the recursion: */
1605 *initial_offset = size_zero_node;
1606 *step = size_zero_node;
1607 *misalign = size_zero_node;
1608 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1609 *base_aligned_p = true;
1613 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1616 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1618 base = vect_get_ptr_offset (expr, vectype, misalign);
1620 *base_aligned_p = true;
1624 *base_aligned_p = true;
1625 *misalign = size_zero_node;
1627 *initial_offset = size_zero_node;
1628 *step = size_zero_node;
1632 *initial_offset = fold_convert (sizetype, expr);
1633 *misalign = fold_convert (sizetype, expr);
1634 *step = size_zero_node;
1637 /* These cases continue the recursion: */
1639 oprnd0 = TREE_OPERAND (expr, 0);
1644 oprnd0 = TREE_OPERAND (expr, 0);
1650 oprnd0 = TREE_OPERAND (expr, 0);
1651 oprnd1 = TREE_OPERAND (expr, 1);
1653 /* In case we have a PLUS_EXPR of the form
1654 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1655 This is verified in vect_get_memtag_and_dr. */
1656 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1657 &this_offset, &this_misalign,
1658 &this_step, base_aligned_p);
1659 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1660 this_offset = size_zero_node;
1663 this_misalign = NULL_TREE;
1669 if (!handled_component_p (expr))
1670 /* Unsupported expression. */
1673 /* Find the base and the offset from it. */
1674 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1675 &pmode, &punsignedp, &pvolatilep, false);
1680 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1681 &this_offset, &this_misalign,
1684 /* Failed to compute offset or step. */
1686 *initial_offset = NULL_TREE;
1687 *misalign = NULL_TREE;
1691 /* Add bit position to OFFSET and MISALIGN. */
1693 bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1694 /* Check that there is no remainder in bits. */
1695 if (pbitpos%BITS_PER_UNIT)
1697 if (vect_debug_details (NULL))
1698 fprintf (dump_file, "bit offset alignment.");
1701 this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1702 fold_convert (sizetype, this_offset)));
1704 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1706 /* Continue the recursion to refine the base (get_inner_reference returns
1707 &a for &a[i], and not a). */
1711 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1712 initial_offset, misalign, step,
1716 /* Combine the results. */
1717 if (this_misalign && *misalign)
1718 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1720 *misalign = NULL_TREE;
1722 *step = size_binop (PLUS_EXPR, *step, this_step);
1724 *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1725 *initial_offset, this_offset));
1727 if (vect_debug_details (NULL))
1729 print_generic_expr (dump_file, expr, TDF_SLIM);
1730 fprintf (dump_file, "\n --> total offset for ref: ");
1731 print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1732 fprintf (dump_file, "\n --> total misalign for ref: ");
1733 print_generic_expr (dump_file, *misalign, TDF_SLIM);
1734 fprintf (dump_file, "\n --> total step for ref: ");
1735 print_generic_expr (dump_file, *step, TDF_SLIM);
1742 /* Function vect_force_dr_alignment_p.
1744 Returns whether the alignment of a DECL can be forced to be aligned
1745 on ALIGNMENT bit boundary. */
1748 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1750 if (TREE_CODE (decl) != VAR_DECL)
1753 if (DECL_EXTERNAL (decl))
1756 if (TREE_ASM_WRITTEN (decl))
1759 if (TREE_STATIC (decl))
1760 return (alignment <= MAX_OFILE_ALIGNMENT);
1762 /* This is not 100% correct. The absolute correct stack alignment
1763 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1764 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1765 However, until someone implements forced stack alignment, SSE
1766 isn't really usable without this. */
1767 return (alignment <= PREFERRED_STACK_BOUNDARY);
1771 /* Function vect_get_new_vect_var.
1773 Returns a name for a new variable. The current naming scheme appends the
1774 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1775 the name of vectorizer generated variables, and appends that to NAME if
1779 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1785 if (var_kind == vect_simple_var)
1790 prefix_len = strlen (prefix);
1793 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1795 new_vect_var = create_tmp_var (type, prefix);
1797 return new_vect_var;
1801 /* Function vect_create_index_for_vector_ref.
1803 Create (and return) an index variable, along with it's update chain in the
1804 loop. This variable will be used to access a memory location in a vector
1808 LOOP: The loop being vectorized.
1809 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1810 function can be added here, or in the loop pre-header.
1813 Return an index that will be used to index a vector array. It is expected
1814 that a pointer to the first vector will be used as the base address for the
1817 FORNOW: we are not trying to be efficient, just creating a new index each
1818 time from scratch. At this time all vector references could use the same
1821 TODO: create only one index to be used by all vector references. Record
1822 the index in the LOOP_VINFO the first time this procedure is called and
1823 return it on subsequent calls. The increment of this index must be placed
1824 just before the conditional expression that ends the single block loop. */
1827 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1830 tree indx_before_incr, indx_after_incr;
1832 /* It is assumed that the base pointer used for vectorized access contains
1833 the address of the first vector. Therefore the index used for vectorized
1834 access must be initialized to zero and incremented by 1. */
1836 init = integer_zero_node;
1837 step = integer_one_node;
1839 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1840 create_iv (init, step, NULL_TREE, loop, bsi, false,
1841 &indx_before_incr, &indx_after_incr);
1843 return indx_before_incr;
1847 /* Function vect_create_addr_base_for_vector_ref.
1849 Create an expression that computes the address of the first memory location
1850 that will be accessed for a data reference.
1853 STMT: The statement containing the data reference.
1854 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1855 OFFSET: Optional. If supplied, it is be added to the initial address.
1858 1. Return an SSA_NAME whose value is the address of the memory location of
1859 the first vector of the data reference.
1860 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1861 these statement(s) which define the returned SSA_NAME.
1863 FORNOW: We are only handling array accesses with step 1. */
1866 vect_create_addr_base_for_vector_ref (tree stmt,
1867 tree *new_stmt_list,
1870 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1871 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1872 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1873 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1874 tree ref = DR_REF (dr);
1875 tree scalar_type = TREE_TYPE (ref);
1876 tree scalar_ptr_type = build_pointer_type (scalar_type);
1879 tree addr_base, addr_expr;
1880 tree dest, new_stmt;
1881 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1883 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1884 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1886 /* Add '&' to ref_base. */
1887 data_ref_base = build_fold_addr_expr (data_ref_base);
1890 /* Create '(scalar_type*) base' for pointers. */
1891 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1892 tree scalar_array_type = build_array_type (scalar_type, 0);
1893 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1894 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1895 add_referenced_tmp_var (array_ptr);
1897 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1898 add_referenced_tmp_var (dest);
1899 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1900 append_to_statement_list_force (new_stmt, new_stmt_list);
1902 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1903 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1904 new_temp = make_ssa_name (array_ptr, vec_stmt);
1905 TREE_OPERAND (vec_stmt, 0) = new_temp;
1906 append_to_statement_list_force (vec_stmt, new_stmt_list);
1907 data_ref_base = new_temp;
1910 /* Create base_offset */
1911 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1912 add_referenced_tmp_var (dest);
1913 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1914 append_to_statement_list_force (new_stmt, new_stmt_list);
1918 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1919 add_referenced_tmp_var (tmp);
1920 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1921 STMT_VINFO_VECT_STEP (stmt_info)));
1922 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1924 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1925 append_to_statement_list_force (new_stmt, new_stmt_list);
1928 /* base + base_offset */
1929 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1932 /* addr_expr = addr_base */
1933 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1934 get_name (base_name));
1935 add_referenced_tmp_var (addr_expr);
1936 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1937 new_temp = make_ssa_name (addr_expr, vec_stmt);
1938 TREE_OPERAND (vec_stmt, 0) = new_temp;
1939 append_to_statement_list_force (vec_stmt, new_stmt_list);
1941 if (vect_debug_details (NULL))
1943 fprintf (dump_file, "created ");
1944 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1945 fprintf (dump_file, "\n");
1951 /* Function get_vectype_for_scalar_type.
1953 Returns the vector type corresponding to SCALAR_TYPE as supported
1957 get_vectype_for_scalar_type (tree scalar_type)
1959 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1960 int nbytes = GET_MODE_SIZE (inner_mode);
1967 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1969 nunits = UNITS_PER_SIMD_WORD / nbytes;
1971 vectype = build_vector_type (scalar_type, nunits);
1972 if (vect_debug_details (NULL))
1974 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1975 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1981 if (vect_debug_details (NULL))
1983 fprintf (dump_file, "vectype: ");
1984 print_generic_expr (dump_file, vectype, TDF_SLIM);
1987 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1989 /* TODO: tree-complex.c sometimes can parallelize operations
1990 on generic vectors. We can vectorize the loop in that case,
1991 but then we should re-run the lowering pass. */
1992 if (vect_debug_details (NULL))
1993 fprintf (dump_file, "mode not supported by target.");
2001 /* Function vect_align_data_ref.
2003 Handle mislignment of a memory accesses.
2005 FORNOW: Can't handle misaligned accesses.
2006 Make sure that the dataref is aligned. */
2009 vect_align_data_ref (tree stmt)
2011 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2012 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2014 /* FORNOW: can't handle misaligned accesses;
2015 all accesses expected to be aligned. */
2016 gcc_assert (aligned_access_p (dr));
2020 /* Function vect_create_data_ref_ptr.
2022 Create a memory reference expression for vector access, to be used in a
2023 vector load/store stmt. The reference is based on a new pointer to vector
2027 1. STMT: a stmt that references memory. Expected to be of the form
2028 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2029 2. BSI: block_stmt_iterator where new stmts can be added.
2030 3. OFFSET (optional): an offset to be added to the initial address accessed
2031 by the data-ref in STMT.
2032 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2033 pointing to the initial address.
2036 1. Declare a new ptr to vector_type, and have it point to the base of the
2037 data reference (initial addressed accessed by the data reference).
2038 For example, for vector of type V8HI, the following code is generated:
2041 vp = (v8hi *)initial_address;
2043 if OFFSET is not supplied:
2044 initial_address = &a[init];
2045 if OFFSET is supplied:
2046 initial_address = &a[init + OFFSET];
2048 Return the initial_address in INITIAL_ADDRESS.
2050 2. Create a data-reference in the loop based on the new vector pointer vp,
2051 and using a new index variable 'idx' as follows:
2055 where if ONLY_INIT is true:
2058 update = idx + vector_type_size
2060 Return the pointer vp'.
2063 FORNOW: handle only aligned and consecutive accesses. */
2066 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2067 tree *initial_address, bool only_init)
2070 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2071 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2072 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2073 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2077 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2078 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2079 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2080 int nvuses, nv_may_defs, nv_must_defs;
2084 tree new_stmt_list = NULL_TREE;
2086 edge pe = loop_preheader_edge (loop);
2092 tree type, tmp, size;
2094 base_name = unshare_expr (DR_BASE_NAME (dr));
2095 if (vect_debug_details (NULL))
2097 tree data_ref_base = base_name;
2098 fprintf (dump_file, "create array_ref of type: ");
2099 print_generic_expr (dump_file, vectype, TDF_SLIM);
2100 if (TREE_CODE (data_ref_base) == VAR_DECL)
2101 fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2102 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2103 fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2104 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2105 fprintf (dump_file, "\nvectorizing a record based array ref: ");
2106 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2107 fprintf (dump_file, "\nvectorizing a pointer ref: ");
2108 print_generic_expr (dump_file, base_name, TDF_SLIM);
2111 /** (1) Create the new vector-pointer variable: **/
2113 vect_ptr_type = build_pointer_type (vectype);
2114 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2115 get_name (base_name));
2116 add_referenced_tmp_var (vect_ptr);
2119 /** (2) Handle aliasing information of the new vector-pointer: **/
2121 tag = STMT_VINFO_MEMTAG (stmt_info);
2123 get_var_ann (vect_ptr)->type_mem_tag = tag;
2125 /* Mark for renaming all aliased variables
2126 (i.e, the may-aliases of the type-mem-tag). */
2127 nvuses = NUM_VUSES (vuses);
2128 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2129 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2130 for (i = 0; i < nvuses; i++)
2132 tree use = VUSE_OP (vuses, i);
2133 if (TREE_CODE (use) == SSA_NAME)
2134 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2136 for (i = 0; i < nv_may_defs; i++)
2138 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2139 if (TREE_CODE (def) == SSA_NAME)
2140 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2142 for (i = 0; i < nv_must_defs; i++)
2144 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2145 if (TREE_CODE (def) == SSA_NAME)
2146 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2150 /** (3) Calculate the initial address the vector-pointer, and set
2151 the vector-pointer to point to it before the loop: **/
2153 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2154 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2156 pe = loop_preheader_edge (loop);
2157 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2158 gcc_assert (!new_bb);
2159 *initial_address = new_temp;
2161 /* Create: p = (vectype *) initial_base */
2162 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2163 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2164 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2165 TREE_OPERAND (vec_stmt, 0) = new_temp;
2166 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2167 gcc_assert (!new_bb);
2168 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2171 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2173 if (only_init) /* No update in loop is required. */
2174 return vect_ptr_init;
2176 idx = vect_create_index_for_vector_ref (loop, bsi);
2178 /* Create: update = idx * vectype_size */
2179 tmp = create_tmp_var (integer_type_node, "update");
2180 add_referenced_tmp_var (tmp);
2181 size = TYPE_SIZE (vect_ptr_type);
2182 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2183 ptr_update = create_tmp_var (type, "update");
2184 add_referenced_tmp_var (ptr_update);
2185 vectype_size = TYPE_SIZE_UNIT (vectype);
2186 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2187 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2188 new_temp = make_ssa_name (tmp, vec_stmt);
2189 TREE_OPERAND (vec_stmt, 0) = new_temp;
2190 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2191 vec_stmt = fold_convert (type, new_temp);
2192 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2193 new_temp = make_ssa_name (ptr_update, vec_stmt);
2194 TREE_OPERAND (vec_stmt, 0) = new_temp;
2195 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2197 /* Create: data_ref_ptr = vect_ptr_init + update */
2198 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2199 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2200 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2201 TREE_OPERAND (vec_stmt, 0) = new_temp;
2202 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2203 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2205 return data_ref_ptr;
2209 /* Function vect_create_destination_var.
2211 Create a new temporary of type VECTYPE. */
2214 vect_create_destination_var (tree scalar_dest, tree vectype)
2217 const char *new_name;
2219 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2221 new_name = get_name (scalar_dest);
2224 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2225 add_referenced_tmp_var (vec_dest);
2231 /* Function vect_init_vector.
2233 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2234 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2235 used in the vectorization of STMT. */
2238 vect_init_vector (tree stmt, tree vector_var)
2240 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2241 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2244 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2250 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2251 add_referenced_tmp_var (new_var);
2253 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2254 new_temp = make_ssa_name (new_var, init_stmt);
2255 TREE_OPERAND (init_stmt, 0) = new_temp;
2257 pe = loop_preheader_edge (loop);
2258 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2259 gcc_assert (!new_bb);
2261 if (vect_debug_details (NULL))
2263 fprintf (dump_file, "created new init_stmt: ");
2264 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2267 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2272 /* Function vect_get_vec_def_for_operand.
2274 OP is an operand in STMT. This function returns a (vector) def that will be
2275 used in the vectorized stmt for STMT.
2277 In the case that OP is an SSA_NAME which is defined in the loop, then
2278 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2280 In case OP is an invariant or constant, a new stmt that creates a vector def
2281 needs to be introduced. */
2284 vect_get_vec_def_for_operand (tree op, tree stmt)
2289 stmt_vec_info def_stmt_info = NULL;
2290 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2291 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2292 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2293 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2300 if (vect_debug_details (NULL))
2302 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2303 print_generic_expr (dump_file, op, TDF_SLIM);
2306 /** ===> Case 1: operand is a constant. **/
2308 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2310 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2314 /* Build a tree with vector elements. */
2315 if (vect_debug_details (NULL))
2316 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2318 for (i = nunits - 1; i >= 0; --i)
2320 t = tree_cons (NULL_TREE, op, t);
2322 vec_cst = build_vector (vectype, t);
2323 return vect_init_vector (stmt, vec_cst);
2326 gcc_assert (TREE_CODE (op) == SSA_NAME);
2328 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2330 def_stmt = SSA_NAME_DEF_STMT (op);
2331 def_stmt_info = vinfo_for_stmt (def_stmt);
2333 if (vect_debug_details (NULL))
2335 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2336 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2340 /** ==> Case 2.1: operand is defined inside the loop. **/
2344 /* Get the def from the vectorized stmt. */
2346 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2347 gcc_assert (vec_stmt);
2348 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2353 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2354 it is a reduction/induction. **/
2356 bb = bb_for_stmt (def_stmt);
2357 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2359 if (vect_debug_details (NULL))
2360 fprintf (dump_file, "reduction/induction - unsupported.");
2361 internal_error ("no support for reduction/induction"); /* FORNOW */
2365 /** ==> Case 2.3: operand is defined outside the loop -
2366 it is a loop invariant. */
2368 switch (TREE_CODE (def_stmt))
2371 def = PHI_RESULT (def_stmt);
2374 def = TREE_OPERAND (def_stmt, 0);
2377 def = TREE_OPERAND (def_stmt, 0);
2378 gcc_assert (IS_EMPTY_STMT (def_stmt));
2382 if (vect_debug_details (NULL))
2384 fprintf (dump_file, "unsupported defining stmt: ");
2385 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2387 internal_error ("unsupported defining stmt");
2390 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2392 if (vect_debug_details (NULL))
2393 fprintf (dump_file, "Create vector_inv.");
2395 for (i = nunits - 1; i >= 0; --i)
2397 t = tree_cons (NULL_TREE, def, t);
2400 vec_inv = build_constructor (vectype, t);
2401 return vect_init_vector (stmt, vec_inv);
2405 /* Function vect_finish_stmt_generation.
2407 Insert a new stmt. */
2410 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2412 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2414 if (vect_debug_details (NULL))
2416 fprintf (dump_file, "add new stmt: ");
2417 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2420 /* Make sure bsi points to the stmt that is being vectorized. */
2422 /* Assumption: any stmts created for the vectorization of stmt S were
2423 inserted before S. BSI is expected to point to S or some new stmt before S.
2426 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2428 gcc_assert (stmt == bsi_stmt (*bsi));
2432 /* Function vectorizable_assignment.
2434 Check if STMT performs an assignment (copy) that can be vectorized.
2435 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2436 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2437 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2440 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2446 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2447 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2448 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2451 /* Is vectorizable assignment? */
2453 if (TREE_CODE (stmt) != MODIFY_EXPR)
2456 scalar_dest = TREE_OPERAND (stmt, 0);
2457 if (TREE_CODE (scalar_dest) != SSA_NAME)
2460 op = TREE_OPERAND (stmt, 1);
2461 if (!vect_is_simple_use (op, loop, NULL))
2463 if (vect_debug_details (NULL))
2464 fprintf (dump_file, "use not simple.");
2468 if (!vec_stmt) /* transformation not required. */
2470 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2475 if (vect_debug_details (NULL))
2476 fprintf (dump_file, "transform assignment.");
2479 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2482 op = TREE_OPERAND (stmt, 1);
2483 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2485 /* Arguments are ready. create the new vector stmt. */
2486 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2487 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2488 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2489 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2495 /* Function vectorizable_operation.
2497 Check if STMT performs a binary or unary operation that can be vectorized.
2498 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2499 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2500 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2503 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2508 tree op0, op1 = NULL;
2509 tree vec_oprnd0, vec_oprnd1=NULL;
2510 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2511 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2512 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2514 enum tree_code code;
2515 enum machine_mode vec_mode;
2521 /* Is STMT a vectorizable binary/unary operation? */
2522 if (TREE_CODE (stmt) != MODIFY_EXPR)
2525 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2528 operation = TREE_OPERAND (stmt, 1);
2529 code = TREE_CODE (operation);
2530 optab = optab_for_tree_code (code, vectype);
2532 /* Support only unary or binary operations. */
2533 op_type = TREE_CODE_LENGTH (code);
2534 if (op_type != unary_op && op_type != binary_op)
2536 if (vect_debug_details (NULL))
2537 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2541 for (i = 0; i < op_type; i++)
2543 op = TREE_OPERAND (operation, i);
2544 if (!vect_is_simple_use (op, loop, NULL))
2546 if (vect_debug_details (NULL))
2547 fprintf (dump_file, "use not simple.");
2552 /* Supportable by target? */
2555 if (vect_debug_details (NULL))
2556 fprintf (dump_file, "no optab.");
2559 vec_mode = TYPE_MODE (vectype);
2560 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2562 if (vect_debug_details (NULL))
2563 fprintf (dump_file, "op not supported by target.");
2567 if (!vec_stmt) /* transformation not required. */
2569 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2575 if (vect_debug_details (NULL))
2576 fprintf (dump_file, "transform binary/unary operation.");
2579 scalar_dest = TREE_OPERAND (stmt, 0);
2580 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2583 op0 = TREE_OPERAND (operation, 0);
2584 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2586 if (op_type == binary_op)
2588 op1 = TREE_OPERAND (operation, 1);
2589 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2592 /* Arguments are ready. create the new vector stmt. */
2594 if (op_type == binary_op)
2595 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2596 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2598 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2599 build1 (code, vectype, vec_oprnd0));
2600 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2601 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2602 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2608 /* Function vectorizable_store.
2610 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2612 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2613 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2614 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2617 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2624 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2625 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2626 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2627 enum machine_mode vec_mode;
2629 enum dr_alignment_support alignment_support_cheme;
2631 /* Is vectorizable store? */
2633 if (TREE_CODE (stmt) != MODIFY_EXPR)
2636 scalar_dest = TREE_OPERAND (stmt, 0);
2637 if (TREE_CODE (scalar_dest) != ARRAY_REF
2638 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2641 op = TREE_OPERAND (stmt, 1);
2642 if (!vect_is_simple_use (op, loop, NULL))
2644 if (vect_debug_details (NULL))
2645 fprintf (dump_file, "use not simple.");
2649 vec_mode = TYPE_MODE (vectype);
2650 /* FORNOW. In some cases can vectorize even if data-type not supported
2651 (e.g. - array initialization with 0). */
2652 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2655 if (!STMT_VINFO_DATA_REF (stmt_info))
2659 if (!vec_stmt) /* transformation not required. */
2661 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2667 if (vect_debug_details (NULL))
2668 fprintf (dump_file, "transform store");
2670 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2671 gcc_assert (alignment_support_cheme);
2672 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2674 /* Handle use - get the vectorized def from the defining stmt. */
2675 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2678 /* FORNOW: make sure the data reference is aligned. */
2679 vect_align_data_ref (stmt);
2680 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2681 data_ref = build_fold_indirect_ref (data_ref);
2683 /* Arguments are ready. create the new vector stmt. */
2684 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2685 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2691 /* vectorizable_load.
2693 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2700 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2703 tree vec_dest = NULL;
2704 tree data_ref = NULL;
2706 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2707 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2708 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2715 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2716 edge pe = loop_preheader_edge (loop);
2717 enum dr_alignment_support alignment_support_cheme;
2719 /* Is vectorizable load? */
2721 if (TREE_CODE (stmt) != MODIFY_EXPR)
2724 scalar_dest = TREE_OPERAND (stmt, 0);
2725 if (TREE_CODE (scalar_dest) != SSA_NAME)
2728 op = TREE_OPERAND (stmt, 1);
2729 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2732 if (!STMT_VINFO_DATA_REF (stmt_info))
2735 mode = (int) TYPE_MODE (vectype);
2737 /* FORNOW. In some cases can vectorize even if data-type not supported
2738 (e.g. - data copies). */
2739 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2741 if (vect_debug_details (loop))
2742 fprintf (dump_file, "Aligned load, but unsupported type.");
2746 if (!vec_stmt) /* transformation not required. */
2748 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2754 if (vect_debug_details (NULL))
2755 fprintf (dump_file, "transform load.");
2757 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2758 gcc_assert (alignment_support_cheme);
2760 if (alignment_support_cheme == dr_aligned
2761 || alignment_support_cheme == dr_unaligned_supported)
2772 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2773 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2774 if (aligned_access_p (dr))
2775 data_ref = build_fold_indirect_ref (data_ref);
2778 int mis = DR_MISALIGNMENT (dr);
2779 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2780 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2781 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2783 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2784 new_temp = make_ssa_name (vec_dest, new_stmt);
2785 TREE_OPERAND (new_stmt, 0) = new_temp;
2786 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2788 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2792 msq_init = *(floor(p1))
2793 p2 = initial_addr + VS - 1;
2794 magic = have_builtin ? builtin_result : initial_address;
2797 p2' = p2 + indx * vectype_size
2799 vec_dest = realign_load (msq, lsq, magic)
2813 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2814 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2815 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2817 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2818 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2819 new_temp = make_ssa_name (vec_dest, new_stmt);
2820 TREE_OPERAND (new_stmt, 0) = new_temp;
2821 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2822 gcc_assert (!new_bb);
2823 msq_init = TREE_OPERAND (new_stmt, 0);
2826 /* <2> Create lsq = *(floor(p2')) in the loop */
2827 offset = build_int_cst (integer_type_node,
2828 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2829 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2830 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2831 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2832 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2833 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2834 new_temp = make_ssa_name (vec_dest, new_stmt);
2835 TREE_OPERAND (new_stmt, 0) = new_temp;
2836 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2837 lsq = TREE_OPERAND (new_stmt, 0);
2841 if (targetm.vectorize.builtin_mask_for_load)
2843 /* Create permutation mask, if required, in loop preheader. */
2845 params = build_tree_list (NULL_TREE, init_addr);
2846 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2847 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2848 new_stmt = build_function_call_expr (builtin_decl, params);
2849 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2850 new_temp = make_ssa_name (vec_dest, new_stmt);
2851 TREE_OPERAND (new_stmt, 0) = new_temp;
2852 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2853 gcc_assert (!new_bb);
2854 magic = TREE_OPERAND (new_stmt, 0);
2856 /* Since we have just created a CALL_EXPR, we may need to
2857 rename call-clobbered variables. */
2858 mark_call_clobbered_vars_to_rename ();
2862 /* Use current address instead of init_addr for reduced reg pressure.
2864 magic = dataref_ptr;
2868 /* <4> Create msq = phi <msq_init, lsq> in loop */
2869 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2870 msq = make_ssa_name (vec_dest, NULL_TREE);
2871 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2872 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2873 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2874 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2877 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2878 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2879 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2880 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2881 new_temp = make_ssa_name (vec_dest, new_stmt);
2882 TREE_OPERAND (new_stmt, 0) = new_temp;
2883 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2888 *vec_stmt = new_stmt;
2893 /* Function vect_supportable_dr_alignment
2895 Return whether the data reference DR is supported with respect to its
2898 static enum dr_alignment_support
2899 vect_supportable_dr_alignment (struct data_reference *dr)
2901 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2902 enum machine_mode mode = (int) TYPE_MODE (vectype);
2904 if (aligned_access_p (dr))
2907 /* Possibly unaligned access. */
2909 if (DR_IS_READ (dr))
2911 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2912 && (!targetm.vectorize.builtin_mask_for_load
2913 || targetm.vectorize.builtin_mask_for_load ()))
2914 return dr_unaligned_software_pipeline;
2916 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2917 /* Can't software pipeline the loads, but can at least do them. */
2918 return dr_unaligned_supported;
2922 return dr_unaligned_unsupported;
2926 /* Function vect_transform_stmt.
2928 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2931 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2933 bool is_store = false;
2934 tree vec_stmt = NULL_TREE;
2935 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2938 switch (STMT_VINFO_TYPE (stmt_info))
2940 case op_vec_info_type:
2941 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2945 case assignment_vec_info_type:
2946 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2950 case load_vec_info_type:
2951 done = vectorizable_load (stmt, bsi, &vec_stmt);
2955 case store_vec_info_type:
2956 done = vectorizable_store (stmt, bsi, &vec_stmt);
2961 if (vect_debug_details (NULL))
2962 fprintf (dump_file, "stmt not supported.");
2966 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2972 /* This function builds ni_name = number of iterations loop executes
2973 on the loop preheader. */
2976 vect_build_loop_niters (loop_vec_info loop_vinfo)
2978 tree ni_name, stmt, var;
2980 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2981 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2983 var = create_tmp_var (TREE_TYPE (ni), "niters");
2984 add_referenced_tmp_var (var);
2985 ni_name = force_gimple_operand (ni, &stmt, false, var);
2987 pe = loop_preheader_edge (loop);
2990 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2991 gcc_assert (!new_bb);
2998 /* This function generates the following statements:
3000 ni_name = number of iterations loop executes
3001 ratio = ni_name / vf
3002 ratio_mult_vf_name = ratio * vf
3004 and places them at the loop preheader edge. */
3007 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3009 tree *ratio_mult_vf_name_ptr,
3010 tree *ratio_name_ptr)
3018 tree ratio_mult_vf_name;
3019 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3020 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3021 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3022 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3024 pe = loop_preheader_edge (loop);
3026 /* Generate temporary variable that contains
3027 number of iterations loop executes. */
3029 ni_name = vect_build_loop_niters (loop_vinfo);
3031 /* Create: ratio = ni >> log2(vf) */
3033 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3034 add_referenced_tmp_var (var);
3035 ratio_name = make_ssa_name (var, NULL_TREE);
3036 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3037 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3038 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3040 pe = loop_preheader_edge (loop);
3041 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3042 gcc_assert (!new_bb);
3044 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3046 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3047 add_referenced_tmp_var (var);
3048 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3049 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3050 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3051 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3053 pe = loop_preheader_edge (loop);
3054 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3055 gcc_assert (!new_bb);
3057 *ni_name_ptr = ni_name;
3058 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3059 *ratio_name_ptr = ratio_name;
3065 /* Function vect_update_ivs_after_vectorizer.
3067 "Advance" the induction variables of LOOP to the value they should take
3068 after the execution of LOOP. This is currently necessary because the
3069 vectorizer does not handle induction variables that are used after the
3070 loop. Such a situation occurs when the last iterations of LOOP are
3072 1. We introduced new uses after LOOP for IVs that were not originally used
3073 after LOOP: the IVs of LOOP are now used by an epilog loop.
3074 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3075 times, whereas the loop IVs should be bumped N times.
3078 - LOOP - a loop that is going to be vectorized. The last few iterations
3079 of LOOP were peeled.
3080 - NITERS - the number of iterations that LOOP executes (before it is
3081 vectorized). i.e, the number of times the ivs should be bumped.
3082 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3083 coming out from LOOP on which there are uses of the LOOP ivs
3084 (this is the path from LOOP->exit to epilog_loop->preheader).
3086 The new definitions of the ivs are placed in LOOP->exit.
3087 The phi args associated with the edge UPDATE_E in the bb
3088 UPDATE_E->dest are updated accordingly.
3090 Assumption 1: Like the rest of the vectorizer, this function assumes
3091 a single loop exit that has a single predecessor.
3093 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3094 organized in the same order.
3096 Assumption 3: The access function of the ivs is simple enough (see
3097 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3099 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3100 coming out of LOOP on which the ivs of LOOP are used (this is the path
3101 that leads to the epilog loop; other paths skip the epilog loop). This
3102 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3103 needs to have its phis updated.
3107 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3109 basic_block exit_bb = loop->exit_edges[0]->dest;
3111 basic_block update_bb = update_e->dest;
3113 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3115 /* Make sure there exists a single-predecessor exit bb: */
3116 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3118 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3120 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3122 tree access_fn = NULL;
3123 tree evolution_part;
3126 tree var, stmt, ni, ni_name;
3127 block_stmt_iterator last_bsi;
3129 /* Skip virtual phi's. */
3130 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3132 if (vect_debug_details (NULL))
3133 fprintf (dump_file, "virtual phi. skip.");
3137 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3138 gcc_assert (access_fn);
3140 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3141 gcc_assert (evolution_part != NULL_TREE);
3143 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3144 of degree >= 2 or exponential. */
3145 gcc_assert (!tree_is_chrec (evolution_part));
3147 step_expr = evolution_part;
3148 init_expr = unshare_expr (initial_condition (access_fn));
3150 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3151 build2 (MULT_EXPR, TREE_TYPE (niters),
3152 niters, step_expr), init_expr);
3154 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3155 add_referenced_tmp_var (var);
3157 ni_name = force_gimple_operand (ni, &stmt, false, var);
3159 /* Insert stmt into exit_bb. */
3160 last_bsi = bsi_last (exit_bb);
3162 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3164 /* Fix phi expressions in the successor bb. */
3165 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3166 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3167 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3172 /* Function vect_do_peeling_for_loop_bound
3174 Peel the last iterations of the loop represented by LOOP_VINFO.
3175 The peeled iterations form a new epilog loop. Given that the loop now
3176 iterates NITERS times, the new epilog loop iterates
3177 NITERS % VECTORIZATION_FACTOR times.
3179 The original loop will later be made to iterate
3180 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3183 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3184 struct loops *loops)
3187 tree ni_name, ratio_mult_vf_name;
3188 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3189 struct loop *new_loop;
3191 #ifdef ENABLE_CHECKING
3195 if (vect_debug_details (NULL))
3196 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3198 /* Generate the following variables on the preheader of original loop:
3200 ni_name = number of iteration the original loop executes
3201 ratio = ni_name / vf
3202 ratio_mult_vf_name = ratio * vf */
3203 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3204 &ratio_mult_vf_name, ratio);
3206 /* Update loop info. */
3207 loop->pre_header = loop_preheader_edge (loop)->src;
3208 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3210 #ifdef ENABLE_CHECKING
3211 loop_num = loop->num;
3213 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3214 ratio_mult_vf_name, ni_name, false);
3215 #ifdef ENABLE_CHECKING
3216 gcc_assert (new_loop);
3217 gcc_assert (loop_num == loop->num);
3218 slpeel_verify_cfg_after_peeling (loop, new_loop);
3221 /* A guard that controls whether the new_loop is to be executed or skipped
3222 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3223 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3224 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3225 is on the path where the LOOP IVs are used and need to be updated. */
3227 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3228 update_e = EDGE_PRED (new_loop->pre_header, 0);
3230 update_e = EDGE_PRED (new_loop->pre_header, 1);
3232 /* Update IVs of original loop as if they were advanced
3233 by ratio_mult_vf_name steps. */
3234 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3236 /* After peeling we have to reset scalar evolution analyzer. */
3243 /* Function vect_gen_niters_for_prolog_loop
3245 Set the number of iterations for the loop represented by LOOP_VINFO
3246 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3247 and the misalignment of DR - the first data reference recorded in
3248 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3249 this loop, the data reference DR will refer to an aligned location.
3251 The following computation is generated:
3253 compute address misalignment in bytes:
3254 addr_mis = addr & (vectype_size - 1)
3256 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3258 (elem_size = element type size; an element is the scalar element
3259 whose type is the inner type of the vectype) */
3262 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3264 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3265 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3266 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3268 tree iters, iters_name;
3271 tree dr_stmt = DR_STMT (dr);
3272 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3273 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3274 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3277 tree new_stmts = NULL_TREE;
3279 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3280 tree ptr_type = TREE_TYPE (start_addr);
3281 tree size = TYPE_SIZE (ptr_type);
3282 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3283 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3284 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3285 tree niters_type = TREE_TYPE (loop_niters);
3286 tree elem_size_log =
3287 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3288 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3290 pe = loop_preheader_edge (loop);
3291 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3292 gcc_assert (!new_bb);
3294 /* Create: byte_misalign = addr & (vectype_size - 1) */
3295 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3297 /* Create: elem_misalign = byte_misalign / element_size */
3299 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3301 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3302 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3303 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3304 iters = fold_convert (niters_type, iters);
3306 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3307 /* If the loop bound is known at compile time we already verified that it is
3308 greater than vf; since the misalignment ('iters') is at most vf, there's
3309 no need to generate the MIN_EXPR in this case. */
3310 if (!TREE_CONSTANT (loop_niters))
3311 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3313 var = create_tmp_var (niters_type, "prolog_loop_niters");
3314 add_referenced_tmp_var (var);
3315 iters_name = force_gimple_operand (iters, &stmt, false, var);
3317 /* Insert stmt on loop preheader edge. */
3318 pe = loop_preheader_edge (loop);
3321 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3322 gcc_assert (!new_bb);
3329 /* Function vect_update_inits_of_dr
3331 NITERS iterations were peeled from LOOP. DR represents a data reference
3332 in LOOP. This function updates the information recorded in DR to
3333 account for the fact that the first NITERS iterations had already been
3334 executed. Specifically, it updates the OFFSET field of stmt_info. */
3337 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3339 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3340 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3342 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3343 STMT_VINFO_VECT_STEP (stmt_info)));
3344 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3345 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3349 /* Function vect_update_inits_of_drs
3351 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3352 This function updates the information recorded for the data references in
3353 the loop to account for the fact that the first NITERS iterations had
3354 already been executed. Specifically, it updates the initial_condition of the
3355 access_function of all the data_references in the loop. */
3358 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3361 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3362 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3364 if (dump_file && (dump_flags & TDF_DETAILS))
3365 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3367 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3369 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3370 vect_update_inits_of_dr (dr, niters);
3373 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3375 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3376 vect_update_inits_of_dr (dr, niters);
3381 /* Function vect_do_peeling_for_alignment
3383 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3384 'niters' is set to the misalignment of one of the data references in the
3385 loop, thereby forcing it to refer to an aligned location at the beginning
3386 of the execution of this loop. The data reference for which we are
3387 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3390 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3392 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3393 tree niters_of_prolog_loop, ni_name;
3395 struct loop *new_loop;
3397 if (vect_debug_details (NULL))
3398 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3400 ni_name = vect_build_loop_niters (loop_vinfo);
3401 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3403 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3405 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3406 niters_of_prolog_loop, ni_name, true);
3407 #ifdef ENABLE_CHECKING
3408 gcc_assert (new_loop);
3409 slpeel_verify_cfg_after_peeling (new_loop, loop);
3412 /* Update number of times loop executes. */
3413 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3414 LOOP_VINFO_NITERS (loop_vinfo) =
3415 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3417 /* Update the init conditions of the access functions of all data refs. */
3418 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3420 /* After peeling we have to reset scalar evolution analyzer. */
3427 /* Function vect_transform_loop.
3429 The analysis phase has determined that the loop is vectorizable.
3430 Vectorize the loop - created vectorized stmts to replace the scalar
3431 stmts in the loop, and update the loop exit condition. */
3434 vect_transform_loop (loop_vec_info loop_vinfo,
3435 struct loops *loops ATTRIBUTE_UNUSED)
3437 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3438 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3439 int nbbs = loop->num_nodes;
3440 block_stmt_iterator si;
3443 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3445 if (vect_debug_details (NULL))
3446 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3449 /* Peel the loop if there are data refs with unknown alignment.
3450 Only one data ref with unknown store is allowed. */
3452 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3453 vect_do_peeling_for_alignment (loop_vinfo, loops);
3455 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3456 compile time constant), or it is a constant that doesn't divide by the
3457 vectorization factor, then an epilog loop needs to be created.
3458 We therefore duplicate the loop: the original loop will be vectorized,
3459 and will compute the first (n/VF) iterations. The second copy of the loop
3460 will remain scalar and will compute the remaining (n%VF) iterations.
3461 (VF is the vectorization factor). */
3463 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3464 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3465 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3466 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3468 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3469 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3471 /* 1) Make sure the loop header has exactly two entries
3472 2) Make sure we have a preheader basic block. */
3474 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3476 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3479 /* FORNOW: the vectorizer supports only loops which body consist
3480 of one basic block (header + empty latch). When the vectorizer will
3481 support more involved loop forms, the order by which the BBs are
3482 traversed need to be reconsidered. */
3484 for (i = 0; i < nbbs; i++)
3486 basic_block bb = bbs[i];
3488 for (si = bsi_start (bb); !bsi_end_p (si);)
3490 tree stmt = bsi_stmt (si);
3491 stmt_vec_info stmt_info;
3494 if (vect_debug_details (NULL))
3496 fprintf (dump_file, "------>vectorizing statement: ");
3497 print_generic_expr (dump_file, stmt, TDF_SLIM);
3499 stmt_info = vinfo_for_stmt (stmt);
3500 gcc_assert (stmt_info);
3501 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3506 #ifdef ENABLE_CHECKING
3507 /* FORNOW: Verify that all stmts operate on the same number of
3508 units and no inner unrolling is necessary. */
3510 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3511 == vectorization_factor);
3513 /* -------- vectorize statement ------------ */
3514 if (vect_debug_details (NULL))
3515 fprintf (dump_file, "transform statement.");
3517 is_store = vect_transform_stmt (stmt, &si);
3520 /* free the attached stmt_vec_info and remove the stmt. */
3521 stmt_ann_t ann = stmt_ann (stmt);
3523 set_stmt_info (ann, NULL);
3532 slpeel_make_loop_iterate_ntimes (loop, ratio);
3534 if (vect_debug_details (loop))
3535 fprintf (dump_file,"Success! loop vectorized.");
3536 if (vect_debug_stats (loop))
3537 fprintf (dump_file, "LOOP VECTORIZED.");
3541 /* Function vect_is_simple_use.
3544 LOOP - the loop that is being vectorized.
3545 OPERAND - operand of a stmt in LOOP.
3546 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3548 Returns whether a stmt with OPERAND can be vectorized.
3549 Supportable operands are constants, loop invariants, and operands that are
3550 defined by the current iteration of the loop. Unsupportable operands are
3551 those that are defined by a previous iteration of the loop (as is the case
3552 in reduction/induction computations). */
3555 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3563 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3566 if (TREE_CODE (operand) != SSA_NAME)
3569 def_stmt = SSA_NAME_DEF_STMT (operand);
3570 if (def_stmt == NULL_TREE )
3572 if (vect_debug_details (NULL))
3573 fprintf (dump_file, "no def_stmt.");
3577 /* empty stmt is expected only in case of a function argument.
3578 (Otherwise - we expect a phi_node or a modify_expr). */
3579 if (IS_EMPTY_STMT (def_stmt))
3581 tree arg = TREE_OPERAND (def_stmt, 0);
3582 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3584 if (vect_debug_details (NULL))
3586 fprintf (dump_file, "Unexpected empty stmt: ");
3587 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3592 /* phi_node inside the loop indicates an induction/reduction pattern.
3593 This is not supported yet. */
3594 bb = bb_for_stmt (def_stmt);
3595 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3597 if (vect_debug_details (NULL))
3598 fprintf (dump_file, "reduction/induction - unsupported.");
3599 return false; /* FORNOW: not supported yet. */
3602 /* Expecting a modify_expr or a phi_node. */
3603 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3604 || TREE_CODE (def_stmt) == PHI_NODE)
3615 /* Function vect_analyze_operations.
3617 Scan the loop stmts and make sure they are all vectorizable. */
3620 vect_analyze_operations (loop_vec_info loop_vinfo)
3622 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3623 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3624 int nbbs = loop->num_nodes;
3625 block_stmt_iterator si;
3626 unsigned int vectorization_factor = 0;
3631 if (vect_debug_details (NULL))
3632 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3634 for (i = 0; i < nbbs; i++)
3636 basic_block bb = bbs[i];
3638 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3640 tree stmt = bsi_stmt (si);
3641 unsigned int nunits;
3642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3645 if (vect_debug_details (NULL))
3647 fprintf (dump_file, "==> examining statement: ");
3648 print_generic_expr (dump_file, stmt, TDF_SLIM);
3651 gcc_assert (stmt_info);
3653 /* skip stmts which do not need to be vectorized.
3654 this is expected to include:
3655 - the COND_EXPR which is the loop exit condition
3656 - any LABEL_EXPRs in the loop
3657 - computations that are used only for array indexing or loop
3660 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3662 if (vect_debug_details (NULL))
3663 fprintf (dump_file, "irrelevant.");
3667 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3669 if (vect_debug_stats (loop) || vect_debug_details (loop))
3671 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3672 print_generic_expr (dump_file, stmt, TDF_SLIM);
3677 if (STMT_VINFO_DATA_REF (stmt_info))
3678 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3679 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3680 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3682 scalar_type = TREE_TYPE (stmt);
3684 if (vect_debug_details (NULL))
3686 fprintf (dump_file, "get vectype for scalar type: ");
3687 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3690 vectype = get_vectype_for_scalar_type (scalar_type);
3693 if (vect_debug_stats (loop) || vect_debug_details (loop))
3695 fprintf (dump_file, "not vectorized: unsupported data-type ");
3696 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3701 if (vect_debug_details (NULL))
3703 fprintf (dump_file, "vectype: ");
3704 print_generic_expr (dump_file, vectype, TDF_SLIM);
3706 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3708 ok = (vectorizable_operation (stmt, NULL, NULL)
3709 || vectorizable_assignment (stmt, NULL, NULL)
3710 || vectorizable_load (stmt, NULL, NULL)
3711 || vectorizable_store (stmt, NULL, NULL));
3715 if (vect_debug_stats (loop) || vect_debug_details (loop))
3717 fprintf (dump_file, "not vectorized: stmt not supported: ");
3718 print_generic_expr (dump_file, stmt, TDF_SLIM);
3723 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3724 if (vect_debug_details (NULL))
3725 fprintf (dump_file, "nunits = %d", nunits);
3727 if (vectorization_factor)
3729 /* FORNOW: don't allow mixed units.
3730 This restriction will be relaxed in the future. */
3731 if (nunits != vectorization_factor)
3733 if (vect_debug_stats (loop) || vect_debug_details (loop))
3734 fprintf (dump_file, "not vectorized: mixed data-types");
3739 vectorization_factor = nunits;
3741 #ifdef ENABLE_CHECKING
3742 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3743 * vectorization_factor == UNITS_PER_SIMD_WORD);
3748 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3750 if (vectorization_factor <= 1)
3752 if (vect_debug_stats (loop) || vect_debug_details (loop))
3753 fprintf (dump_file, "not vectorized: unsupported data-type");
3756 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3758 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3760 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3761 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3763 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3764 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3766 if (vect_debug_stats (loop) || vect_debug_details (loop))
3767 fprintf (dump_file, "not vectorized: iteration count too small.");
3771 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3772 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3774 if (vect_debug_stats (loop) || vect_debug_details (loop))
3775 fprintf (dump_file, "epilog loop required.");
3776 if (!vect_can_advance_ivs_p (loop))
3778 if (vect_debug_stats (loop) || vect_debug_details (loop))
3779 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3782 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3784 if (vect_debug_stats (loop) || vect_debug_details (loop))
3785 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3794 /* Function exist_non_indexing_operands_for_use_p
3796 USE is one of the uses attached to STMT. Check if USE is
3797 used in STMT for anything other than indexing an array. */
3800 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3803 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3805 /* USE corresponds to some operand in STMT. If there is no data
3806 reference in STMT, then any operand that corresponds to USE
3807 is not indexing an array. */
3808 if (!STMT_VINFO_DATA_REF (stmt_info))
3811 /* STMT has a data_ref. FORNOW this means that its of one of
3812 the following forms:
3815 (This should have been verified in analyze_data_refs).
3817 'var' in the second case corresponds to a def, not a use,
3818 so USE cannot correspond to any operands that are not used
3821 Therefore, all we need to check is if STMT falls into the
3822 first case, and whether var corresponds to USE. */
3824 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3827 operand = TREE_OPERAND (stmt, 1);
3829 if (TREE_CODE (operand) != SSA_NAME)
3839 /* Function vect_is_simple_iv_evolution.
3841 FORNOW: A simple evolution of an induction variables in the loop is
3842 considered a polynomial evolution with constant step. */
3845 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3846 tree * step, bool strict)
3851 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3853 /* When there is no evolution in this loop, the evolution function
3855 if (evolution_part == NULL_TREE)
3858 /* When the evolution is a polynomial of degree >= 2
3859 the evolution function is not "simple". */
3860 if (tree_is_chrec (evolution_part))
3863 step_expr = evolution_part;
3864 init_expr = unshare_expr (initial_condition (access_fn));
3866 if (vect_debug_details (NULL))
3868 fprintf (dump_file, "step: ");
3869 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3870 fprintf (dump_file, ", init: ");
3871 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3877 if (TREE_CODE (step_expr) != INTEGER_CST)
3879 if (vect_debug_details (NULL))
3880 fprintf (dump_file, "step unknown.");
3885 if (!integer_onep (step_expr))
3887 if (vect_debug_details (NULL))
3888 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3896 /* Function vect_analyze_scalar_cycles.
3898 Examine the cross iteration def-use cycles of scalar variables, by
3899 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3900 cycles that they represent do not impede vectorization.
3902 FORNOW: Reduction as in the following loop, is not supported yet:
3906 The cross-iteration cycle corresponding to variable 'sum' will be
3907 considered too complicated and will impede vectorization.
3909 FORNOW: Induction as in the following loop, is not supported yet:
3914 However, the following loop *is* vectorizable:
3919 In both loops there exists a def-use cycle for the variable i:
3920 loop: i_2 = PHI (i_0, i_1)
3925 The evolution of the above cycle is considered simple enough,
3926 however, we also check that the cycle does not need to be
3927 vectorized, i.e - we check that the variable that this cycle
3928 defines is only used for array indexing or in stmts that do not
3929 need to be vectorized. This is not the case in loop2, but it
3930 *is* the case in loop3. */
3933 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3936 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3937 basic_block bb = loop->header;
3940 if (vect_debug_details (NULL))
3941 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3943 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3945 tree access_fn = NULL;
3947 if (vect_debug_details (NULL))
3949 fprintf (dump_file, "Analyze phi: ");
3950 print_generic_expr (dump_file, phi, TDF_SLIM);
3953 /* Skip virtual phi's. The data dependences that are associated with
3954 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3956 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3958 if (vect_debug_details (NULL))
3959 fprintf (dump_file, "virtual phi. skip.");
3963 /* Analyze the evolution function. */
3965 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3966 those of loop induction variables; This property is verified here.
3968 Furthermore, if that induction variable is used in an operation
3969 that needs to be vectorized (i.e, is not solely used to index
3970 arrays and check the exit condition) - we do not support its
3971 vectorization yet. This property is verified in vect_is_simple_use,
3972 during vect_analyze_operations. */
3974 access_fn = /* instantiate_parameters
3976 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3980 if (vect_debug_stats (loop) || vect_debug_details (loop))
3981 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3985 if (vect_debug_details (NULL))
3987 fprintf (dump_file, "Access function of PHI: ");
3988 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3991 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3994 if (vect_debug_stats (loop) || vect_debug_details (loop))
3995 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4004 /* Function vect_analyze_data_ref_dependence.
4006 Return TRUE if there (might) exist a dependence between a memory-reference
4007 DRA and a memory-reference DRB. */
4010 vect_analyze_data_ref_dependence (struct data_reference *dra,
4011 struct data_reference *drb,
4015 struct data_dependence_relation *ddr;
4017 if (!array_base_name_differ_p (dra, drb, &differ_p))
4019 if (vect_debug_stats (loop) || vect_debug_details (loop))
4022 "not vectorized: can't determine dependence between: ");
4023 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4024 fprintf (dump_file, " and ");
4025 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4033 ddr = initialize_data_dependence_relation (dra, drb);
4034 compute_affine_dependence (ddr);
4036 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4039 if (vect_debug_stats (loop) || vect_debug_details (loop))
4042 "not vectorized: possible dependence between data-refs ");
4043 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4044 fprintf (dump_file, " and ");
4045 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4052 /* Function vect_analyze_data_ref_dependences.
4054 Examine all the data references in the loop, and make sure there do not
4055 exist any data dependences between them.
4057 TODO: dependences which distance is greater than the vectorization factor
4061 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4064 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4065 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4066 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4068 /* Examine store-store (output) dependences. */
4070 if (vect_debug_details (NULL))
4071 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4073 if (vect_debug_details (NULL))
4074 fprintf (dump_file, "compare all store-store pairs.");
4076 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4078 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4080 struct data_reference *dra =
4081 VARRAY_GENERIC_PTR (loop_write_refs, i);
4082 struct data_reference *drb =
4083 VARRAY_GENERIC_PTR (loop_write_refs, j);
4084 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4089 /* Examine load-store (true/anti) dependences. */
4091 if (vect_debug_details (NULL))
4092 fprintf (dump_file, "compare all load-store pairs.");
4094 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4096 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4098 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4099 struct data_reference *drb =
4100 VARRAY_GENERIC_PTR (loop_write_refs, j);
4101 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4110 /* Function vect_compute_data_ref_alignment
4112 Compute the misalignment of the data reference DR.
4115 1. If during the misalignment computation it is found that the data reference
4116 cannot be vectorized then false is returned.
4117 2. DR_MISALIGNMENT (DR) is defined.
4119 FOR NOW: No analysis is actually performed. Misalignment is calculated
4120 only for trivial cases. TODO. */
4123 vect_compute_data_ref_alignment (struct data_reference *dr)
4125 tree stmt = DR_STMT (dr);
4126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4127 tree ref = DR_REF (dr);
4129 tree base, alignment;
4130 bool base_aligned_p;
4133 if (vect_debug_details (NULL))
4134 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4136 /* Initialize misalignment to unknown. */
4137 DR_MISALIGNMENT (dr) = -1;
4139 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4140 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4141 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4142 vectype = STMT_VINFO_VECTYPE (stmt_info);
4146 if (vect_debug_details (NULL))
4148 fprintf (dump_file, "Unknown alignment for access: ");
4149 print_generic_expr (dump_file, base, TDF_SLIM);
4154 if (!base_aligned_p)
4156 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4158 if (vect_debug_details (NULL))
4160 fprintf (dump_file, "can't force alignment of ref: ");
4161 print_generic_expr (dump_file, ref, TDF_SLIM);
4166 /* Force the alignment of the decl.
4167 NOTE: This is the only change to the code we make during
4168 the analysis phase, before deciding to vectorize the loop. */
4169 if (vect_debug_details (NULL))
4170 fprintf (dump_file, "force alignment");
4171 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4172 DECL_USER_ALIGN (base) = 1;
4175 /* At this point we assume that the base is aligned. */
4176 gcc_assert (base_aligned_p
4177 || (TREE_CODE (base) == VAR_DECL
4178 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4180 /* Alignment required, in bytes: */
4181 alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4183 /* Modulo alignment. */
4184 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4185 if (tree_int_cst_sgn (misalign) < 0)
4187 /* Negative misalignment value. */
4188 if (vect_debug_details (NULL))
4189 fprintf (dump_file, "unexpected misalign value");
4193 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4195 if (vect_debug_details (NULL))
4196 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4202 /* Function vect_compute_data_refs_alignment
4204 Compute the misalignment of data references in the loop.
4205 This pass may take place at function granularity instead of at loop
4208 FOR NOW: No analysis is actually performed. Misalignment is calculated
4209 only for trivial cases. TODO. */
4212 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4214 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4215 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4218 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4220 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4221 if (!vect_compute_data_ref_alignment (dr))
4225 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4227 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4228 if (!vect_compute_data_ref_alignment (dr))
4236 /* Function vect_enhance_data_refs_alignment
4238 This pass will use loop versioning and loop peeling in order to enhance
4239 the alignment of data references in the loop.
4241 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4242 original loop is to be vectorized; Any other loops that are created by
4243 the transformations performed in this pass - are not supposed to be
4244 vectorized. This restriction will be relaxed. */
4247 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4249 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4250 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4251 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4255 This pass will require a cost model to guide it whether to apply peeling
4256 or versioning or a combination of the two. For example, the scheme that
4257 intel uses when given a loop with several memory accesses, is as follows:
4258 choose one memory access ('p') which alignment you want to force by doing
4259 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4260 other accesses are not necessarily aligned, or (2) use loop versioning to
4261 generate one loop in which all accesses are aligned, and another loop in
4262 which only 'p' is necessarily aligned.
4264 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4265 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4266 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4268 Devising a cost model is the most critical aspect of this work. It will
4269 guide us on which access to peel for, whether to use loop versioning, how
4270 many versions to create, etc. The cost model will probably consist of
4271 generic considerations as well as target specific considerations (on
4272 powerpc for example, misaligned stores are more painful than misaligned
4275 Here is the general steps involved in alignment enhancements:
4277 -- original loop, before alignment analysis:
4278 for (i=0; i<N; i++){
4279 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4280 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4283 -- After vect_compute_data_refs_alignment:
4284 for (i=0; i<N; i++){
4285 x = q[i]; # DR_MISALIGNMENT(q) = 3
4286 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4289 -- Possibility 1: we do loop versioning:
4291 for (i=0; i<N; i++){ # loop 1A
4292 x = q[i]; # DR_MISALIGNMENT(q) = 3
4293 p[i] = y; # DR_MISALIGNMENT(p) = 0
4297 for (i=0; i<N; i++){ # loop 1B
4298 x = q[i]; # DR_MISALIGNMENT(q) = 3
4299 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4303 -- Possibility 2: we do loop peeling:
4304 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4308 for (i = 3; i < N; i++){ # loop 2A
4309 x = q[i]; # DR_MISALIGNMENT(q) = 0
4310 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4313 -- Possibility 3: combination of loop peeling and versioning:
4314 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4319 for (i = 3; i<N; i++){ # loop 3A
4320 x = q[i]; # DR_MISALIGNMENT(q) = 0
4321 p[i] = y; # DR_MISALIGNMENT(p) = 0
4325 for (i = 3; i<N; i++){ # loop 3B
4326 x = q[i]; # DR_MISALIGNMENT(q) = 0
4327 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4331 These loops are later passed to loop_transform to be vectorized. The
4332 vectorizer will use the alignment information to guide the transformation
4333 (whether to generate regular loads/stores, or with special handling for
4337 /* (1) Peeling to force alignment. */
4339 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4341 + How many accesses will become aligned due to the peeling
4342 - How many accesses will become unaligned due to the peeling,
4343 and the cost of misaligned accesses.
4344 - The cost of peeling (the extra runtime checks, the increase
4347 The scheme we use FORNOW: peel to force the alignment of the first
4348 misaligned store in the loop.
4349 Rationale: misaligned stores are not yet supported.
4351 TODO: Use a better cost model. */
4353 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4355 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4356 if (!aligned_access_p (dr))
4358 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4359 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4364 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4366 if (vect_debug_details (loop))
4367 fprintf (dump_file, "Peeling for alignment will not be applied.");
4371 if (vect_debug_details (loop))
4372 fprintf (dump_file, "Peeling for alignment will be applied.");
4375 /* (1.2) Update the alignment info according to the peeling factor.
4376 If the misalignment of the DR we peel for is M, then the
4377 peeling factor is VF - M, and the misalignment of each access DR_i
4378 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4379 If the misalignment of the DR we peel for is unknown, then the
4380 misalignment of each access DR_i in the loop is also unknown.
4382 FORNOW: set the misalignment of the accesses to unknown even
4383 if the peeling factor is known at compile time.
4385 TODO: - if the peeling factor is known at compile time, use that
4386 when updating the misalignment info of the loop DRs.
4387 - consider accesses that are known to have the same
4388 alignment, even if that alignment is unknown. */
4390 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4392 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4393 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4394 DR_MISALIGNMENT (dr) = 0;
4396 DR_MISALIGNMENT (dr) = -1;
4398 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4400 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4401 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4402 DR_MISALIGNMENT (dr) = 0;
4404 DR_MISALIGNMENT (dr) = -1;
4409 /* Function vect_analyze_data_refs_alignment
4411 Analyze the alignment of the data-references in the loop.
4412 FOR NOW: Until support for misliagned accesses is in place, only if all
4413 accesses are aligned can the loop be vectorized. This restriction will be
4417 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4419 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4420 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4421 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4422 enum dr_alignment_support supportable_dr_alignment;
4425 if (vect_debug_details (NULL))
4426 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4429 /* This pass may take place at function granularity instead of at loop
4432 if (!vect_compute_data_refs_alignment (loop_vinfo))
4434 if (vect_debug_details (loop) || vect_debug_stats (loop))
4436 "not vectorized: can't calculate alignment for data ref.");
4441 /* This pass will decide on using loop versioning and/or loop peeling in
4442 order to enhance the alignment of data references in the loop. */
4444 vect_enhance_data_refs_alignment (loop_vinfo);
4447 /* Finally, check that all the data references in the loop can be
4448 handled with respect to their alignment. */
4450 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4452 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4453 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4454 if (!supportable_dr_alignment)
4456 if (vect_debug_details (loop) || vect_debug_stats (loop))
4457 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4461 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4463 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4464 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4465 if (!supportable_dr_alignment)
4467 if (vect_debug_details (loop) || vect_debug_stats (loop))
4468 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4477 /* Function vect_analyze_data_ref_access.
4479 Analyze the access pattern of the data-reference DR. For now, a data access
4480 has to consecutive to be considered vectorizable. */
4483 vect_analyze_data_ref_access (struct data_reference *dr)
4485 tree stmt = DR_STMT (dr);
4486 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4487 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4488 tree scalar_type = TREE_TYPE (DR_REF (dr));
4490 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4492 if (vect_debug_details (NULL))
4493 fprintf (dump_file, "not consecutive access");
4500 /* Function vect_analyze_data_ref_accesses.
4502 Analyze the access pattern of all the data references in the loop.
4504 FORNOW: the only access pattern that is considered vectorizable is a
4505 simple step 1 (consecutive) access.
4507 FORNOW: handle only arrays and pointer accesses. */
4510 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4513 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4514 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4516 if (vect_debug_details (NULL))
4517 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4519 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4521 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4522 bool ok = vect_analyze_data_ref_access (dr);
4525 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4526 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4527 fprintf (dump_file, "not vectorized: complicated access pattern.");
4532 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4534 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4535 bool ok = vect_analyze_data_ref_access (dr);
4538 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4539 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4540 fprintf (dump_file, "not vectorized: complicated access pattern.");
4549 /* Function vect_analyze_pointer_ref_access.
4552 STMT - a stmt that contains a data-ref
4553 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4555 If the data-ref access is vectorizable, return a data_reference structure
4556 that represents it (DR). Otherwise - return NULL. */
4558 static struct data_reference *
4559 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4561 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4562 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4563 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4565 tree reftype, innertype;
4566 tree indx_access_fn;
4567 int loopnum = loop->num;
4568 struct data_reference *dr;
4572 if (vect_debug_stats (loop) || vect_debug_details (loop))
4573 fprintf (dump_file, "not vectorized: complicated pointer access.");
4577 if (vect_debug_details (NULL))
4579 fprintf (dump_file, "Access function of ptr: ");
4580 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4583 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4585 if (vect_debug_stats (loop) || vect_debug_details (loop))
4586 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4592 if (!TREE_CONSTANT (step))
4594 if (vect_debug_stats (loop) || vect_debug_details (loop))
4596 "not vectorized: non constant step for pointer access.");
4600 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4601 if (TREE_CODE (reftype) != POINTER_TYPE)
4603 if (vect_debug_stats (loop) || vect_debug_details (loop))
4604 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4608 reftype = TREE_TYPE (init);
4609 if (TREE_CODE (reftype) != POINTER_TYPE)
4611 if (vect_debug_stats (loop) || vect_debug_details (loop))
4612 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4616 innertype = TREE_TYPE (reftype);
4617 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4619 /* FORNOW: support only consecutive access */
4620 if (vect_debug_stats (loop) || vect_debug_details (loop))
4621 fprintf (dump_file, "not vectorized: non consecutive access.");
4625 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4626 if (TREE_CODE (init) == PLUS_EXPR
4627 || TREE_CODE (init) == MINUS_EXPR)
4628 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4629 fold (size_binop (TREE_CODE (init), size_zero_node,
4630 fold_convert (sizetype, TREE_OPERAND (init, 1))));
4632 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4635 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4636 if (vect_debug_details (NULL))
4638 fprintf (dump_file, "Access function of ptr indx: ");
4639 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4641 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4646 /* Function vect_get_memtag_and_dr.
4648 The function returns the relevant variable for memory tag (for aliasing
4649 purposes). Also data reference structure DR is created.
4651 This function handles three kinds of MEMREF:
4653 It is called from vect_analyze_data_refs with a MEMREF that is either an
4654 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4655 It builds a DR for them using vect_get_base_and_offset, and calls itself
4656 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4657 MEMREF along the way. During the recursive calls, the function may be called
4658 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4659 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4660 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4661 and SSA_NAME (this is category 3 - "recursion stop condition").
4663 When the MEMREF falls into category 1 there is still no data reference struct
4664 (DR) available. It is created by this function, and then, along the recursion,
4665 MEMREF will fall into category 2 or 3, in which case a DR will have already
4666 been created, but the analysis continues to retrieve the MEMTAG.
4669 MEMREF - data reference in STMT
4670 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4673 DR - data_reference struct for MEMREF
4674 return value - the relevant variable for memory tag (for aliasing purposes).
4679 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4680 loop_vec_info loop_vinfo,
4681 tree vectype, struct data_reference **dr)
4683 tree symbl, oprnd0, oprnd1;
4684 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4685 tree offset, misalign, step;
4686 tree ref_to_be_analyzed, tag, dr_base;
4687 struct data_reference *new_dr;
4688 bool base_aligned_p;
4692 /* Category 3: recursion stop condition. */
4693 /* (1) A DR already exists. We only need to get the relevant memtag for
4694 MEMREF, the rest of the data was already initialized. */
4696 switch (TREE_CODE (memref))
4698 /* (1.1) Stop condition: find the relevant memtag and return. */
4700 symbl = SSA_NAME_VAR (memref);
4701 tag = get_var_ann (symbl)->type_mem_tag;
4704 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4705 if (TREE_CODE (ptr) == SSA_NAME)
4706 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4710 if (vect_debug_details (NULL))
4711 fprintf (dump_file, "not vectorized: no memtag for ref.");
4720 /* Category 2: recursion continues. */
4721 /* (1.2) A recursive call to find the relevant memtag is required. */
4723 symbl = TREE_OPERAND (memref, 0);
4724 break; /* For recursive call. */
4727 /* Could have recorded more accurate information -
4728 i.e, the actual FIELD_DECL that is being referenced -
4729 but later passes expect VAR_DECL as the nmt. */
4733 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4734 break; /* For recursive call. */
4738 /* Although DR exists, we have to call the function recursively to
4739 build MEMTAG for such expression. This is handled below. */
4740 oprnd0 = TREE_OPERAND (memref, 0);
4741 oprnd1 = TREE_OPERAND (memref, 1);
4743 STRIP_NOPS (oprnd1);
4744 /* Supported plus/minus expressions are of the form
4745 {address_base + offset}, such that address_base is of type
4746 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4747 or it's not of type POINTER/ARRAY.
4748 TODO: swap operands if {offset + address_base}. */
4749 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4750 && TREE_CODE (oprnd1) != INTEGER_CST)
4751 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4755 break; /* For recursive call. */
4763 /* Category 1: recursion begins. */
4764 /* (2) A DR does not exist yet and must be built, followed by a
4765 recursive call to get the relevant memtag for MEMREF. */
4767 switch (TREE_CODE (memref))
4770 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4774 symbl = DR_BASE_NAME (new_dr);
4775 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4779 new_dr = analyze_array (stmt, memref, is_read);
4781 symbl = DR_BASE_NAME (new_dr);
4782 ref_to_be_analyzed = memref;
4786 /* TODO: Support data-refs of form a[i].p for unions and single
4787 field structures. */
4791 offset = size_zero_node;
4792 misalign = size_zero_node;
4793 step = size_zero_node;
4795 /* Analyze data-ref, find its base, initial offset from the base, step,
4797 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4798 vectype, loop_vinfo, &offset,
4799 &misalign, &step, &base_aligned_p);
4803 /* Initialize information according to above analysis. */
4804 /* Since offset and step of a pointer can be also set in
4805 vect_analyze_pointer_ref_access, we combine the values here. */
4806 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4807 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4808 fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4809 STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4811 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4813 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4814 STMT_VINFO_VECT_STEP (stmt_info) =
4815 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4817 STMT_VINFO_VECT_STEP (stmt_info) = step;
4819 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4820 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4821 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4826 /* Recursive call to retrieve the relevant memtag. */
4827 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4833 /* Function vect_analyze_data_refs.
4835 Find all the data references in the loop.
4837 The general structure of the analysis of data refs in the vectorizer is as
4839 1- vect_analyze_data_refs(loop):
4840 Find and analyze all data-refs in the loop:
4842 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4843 1.1- vect_get_memtag_and_dr(ref):
4844 Analyze ref, and build a DR (data_referece struct) for it;
4845 call vect_get_base_and_offset to compute base, initial_offset,
4846 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4847 ref_stmt.alignment, and ref_stmt.step accordingly.
4848 1.1.1- vect_get_base_and_offset():
4849 Calculate base, initial_offset, step and alignment.
4850 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4851 2- vect_analyze_dependences(): apply dependece testing using ref_stmt.DR
4852 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4853 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4855 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4856 which base is really an array (not a pointer) and which alignment
4857 can be forced. This restriction will be relaxed. */
4860 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4862 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4863 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4864 int nbbs = loop->num_nodes;
4865 block_stmt_iterator si;
4867 struct data_reference *dr;
4869 if (vect_debug_details (NULL))
4870 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4872 for (j = 0; j < nbbs; j++)
4874 basic_block bb = bbs[j];
4875 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4877 bool is_read = false;
4878 tree stmt = bsi_stmt (si);
4879 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4880 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4881 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4882 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4883 varray_type *datarefs = NULL;
4884 int nvuses, nv_may_defs, nv_must_defs;
4887 tree scalar_type, vectype;
4889 /* Assumption: there exists a data-ref in stmt, if and only if
4890 it has vuses/vdefs. */
4892 if (!vuses && !v_may_defs && !v_must_defs)
4895 nvuses = NUM_VUSES (vuses);
4896 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4897 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4899 if (nvuses && (nv_may_defs || nv_must_defs))
4901 if (vect_debug_details (NULL))
4903 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4904 print_generic_expr (dump_file, stmt, TDF_SLIM);
4909 if (TREE_CODE (stmt) != MODIFY_EXPR)
4911 if (vect_debug_details (NULL))
4913 fprintf (dump_file, "unexpected vops in stmt: ");
4914 print_generic_expr (dump_file, stmt, TDF_SLIM);
4921 memref = TREE_OPERAND (stmt, 1);
4922 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4927 memref = TREE_OPERAND (stmt, 0);
4928 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4932 scalar_type = TREE_TYPE (memref);
4933 vectype = get_vectype_for_scalar_type (scalar_type);
4936 if (vect_debug_details (NULL))
4938 fprintf (dump_file, "no vectype for stmt: ");
4939 print_generic_expr (dump_file, stmt, TDF_SLIM);
4940 fprintf (dump_file, " scalar_type: ");
4941 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4943 /* It is not possible to vectorize this data reference. */
4946 /* Analyze MEMREF. If it is of a supported form, build data_reference
4947 struct for it (DR) and find memtag for aliasing purposes. */
4949 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4953 if (vect_debug_stats (loop) || vect_debug_details (loop))
4955 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4956 print_generic_expr (dump_file, stmt, TDF_SLIM);
4960 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4961 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4962 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4963 STMT_VINFO_DATA_REF (stmt_info) = dr;
4971 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
4973 /* Function vect_mark_relevant.
4975 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
4978 vect_mark_relevant (varray_type *worklist, tree stmt)
4980 stmt_vec_info stmt_info;
4982 if (vect_debug_details (NULL))
4983 fprintf (dump_file, "mark relevant.");
4985 if (TREE_CODE (stmt) == PHI_NODE)
4987 VARRAY_PUSH_TREE (*worklist, stmt);
4991 stmt_info = vinfo_for_stmt (stmt);
4995 if (vect_debug_details (NULL))
4997 fprintf (dump_file, "mark relevant: no stmt info!!.");
4998 print_generic_expr (dump_file, stmt, TDF_SLIM);
5003 if (STMT_VINFO_RELEVANT_P (stmt_info))
5005 if (vect_debug_details (NULL))
5006 fprintf (dump_file, "already marked relevant.");
5010 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5011 VARRAY_PUSH_TREE (*worklist, stmt);
5015 /* Function vect_stmt_relevant_p.
5017 Return true if STMT in loop that is represented by LOOP_VINFO is
5018 "relevant for vectorization".
5020 A stmt is considered "relevant for vectorization" if:
5021 - it has uses outside the loop.
5022 - it has vdefs (it alters memory).
5023 - control stmts in the loop (except for the exit condition).
5025 CHECKME: what other side effects would the vectorizer allow? */
5028 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5030 v_may_def_optype v_may_defs;
5031 v_must_def_optype v_must_defs;
5032 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5037 /* cond stmt other than loop exit cond. */
5038 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5041 /* changing memory. */
5042 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5043 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5044 if (v_may_defs || v_must_defs)
5046 if (vect_debug_details (NULL))
5047 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5051 /* uses outside the loop. */
5052 df = get_immediate_uses (stmt);
5053 num_uses = num_immediate_uses (df);
5054 for (i = 0; i < num_uses; i++)
5056 tree use = immediate_use (df, i);
5057 basic_block bb = bb_for_stmt (use);
5058 if (!flow_bb_inside_loop_p (loop, bb))
5060 if (vect_debug_details (NULL))
5061 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5070 /* Function vect_mark_stmts_to_be_vectorized.
5072 Not all stmts in the loop need to be vectorized. For example:
5081 Stmt 1 and 3 do not need to be vectorized, because loop control and
5082 addressing of vectorized data-refs are handled differently.
5084 This pass detects such stmts. */
5087 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5089 varray_type worklist;
5090 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5091 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5092 unsigned int nbbs = loop->num_nodes;
5093 block_stmt_iterator si;
5099 stmt_vec_info stmt_info;
5101 if (vect_debug_details (NULL))
5102 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5104 VARRAY_TREE_INIT (worklist, 64, "work list");
5106 /* 1. Init worklist. */
5108 for (i = 0; i < nbbs; i++)
5110 basic_block bb = bbs[i];
5111 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5113 stmt = bsi_stmt (si);
5115 if (vect_debug_details (NULL))
5117 fprintf (dump_file, "init: stmt relevant? ");
5118 print_generic_expr (dump_file, stmt, TDF_SLIM);
5121 stmt_info = vinfo_for_stmt (stmt);
5122 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5124 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5125 vect_mark_relevant (&worklist, stmt);
5130 /* 2. Process_worklist */
5132 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5134 stmt = VARRAY_TOP_TREE (worklist);
5135 VARRAY_POP (worklist);
5137 if (vect_debug_details (NULL))
5139 fprintf (dump_file, "worklist: examine stmt: ");
5140 print_generic_expr (dump_file, stmt, TDF_SLIM);
5143 /* Examine the USES in this statement. Mark all the statements which
5144 feed this statement's uses as "relevant", unless the USE is used as
5147 if (TREE_CODE (stmt) == PHI_NODE)
5149 /* follow the def-use chain inside the loop. */
5150 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5152 tree arg = PHI_ARG_DEF (stmt, j);
5153 tree def_stmt = NULL_TREE;
5155 if (!vect_is_simple_use (arg, loop, &def_stmt))
5157 if (vect_debug_details (NULL))
5158 fprintf (dump_file, "worklist: unsupported use.");
5159 varray_clear (worklist);
5165 if (vect_debug_details (NULL))
5167 fprintf (dump_file, "worklist: def_stmt: ");
5168 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5171 bb = bb_for_stmt (def_stmt);
5172 if (flow_bb_inside_loop_p (loop, bb))
5173 vect_mark_relevant (&worklist, def_stmt);
5177 ann = stmt_ann (stmt);
5178 use_ops = USE_OPS (ann);
5180 for (i = 0; i < NUM_USES (use_ops); i++)
5182 tree use = USE_OP (use_ops, i);
5184 /* We are only interested in uses that need to be vectorized. Uses
5185 that are used for address computation are not considered relevant.
5187 if (exist_non_indexing_operands_for_use_p (use, stmt))
5189 tree def_stmt = NULL_TREE;
5191 if (!vect_is_simple_use (use, loop, &def_stmt))
5193 if (vect_debug_details (NULL))
5194 fprintf (dump_file, "worklist: unsupported use.");
5195 varray_clear (worklist);
5202 if (vect_debug_details (NULL))
5204 fprintf (dump_file, "worklist: examine use %d: ", i);
5205 print_generic_expr (dump_file, use, TDF_SLIM);
5208 bb = bb_for_stmt (def_stmt);
5209 if (flow_bb_inside_loop_p (loop, bb))
5210 vect_mark_relevant (&worklist, def_stmt);
5213 } /* while worklist */
5215 varray_clear (worklist);
5220 /* Function vect_can_advance_ivs_p
5222 In case the number of iterations that LOOP iterates in unknown at compile
5223 time, an epilog loop will be generated, and the loop induction variables
5224 (IVs) will be "advanced" to the value they are supposed to take just before
5225 the epilog loop. Here we check that the access function of the loop IVs
5226 and the expression that represents the loop bound are simple enough.
5227 These restrictions will be relaxed in the future. */
5230 vect_can_advance_ivs_p (struct loop *loop)
5232 basic_block bb = loop->header;
5235 /* Analyze phi functions of the loop header. */
5237 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5239 tree access_fn = NULL;
5240 tree evolution_part;
5242 if (vect_debug_details (NULL))
5244 fprintf (dump_file, "Analyze phi: ");
5245 print_generic_expr (dump_file, phi, TDF_SLIM);
5248 /* Skip virtual phi's. The data dependences that are associated with
5249 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5251 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5253 if (vect_debug_details (NULL))
5254 fprintf (dump_file, "virtual phi. skip.");
5258 /* Analyze the evolution function. */
5260 access_fn = instantiate_parameters
5261 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5265 if (vect_debug_details (NULL))
5266 fprintf (dump_file, "No Access function.");
5270 if (vect_debug_details (NULL))
5272 fprintf (dump_file, "Access function of PHI: ");
5273 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5276 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5278 if (evolution_part == NULL_TREE)
5281 /* FORNOW: We do not transform initial conditions of IVs
5282 which evolution functions are a polynomial of degree >= 2. */
5284 if (tree_is_chrec (evolution_part))
5292 /* Function vect_get_loop_niters.
5294 Determine how many iterations the loop is executed.
5295 If an expression that represents the number of iterations
5296 can be constructed, place it in NUMBER_OF_ITERATIONS.
5297 Return the loop exit condition. */
5300 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5304 if (vect_debug_details (NULL))
5305 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5307 niters = number_of_iterations_in_loop (loop);
5309 if (niters != NULL_TREE
5310 && niters != chrec_dont_know)
5312 *number_of_iterations = niters;
5314 if (vect_debug_details (NULL))
5316 fprintf (dump_file, "==> get_loop_niters:" );
5317 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5321 return get_loop_exit_condition (loop);
5325 /* Function vect_analyze_loop_form.
5327 Verify the following restrictions (some may be relaxed in the future):
5328 - it's an inner-most loop
5329 - number of BBs = 2 (which are the loop header and the latch)
5330 - the loop has a pre-header
5331 - the loop has a single entry and exit
5332 - the loop exit condition is simple enough, and the number of iterations
5333 can be analyzed (a countable loop). */
5335 static loop_vec_info
5336 vect_analyze_loop_form (struct loop *loop)
5338 loop_vec_info loop_vinfo;
5340 tree number_of_iterations = NULL;
5341 bool rescan = false;
5343 if (vect_debug_details (loop))
5344 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5347 || !loop->single_exit
5348 || loop->num_nodes != 2
5349 || EDGE_COUNT (loop->header->preds) != 2
5350 || loop->num_entries != 1)
5352 if (vect_debug_stats (loop) || vect_debug_details (loop))
5354 fprintf (dump_file, "not vectorized: bad loop form. ");
5356 fprintf (dump_file, "nested loop.");
5357 else if (!loop->single_exit)
5358 fprintf (dump_file, "multiple exits.");
5359 else if (loop->num_nodes != 2)
5360 fprintf (dump_file, "too many BBs in loop.");
5361 else if (EDGE_COUNT (loop->header->preds) != 2)
5362 fprintf (dump_file, "too many incoming edges.");
5363 else if (loop->num_entries != 1)
5364 fprintf (dump_file, "too many entries.");
5370 /* We assume that the loop exit condition is at the end of the loop. i.e,
5371 that the loop is represented as a do-while (with a proper if-guard
5372 before the loop if needed), where the loop header contains all the
5373 executable statements, and the latch is empty. */
5374 if (!empty_block_p (loop->latch))
5376 if (vect_debug_stats (loop) || vect_debug_details (loop))
5377 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5381 /* Make sure we have a preheader basic block. */
5382 if (!loop->pre_header)
5385 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5388 /* Make sure there exists a single-predecessor exit bb: */
5389 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5392 loop_split_edge_with (loop->exit_edges[0], NULL);
5397 flow_loop_scan (loop, LOOP_ALL);
5398 /* Flow loop scan does not update loop->single_exit field. */
5399 loop->single_exit = loop->exit_edges[0];
5402 if (empty_block_p (loop->header))
5404 if (vect_debug_stats (loop) || vect_debug_details (loop))
5405 fprintf (dump_file, "not vectorized: empty loop.");
5409 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5412 if (vect_debug_stats (loop) || vect_debug_details (loop))
5413 fprintf (dump_file, "not vectorized: complicated exit condition.");
5417 if (!number_of_iterations)
5419 if (vect_debug_stats (loop) || vect_debug_details (loop))
5421 "not vectorized: number of iterations cannot be computed.");
5425 if (chrec_contains_undetermined (number_of_iterations))
5427 if (vect_debug_details (NULL))
5428 fprintf (dump_file, "Infinite number of iterations.");
5432 loop_vinfo = new_loop_vec_info (loop);
5433 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5435 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5437 if (vect_debug_details (loop))
5439 fprintf (dump_file, "loop bound unknown.\n");
5440 fprintf (dump_file, "Symbolic number of iterations is ");
5441 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5445 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5447 if (vect_debug_stats (loop) || vect_debug_details (loop))
5448 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5452 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5458 /* Function vect_analyze_loop.
5460 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5461 for it. The different analyses will record information in the
5462 loop_vec_info struct. */
5464 static loop_vec_info
5465 vect_analyze_loop (struct loop *loop)
5468 loop_vec_info loop_vinfo;
5470 if (vect_debug_details (NULL))
5471 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5473 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5475 loop_vinfo = vect_analyze_loop_form (loop);
5478 if (vect_debug_details (loop))
5479 fprintf (dump_file, "bad loop form.");
5483 /* Find all data references in the loop (which correspond to vdefs/vuses)
5484 and analyze their evolution in the loop.
5486 FORNOW: Handle only simple, array references, which
5487 alignment can be forced, and aligned pointer-references. */
5489 ok = vect_analyze_data_refs (loop_vinfo);
5492 if (vect_debug_details (loop))
5493 fprintf (dump_file, "bad data references.");
5494 destroy_loop_vec_info (loop_vinfo);
5498 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5500 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5503 if (vect_debug_details (loop))
5504 fprintf (dump_file, "unexpected pattern.");
5505 if (vect_debug_details (loop))
5506 fprintf (dump_file, "not vectorized: unexpected pattern.");
5507 destroy_loop_vec_info (loop_vinfo);
5511 /* Check that all cross-iteration scalar data-flow cycles are OK.
5512 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5514 ok = vect_analyze_scalar_cycles (loop_vinfo);
5517 if (vect_debug_details (loop))
5518 fprintf (dump_file, "bad scalar cycle.");
5519 destroy_loop_vec_info (loop_vinfo);
5523 /* Analyze data dependences between the data-refs in the loop.
5524 FORNOW: fail at the first data dependence that we encounter. */
5526 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5529 if (vect_debug_details (loop))
5530 fprintf (dump_file, "bad data dependence.");
5531 destroy_loop_vec_info (loop_vinfo);
5535 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5536 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5538 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5541 if (vect_debug_details (loop))
5542 fprintf (dump_file, "bad data access.");
5543 destroy_loop_vec_info (loop_vinfo);
5547 /* Analyze the alignment of the data-refs in the loop.
5548 FORNOW: Only aligned accesses are handled. */
5550 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5553 if (vect_debug_details (loop))
5554 fprintf (dump_file, "bad data alignment.");
5555 destroy_loop_vec_info (loop_vinfo);
5559 /* Scan all the operations in the loop and make sure they are
5562 ok = vect_analyze_operations (loop_vinfo);
5565 if (vect_debug_details (loop))
5566 fprintf (dump_file, "bad operation or unsupported loop bound.");
5567 destroy_loop_vec_info (loop_vinfo);
5571 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5577 /* Function need_imm_uses_for.
5579 Return whether we ought to include information for 'var'
5580 when calculating immediate uses. For this pass we only want use
5581 information for non-virtual variables. */
5584 need_imm_uses_for (tree var)
5586 return is_gimple_reg (var);
5590 /* Function vectorize_loops.
5592 Entry Point to loop vectorization phase. */
5595 vectorize_loops (struct loops *loops)
5597 unsigned int i, loops_num;
5598 unsigned int num_vectorized_loops = 0;
5600 /* Does the target support SIMD? */
5601 /* FORNOW: until more sophisticated machine modelling is in place. */
5602 if (!UNITS_PER_SIMD_WORD)
5604 if (vect_debug_details (NULL))
5605 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5609 #ifdef ENABLE_CHECKING
5610 verify_loop_closed_ssa ();
5613 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5615 /* ----------- Analyze loops. ----------- */
5617 /* If some loop was duplicated, it gets bigger number
5618 than all previously defined loops. This fact allows us to run
5619 only over initial loops skipping newly generated ones. */
5620 loops_num = loops->num;
5621 for (i = 1; i < loops_num; i++)
5623 loop_vec_info loop_vinfo;
5624 struct loop *loop = loops->parray[i];
5629 loop_vinfo = vect_analyze_loop (loop);
5630 loop->aux = loop_vinfo;
5632 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5635 vect_transform_loop (loop_vinfo, loops);
5636 num_vectorized_loops++;
5639 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5640 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5641 num_vectorized_loops);
5643 /* ----------- Finalize. ----------- */
5646 for (i = 1; i < loops_num; i++)
5648 struct loop *loop = loops->parray[i];
5649 loop_vec_info loop_vinfo;
5653 loop_vinfo = loop->aux;
5654 destroy_loop_vec_info (loop_vinfo);
5658 rewrite_into_ssa (false);
5659 rewrite_into_loop_closed_ssa (); /* FORNOW */
5660 bitmap_clear (vars_to_rename);